As of the date shown above, I work for Dr. Kate Ross on a condensed matter physics team at Colorado State University. As part of my work, I was asked to create a data visualization tool for a Neutron Scattering Experiment conducted by the graduate student Gavin Hester. The plan is to build this tool using Python and the Dash Framwork. The Dash Framwork integrates Python's Plotly API with the Flask web hosting library.
In this notebook I have created some plotly plots. As a result, In order to run this notebook, you may need to install the plotly-extension
for jupyterlab or jupyter notebook (depending on which program you are using).
# General Processing
import pandas as pd
import numpy as np
# Visualization
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
# Standard Library
from typing import List
%matplotlib inline
np.set_printoptions(precision=2, linewidth=150)
The code below allows for the user to create a display object for displaying multiple pandas dataframes next to one another.
class display(object):
"""Display HTML representation of multiple objects"""
template = """<div style="float: left; padding: 10px;">
<p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
</div>"""
def __init__(self, *args: List[pd.DataFrame]):
self.args = args
def _repr_html_(self):
return '\n'.join(self.template.format(a, pd.DataFrame(eval(a))._repr_html_())
for a in self.args)
def __repr__(self):
return '\n\n'.join(a + '\n' + repr(eval(a))
for a in self.args)
Define the data file to analyze
data_file = "../data/1K0Slice_Integratedpm0p1.csv"
Read in the Data
df = pd.read_csv(data_file,names=["x","y","z"])
Size of the Data
rows, columns = df.shape
print("Data Rows: ", rows)
print("Data Columns:", columns)
Inspect Elements
first_five_elements = df.head()
last_five_elements = df.tail()
display("first_five_elements", "last_five_elements")
Get the unique element counts
for key in df:
print(f"unique {key} count:", df[key].unique().size)
Let's Check!
I am going to make some plots of the raw data to see if I can make sense of it.
plt.scatter(range(len(df.x)), df.x, s=1, marker='o');
plt.title("X-Data")
plt.ylabel("x-value")
plt.xlabel("x-index")
plt.show()
plt.scatter(range(2000),df.x[:2000],s=1,marker='o');
plt.xlabel("x-index")
plt.ylabel("x-value")
plt.title("X-Data (First 2k Values)")
plt.show()
x
coordinates that get repeated roughly every 300 elements. This interval likely occures every 361 elements because that's how many unique x
elements I found earlier! LETS CHECK!
If this is true, than every grouping of every 361 elements will be the same as the first grouping of 361 elements.
same_elements = True
for i,val in enumerate(df.x):
if val != df.x[i % 361]:
same_elements = False
print(same_elements)
Now I will inspect the range of the x
coordinates.
print(f"Range(x) = [{df.x.unique().min()},{df.x.unique().max()}]")
plt.scatter(range(len(df.y)), df.y, s=1, marker='o');
plt.xlabel("y-index")
plt.ylabel("y-value")
plt.title("Y-Data")
plt.show()
y
values seem to have a stepwise pattern, but lets make a smaller plot to verify this.plt.scatter(range(2000), df.y[:2000], s=1, marker='o');
plt.xlabel("y-index")
plt.ylabel("y-value")
plt.title("Y-Data (First 2k Values)")
plt.show()
y
seems to stay constant for roughly every 300 values. It appears that in our data, a particular y
value is chosen and than roughly 300 unique x
elements are chosen.Let's Check!
all_counts = [] # consecutive counts associated with each unique element
curr = df.y[0] # current unique element
curr_count = 0 # count of the number of consecutive times I have seen `curr`
for val in df.y:
if curr == val:
curr_count += 1
else:
curr = val
all_counts.append(curr_count)
curr_count = 1
# Check that the number of uniqu
print("Did all unique consecutive elements show up the same number of times?", len(set(all_counts)) == 1)
print("How many times did each unique element show up consecutively?", all_counts.pop())
Now I will inspect the range of the y
coordinates.
print(f"Range(x) = [{df.y.unique().min()},{df.y.unique().max()}]")
%matplotlib inline
plt.scatter(df.y,
df.x,
s=5,
c=np.arange(len(df.x)) // len(df.x.unique()),
marker='o');
plt.colorbar()
plt.xlabel("y-data")
plt.ylabel("x-data")
plt.title("X-Data Vs. Y-Data");
z
value intensity data. plt.scatter(range(len(df.z)), df.z,s=1)
plt.xlabel("Z index")
plt.ylabel("z-data")
plt.title("z-Data");
Now that I feel comfortable saying that the x
and y
values are grid coordinates for the intensity values that are represented by z
, let's go ahead and make some plots!
To get a better grasp on the data, I will go ahead and make a scatter plot.
data = [go.Scatter3d(x=df.x, y=df.y, z=df.z,
mode="markers",
marker = dict(
color = '#FFBAD2',
line = dict(width = 0.01)
)
)]
fig = go.Figure(data)
fig.show()
y
coordinates.I need to come up with a way of handling the elastic line, so that it does not wash out all of the data. In order to fix this issue, I will set a maximum value in the z
data, so that elastic line appears as being a singular color.
y
coordinates to see if it is obvious where the elastic line is in the data.avg_z_by_y = np.array(df.z).reshape((361, -1)).mean(axis=1)
#plt.plot(avg_z_by_y);
data = go.Scatter(y=avg_z_by_y)
layout = go.Layout(
xaxis=go.layout.XAxis(title="Y Index"),
yaxis=go.layout.YAxis(title="Average Z"),
title=go.layout.Title(text="Average Z value for each Y value")
)
fig = go.Figure(data=data, layout=layout)
fig.show()
y
index.I will now create a histogram of the z data that occures after the $22_{nd}$ y
value.
# 22nd yth value
y_no_elastic = df.y.unique()[21]
# Grab z values where we aren't considering values in the elastic line
z_no_elastic = df[df.y > y_no_elastic].z
df_z_no_elastic = pd.DataFrame({"z":np.array(z_no_elastic)})
# plot
px.histogram(df_z_no_elastic, x="z", title="Intensity Distribution Without Elastic Line")
z
value to cap the data that comes well before these outliers, so as not to wash out the rest of the heatmap that I will be making.I will now recreate the same histogram with any of the data in the large bin that occures near zero to better view the data.
dz = df_z_no_elastic.copy()
dz = dz.mask((dz.z >= 0) & (dz.z <= (100*(10**-6))))
px.histogram(dz, x="z", title="Intensity Distribution Without Elastic Line And without data z=[0-100u]")
I will now create a cumulative histogram that depicts what percentage of the data is less than a given z
value.
px.histogram(dz, x="z",nbins=350,cumulative=True, histnorm='percent', title="Cumulative Intensity Distribution Without Elastic Line And without data z=[0-100u]")
z=0.025
is larger than 99.92%
of the data (not including the elastic line, or the zero line) and it appears to be smaller than most of the outliers, so I will take it to be a good boundary value to set as a maximum when I create the heat map.Creating a New Z value to test things out
# creating a copy of the z data
new_z = df.z.copy()
# adding a minimum value
min_val = 0
new_z.loc[new_z < min_val] = min_val
# adding a maximum value
max_val = 0.025
new_z.loc[new_z > max_val] = max_val
# printing results
largest_values = np.array(new_z.sort_values(ascending=False).head(5))
smallest_values = np.array(new_z.sort_values(ascending=True).head(5))
pd.DataFrame({"Largest Z Values":largest_values, "Smallest Z Values":smallest_values})
Below, I have defined a colorscale to use for the heatmap.
colorscale= [
[0, 'rgb(0, 0, 0)'], # black
[0.1, 'rgb(153, 51, 255)'], # purple
[0.2, 'rgb(51, 51, 255)'], # blue
[0.3, 'rgb(51, 153, 255)'], # light blue
[0.4, 'rgb(51, 255, 255)'], # teal
[0.5, 'rgb(51, 255, 153)'], # light green
[0.6, 'rgb(51, 255, 51)'], # green
[0.7, 'rgb(153, 255, 51)'], # yellow green
[0.8, 'rgb(255, 255, 51)'], # yellow
[0.9, 'rgb(255, 153, 51)'], # orange
[1, 'rgb(255, 51, 51)']
]
I will now create the heatmap. I will also throw in an example line that I will be using to scan over the image.
heat_trace = go.Heatmap(
z=np.array(new_z).reshape(-1, (len(df.x.unique()))),
colorscale=colorscale,
showscale=True #showing colorbar
)
line_trace = go.Scatter(x=[20,20],y=[0,len(df.y.unique())-1], marker={"color":"red"})
data=[heat_trace,line_trace]
heat_fig = go.Figure(data)
# Update The Figure
heat_fig.update_layout(
#margin=go.layout.Margin(
# l=70,
# r=50,
# b=50,
# t=150,
# pad=40
#),
#paper_bgcolor="LightSteelBlue",
title=go.layout.Title(text="Neutron Scattering Heatmap", xref="paper", x=0.5),
yaxis=go.layout.YAxis(
title="Y Axis Text",
),
xaxis=go.layout.XAxis(
title="X Axis Text",
)
)
heat_fig.show()