diff --git a/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py b/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py new file mode 100644 index 0000000..0da334e --- /dev/null +++ b/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py @@ -0,0 +1,156 @@ +# First import required packages +import sys +import json +from sklearn.cluster import KMeans + +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +import probeinterface as pi + + +def meta_spikeglx(exp, session): + """ + Simply extracts channel depth from probe metadata. + + exp: exp class containing mouse IDs + + session: specific recording session to extract information from + """ + meta = exp[session].files[0]["spike_meta"] + data_path = exp[session].find_file(meta) + data = pi.read_spikeglx(data_path) + + return data + + +def noise_per_channeldepth(myexp): + """ + Function extracts the noise for each channel, combining this into a dataframe + + myexp: the experiment defined in base.py, will extract the depth information from here. + """ + noise = pd.DataFrame( + columns=["session", "project", "SDs", "x", "y"] + ) # Create the empty array to hold the noise information + depths = meta_spikeglx(myexp, 0) + depths = depths.to_dataframe() + coords = depths[ + ["x", "y"] + ] # Create a dataframe containing the generic x and y coords. + tot_noise = [] + + # Iterate through each session, taking the noise for each file and loading them into one continuous data frame. + for s, session in enumerate(myexp): + for i in range(len(session.files)): + path = session.processed / f"noise_{i}.json" + with path.open() as fd: + ses_noise = json.load(fd) + + chan_noises = [] + for j, SD in enumerate( + ses_noise["SDs"][0:-1] + ): # This will iterate over first 384 channels, and exclude the sync channel + x = coords["x"].iloc[j] + y = coords["y"].iloc[j] + noise_row = pd.DataFrame.from_records( + {"session": [session.name], "SDs": [SD], "x": x, "y": y} + ) + chan_noises.append(noise_row) + + # Take all datafrom channel noises for a session, then concatenate + noise = pd.concat(chan_noises) + tot_noise.append(noise) # Take all channel noises and add to a master file + df2 = pd.concat( + tot_noise + ) # Convert this master file, containing every sessions noise data into a dataframe + + return df2 + + +def elbowplot(data, myexp): + + """ + + This function takes data formatted according to noise_per_channeldepth(), containing the noise values for all channels + Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends. + This point is defined as the boundary where additional clusters no longer explain much more variance in the data. + + data: The dataframe, as formatted by noise_per_channeldepth() + + myexp: The experiment, defined in base.py containing the session information. + + """ + + for s, session in enumerate(myexp): + name = session.name + ses_data = data.loc[data["session"] == name] + df3 = ses_data["SDs"].values.reshape( + -1, 1 + ) # Just gives all noise values, for each session + Sum_of_squares = [] # create an empty list to store these in. + + k = range(1, 10) + for num_clusters in k: + kmeans = KMeans(n_clusters=num_clusters) + kmeans.fit(df3) + Sum_of_squares.append(kmeans.inertia_) + + fig, ax = plt.subplots() + + # This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters + # This gives the distance from the centroids, as a measure of the variability explained + # We want this to drop off indicating that there is no remaining data explained by further centroid inclusion + + # Figure has two rows, one columns, this is the first plot + plt.plot(k, Sum_of_squares, "bx-") # bx gives blue x as each point. + plt.xlabel("Putative Number of Clusters") + plt.ylabel("Sum of Squares Distances/Inertia") + plt.title( + f"Determining Optimal Number of Clusters for Analysis - Session {name}" + ) + + f = plt.gca() + return f + + +def clusterplot(data, myexp, cluster_num): + """ + Function takes the noise per channel and depth information, produced by noise_per_channeldepth() and produces a clusterplot. + Clustering is performed by K-means analysis, elbow plot should be produced by elbowplot() to determine optimal cluster number. + + data: data produced by noise_per_channel_depth() containing channel ID, coordinate, and recording noise for each session in the exp class + + myexp: the exp class containing mouse IDs + + cluster_num: the number of clusters to produce through the k-means analysis, determined by qualitative analysis of elbow plots. (where the "bow" of the line occurs) + + """ + + # First define k-means parameters for clustering + kmeans = KMeans( + init="random", # Initiate the iterative analysis with random centres + n_clusters=cluster_num, # How many clusters to bin the data into, based on the elbow analysis! + n_init=10, # Number of centroids to generate initially + max_iter=300, # Max number of iterations before ceasing analysis + random_state=42, # The random number seed for centroid generation, can really be anything for our purposes + ) + + for s, session in enumerate(myexp): + name = session.name + + ses = data.loc[data["session"] == name] + sd = ses["SDs"].values.reshape(-1, 1) + y_means = kmeans.fit_predict(sd) + + # Now plot the kmeans analysis + # Remember we use our original data (ses) but use the clustering analysis to generate the labels + plt.scatter(ses["y"], ses["SDs"], c=y_means, cmap="viridis") + + plt.xlabel("Probe Channel Y-Coordinate") + plt.ylabel("Channel Noise (SD)") + plt.title(f"{name} Channel Noise k-Mean Clustering Analysis") + + f = plt.gca() + return f diff --git a/pixtools/clusters/unit_depths.py b/pixtools/clusters/unit_depths.py index 4df3881..01ee26d 100644 --- a/pixtools/clusters/unit_depths.py +++ b/pixtools/clusters/unit_depths.py @@ -19,17 +19,17 @@ def unit_depths(exp): for s, session in enumerate(exp): session_depths = {} - + rec_num=0 for rec_num, probe_depth in enumerate(session.get_probe_depth()): rec_depths = {} - rec_info = info[s][rec_num] + rec_info = info[s] id_key = 'id' if 'id' in rec_info else 'cluster_id' # Depends on KS version for unit in rec_info[id_key]: unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict() rec_depths[unit] = probe_depth - unit_info["depth"] - session_depths[rec_num] = pd.DataFrame(rec_depths, index=["depth"]) + session_depths[0] = pd.DataFrame(rec_depths, index=["depth"]) depths.append(pd.concat(session_depths, axis=1, names=["rec_num", "unit"])) diff --git a/pixtools/responsiveness/CI_Analysis_pointplot.py b/pixtools/responsiveness/CI_Analysis_pointplot.py new file mode 100644 index 0000000..e4d3e1c --- /dev/null +++ b/pixtools/responsiveness/CI_Analysis_pointplot.py @@ -0,0 +1,81 @@ +def significance_extraction(CI): + """ + This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format. + + CI: The dataframe created by the CI calculation previously mentioned + + """ + + sig = [] + keys=[] + rec_num = 0 + + #This loop iterates through each column, storing the data as un, and the location as s + for s, unit in CI.items(): + #Now iterate through each recording, and unit + #Take any significant values and append them to lists. + if unit.loc[2.5] > 0 or unit.loc[97.5] < 0: + sig.append(unit) #Append the percentile information for this column to a list + keys.append(s) #append the information containing the point at which the iteration currently stands + + + #Now convert this list to a dataframe, using the information stored in the keys list to index it + sigs = pd.concat( + sig, axis = 1, copy = False, + keys=keys, + names=["session", "unit", "rec_num"] + ) + + return sigs + +def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False): + """ + + This function takes the CI data and significant values and plots them relative to zero. + May specify if percentiles should be plotted in ascending or descending order. + + CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points. + + sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero + + exp: The experimental session to analyse, defined in base.py + + sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False) + + dir_ascending: Whether to plot the values in ascending order (True/False) + + """ + #First sort the data into long form for the full dataset, by percentile + CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending) + CIs_long = CIs_long.reset_index() + CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting + + #Now select if we want only significant values plotted, else raise an error. + if sig_only is True: + CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending) + CIs_long_sig = CIs_long_sig.reset_index() + CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0])) + + data = CIs_long_sig + + elif sig_only is False: + data = CIs_long + + else: + raise TypeError("Sig_only argument must be a boolean operator (True/False)") + + #Plot this data for the experimental sessions as a pointplot. + for s, session in enumerate(exp): + name = session.name + + p = sns.pointplot( + x="unit", y = "value", data = data.loc[(data.session == s)], + order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting + + p.set_xlabel("Unit") + p.set_ylabel("Confidence Interval") + p.set(xticklabels=[]) + p.axhline(0) + plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page. + + plt.show() \ No newline at end of file diff --git a/pixtools/responsiveness/ci_analysis_pointplot.py b/pixtools/responsiveness/ci_analysis_pointplot.py new file mode 100644 index 0000000..e4d3e1c --- /dev/null +++ b/pixtools/responsiveness/ci_analysis_pointplot.py @@ -0,0 +1,81 @@ +def significance_extraction(CI): + """ + This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format. + + CI: The dataframe created by the CI calculation previously mentioned + + """ + + sig = [] + keys=[] + rec_num = 0 + + #This loop iterates through each column, storing the data as un, and the location as s + for s, unit in CI.items(): + #Now iterate through each recording, and unit + #Take any significant values and append them to lists. + if unit.loc[2.5] > 0 or unit.loc[97.5] < 0: + sig.append(unit) #Append the percentile information for this column to a list + keys.append(s) #append the information containing the point at which the iteration currently stands + + + #Now convert this list to a dataframe, using the information stored in the keys list to index it + sigs = pd.concat( + sig, axis = 1, copy = False, + keys=keys, + names=["session", "unit", "rec_num"] + ) + + return sigs + +def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False): + """ + + This function takes the CI data and significant values and plots them relative to zero. + May specify if percentiles should be plotted in ascending or descending order. + + CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points. + + sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero + + exp: The experimental session to analyse, defined in base.py + + sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False) + + dir_ascending: Whether to plot the values in ascending order (True/False) + + """ + #First sort the data into long form for the full dataset, by percentile + CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending) + CIs_long = CIs_long.reset_index() + CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting + + #Now select if we want only significant values plotted, else raise an error. + if sig_only is True: + CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending) + CIs_long_sig = CIs_long_sig.reset_index() + CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0])) + + data = CIs_long_sig + + elif sig_only is False: + data = CIs_long + + else: + raise TypeError("Sig_only argument must be a boolean operator (True/False)") + + #Plot this data for the experimental sessions as a pointplot. + for s, session in enumerate(exp): + name = session.name + + p = sns.pointplot( + x="unit", y = "value", data = data.loc[(data.session == s)], + order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting + + p.set_xlabel("Unit") + p.set_ylabel("Confidence Interval") + p.set(xticklabels=[]) + p.axhline(0) + plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page. + + plt.show() \ No newline at end of file diff --git a/pixtools/responsiveness.py b/pixtools/responsiveness/responsiveness.py similarity index 100% rename from pixtools/responsiveness.py rename to pixtools/responsiveness/responsiveness.py diff --git a/projects/Aidan_Analysis/FullAnalysis/.vscode/launch.json b/projects/Aidan_Analysis/FullAnalysis/.vscode/launch.json new file mode 100644 index 0000000..1fbdcdc --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/.vscode/launch.json @@ -0,0 +1,18 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "stopOnEntry": true, + "justMyCode": false, + "editor.bracketPairColorization.independentColorPoolPerBracketType": true + } + ] +} \ No newline at end of file diff --git a/projects/Aidan_Analysis/FullAnalysis/.vscode/settings.json b/projects/Aidan_Analysis/FullAnalysis/.vscode/settings.json new file mode 100644 index 0000000..de288e1 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.formatting.provider": "black" +} \ No newline at end of file diff --git a/projects/Aidan_Analysis/FullAnalysis/CI_Analysis.py b/projects/Aidan_Analysis/FullAnalysis/CI_Analysis.py new file mode 100644 index 0000000..46c1398 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/CI_Analysis.py @@ -0,0 +1,223 @@ +#This script draws a confidence interval comparison between a baseline and some event. This is calculated as baseline - average firing rate +#Will use a modified version of a preexisting function in base.py (align_spike_rate_CI) +#Import required packages +from argparse import Action +import sys +from tokenize import group +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib import cm +from pandas import melt +import seaborn as sns +from textwrap import wrap + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +from pixtools.utils import Subplots2D +#sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels") #use the local copy of base.py +from base import * + +#First select units to analyse, will take all units in the brain, i.e. across the full 3500um recording range +units = myexp.select_units( + group="good", + max_depth=3500, + name="unit" +) +#Now let us run the align spike rate CI and save this as CIs + +# CIs = myexp.get_aligned_spike_rate_CI( +# label=ActionLabels.correct_left | ActionLabels.correct_right, +# event=Events.led_off, +# start=-0.400, #Start the analysis 400ms before LED turns off +# step=0.400, #Make this analysis in one step, not looking after or before +# end=0.000, #Finish the analysis for each unit when the grasp is made + +# bl_label=ActionLabels.correct_left | ActionLabels.correct_right, +# bl_event=Events.led_on, #The baseline will examine the ITI (i.e., time before the trial began) +# bl_start=-4.000, #Will catch every trial's ITI while avoiding the tail end of the previous trial +# bl_end=0.000, + +# ss=20, #The size of each sample to take +# CI=95, #Confidence interval to analyse to +# bs=10000, #The number of times to take a pseudorandom sample for bootstrapping +# units=units +# ) + +#Also create a dataset containing only values that do not straddle zero. +#Could check if first and last percentile are different from zero +#cis[0] > 0 or cis[-1] < 0 - if true then suggests significance! +#Will be easier if this is in wide form, can simply take it by column + +#First will iterate through the sessions for CI +#Remember the indexes are as follows: +#Session, Unit, rec_num (always zero), percentile (2.5, 50, or 97.5) + +def significance_extraction(CI): + """ + This function takes the output of the get_aligned_spike_rate_CI function and extracts any significant values, returning a dataframe in the same format. + + CI: The dataframe created by the CI calculation previously mentioned + + """ + + sig = [] + keys=[] + rec_num = 0 + + #This loop iterates through each column, storing the data as unit, and the location as s + for s, unit in CI.items(): + #Now iterate through each recording, and unit + #Take any significant values and append them to lists. + if unit.loc[2.5] >= 0 or unit.loc[97.5] <= 0: + sig.append(unit) #Append the percentile information for this column to a list + keys.append(s) #append the information containing the point at which the iteration currently stands + + + #Now convert this list to a dataframe, using the information stored in the keys list to index it + sigs = pd.concat( + sig, axis = 1, copy = False, + keys=keys, + names=["session", "unit", "rec_num"] + ) + + return sigs + +#sigs = significance_extraction(CIs) + +#Now we have successfully derived confidence intervals, we may plot these as a scatter for each unit, with a line denoting the critical point +#TODO: Add code to plot a vertical line at the point where percentiles cross zero. Allows for a visual representation of proportion. + +def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False): + """ + + This function takes the CI data and significant values and plots them relative to zero. + May specify if percentiles should be plotted in ascending or descending order. + + CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points. + + sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero + + exp: The experimental session to analyse, defined in base.py + + sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False) + + dir_ascending: Whether to plot the values in ascending order (True/False) + + NB: Remember to change the title of the graph if making a different comparison! + + """ + #First sort the data into long form for both datasets, by percentile + CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending) + CIs_long = CIs_long.reset_index() + CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting + + CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending) + CIs_long_sig = CIs_long_sig.reset_index() + CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0])) + + #Now select if we want only significant values plotted, else raise an error. + if sig_only is True: + data = CIs_long_sig + + elif sig_only is False: + data = CIs_long + + else: + raise TypeError("Sig_only argument must be a boolean operator (True/False)") + + + #Plot this data for the experimental sessions as a pointplot. + for s, session in enumerate(exp): + name = session.name + + p = sns.pointplot( + x="unit", y = "value", data = data.loc[(data.session == s)], + order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting + + p.set_xlabel("Unit") + p.set_ylabel("Confidence Interval") + p.set(xticklabels=[]) + p.axhline(0) + plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page. + + plt.show() + + +#Can also make a pie chart detailing the proportions of the whole collection of units that were signifcant +#Can simply compare the lengths of the unique values in the unit columns of the dataframes +#Convert this to two bar charts, one displaying proportion, like this. And another displaying actual values. +#Proportion on a scale of zero to one +#Will append these two seperate dataframes + +# colors = plt.get_cmap("Pastel1") +# prop = [] +# count = [] +# ses_key = [] + +# for s, session in enumerate(myexp): +# #First will create the dataframe of proportion vs total. (zero to one) + +# name = session.name + +# ses_data = CIs[s] +# sig_data = sigs[s] + + +# #Now calculate the proportion of significant to nonsignificant units +# sig_count = len(sig_data.columns) +# nonsig_count = len(ses_data.columns) - sig_count + +# #This divides the group of units by the total number in a session to give the proportion of units on a scale of zero to one +# sig_prop = sig_count/len(ses_data.columns) +# nonsig_prop = nonsig_count/len(ses_data.columns) + +# #Now append these values to prop, and save key for this iteration +# prop.append([sig_prop, nonsig_prop]) +# ses_key.append(name) #This will hold the name of the sesision the proportions were calc. from + +# #Then shall create another dataframe containing the actual number of sig vs nonsig units +# count.append([sig_count, nonsig_count]) + +# #Now concatenate these lists as a dataframe +# counts = pd.DataFrame( +# count, copy = False, +# columns = ["sig count", "nonsig count"] +# ) + +# props = pd.DataFrame( +# prop, copy=False, +# columns = ["sig proportion", "nonsig proportion"] +# ) + +# counts["session"] = ses_key +# props["session"] = ses_key + + +# #Finally, plot this data as two seperate stacked bar charts +# fig, ax = plt.subplots(nrows = 1, ncols = 2) #One row, two columns + + +# props.plot.bar( +# stacked = True, x = "session", ax = ax[0], legend = False, +# xlabel="Session No.", ylabel = "Proportion of Total Units", +# color={"sig proportion":"cornflowerblue", "nonsig proportion":"pink"} +# ) + + +# counts.plot.bar( +# stacked = True, x = "session", ax = ax[1], legend = False, +# xlabel = "Session No.", ylabel = "Number of Units", +# color={"sig count":"cornflowerblue", "nonsig count":"pink"} +# ) + +# ax[0].tick_params('x', labelrotation=45) +# ax[1].tick_params('x', labelrotation=45) + +# plt.setp(ax[0].xaxis.get_majorticklabels(), ha='right') #Prevent the ticks from moving after rotating +# plt.setp(ax[1].xaxis.get_majorticklabels(), ha='right') +# plt.legend(["Responsive", "Non-Responsive"], loc=[1.1,0.5]) +# plt.suptitle("\n".join(wrap("Proportion of Responsive to Non-Responsive Units - Compared Across 400ms Pre-Grasp vs. 4s ITI"))) + +# utils.save("/home/s1735718/Figures/AllSession_BarChart_Significant") + + diff --git a/projects/Aidan_Analysis/FullAnalysis/CI_Analysis_responsiveness.py b/projects/Aidan_Analysis/FullAnalysis/CI_Analysis_responsiveness.py new file mode 100644 index 0000000..8d593df --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/CI_Analysis_responsiveness.py @@ -0,0 +1,148 @@ +#This file will produce graphs detailing the relative responsiveness of units. Will have three scatters: +#Did not significantly change (unresponsive), increased in activity, decreased in activity. +#Can import data from CI Analysis + +from matplotlib.pyplot import title, ylim +import numpy as np +import pandas as pd + +import sys +from base import * +from CI_Analysis import significance_extraction +from CI_Analysis import percentile_plot + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels") +from pixtools.utils import Subplots2D #use the local copy of base.py +from pixtools import utils + + +#First select the units that shall be analysed +units = myexp.select_units( + group="good", + max_depth=3500, + name="unit" +) + +#Then align spike rates to trial, and gennerate confidence intervals +#Will once again compare peak reach time to a 4s ITI period acting as baseline +CIs = myexp.get_aligned_spike_rate_CI( + label=ActionLabels.correct_left | ActionLabels.correct_right, + event=Events.led_off, + start=-0.400, #Start the analysis 400ms before LED turns off + step=0.400, #Make this analysis in one step, not looking after or before + end=0.000, #Finish the analysis for each unit when the grasp is made + + bl_label=ActionLabels.correct_left | ActionLabels.correct_right, + bl_event=Events.led_on, #The baseline will examine the ITI (i.e., time before the trial began) + bl_start=-4.000, #Will catch every trial's ITI while avoiding the tail end of the previous trial + bl_end=0.000, + + ss=20, #The size of each sample to take + CI=95, #Confidence interval to analyse to + bs=10000, #The number of times to take a pseudorandom sample for bootstrapping + units=units +) + +#Now take this information and pass it to the significance extraction function +sigs = significance_extraction(CIs) + +#Split this data into upregulated and downregulated units +#Define a function to split units by responsiveness +def responsiveness(CI, sigs_only = True): + + """ + This function takes the confidence intervals computed by the aligned + spike rate CI method and returns the responsiveness type (Up, Down, No Sig Change) + + If sigs_only is true (default) then this function will not return those values that did not significantly change from zero + + CI: The data produced by the aligned spike rate CI method of myexp + + sigs_only: Whether to return no change values + + """ + + #first begin by extracting significant values + sigs = significance_extraction(CI) + + #Then create empty lists to store the data + units = [] + keys = [] + rec_num = 0 + + #If we only want the significant units: + if sigs_only == True: + #Check if the 2.5 percentile is greater than or equal to zero (i.e., upregulated is true) + #Append this as a new row + upregulated = (sigs.loc[2.5] >= 0).rename(index={2.5: "change"}) + sigs.loc["upregulated"] = upregulated #True or False that unit was upregulated + + return sigs + + #Or if we want every unit regardless of significance: + elif sigs_only == False: + #Check the entire dataframe rather than iterating, using a boolean operator + + CI.loc["change"] = "nochange" #Add a row of entirely no change values (default) + + upregulated = (CI.loc[2.5] >= 0).rename(index={2.5: "change"}) #Find all upregulated values + downregulated = (CI.loc[97.5] <= 0).rename(index={2.5: "change"}) #And the same for downregulated values + + CI.loc["change"][upregulated] = "up" #Replace these values with up + CI.loc["change"][downregulated] = "down" + print(CI) + return CI + + +df = responsiveness(CIs, sigs_only=False) + +#Now transpose this to long form information and plot this by depth +#To get depth for a unit, use get cluster info, here cluster_id is the unit, and depth is stated +#Get cluster_id == unit and extract depth. +info = myexp.get_cluster_info() +depths = [] +keys = [] +units = [] +skipped = [] + + +#Now iterate through the list, finding the depth of each unit + +for s, unit in df.items(): + + ses_info = info[s[0]] + + if len(ses_info["depth"].loc[ses_info["cluster_id"] == s[1]]) == 0: + skipped.append(s[1]) + continue #This line checks if the unit is absent from cluster_id and skips it if it is, will also append this to a list for checking later + + units.append(unit) + key = list(s) + depths = ses_info["depth"].loc[ses_info["cluster_id"] == s[1]].item() + key.append(depths) + keys.append(tuple(key)) + + +df2 = pd.concat( + units, axis = 1, copy = False, + keys = keys, names = ["session", "unit", "rec_num", "depth"] +) + + +#Once the units are known alongside their depths, can plot the relative proportion of response types +#Will use a bar plot for this +#Iterate over session in df2, plotting the chart for each + +fig, axs = plt.subplots(2) +plt.subplots_adjust(top = 1, hspace=0.5) +for s, session in enumerate(df2): + name = myexp[session[0]].name + sns.countplot( + x = df2[session[0]].xs("change"), data = df2[session[0]], orient='vertical', + palette="pastel", ax = axs[session[0]] + ).set_title(f"{name} Count of Unit Responsiveness") + +#Now save this figure including all sessions used. +names = "_".join(s.name for s in myexp) +utils.save(f"/home/s1735718/Figures/{names}_count_of_unit_responsiveness", nosize=True) \ No newline at end of file diff --git a/projects/Aidan_Analysis/FullAnalysis/FanoFactorCalc.py b/projects/Aidan_Analysis/FullAnalysis/FanoFactorCalc.py new file mode 100644 index 0000000..f8c77dd --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/FanoFactorCalc.py @@ -0,0 +1,1007 @@ +# First import required packages +from argparse import Action +from curses import raw +import math + +from matplotlib.pyplot import axvline, xlim +from base import * +import numpy as np +import pandas as pd +import matplotlib as plt +from pixtools.utils import Subplots2D +from rasterbase import * +from tqdm import tqdm +from sklearn.linear_model import LinearRegression +from pixtools.utils import Subplots2D +from pixels import ioutils + +from functions import ( + event_times, + per_trial_raster, + per_trial_binning, + per_unit_spike_rate, + bin_data_average, +) + +#%% +# First, select only units from correct trials, of good quality, and within m2 +units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200) + +l23_units = myexp.select_units( + group="good", name="layerII/III", min_depth=200, max_depth=375 +) +l5_units = myexp.select_units(group="good", name="layerV", min_depth=375, max_depth=815) +l6_units = myexp.select_units( + group="good", name="layerVI", min_depth=815, max_depth=1200 +) + +# Now align these firing rates to the point at which the trial began +duration = 8 + +# Specifically, to calculate FANO FACTOR we need to get the exact spike times. +spike_times = myexp.align_trials( + ActionLabels.clean, + Events.reach_onset, + "spike_times", + duration=duration, + units=units, +) +spike_times = spike_times.reorder_levels( + ["session", "trial", "unit"], axis=1 +) # reorder the levels + +l23_spike_times = myexp.align_trials( + ActionLabels.clean, + Events.reach_onset, + "spike_times", + duration=duration, + units=l23_units, +) +l23_spike_times = l23_spike_times.reorder_levels( + ["session", "trial", "unit"], axis=1 +) # reorder the levels + +l5_spike_times = myexp.align_trials( + ActionLabels.clean, + Events.reach_onset, + "spike_times", + duration=duration, + units=l5_units, +) +l5_spike_times = l5_spike_times.reorder_levels( + ["session", "trial", "unit"], axis=1 +) # reorder the levels + +l6_spike_times = myexp.align_trials( + ActionLabels.clean, + Events.reach_onset, + "spike_times", + duration=duration, + units=l6_units, +) +l6_spike_times = l6_spike_times.reorder_levels( + ["session", "trial", "unit"], axis=1 +) # reorder the levels + +firing_rates = myexp.align_trials( + ActionLabels.clean, Events.reach_onset, "spike_rate", duration=duration, units=units +) +firing_rate_means = per_trial_binning( + firing_rates, myexp, timespan=duration, bin_size=0.1 +) +firing_rate_means = bin_data_average(firing_rate_means) +firing_rate_means = firing_rate_means.mean() +# l23_firing_rates = myexp.align_trials( +# ActionLabels.clean, +# Events.reach_onset, +# "spike_rate", +# duration=duration, +# units=l23_units, +# ) +# l23_firing_rates = l23_firing_rates.reorder_levels( +# ["session", "unit", "trial"], axis=1 +# ) # reorder the levels + +# l5_firing_rates = myexp.align_trials( +# ActionLabels.clean, +# Events.reach_onset, +# "spike_rate", +# duration=duration, +# units=l5_units, +# ) +# l5_firing_rates = l5_firing_rates.reorder_levels( +# ["session", "unit", "trial"], axis=1 +# ) # reorder the levels + +# l6_firing_rates = myexp.align_trials( +# ActionLabels.clean, +# Events.reach_onset, +# "spike_rate", +# duration=duration, +# units=l6_units, +# ) +# l6_firing_rates = l6_firing_rates.reorder_levels( +# ["session", "unit", "trial"], axis=1 +# ) # reorder the levels + +# Take average firing rates +# l23_firing_rates = per_trial_binning( +# l23_firing_rates, myexp, timespan=duration, bin_size=0.1 +# ) +# l23_means = bin_data_average(l23_firing_rates) +# l23_means = l23_means.mean() + +# l5_firing_rates = per_trial_binning( +# l5_firing_rates, myexp, timespan=duration, bin_size=0.1 +# ) +# l5_means = bin_data_average(l5_firing_rates) +# l5_means = l5_means.mean() + +# l6_firing_rates = per_trial_binning( +# l6_firing_rates, myexp, timespan=duration, bin_size=0.1 +# ) +# l6_means = bin_data_average(l6_firing_rates) +# l6_means = l6_means.mean() + +# From this, must now calculate the point at which the LED turned off in each trial. +# Will allow plotting of off time. +off_times = event_times("led_off", myexp) # time after LED on when trial finished + +# %% +# Now shall define a function to calculate per Trial FF + + +def cross_trial_FF( + spike_times, + exp, + duration, + bin_size, + cache_name, + ff_iterations=50, + mean_matched=False, + raw_values=False, +): + """ + This function will take raw spike times calculated through the exp.align_trials method and calculate the fano factor for each trial, cross units + or for individual units, if mean matched is False. + + ------------------------------------------------------------------------ + |NB: Output of the function is saved to an h5 file as "FF_Calculation" | + ------------------------------------------------------------------------ + + spike_times: The raw spike time data produced by the align_trials method + + exp: the experimental cohort defined in base.py (through the Experiment class) + + duration: the window of the aligned data (in s) + + bin_size: the size of the bins to split the trial duration by (in s) + + name: the name to cache the results under + + mean_matched: Whether to apply mean matching adjustment (as defined by Churchland 2010) to correct for large changes in unit mean firing rates across trials + If this is False, the returned dataframe will be of unadjusted single unit FFs + + raw_values: Whether to return the raw values rather than mean FFs + + ff_iterations: The number of times to repeat the random mean-matched sampling when calculating population fano factor (default 50) + + """ + # First create bins that the data shall be categorised into + duration = duration * 1000 # convert s to ms + bin_size = bin_size * 1000 + bins = np.arange((-duration / 2), (duration / 2), bin_size) + all_FF = pd.DataFrame() + + # First split data by session + for s, session in enumerate(myexp): + ses_data = spike_times[s] + ses_data = ses_data.reorder_levels(["unit", "trial"], axis=1) + name = session.name + session_FF = {} + session_se = {} + session_ci = {} + + repeat_FF = pd.DataFrame() + repeat_se = pd.DataFrame() + repeat_ci = pd.DataFrame() + + # Bin the session data + ses_vals = ses_data.values + bin_ids = np.digitize(ses_vals[~np.isnan(ses_vals)], bins, right=True) + + # Then by unit + unit_bin_means = [] + unit_bin_var = [] + + print(f"beginning mean/variance calculation for session {name}") + keys = [] + for t in tqdm(ses_data.columns.get_level_values("unit").unique()): + unit_data = ses_data[t] # take each unit values for all trials + unit_data = ( + unit_data.melt().dropna() + ) # Pivot this data and drop missing values + + # Once these values have been isolated, can begin FF calculation. + unit_data["bin"] = np.digitize(unit_data["value"], bins, right=True) + + bin_count_means = [] + bin_count_var = [] + + for b in np.unique(bin_ids): + bin_data = unit_data.loc[unit_data["bin"] == b] + + # Calculate the count mean of this bin, i.e., the average number of events per trial + bin_count_mean = ( + bin_data.groupby("trial").count().mean()["value"] + ) # the average number of spikes per unit in this bin + + # If there are any bins where no spikes occurred (i.e, empty dataframes) then return zero + if np.isnan(bin_count_mean): + bin_count_mean = 0 + + bin_count_means.append(bin_count_mean) + + # Now calculate variance of the event count + bin_var = np.var(bin_data.groupby("trial").count()["value"]) + + # Again return zero if no events + if np.isnan(bin_var): + bin_var = 0 + + bin_count_var.append(bin_var) + + # These values will make up a single point on the scatterplots used to calculate cross-unit FF + # Append to list containing values for each unit, across trials + + unit_bin_means.append(bin_count_means) + unit_bin_var.append(bin_count_var) + keys.append(t) + + # Now convert this to a dataframe + unit_bin_means = pd.DataFrame.from_records(unit_bin_means, index=keys) + unit_bin_means.index.name = "unit" + unit_bin_means.columns.name = "bin" + + unit_bin_var = pd.DataFrame.from_records(unit_bin_var, index=keys) + unit_bin_var.index.name = "unit" + unit_bin_var.columns.name = "bin" + + # can now use this information to calculate FF, and include any potential adjustments needed. + # To do so, shall create a linear regression to calculate FF, for each bin + + if mean_matched is True: + bin_heights = {} + """ + First calculate the greatest common distribution for this session's data + 1. Calculate the distribution of mean counts for each timepoint (binned trial time) + 2. Take the smallest height of each bin across timepoints, this will form the height of the GCD's corresponding bin + 3. Save this GCD to allow further mean matching + """ + + # Take each bin, and unit, then calculate the distribution of mean counts + for b in unit_bin_means.columns: + + # Isolate bin values, across units + bin_means = unit_bin_means[b] + + # Calculate distribution for this time, across units + p = np.histogram(bin_means, bins=9) + + # Save the heights of these bins for each bin + bin_heights.update( + {b: p[0]} + ) # append to a dictionary, where the key is the bin + + # Convert this dictionary to a dataframe, containing the heights of each timepoints distribution + bin_heights = pd.DataFrame.from_dict( + bin_heights, orient="index" + ) # the index of this dataframe will be the binned timepoints + gcd_heights = ( + bin_heights.min() + ) # get the minimum value for each "bin" of the histogram across timepoints + gcd_heights.index += 1 + + # Now may begin mean matching to the gcd + # Take each bin (i.e., timepoint) and calculate the distribution of points + # TODO: Update this analysis to utilise a rolling bin approach. I.e, take data at 100ms intervals, then shift the bin 10ms right + + print(f"Initiating Mean Matched Fano Factor Calculation for Session {name}") + for iteration in tqdm(range(ff_iterations)): + np.random.seed(iteration) + + for b in unit_bin_means.columns: + timepoint_means = pd.DataFrame(unit_bin_means[b]) + timepoint_edges = pd.DataFrame( + np.histogram(timepoint_means, bins=9)[1] + ) # the left edges of the histogram used to create a distribution + timepoint_var = unit_bin_var[b] + + # split data into bins + binned_data = pd.cut( + np.ndarray.flatten(timepoint_means.values), + np.ndarray.flatten(timepoint_edges.values), + labels=timepoint_edges.index[1:], + include_lowest=True, + ) # seperate raw data into the distribution bins + + timepoint_means["bin"] = binned_data + + # Isolate a single bin from the timepoint histogram + timepoint_FF_mean = {} + timepoint_FF_var = {} + + for i in range(len(timepoint_edges.index)): + if i == 0: + continue + # Bins begin at one, skip first iteration + repeat = True + bin_data = timepoint_means.loc[timepoint_means["bin"] == i] + + while repeat == True: + + # Check if bin height matches gcd height - if it does, append timepoint data to list + + if bin_data.count()["bin"] == gcd_heights[i]: + timepoint_FF_mean.update( + {i: bin_data[bin_data.columns[0]]} + ) # append the bin, and unit/mean count + repeat = False + + # If not, remove randomly points from the dataset one at a time, until they match + else: + dropped_data = np.random.choice( + bin_data.index, 1, replace=False + ) + bin_data = bin_data.drop( + dropped_data + ) # remove one datapoint, then continue with check + + ##Now extract the associated variance for the timepoint + timepoint_FF_mean = pd.DataFrame.from_dict(timepoint_FF_mean) + timepoint_FF_mean = timepoint_FF_mean.melt( + ignore_index=False + ).dropna() + timepoint_FF_var = timepoint_var[ + timepoint_var.index.isin(timepoint_FF_mean.index) + ] + + # Using the count mean and variance of the count, construct a linear regression for the population + # Ensure the origin of the linear model is set as zero + + # Set up data in correct shape + x = np.array(timepoint_FF_mean["value"]).reshape( + (-1, 1) + ) # mean count, the x axis + y = np.array(timepoint_FF_var) + + # Now fit model + timepoint_model = sm.OLS(y, x).fit() + + # The slope (i.e., coefficient of variation) of this model + ff_score = timepoint_model.params[0] + + # Extract the confidence interval of this model fit + ff_se = timepoint_model.bse[0] + ff_ci = timepoint_model.conf_int()[0] + + session_FF.update({b: ff_score}) + session_se.update({b: ff_se}) + session_ci.update({b: tuple(ff_ci)}) + + # Convert the session's FF to dataframe, then add to a master + session_FF = pd.DataFrame(session_FF, index=[0]) + session_se = pd.DataFrame(session_se, index=[0]) + session_ci = pd.DataFrame(session_ci.items()) + + repeat_FF = pd.concat([repeat_FF, session_FF], axis=0) + repeat_se = pd.concat([repeat_se, session_se], axis=0) + repeat_ci = pd.concat([repeat_ci, session_ci], axis=0) + + session_FF = {} + session_se = {} + session_ci = {} + + # Take the average across repeats for each session, add this to a final dataframe for session, timepoint, mean_FF, SE + + for i, cols in enumerate(repeat_FF.iteritems()): + timepoint_vals = repeat_FF.describe()[i] + timepoint_se = repeat_se.describe()[i] + + ci_bins = repeat_ci[0].tolist() + timepoint_ci = pd.DataFrame(repeat_ci[1].tolist()) + timepoint_ci = timepoint_ci.rename( + columns={0: "lower_ci", 1: "upper_ci"} + ).reset_index(drop=True) + timepoint_ci.insert(0, "bin", ci_bins) + + mean = timepoint_vals["mean"] + se = timepoint_se["mean"] + lower_ci = timepoint_ci.loc[timepoint_ci["bin"] == i].describe()[ + "lower_ci" + ]["mean"] + upper_ci = timepoint_ci.loc[timepoint_ci["bin"] == i].describe()[ + "upper_ci" + ]["mean"] + + if raw_values == False: + vals = pd.DataFrame( + { + "session": name, + "bin": i, + "mean FF": mean, + "SE": se, + "lower_ci": lower_ci, + "upper_ci": upper_ci, + }, + index=[0], + ) + all_FF = pd.concat([all_FF, vals], ignore_index=True) + elif raw_values == True: + repeat_FF = repeat_FF.melt() + repeat_se = repeat_se.melt() + + repeat_FF["session"] = name + repeat_FF = repeat_FF.set_index("session") + repeat_FF = repeat_FF.rename( + columns={"value": "FF", "variable": "bin"} + ) + + repeat_se["session"] = name + repeat_se = repeat_se.set_index("session") + repeat_FF["se"] = repeat_se["value"] + repeat_FF["lower_ci"] = timepoint_ci["lower_ci"] + repeat_FF["upper_ci"] = timepoint_ci["upper_ci"] + + all_FF = pd.concat([all_FF, repeat_FF]) + break + + elif mean_matched is False: + # Remember, this will return the individual FF for each unit, not the population level FF + # calculate raw FF for each unit + session_FF = pd.DataFrame(columns=["bin", "FF", "session", "unit"]) + for unit in unit_bin_means.index: + unit_FF = pd.DataFrame( + unit_bin_var.loc[unit] / unit_bin_means.loc[unit] + ).rename( + columns={unit: "FF"} + ) # return all bins for this unit + unit_FF = unit_FF.reset_index() + unit_FF["session"] = name + unit_FF["unit"] = unit + + session_FF = pd.concat([session_FF, unit_FF]) + + # Reorder columns, then return data + session_FF = session_FF[["session", "unit", "bin", "FF"]] + session_FF.reset_index(drop=True) + all_FF = pd.concat([all_FF, session_FF], axis=0) + + ioutils.write_hdf5( + f"/data/aidan/{cache_name}_FF_Calculation_mean_matched_{mean_matched}_raw{raw_values}.h5", + all_FF, + ) + return all_FF + + +# test = cross_trial_FF( +# spike_times=spike_times, +# exp=myexp, +# duration=4, +# bin_size=0.1, +# cache_name="per_unit_FF", +# mean_matched=True, +# ff_iterations=2, +# ) + + +# %% +# Plot individual unit FFs for each session overlaid on firing rates +# Split this by unit type + +# l23_per_unit_FF = cross_trial_FF( +# spike_times=l23_spike_times, +# exp=myexp, +# duration=4, +# bin_size=0.1, +# cache_name="per_unit_FF", +# mean_matched=False, +# ff_iterations=50, +# ) + +# l5_per_unit_FF = cross_trial_FF( +# spike_times=l5_spike_times, +# exp=myexp, +# duration=4, +# bin_size=0.1, +# cache_name="per_unit_FF", +# mean_matched=False, +# ff_iterations=50, +# ) + +# l6_per_unit_FF = cross_trial_FF( +# spike_times=l6_spike_times, +# exp=myexp, +# duration=4, +# bin_size=0.1, +# cache_name="per_unit_FF", +# mean_matched=False, +# ff_iterations=50, +# ) +# # Concatenate data +# l23_per_unit_FF["layer"] = "layer 2/3" +# l5_per_unit_FF["layer"] = "layer 5" +# l6_per_unit_FF["layer"] = "layer 6" + +# per_unit_FF = pd.concat( +# [l23_per_unit_FF, l5_per_unit_FF, l6_per_unit_FF], ignore_index=True +# # ) + +# # First plot layer 2/3 +# for s, session in enumerate(myexp): +# name = session.name +# units = l23_per_unit_FF["unit"].unique() +# subplots = Subplots2D(units, sharex=False) +# layer = "23" +# palette = sns.color_palette() +# plt.rcParams["figure.figsize"] = (10, 10) +# ses_fr = l23_firing_rates[s] + +# for i, un in enumerate(units): +# unit_data = l23_per_unit_FF.loc[l23_per_unit_FF["unit"] == un] +# unit_data["bin/10"] = unit_data["bin"] / 10 - ( +# duration / 2 +# ) # convert 100ms bins to seconds relative to action +# ax = subplots.axes_flat[i] +# ax1 = ax.twinx() # secondary axis for fr + +# bins = np.unique( +# l23_per_unit_FF.loc[l23_per_unit_FF["session"] == name]["bin"].values +# ) + +# val_data = ses_fr[un].stack().reset_index() +# val_data["y"] = val_data[0] +# # num_samples = len(ses_fr[values[0]].columns) + +# p = sns.lineplot( +# data=unit_data, x="bin/10", y="FF", ax=ax, linewidth=0.5, color="red" +# ) +# p.axvline( +# len(np.unique(bins)) / 2, +# color="green", +# ls="--", +# ) +# # p.autoscale(enable=True, tight=False) +# p.set_xticks([]) +# p.set_yticks([]) +# p.get_yaxis().get_label().set_visible(False) +# p.get_xaxis().get_label().set_visible(False) +# ax.set_xlim(bins[0], bins[-1]) + +# q = sns.lineplot( +# data=val_data, +# x="time", +# y="y", +# ci="sd", +# ax=ax1, +# linewidth=0.5, +# ) +# # q.autoscale(enable=True, tight=False) +# q.set_yticks([]) +# q.set_xticks([]) +# ax1.set_xlim(-duration / 2, duration / 2) +# # peak = ses_fr[value].values.mean(axis=1).max() +# q.get_yaxis().get_label().set_visible(False) +# q.get_xaxis().get_label().set_visible(False) +# q.axvline(c=palette[1], ls="--", linewidth=0.5) +# q.set_box_aspect(1) + +# to_label = subplots.axes_flat[0] +# to_label.get_yaxis().get_label().set_visible(True) +# to_label.get_xaxis().get_label().set_visible(True) +# to_label.set_xticks([]) + +# legend = subplots.legend + +# legend.text( +# 0, +# 0.3, +# "Unit ID", +# transform=legend.transAxes, +# color=palette[0], +# ) +# legend.set_visible(True) +# ticks = list(unit_data["bin"].unique()) +# legend.get_xaxis().set_ticks( +# ticks=[ticks[0], ticks[round(len(ticks) / 2)], ticks[-1]] +# ) +# legend.set_xlabel("100ms Bin") +# legend.get_yaxis().set_visible(False) +# legend.set_box_aspect(1) + +# # Overlay firing rates +# values = ses_fr.columns.get_level_values("unit").unique() +# values.values.sort() +# # num_samples = len(ses_fr[values[0]].columns) + +# plt.suptitle(f"Session {name} Per Unit Fano Factor + Firing Rate") + +# utils.save( +# f"/home/s1735718/Figures/{name}_per_unit_FanoFactor_l{layer}", nosize=True +# ) + +# for s, session in enumerate(myexp): +# name = session.name +# units = l5_per_unit_FF["unit"].unique() +# subplots = Subplots2D(units, sharex=False) +# layer = "5" +# palette = sns.color_palette() +# plt.rcParams["figure.figsize"] = (10, 10) +# ses_fr = l5_firing_rates[s] + +# for i, un in enumerate(units): +# unit_data = l5_per_unit_FF.loc[l5_per_unit_FF["unit"] == un] +# unit_data["bin/10"] = unit_data["bin"] / 10 - ( +# duration / 2 +# ) # convert 100ms bins to seconds relative to action +# ax = subplots.axes_flat[i] +# ax1 = ax.twinx() # secondary axis for fr + +# bins = np.unique( +# l5_per_unit_FF.loc[l5_per_unit_FF["session"] == name]["bin"].values +# ) + +# val_data = ses_fr[un].stack().reset_index() +# val_data["y"] = val_data[0] +# # num_samples = len(ses_fr[values[0]].columns) + +# p = sns.lineplot( +# data=unit_data, x="bin/10", y="FF", ax=ax, linewidth=0.5, color="red" +# ) +# p.axvline( +# len(np.unique(bins)) / 2, +# color="green", +# ls="--", +# ) +# # p.autoscale(enable=True, tight=False) +# p.set_xticks([]) +# p.set_yticks([]) +# p.get_yaxis().get_label().set_visible(False) +# p.get_xaxis().get_label().set_visible(False) +# ax.set_xlim(bins[0], bins[-1]) + +# q = sns.lineplot( +# data=val_data, +# x="time", +# y="y", +# ci="sd", +# ax=ax1, +# linewidth=0.5, +# ) +# # q.autoscale(enable=True, tight=False) +# q.set_yticks([]) +# q.set_xticks([]) +# ax1.set_xlim(-duration / 2, duration / 2) +# # peak = ses_fr[value].values.mean(axis=1).max() +# q.get_yaxis().get_label().set_visible(False) +# q.get_xaxis().get_label().set_visible(False) +# q.axvline(c=palette[1], ls="--", linewidth=0.5) +# q.set_box_aspect(1) + +# to_label = subplots.axes_flat[0] +# to_label.get_yaxis().get_label().set_visible(True) +# to_label.get_xaxis().get_label().set_visible(True) +# to_label.set_xticks([]) + +# legend = subplots.legend + +# legend.text( +# 0, +# 0.3, +# "Unit ID", +# transform=legend.transAxes, +# color=palette[0], +# ) +# legend.set_visible(True) +# ticks = list(unit_data["bin"].unique()) +# legend.get_xaxis().set_ticks( +# ticks=[ticks[0], ticks[round(len(ticks) / 2)], ticks[-1]] +# ) +# legend.set_xlabel("100ms Bin") +# legend.get_yaxis().set_visible(False) +# legend.set_box_aspect(1) + +# # Overlay firing rates +# values = ses_fr.columns.get_level_values("unit").unique() +# values.values.sort() +# # num_samples = len(ses_fr[values[0]].columns) + +# plt.suptitle(f"Session {name} Per Unit Fano Factor + Firing Rate") + +# utils.save( +# f"/home/s1735718/Figures/{name}_per_unit_FanoFactor_l{layer}", nosize=True +# ) + +# for s, session in enumerate(myexp): +# name = session.name +# units = l6_per_unit_FF["unit"].unique() +# subplots = Subplots2D(units, sharex=False) +# layer = "6" +# palette = sns.color_palette() +# plt.rcParams["figure.figsize"] = (10, 10) +# ses_fr = l6_firing_rates[s] + +# for i, un in enumerate(units): +# unit_data = l6_per_unit_FF.loc[l6_per_unit_FF["unit"] == un] +# unit_data["bin/10"] = unit_data["bin"] / 10 - ( +# duration / 2 +# ) # convert 100ms bins to seconds relative to action +# ax = subplots.axes_flat[i] +# ax1 = ax.twinx() # secondary axis for fr + +# bins = np.unique( +# l6_per_unit_FF.loc[l6_per_unit_FF["session"] == name]["bin"].values +# ) + +# val_data = ses_fr[un].stack().reset_index() +# val_data["y"] = val_data[0] +# # num_samples = len(ses_fr[values[0]].columns) + +# p = sns.lineplot( +# data=unit_data, x="bin/10", y="FF", ax=ax, linewidth=0.5, color="red" +# ) +# p.axvline( +# len(np.unique(bins)) / 2, +# color="green", +# ls="--", +# ) +# # p.autoscale(enable=True, tight=False) +# p.set_xticks([]) +# p.set_yticks([]) +# p.get_yaxis().get_label().set_visible(False) +# p.get_xaxis().get_label().set_visible(False) +# ax.set_xlim(bins[0], bins[-1]) + +# q = sns.lineplot( +# data=val_data, +# x="time", +# y="y", +# ci="sd", +# ax=ax1, +# linewidth=0.5, +# ) +# # q.autoscale(enable=True, tight=False) +# q.set_yticks([]) +# q.set_xticks([]) +# ax1.set_xlim(-duration / 2, duration / 2) +# # peak = ses_fr[value].values.mean(axis=1).max() +# q.get_yaxis().get_label().set_visible(False) +# q.get_xaxis().get_label().set_visible(False) +# q.axvline(c=palette[1], ls="--", linewidth=0.5) +# q.set_box_aspect(1) + +# to_label = subplots.axes_flat[0] +# to_label.get_yaxis().get_label().set_visible(True) +# to_label.get_xaxis().get_label().set_visible(True) +# to_label.set_xticks([]) + +# legend = subplots.legend + +# legend.text( +# 0, +# 0.3, +# "Unit ID", +# transform=legend.transAxes, +# color=palette[0], +# ) +# legend.set_visible(True) +# ticks = list(unit_data["bin"].unique()) +# legend.get_xaxis().set_ticks( +# ticks=[ticks[0], ticks[round(len(ticks) / 2)], ticks[-1]] +# ) +# legend.set_xlabel("100ms Bin") +# legend.get_yaxis().set_visible(False) +# legend.set_box_aspect(1) + +# # Overlay firing rates +# values = ses_fr.columns.get_level_values("unit").unique() +# values.values.sort() +# # num_samples = len(ses_fr[values[0]].columns) + +# plt.suptitle(f"Session {name} Per Unit Fano Factor + Firing Rate") + +# utils.save( +# f"/home/s1735718/Figures/{name}_per_unit_FanoFactor_l{layer}", nosize=True +# ) + +# # %% +# # Plot population FF changes + +# population_FF_l23 = cross_trial_FF( +# spike_times=l23_spike_times, +# exp=myexp, +# duration=4, +# bin_size=0.1, +# cache_name="l23", +# mean_matched=True, +# ff_iterations=50, +# ) +# population_FF_l5 = cross_trial_FF( +# spike_times=l5_spike_times, +# exp=myexp, +# duration=4, +# bin_size=0.1, +# cache_name="l5", +# mean_matched=True, +# ff_iterations=50, +# ) +# population_FF_l6 = cross_trial_FF( +# spike_times=l6_spike_times, +# exp=myexp, +# duration=4, +# bin_size=0.1, +# cache_name="l6", +# mean_matched=True, +# ff_iterations=50, +# ) + +#%% +# # Plot lines for each layer +# fig, ax = plt.subplots(2, figsize=(10, 10)) +# bin_size = 0.1 +# ax1 = ax[0] +# # First layer 2/3 +# mean = population_FF_l23["mean FF"] +# se = population_FF_l23["SE"] +# x = (population_FF_l23["bin"] * bin_size) - 2 +# lower = mean - se +# upper = mean + se + +# ax1.plot(x, mean, label="Layer 2/3", color="blue") +# ax1.plot(x, lower, color="tab:blue", alpha=0.1) +# ax1.plot(x, upper, color="tab:blue", alpha=0.1) +# ax1.fill_between(x, lower, upper, alpha=0.2) + +# # Then layer 5 +# mean = population_FF_l5["mean FF"] +# se = population_FF_l5["SE"] +# x = (population_FF_l5["bin"] * bin_size) - 2 +# lower = mean - se +# upper = mean + se + +# ax1.plot(x, mean, label="Layer 5", color="orange") +# ax1.plot(x, lower, color="tab:orange", alpha=0.1) +# ax1.plot(x, upper, color="tab:orange", alpha=0.1) +# ax1.fill_between(x, lower, upper, alpha=0.2) + +# # Finally, layer 6 +# mean = population_FF_l6["mean FF"] +# se = population_FF_l6["SE"] +# x = (population_FF_l6["bin"] * bin_size) - 2 +# lower = mean - se +# upper = mean + se + +# ax1.plot(x, mean, label="Layer 6", color="green") +# ax1.plot(x, lower, color="tab:green", alpha=0.1) +# ax1.plot(x, upper, color="tab:green", alpha=0.1) +# ax1.fill_between(x, lower, upper, alpha=0.2) + +# # Now set axes +# ax1.set_ylabel("Mean Matched Fano Factor") +# ax1.set_xticklabels([]) +# ax1.axvline(x=0, color="black", ls="--") +# ax1.set_ylim(0) +# ax1.legend() + + +# plt.suptitle( +# f"Population Level Fano Factor - Aligned to Reach Onset - Session {myexp[0].name}", +# y=0.9, +# ) + +# # Now plot firing rates +# ax2 = ax[1] + +# ax2.plot(x, l23_means, color="blue") +# ax2.plot(x, l5_means, color="orange") +# ax2.plot(x, l6_means, color="green") +# ax2.axvline(x=0, color="black", ls="--") +# ax2.set_title("Population Level Firing Rate") +# ax2.set_ylabel("Mean Firing Rate (Hz)") +# ax2.set_xlabel("Time Relative to Reach Onset (s)") +# ax2.set_ylim(0) +# utils.save(f"/home/s1735718/Figures/{myexp[0].name}_population_FanoFactor", nosize=True) +# plt.show() +# %% +# Plot undivided fano factor for the population +population_FF = cross_trial_FF( + spike_times=spike_times, + exp=myexp, + duration=8, + bin_size=0.1, + cache_name="population", + mean_matched=True, + ff_iterations=50, +) +# Check significance for each bin + + +def fano_fac_sig(population_FF): + """ + Function checks for significance in fano factor output, assuming the data is NOT raw, and mean-matched. + Returns a list of significance values (sig or nonsig) for each bin, compared to the previous timepoint. + + population_FF: the data obtained from the cross_trial_FF() function, where mean_matched is True, and raw_data is False. + """ + significance = [] + for i, items in enumerate(population_FF.iterrows()): + if i == 0: + continue + + bin_upper = items[1]["upper_ci"] + bin_lower = items[1]["lower_ci"] + + previous_upper = population_FF.loc[population_FF["bin"] == i - 1]["upper_ci"] + previous_lower = population_FF.loc[population_FF["bin"] == i - 1]["lower_ci"] + + # Compare bins for overlap + if (bin_upper <= previous_lower.item()) or (bin_lower >= previous_upper.item()): + significance.append("significant") + + else: + significance.append("nonsignificant") + + return significance + +significance = fano_fac_sig(population_FF) +#%% +# Plot firing rates and fano factor +name = myexp[0].name +bin_size = 0.1 +bins = per_trial_binning(firing_rates, myexp, timespan=duration, bin_size=0.1) +bins = bin_data_average(bins).describe() +std = [] +for i in bins.iteritems(): + std.append(i[1]["std"]) + +fig, ax = plt.subplots(2, figsize=(10, 10)) +ax1 = ax[0] +ax2 = ax[1] + +mean = population_FF["mean FF"] +se = population_FF["SE"] +upper = mean + se +lower = mean - se +x = (population_FF["bin"] * bin_size) - 4 + +ax1.plot(x, mean, color="purple") +ax1.plot(x, lower, color="tab:purple", alpha=0.1) +ax1.plot(x, upper, color="tab:purple", alpha=0.1) +ax1.fill_between(x, lower, upper, alpha=0.2) + +ax1.set_ylabel("Mean Matched Fano Factor") +ax1.set_xlabel("Duration Relative to Reach Onset (s)") +ax1.axvline(x=0, color="black", ls="--") +ax1.set_ylim(0) +ax1.set_title(f"Population Level Fano Factor - session {name}") + +# Plot firing rate +lower = firing_rate_means - std +upper = firing_rate_means + std + +ax2.plot(x, firing_rate_means) +ax2.plot(x, lower, color="tab:purple", alpha=0.1) +ax2.plot(x, upper, color="tab:purple", alpha=0.1) +ax2.fill_between(x, lower, upper, alpha=0.2) + +ax2.set_title("Population Level Firing Rate") +ax2.set_ylabel("Mean Firing Rate (Hz)") +ax2.set_xlabel("Time Relative to Reach Onset (s)") + +ax2.axvline(x=0, color="black", ls="--") + +utils.save( + f"/home/s1735718/Figures/{myexp[0].name}_population_FanoFactor_unsplit", nosize=True +) +plt.show() +# %% diff --git a/projects/Aidan_Analysis/FullAnalysis/FanoFactor_Epochs.py b/projects/Aidan_Analysis/FullAnalysis/FanoFactor_Epochs.py new file mode 100644 index 0000000..ad5887f --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/FanoFactor_Epochs.py @@ -0,0 +1,37 @@ +# This file will calculate the fano factor relative to different alignment points. +# Then compare the changes observed. +# First import required packages +from argparse import Action +import math + +from matplotlib.pyplot import axvline, xlim +from base import * +import numpy as np +import pandas as pd +import matplotlib as plt +from pixtools.utils import Subplots2D +from rasterbase import * +from tqdm import tqdm +from sklearn.linear_model import LinearRegression +from pixtools.utils import Subplots2D +from pixels import ioutils +import statsmodels.api as sm +from statsmodels.formula.api import ols + +from functions import ( + event_times, + per_trial_raster, + per_trial_binning, + per_unit_spike_rate, + bin_data_average, + cross_trial_FF, + fano_fac_sig, +) + +# Import previously calculated FF values +population_FF = ioutils.read_hdf5( + "/data/aidan/per_unit_FF_FF_Calculation_mean_matched_True_rawTrue.h5" +) +population_FF = population_FF.melt() +population_FF.rename(columns={"variable": "bin", "value": "FF"}, inplace=True) +# Calculate any significant changes in FF across time: using the SE produced from the model diff --git a/projects/Aidan_Analysis/FullAnalysis/VR46_SpikeRate_ShallowDeep.py b/projects/Aidan_Analysis/FullAnalysis/VR46_SpikeRate_ShallowDeep.py new file mode 100644 index 0000000..1ad7a8f --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/VR46_SpikeRate_ShallowDeep.py @@ -0,0 +1,72 @@ +#This file shall split 211024_VR46 into two groups by depth, then plot spike firing rate by unit +#First import required packages +import sys +import matplotlib.pyplot as plt + + +from matplotlib.pyplot import figure +from distutils.command.sdist import sdist +from base import * +from textwrap import wrap +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +from pixtools import spike_rate + + + +#Now select the specific recording to be analysed +ses = myexp.get_session_by_name("211024_VR46") + +#And now split session into two groups +shallow = ses.select_units( + group="good", + max_depth=500, + uncurated=False, + name="shallow" +) + +deep = ses.select_units( + group="good", + min_depth=550, + max_depth=1200, + uncurated=False, + name="deep" +) + +#And then define the hits +hits_s = ses.align_trials( + ActionLabels.correct, + Events.led_on, + "spike_rate", + duration=4, + units=shallow +) + +hits_d = ses.align_trials( + ActionLabels.correct, + Events.led_on, + "spike_rate", + duration=4, + units=deep +) + +#Using these units we may now plot seperate spike rate graphs +#First for shallow + +name=ses.name +ci = 95 + +spike_rate.per_unit_spike_rate(hits_s, ci=ci) +plt.suptitle( + "\n".join(wrap(f"Session {name} - per-unit (<500 Depth) across-trials firing rate (aligned to LED on)")), +) + + +#And then for deep units + +spike_rate.per_unit_spike_rate(hits_d, ci=ci) +plt.suptitle( + "\n".join(wrap(f"Session {name} - per-unit (500 < Depth < 1200) across-trials firing rate (aligned to LED on)")), +) + + +plt.show() diff --git a/projects/Aidan_Analysis/FullAnalysis/base.py b/projects/Aidan_Analysis/FullAnalysis/base.py new file mode 100644 index 0000000..24ef39c --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/base.py @@ -0,0 +1,82 @@ +# This file sets up the required data/paths for subsequent data processing +# TODO: add pixtools to path permenantly so I don't have to keep importing from a local copy +# TODO: run files through black to format + +# Package required to change path +from pathlib import Path + +import sys +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns + +fig_dir = Path( + "/home/s1735718/PixelsAnalysis/pixels-analysis/projects/Aidan_Analysis/FullAnalysis/Figures" +) + +sys.path.append( + "/opt/neuropixels/pixels" +) # Adds the location of the pixtools folder to the python path +sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels-analysis/") +from pixels import Experiment +from pixels.behaviours.reach import Reach, ActionLabels, Events +from pixtools import utils + +interim_dir = "/data/visuomotor_control/interim" + +# Step 1: Load an experiment +# +# An experiment handles a group of mice that were trained in the same behaviour. It +# stores data and metadata for all included sessions belonging to the list of mice +# provided. The Experiment class is passed the mouse or list of mice, the class +# definition for the behaviour they were trained in (imported from pixels.behaviours), +# and the paths where it can find recording data (the folder containing 'raw', 'interim' +# and 'processed' folders) and training metadata. + +# Will import master data from visuomotor control/neuropixels + +myexp = Experiment( + [ + # "VR46", + # "VR47", + # "VR49", + # "VR50", + # "VR52", + # "VR53", + # "VR55", + # "VR56", + # "VR58", + "VR59", + ], # This can be a list + Reach, # We want the reach behaviour + "~/duguidlab/visuomotor_control/neuropixels", # Where is the main data saved + "~/duguidlab/CuedBehaviourAnalysis/Data/TrainingJSON", # Where is the metadata for the recording saved + interim_dir="/data/visuomotor_control/interim", +) + + +# This depth range covers all mice +m2_depth = { + "min_depth": 200, + "max_depth": 1200, +} + + +def get_pyramidals(exp): + rep = "-".join(str(s) for s in m2_depth.values()) + + return exp.select_units( + **m2_depth, + min_spike_width=0.4, + name=f"{rep}_pyramidals", + ) + + +def get_interneurons(exp): + rep = "-".join(str(s) for s in m2_depth.values()) + + return exp.select_units( + **m2_depth, + max_spike_width=0.35, + name=f"{rep}_interneurons", + ) diff --git a/projects/Aidan_Analysis/FullAnalysis/bin_delta_depth_plot.py b/projects/Aidan_Analysis/FullAnalysis/bin_delta_depth_plot.py new file mode 100644 index 0000000..02820cf --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/bin_delta_depth_plot.py @@ -0,0 +1,512 @@ +###Using the calculated bin changes from the FR anova, this script will plot the depths of individual neuronal units against the change in firing rate (delta) +# Delta calculated from start of drop in FR to end (i.e., largest change in bin) + +#%% +# First import required packages: +import enum +from xml.etree.ElementInclude import include +from matplotlib.pyplot import legend, title, ylabel, ylim +import matplotlib.lines as mlines +from pyrsistent import b +import statsmodels.api as sm +from statsmodels.formula.api import ols +import numpy as np +import pandas as pd +import seaborn as sns +import sys +from base import * +from channeldepth import meta_spikeglx +from tqdm import tqdm # for progress bar + +from functions import ( + per_trial_binning, + bin_data_average, + event_times, + per_trial_spike_rate, +) +from reach import Cohort + + +from pixtools.utils import Subplots2D +from pixtools import utils +from pixtools import spike_rate +from pixels import ioutils +import statsmodels.stats.multicomp as mc +from statsmodels.graphics.factorplots import interaction_plot +from bioinfokit.analys import stat +from channeldepth import meta_spikeglx +from pixtools import clusters +from textwrap import wrap + +#%% +# Select units from given session +units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200) + +duration = 2 +per_unit_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=duration, units=units +) + +#%% +# Using import functions, bin firing rate data +bin_data = per_trial_binning(per_unit_aligned, myexp, timespan=2, bin_size=0.1) +bin_data.reorder_levels(["session", "unit", "trial"], axis=1) + +multicomp_bin_results = [] +multicomp_unit_results = [] +multicomp_int_results = [] + + +# +###Run linear model in a distributed manner### +###NB: As the dataset is too large to run within memory constraints, will remove the final 4s of data. + + +def within_unit_GLM(bin_data, myexp): + + """ + Function takes binned data (with levels in order session -> unit -> trial) and performs an ANOVA on each session + Returns the multiple comparisons and ANOVA results for the interaction, also prints anova results + Following parameters are used: + + IV = Firing Rate, DV = Bin, Unit (containing many trials) + + Model is as follows: + + firing_rate ~ C(bin) + C(unit) + C(bin):C(unit) + + bin_data: the output of the per_trial_binning function reordered to swap unit and trial level hierarchy + + myexp: experimental cohort defined in base.py + + + NB: This function runs the model as a distributed set of calculations!! + i.e., it slices the data and runs several smaller calculations before concatenating results + + """ + # store data as a resillient distributed dataset + # will do this using pyspark + + # Now split this data into sessions + results = [] + for s, session in enumerate(myexp): + + name = session.name + ses_data = bin_data[s] + + # Now shall average the data per bin + ses_avgs = bin_data_average(ses_data) + + # Then melt data to prep for analysis + ses_avgs = ses_avgs.reset_index().melt(id_vars=["unit", "trial"]) + ses_avgs.rename({"value": "firing_rate"}, axis=1, inplace=True) + + ##Now shall construct a linear model to allow for analysis + # IV: Firing Rate + # DVs: Bin (within subjects), Unit (Within subjects - trials averaged out) + + # Now run the GLM analysis + model = ols( + "firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)", data=ses_avgs + ).fit() + + output = sm.stats.anova_lm(model, typ=2) # Type II SS as this is faster + print(f"ANOVA Results - Session {name}") + print(output) + results.append(output) + + ####Run Multiple Comparisons#### + # Main effect of bin + res = stat() + + print(f"starting multiple comparisons for session {name}") + # Interaction + res.tukey_hsd( + df=ses_avgs, + res_var="firing_rate", + xfac_var=["bin", "unit"], + anova_model="firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)", + phalpha=0.05, + ss_typ=2, + ) + int_effect = res.tukey_summary + multicomp_int_results.append(int_effect) + results = pd.DataFrame(results) + return multicomp_int_results, results + + +glm, output = within_unit_GLM(bin_data, myexp) + +#Cache the multicomps and results +for s, session in enumerate(myexp): + name = session.name + ses_output = output[s] + ses_output = pd.DataFrame(ses_output) + ioutils.write_hdf5(f"/data/aidan/glm_output_session_{name}.h5", ses_output) + + ses_multicomps = glm[s] + ses_multicomps = pd.DataFrame(ses_multicomps) + + ioutils.write_hdf5(f"/data/aidan/glm_multicomps_session_{name}.h5", ses_multicomps) + +#For later retrieval +# %% +# Using this data, now shall determine the largest delta between bins for each unit +# Split data into seperate dataframes for each session +def unit_delta(glm, myexp, bin_data, bin_duration, sig_only, percentage_change): + """ + Function takes the output of the multiple comparisons calculated by within_unit_GLM() and calculates the larges change in firing rate (or other IV measured by the GLM) between bins, per unit + This requires both experimental cohort data, and bin_data calculated by per_trial_binning() and passed through reorder levels (as "session", "unit", "trial") + Function returns a list of lists, in a hierarchy of [session[unit deltas]] + + Using this data, a depth plot may be created displaying the relative delta by unit location in pM2 + + glm: the result of the ANOVA and subsequent Tukey adjusted multiple comparisons performed by within_unit_GLM + NB: THIS IS A COMPUTATIONALLY EXPENSIVE FUNCTION, DO NOT RUN IT ON MORE THAN ONE SESSION AT A TIME + + myexp: the experimental cohort defined in base.py + + bin_data: the binned raw data computed by the per_trial_binning function + + bin_duration: the length of the bins in the data + + sig_only: whether to return only the greatest delta of significant bins or not + + percentage_change: whether to return deltas as a percentage change + + """ + ses_deltas = [] + for s, session in enumerate(myexp): + + final_deltas = [] + unit_deltas = [] + sigunit_comps = [] + ses_comps = glm[s] + + if sig_only is True: + # Return only significant comparisons + ses_comps = ses_comps.loc[ses_comps["p-value"] < 0.05] + + # Determine list of units remaining in comparison + units = [] + for i in ses_comps["group1"]: + units.append(i[1]) + units = np.array(units) + units = np.unique(units) + + # Now iterate through comparisons, by unit, Saving the significant comparisons to allow later calculation of delta + + for i in tqdm(units): + unit_comps = ses_comps[ + ses_comps["group1"].apply(lambda x: True if i in x else False) + ] # Lambda function checks if the value is in the tuple for group one + unit_comps = unit_comps[ + unit_comps["group2"].apply(lambda x: True if i in x else False) + ] + unit_comps.reset_index(inplace=True) + + # find row with largest difference + if unit_comps.empty: # skip if empty + continue + + unit_comps = unit_comps.sort_values("Diff", ascending=False) + ##Right around here I need to work out a way to sort by adjacent bins only + # Extract only adjacent bins, then take the largest value with this set. + for item in unit_comps.iterrows(): + # row = unit_comps["Diff"].idxmax() + row = item[1] + g1_bin, unit_num = item[1]["group1"] + g1_bin1 = round(float(g1_bin.split()[0][0:-1]), 1) + g1_bin2 = round(float(g1_bin.split()[1][0:]), 1) + + g2_bin, unit_num = item[1]["group2"] + g2_bin1 = round(float(g2_bin.split()[0][0:-1]), 1) + g2_bin2 = round(float(g2_bin.split()[1][0:]), 1) + + # Check if bins are sequential, will return the largest value where bins are next to eachother + if g2_bin1 == g1_bin2 + bin_duration: + + sigunit_comps.append([row]) + else: + continue + + # Now that we know the units with significant comparisons, take these bins from raw firing rate averages + ses_avgs = bin_data_average(bin_data[s]) + # Iterate through our significant comparisons, calculating the actual delta firing rate + for i in range(len(sigunit_comps)): + sig_comp = sigunit_comps[i] + sig_comp = pd.DataFrame(sig_comp) # convert list to dataframe + + unit = [x[1] for x in sig_comp["group1"]] + ses_unit = ses_avgs.loc[ + ses_avgs.index.get_level_values("unit") == int(unit[0]) + ].mean() # get all rows where our units firing rate is present + + # Get the bins that the signficant comparison looked at + bin_val1 = [x[0] for x in sig_comp["group1"]][0] + bin_val2 = [x[0] for x in sig_comp["group2"]][0] + + # Return the percentage change from initial state rather than raw value + if percentage_change == True: + change = ses_unit[bin_val2] - ses_unit[bin_val1] + delta = (change / ses_unit[bin_val1]) * 100 + if ses_unit[bin_val1] == 0: + continue # Skip this value, if it changes from a value of zero, this is infinite + unit_deltas.append([int(unit[0]), delta]) + # Finally, get the delta value across these bins for the given unit + elif percentage_change == False: + delta = ses_unit[bin_val2] - ses_unit[bin_val1] + unit_deltas.append([int(unit[0]), delta]) + + # Iterate through unit_deltas and remove any duplicate units + # Keeping only the units of largest values + unit_deltas = pd.DataFrame(unit_deltas, columns=["unit", "delta"]) + for j in unit_deltas["unit"].unique(): + vals = unit_deltas.loc[unit_deltas["unit"] == j] + vals = vals.sort_values("delta", key=abs, ascending=False) + + final_deltas.append(vals.iloc[0].values.tolist()) + + ses_deltas.append(final_deltas) + + return ses_deltas + + +sig_deltas = unit_delta( + glm, myexp, bin_data, bin_duration=0.1, sig_only=True, percentage_change=False +) +all_deltas = unit_delta( + glm, myexp, bin_data, bin_duration=0.1, sig_only=False, percentage_change=False +) + +# %% +# Using this delta information, the approximate depths of these units may be determined +# First, update the unit depths function +def unit_depths(exp): + """ + Parameters + ========== + exp : pixels.Experiment + Your experiment. + """ + info = exp.get_cluster_info() + depths = [] + keys = [] + + for s, session in enumerate(exp): + name = session.name + session_depths = {} + rec_num = 0 + for rec_num, probe_depth in enumerate(session.get_probe_depth()): + rec_depths = {} + rec_info = info[s] + id_key = "id" if "id" in rec_info else "cluster_id" # Depends on KS version + + for unit in rec_info[id_key]: + unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict() + rec_depths[unit] = probe_depth - unit_info["depth"] + + session_depths = pd.DataFrame(rec_depths, index=["depth"]) + keys.append(name) + session_depths = pd.concat([session_depths], axis=1, names="unit") + depths.append(session_depths) + + return pd.concat(depths, axis=1, names=["session", "unit"], keys=keys) + + +all_depths = unit_depths(myexp) +# %% +# Sort pyramidal and interneuronal units for the plot below +# select all pyramidal neurons +pyramidal_units = get_pyramidals(myexp) + +# and all interneurons +interneuron_units = get_interneurons(myexp) + +pyramidal_aligned = myexp.align_trials( + ActionLabels.correct, + Events.led_off, + "spike_rate", + duration=2, + units=pyramidal_units, +) + +interneuron_aligned = myexp.align_trials( + ActionLabels.correct, + Events.led_off, + "spike_rate", + duration=2, + units=interneuron_units, +) + + +#%% +# Now extract the depths of each of our largest delta units from this +# Plot the points as interneurons or pyramidals + +plt.rcParams.update({"font.size": 20}) +for s, session in enumerate(myexp): + name = session.name + ses_deltas = sig_deltas[s] + + ses_deltas = pd.DataFrame(ses_deltas, columns=["unit", "delta"]) + + ses_depths = all_depths[name] + # Find depths of associated units + unit_depth = ses_depths[ses_deltas["unit"]].melt() + ses_deltas = pd.concat([ses_deltas, unit_depth["value"]], axis=1) + ses_deltas.rename(columns={"value": "depth"}, inplace=True) + + # Do the same for nonsignificant units + ses_nosig_deltas = all_deltas[s] + ses_nosig_deltas = pd.DataFrame(ses_nosig_deltas, columns=["unit", "delta"]) + + # Find depths of associated units + unit_nosig_depth = ses_depths[ses_nosig_deltas["unit"]].melt() + ses_nosig_deltas = pd.concat([ses_nosig_deltas, unit_nosig_depth["value"]], axis=1) + ses_nosig_deltas.rename(columns={"value": "depth"}, inplace=True) + # Now remove any significant values + ses_nosig_deltas = ses_nosig_deltas[ + ~ses_nosig_deltas["unit"].isin(ses_deltas["unit"]) + ].dropna() + ses_nosig_deltas = ses_nosig_deltas.reset_index(drop=True) + + ##Then determine the type of neuron these units are (i.e., pyramidal or interneuron) + pyr_units = pyramidal_aligned[s].melt()["unit"].unique() + int_units = interneuron_aligned[s].melt()["unit"].unique() + dicts = {} + + unit_types = [] + for i, item in enumerate(ses_deltas["unit"]): + if item in pyr_units: + unit_types.append("pyramidal") + if item in int_units: + unit_types.append("interneuron") + else: + continue + + nosig_unit_types = [] + for i, item in enumerate(ses_nosig_deltas["unit"]): + if item in pyr_units: + nosig_unit_types.append("pyramidal") + if item in int_units: + nosig_unit_types.append("interneuron") + else: + continue + + # Append unit_types as a new column + # Will try and append this as a new column, to see where the extra value lies + unit_types = pd.DataFrame(unit_types) + ses_deltas = pd.concat([ses_deltas, unit_types], ignore_index=True, axis=1) + ses_deltas.rename( + columns={0: "unit", 1: "delta", 2: "depth", 3: "type"}, inplace=True + ) + + nosig_unit_types = pd.DataFrame(nosig_unit_types) + ses_nosig_deltas = pd.concat( + [ses_nosig_deltas, nosig_unit_types], ignore_index=True, axis=1 + ) + ses_nosig_deltas.rename( + columns={0: "unit", 1: "delta", 2: "depth", 3: "type"}, inplace=True + ) + + # Finally, plot a graph of deltas relative to zero, by depth + p = sns.scatterplot( + x="delta", + y="depth", + data=ses_nosig_deltas, + s=100, + linewidth=1, + style="type", + color="#972c7f", + alpha=0.5, + ) + + p2 = sns.scatterplot( + x="delta", + y="depth", + data=ses_deltas, + s=100, + linewidth=1, + style="type", # uncomment when neuron type is fixed + color="#221150", + ) + + plt.axvline(x=0, color="green", ls="--") + + plt.xlim(-50, 50) + plt.ylim(0, 1300) + + plt.xticks(fontsize=25) + plt.yticks(fontsize=25) + plt.xlabel("Δ Firing Rate (Hz)", fontsize=20) + plt.ylabel("Depth of Recorded Neuron (μm)", fontsize=20) + plt.suptitle( + "\n".join( + wrap( + f"Largest Trial Firing Rate Change by pM2 Depth - Session {name}", + width=30, + ) + ), + y=1.1, + fontsize=20, + ) + + # Create lengend manually + # Entries for legend listed below as handles: + sig_dot = mlines.Line2D( + [], + [], + color="#221150", + marker="o", + linestyle="None", + markersize=10, + label="Significant", + ) + nonsig_dot = mlines.Line2D( + [], + [], + color="#972c7f", + marker="o", + linestyle="None", + alpha=0.5, + markersize=10, + label="Non-Significant", + ) + grey_dot = mlines.Line2D( + [], + [], + color="grey", + marker="o", + linestyle="None", + markersize=10, + label="Pyramidal", + ) + grey_cross = mlines.Line2D( + [], + [], + color="grey", + marker="X", + lw=5, + linestyle="None", + markersize=10, + label="Interneuronal", + ) + + plt.legend( + handles=[sig_dot, nonsig_dot, grey_dot, grey_cross], + bbox_to_anchor=(1.7, 1), + title="Significance (p < 0.05)", + fontsize=15, + ) + + # Now invert y-axis and save + plt.gca().invert_yaxis() + + utils.save( + f"/home/s1735718/Figures/{name}_DeltaFR_byDepth", + nosize=True, + ) + plt.show() + +# %% diff --git a/projects/Aidan_Analysis/FullAnalysis/bin_firingrate_ANOVA.py b/projects/Aidan_Analysis/FullAnalysis/bin_firingrate_ANOVA.py new file mode 100644 index 0000000..efca6b6 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/bin_firingrate_ANOVA.py @@ -0,0 +1,361 @@ +################################################# +# This script will analyse the standard deviation of the data, as a measure of variance +# Rather than confidence interval size. +# Will be done by a comparison across bins and session +# IV: Standard Deviation of Firing Rate +# DV: Bin (Within Subjects), Session (Between Subjects) +################################################# + +# Import req. packages + +import enum +from xml.etree.ElementInclude import include +from matplotlib.pyplot import title, ylabel, ylim +import statsmodels.api as sm +from statsmodels.formula.api import ols +import numpy as np +import pandas as pd +import seaborn as sns +import sys +from base import * + + +from functions import ( + per_trial_binning, + bin_data_average, + event_times, + per_trial_spike_rate, +) +from reach import Cohort + + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels") +from pixtools.utils import Subplots2D # use the local copy of base.py +from pixtools import utils +from pixtools import spike_rate +from pixels import ioutils +import statsmodels.stats.multicomp as mc +from statsmodels.graphics.factorplots import interaction_plot +from bioinfokit.analys import stat + +#%% +# Select Units From Cache (m2 only) +units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200) + +duration = 2 +trial_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=duration, units=units +) + +#%% +# Bin firing rate data +# +bin_data = per_trial_binning(trial_aligned, myexp, timespan=2, bin_size=0.1) + + +def bin_data_SD(bin_data): + """ + Function is similar to bin_data_mean. + + This function takes the dataframe produced by per_trial_binning() including bins as a multiindex + and calculates the standard deviation of each bin's activity within a given time period + This returns the SD of the firing rate of each unit (i.e., per session and time) + + bin_data: the dataframe produced by per_trial_binning, with predefined bins + + NB: To return a transposed version of the data (i.e., with bin averages as rows rather than columns) + add ".T" to function call. + """ + + # First extract the values of each bin in bin_data + bins = bin_data.index.get_level_values( + "bin" + ) # a list of the bins created by the previous function + bin_names = ( + bins.unique() + ) # Take the unique values present, giving us the list of bin categories + ses_bins = {} + for s, session in enumerate(myexp): + name = session.name + + ses_data = bin_data[s] + trial_bins = {} + + # Trial number will give the following loop the number of times to iterate + trial_number = ses_data.columns.get_level_values("trial").unique() + # Now, take each trial and average across bins + for t in trial_number: + trial_data = ses_data[t] + individual_bins = {} + for b in bin_names: + + bin_vals = trial_data.loc[trial_data.index.get_level_values("bin") == b] + bin_st_dev = np.std( + bin_vals.values + ) # take standard deviation for all values in this trials bin + individual_bins.update({b: bin_st_dev}) + trial_bins.update({t: individual_bins}) + + ses_bins.update({name: trial_bins}) + + # Merge these SDs as a dataframe + binned_sd = pd.DataFrame.from_dict( + {(i, j): ses_bins[i][j] for i in ses_bins.keys() for j in ses_bins[i].keys()}, + orient="index", + ) + + # Take the mean of the SDs for each trial Bin + + binned_sd.index.rename(["session", "trial"], inplace=True) + binned_sd.columns.names = ["bin"] + + # Return the data + return binned_sd + + +bin_sd = bin_data_SD(bin_data) + + +# %% +# Now take SD data and and convert to long form to prepare for ANOVA analysis +long_sd = pd.melt( + bin_sd.reset_index(), id_vars=["session", "trial"] +) # Preserve the indexes as comparator variables +long_sd.rename( + {"value": "firing_rate_sd"}, axis=1, inplace=True +) # rename value column to sd for readability + +#%% +####################################### +# Then shall run a mixed factorial ANOVA +# IV: Standard Deviation of Firing Rate +# DV: Bin (Within Subjects - 100 levels), Session (Between Subjects - 2 levels for VR46 [n otherwise]) +####################################### + +# Assemble model +model = ols( + "firing_rate_sd ~ C(bin) + C(session) + C(bin):C(session)", data=long_sd +).fit() + +output = sm.stats.anova_lm( + model, typ=2 +) # Run test on model, sample sizes are equal between groups here (neurons stay the same across trials) so type 2 SS used +print(output) # Print output + +####Run multiple comparisons#### +# For main effect of bin +res = stat() +res.tukey_hsd( + df=long_sd, + res_var="firing_rate_sd", + xfac_var="bin", + anova_model="firing_rate_sd ~ C(bin) + C(session) + C(bin):C(session)", + phalpha=0.05, + ss_typ=2, +) +bin_main_effect = res.tukey_summary + +# For main effect of session +res = stat() +res.tukey_hsd( + df=long_sd, + res_var="firing_rate_sd", + xfac_var="session", + anova_model="firing_rate_sd ~ C(bin) + C(session) + C(bin):C(session)", + phalpha=0.05, + ss_typ=2, +) +ses_main_effect = res.tukey_summary + +# For interaction +res = stat() +res.tukey_hsd( + df=long_sd, + res_var="firing_rate_sd", + xfac_var=["bin", "session"], + anova_model="firing_rate_sd ~ C(bin) + C(session) + C(bin):C(session)", + phalpha=0.05, + ss_typ=2, +) +interaction = res.tukey_summary + +# Determine sig. comparisons +sig_bin_comps = bin_main_effect.loc[bin_main_effect["p-value"] < 0.05] +sig_ses_comps = ses_main_effect.loc[ses_main_effect["p-value"] < 0.05] +sig_int = interaction.loc[interaction["p-value"] < 0.05] + +####Now plot the findings of this analysis#### +fig = interaction_plot( + x=long_sd["bin"], trace=long_sd["session"], response=long_sd["firing_rate_sd"] +) +plt.show() +# From this it is clear there is a drop around the centre of bins + +##Check assumptions +# First residual plot (QQ) +sm.qqplot(res.anova_std_residuals, line="45") +plt.xlabel("Theoretical Quantiles") +plt.ylabel("Standardized Residuals") +# plt.show() + +# Then Distribution plot +plt.hist(res.anova_model_out.resid, bins="auto", histtype="bar", ec="k") +plt.xlabel("Residuals") +plt.ylabel("Frequency") +# plt.show() + +import scipy.stats as stats + +w, pvalue = stats.shapiro(res.anova_model_out.resid) +# print(w, pvalue) + +# Model assumptions are met. +# %% +############################################# +# Run the same analysis on firing rate to paint the other half of the picture. +# Calculate mean firing rate for the data +bin_avgs = bin_data_average(bin_data) + +bins = bin_avgs.columns +ses_bins = {} + +for s, session in enumerate(myexp): + name = session.name + + ses_data = bin_avgs.loc[bin_avgs.index.get_level_values("session") == s] + ses_trials = ses_data.index.get_level_values("trial") + trial_bins = {} + # Now, take each trial and average across bins + for t in ses_trials: + trial_data = ses_data.loc[ses_data.index.get_level_values("trial") == t].mean( + axis=0 + ) + trial_bins.update({t: trial_data}) + + ses_bins.update({name: trial_bins}) + +# Merge these averages as a dataframe +means = pd.DataFrame.from_dict( + {(i, j): ses_bins[i][j] for i in ses_bins.keys() for j in ses_bins[i].keys()}, + orient="index", +) + +means.index.rename(["session", "trial"], inplace=True) +means.columns.names = ["bin"] + +long_mean = pd.melt( + means.reset_index(), id_vars=["session", "trial"] +) # Preserve the indexes as comparator variables +long_mean.rename( + {"value": "mean_firing_rate"}, axis=1, inplace=True +) # rename value column to sd for readability + +#%% +# Now run analysis for firing rate, per session +# Do this manually, its faster + +model = ols( + "mean_firing_rate ~ C(bin) + C(session) + C(bin):C(session)", data=long_mean +).fit() + +output = sm.stats.anova_lm( + model, typ=2 +) # Run test on model, sample sizes are equal between groups here (neurons stay the same across trials) so type 2 SS used +print(output) # Print output + +####Run multiple comparisons#### +# For main effect of bin +res = stat() +res.tukey_hsd( + df=long_mean, + res_var="mean_firing_rate", + xfac_var="bin", + anova_model="mean_firing_rate ~ C(bin) + C(session) + C(bin):C(session)", + phalpha=0.05, + ss_typ=2, +) +bin_main_effect_fr = res.tukey_summary + +# For main effect of session +res = stat() +res.tukey_hsd( + df=long_mean, + res_var="mean_firing_rate", + xfac_var="session", + anova_model="mean_firing_rate ~ C(bin) + C(session) + C(bin):C(session)", + phalpha=0.05, + ss_typ=2, +) +ses_main_effect_fr = res.tukey_summary + +# For interaction +res = stat() +res.tukey_hsd( + df=long_mean, + res_var="mean_firing_rate", + xfac_var=["bin", "session"], + anova_model="mean_firing_rate ~ C(bin) + C(session) + C(bin):C(session)", + phalpha=0.05, + ss_typ=2, +) +interaction_fr = res.tukey_summary + +# Determine sig. comparisons +sig_bin_comps_fr = bin_main_effect_fr.loc[bin_main_effect_fr["p-value"] < 0.05] +sig_ses_comps_fr = ses_main_effect_fr.loc[ses_main_effect_fr["p-value"] < 0.05] +sig_int_fr = interaction_fr.loc[interaction_fr["p-value"] < 0.05] + +# %% +######Now determine where sig. changes occur in adjacent bins###### +##To do so, will only select columns 1s before grasp. +def pairwise_significance_extraction(pairwise_data): + + """ + This function will return pairwise comparisons for the 1s preceeding grasp. + To return only significant values, feed it data with sig. isolated already (i.e., where p-value column < 0.05) + + pairwise_data: the result of the multiple comparisons performed earlier in the script, by tukey_hsd() + + """ + + df = pairwise_data.loc[ + ( + (pairwise_data["group1"] == "-1.0, -0.9") + | (pairwise_data["group1"] == "-0.9, -0.8") + | (pairwise_data["group1"] == "-0.8, -0.7") + | (pairwise_data["group1"] == "-0.7, -0.6") + | (pairwise_data["group1"] == "-0.6, -0.5") + | (pairwise_data["group1"] == "-0.5, -0.4") + | (pairwise_data["group1"] == "-0.4, -0.3") + | (pairwise_data["group1"] == "-0.3, -0.2") + | (pairwise_data["group1"] == "-0.2, -0.1") + | (pairwise_data["group1"] == "-0.1, 0.0005") + ) + & ( + (pairwise_data["group2"] == "-1.0, -0.9") + | (pairwise_data["group2"] == "-0.9, -0.8") + | (pairwise_data["group2"] == "-0.8, -0.7") + | (pairwise_data["group2"] == "-0.7, -0.6") + | (pairwise_data["group2"] == "-0.6, -0.5") + | (pairwise_data["group2"] == "-0.5, -0.4") + | (pairwise_data["group2"] == "-0.4, -0.3") + | (pairwise_data["group2"] == "-0.3, -0.2") + | (pairwise_data["group2"] == "-0.2, -0.1") + | (pairwise_data["group2"] == "-0.1, 0.0005") + ) + ] + + return df + + +# Return only comparisons in the 1s leading up to grasp +# In SD data, significant change occurs in the 0.6s leading up to grasp +bin_sd_comps = pairwise_significance_extraction(sig_bin_comps) +bin_fr_comps = pairwise_significance_extraction(sig_bin_comps_fr) +###################################### +# TODO: +# Run this on remainder of sessions +# run on all session in one Cohort +# plot significant changes to graph +###################################### diff --git a/projects/Aidan_Analysis/FullAnalysis/bin_variance_analysis.py b/projects/Aidan_Analysis/FullAnalysis/bin_variance_analysis.py new file mode 100644 index 0000000..0a687be --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/bin_variance_analysis.py @@ -0,0 +1,751 @@ +#%% +# First shall import required packages + +import enum +from xml.etree.ElementInclude import include +from matplotlib.pyplot import title, ylabel, ylim +import numpy as np +import pandas as pd +import seaborn as sns +import sys +from base import * + + +print("starting function import") +# Will import functions from the master file to speed this process and avoid un-needed bootstrapping +from functions import ( + per_trial_binning, + bin_data_average, + bin_data_SD, + bin_data_bootstrap, + bs_pertrial_bincomp, +) + +print("import function complete") +from CI_Analysis import significance_extraction +from CI_Analysis import percentile_plot +from functions import event_times, per_trial_spike_rate +from textwrap import wrap + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels") +from pixtools.utils import Subplots2D # use the local copy of base.py +from pixtools import utils +from pixtools import spike_rate +from pixels import ioutils + + +# Now select the units that shall be analysed +# Will only analyse M2 for the purposes of this script +units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200) + +# Then align trials to LED off (i.e, end of trial) with the time window determined by the max trial length +duration = 10 +trial_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=duration, units=units +) + +#%% +################################################################################################## +##Run functions defined in unit_binning_analysis to perform the binning and boostrapping of data## +################################################################################################## + +print("bootstrapping initiated") + +bin_data = per_trial_binning(trial_aligned, myexp, timespan=10, bin_size=0.1) + +bin_avgs = bin_data_average(bin_data) + +bootstrap = bin_data_bootstrap( + bin_avgs, CI=95, ss=20, bootstrap=10000, name="fullbootstrap", cache_overwrite=True +) + +# bin_comp = bs_pertrial_bincomp(bootstrap, myexp) + +#%% +################################################################################################################## +# As an alternative to the bootstrapping approach taken before I will now take average firing rates across trials +# All trials will be combined into a single dataset and binned +# Each bin will contain a series of average firing rates across trials +# May then bootstrap this dataset to see how firing rate changes with time +################################################################################################################## + +# First, average firing rate for each session across units/trials (to give a vals for each trial per bin) +# i.e., each bin will have a single value representing the trials average firing rate at that point +# To do this, will average across units, in bin_avgs + +bins = bin_avgs.columns +ses_bins = {} + +for s, session in enumerate(myexp): + name = session.name + + ses_data = bin_avgs.loc[bin_avgs.index.get_level_values("session") == s] + ses_trials = ses_data.index.get_level_values("trial") + trial_bins = {} + # Now, take each trial and average across bins + for t in ses_trials: + trial_data = ses_data.loc[ses_data.index.get_level_values("trial") == t].mean( + axis=0 + ) + trial_bins.update({t: trial_data}) + + ses_bins.update({name: trial_bins}) + +# Merge these averages as a dataframe +means = pd.DataFrame.from_dict( + {(i, j): ses_bins[i][j] for i in ses_bins.keys() for j in ses_bins[i].keys()}, + orient="index", +) + +means.index.rename(["session", "trial"], inplace=True) +means.columns.names = ["bin"] + +# %% +# Define all functions to be used in this script +def cross_trial_bootstrap( + bin_data, CI=95, ss=20, bootstrap=10000, cache_name=None, cache_overwrite=False +): + """ + This function takes the averages across trials for a given bin as previously calculated + and samples across values, to give a series of percentiles for average firing rate in a given bin + + bin_data: the dataframe produced by bin_data_average, with bin times represented as columns + + CI: the confidence interval to calculate (default is 95%) + + ss: the sample size to randomly select from the averages (default is 20) + + bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000) + + cache_name: the name to save the output under in cache, speeds the process if this has been run before + + cache_overwrite: whether to overwrite the saved cache with new data, (default is False) + """ + + ##Stages of this analysis are as follows: + # 1. Take individual bin + # 2. Calculate confidence interval via bootstapping + # 3. Store all bin CIs + + # Check if the function has been run before, and saved to a chache + # NB: This function caches under the name of the first session in the list! + + if cache_name is not None: + # Read in data from previous run + output = myexp.sessions[0].interim / "cache" / (name + ".h5") + if output.exists() and cache_overwrite is not True: + df = ioutils.read_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5")) + return df + + # First define confidence intervals + lower = (100 - CI) / 2 # Lower confidence interval bound (default 2.5%) + upper = 100 - lower # Upper bound (default 97.5%) + percentiles = [lower, 50, upper] # The defined percentiles to analyse + cis = [] # empty list to store calculated intervals + + bin_values = bin_data.columns + keys = [] + + # Loop through each of the bin averages and calculate their confidence interval + for s, session in enumerate(myexp): + + names = session.name + ses_bins = bin_data[bin_data.index.get_level_values("session") == names] + trials = ses_bins.index.get_level_values( + "trial" + ).unique() # get the trials for a given session + + # Sample from every trial within a bin + for b in bin_values: + + # Take every trial within a bin + bin_avgs = ses_bins[b] + samples = np.array( + [[np.random.choice(bin_avgs.values, size=ss)] for j in range(bootstrap)] + ) + + # Use these samples to calculate the percentiles for each bin + medians = np.median(samples, axis=2) # Median of the samples taken + results = np.percentile( + medians, percentiles, axis=0 + ) # The percentiles, relative to the median + cis.append(pd.DataFrame(results)) + keys.append( + (names, b) + ) # Set the keys for the dataframe to be bin, session, trial + + # Now save these calculated confidence intervals to a dataframe + df = pd.concat(cis, axis=1, names=["session", "bin"], keys=keys) + df.set_index( + pd.Index(percentiles, name="percentile"), inplace=True + ) # Set the name of the index to percentiles + df.columns = df.columns.droplevel( + level=2 + ) # remove the redundant units row, as they have been averaged out + + # Finally, save this df to cache to speed up future runs + if cache_name is not None: + ioutils.write_hdf5( + myexp.sessions[0].interim / "cache" / (cache_name + ".h5"), df + ) + return df + + return df + + +def cross_trial_bincomp(bs_data, myexp): + """ + This function imports percentile data produced from bootstrapping, and compares across bins to determine if a significant change from previous states occurs + This process is repeated across trials to allow plotting of significance. + Returns a dataframe with bins classified as either increasing, decreasing or displaying little change from previous timepoint. + + bs_data: the percentile information produced by bin_data_bootstrap() + + myexp: the experimental cohort defined in base.py + """ + + total_bin_response = {} + # First import the data and split it by session, and trial. + for s, session in enumerate(myexp): + name = session.name + ses_data = bs_data[name] + ses_bin_response = {} + for t, cols in enumerate(ses_data): + + # Now that we have all the bins for a given trial, + # t count value in enumerate loop will allow comparison to previous iteration + bins = ses_data.columns + + # compare each column's 2.5% (lower) to previous column's 97.5% + # to check for sig. increase in activity + previous_bin = [] + for b, bins in enumerate(ses_data): + + # First store the initial bin to previous_bin + if len(previous_bin) == 0: + previous_bin.append(ses_data[bins]) + continue + + # Once this has been done, will compare across bins + current_bin = ses_data[bins] + + # First check if there is a sig. increase - i.e., if current 2.5 and previous 97.5 do not overlap + if current_bin[2.5] >= previous_bin[0][97.5]: + ses_bin_response.update({bins: "increased"}) + + # Now check if there is a sig. decrease + elif current_bin[97.5] <= previous_bin[0][2.5]: + ses_bin_response.update({bins: "decreased"}) + + # And finally if there is no change at all + else: + ses_bin_response.update({bins: "nochange"}) + + # The replace previous bin with current bin + previous_bin = [] + previous_bin.append(current_bin) + + # Now append data for each trial to a larget list + total_bin_response.update({name: ses_bin_response}) + + # Convert nested dictionary to dataframe + df = pd.DataFrame.from_dict( + total_bin_response, + orient="index", + ) + # df.index.rename(["session"], inplace=True) + df.columns.names = ["bin"] + df.index.rename("session", inplace=True) + return df + + +def percentile_range(bs_data): + """ + Function takes percentiles for given bins and calculates the range, adding this as a new index row + + bs_data: a bootstrapped dataset containing 2.5, 50, and 97.5% percentiles as index + with data organised as columns for some given bin + """ + + # Run through the set col by col + ranges = [] + + for col in bs_data: + column = bs_data[col] + perc_delta = column[97.5] - column[2.5] # Range of the percentiles + ranges.append(perc_delta) + + # Now append this as a new row + bs_data.loc["range"] = ranges + return bs_data + + +def variance_boostrapping( + bs_data, + myexp, + CI=95, + ss=20, + bootstrap=10000, + cache_name=None, + cache_overwrite=False, +): + """ + Similar to previous functions, this performs a boostrapping analysis per bin throughout a sessions trials. + However, it instead analyses if the size of the variance (ie. the range between percentiles calculated previously) differs significantly between bins + This will act as a proxy for the synchronicity of neuronal activity for a given bin. + + bs_data: the data calculated within each trial by bin_data_bootstrap (NOT Cross data bootstrap!) + + myexp: the experimental cohort defined in base.py + + CI: the confidence interval to calculate (default is 95%) + + ss: the sample size to randomly select from the averages (default is 20) + + bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000) + + cache_name: the name to save the output under in cache, speeds the process if this has been run before + + cache_overwrite: whether to overwrite the saved cache with new data, (default is False) + + """ + # Check if the function has been run before, and saved to a chache + # NB: This function caches under the name of the first session in the list! + if cache_name is not None: + # Read in data from previous run + output = myexp.sessions[0].interim / "cache" / (cache_name + ".h5") + if output.exists() and cache_overwrite is not True: + df = ioutils.read_hdf5( + myexp.sessions[0].interim / "cache" / (cache_name + ".h5") + ) + return df + + # First define confidence intervals + lower = (100 - CI) / 2 # Lower confidence interval bound (default 2.5%) + upper = 100 - lower # Upper bound (default 97.5%) + percentiles = [lower, 50, upper] # The defined percentiles to analyse + cis = [] # empty list to store calculated intervals + + keys = [] + + for s, session in enumerate(myexp): + ses_data = bs_data[s] + ses_data = ses_data.loc[ses_data.index == "range"] + ses_data = ses_data.melt() + + name = session.name + legend_names.append(name) + bin_vals = ses_data.bin.unique() + + # Sample across all trial ranges for a given bin + for b in bin_vals: + bin_data = ses_data.loc[ses_data.bin == b] + samples = np.array( + [[np.random.choice(bin_data.value, size=ss)] for j in range(bootstrap)] + ) + + # Save results of single bin boostrapping + medians = np.median(samples, axis=2) # Median of the samples taken + results = np.percentile( + medians, percentiles, axis=0 + ) # The percentiles, relative to the median + cis.append(pd.DataFrame(results)) + + keys.append((name, b)) + + # Now save these calculated confidence intervals to a dataframe + df = pd.concat(cis, axis=1, names=["session", "bin"], keys=keys) + df.set_index( + pd.Index(percentiles, name="percentile"), inplace=True + ) # Set the name of the index to percentiles + df.columns = df.columns.droplevel( + level=2 + ) # remove the redundant units row, as they have been averaged out + + if cache_name is not None: + ioutils.write_hdf5( + myexp.sessions[0].interim / "cache" / (cache_name + ".h5"), df + ) + return df + + return df + + +#%% +# Now that the firing rates have been averaged, may perform a bootstrapping analysis per bin + +crosstrialbs = cross_trial_bootstrap( + means, + CI=95, + ss=20, + bootstrap=10000, + cache_name="crosstrialbs", + cache_overwrite=False, +) + +#%% +# Using this boostrapped firing rate, will plot average firing rate across trials, then compare bootstrap CIs +# Specifically, compare size of the variance. + +# cross_trial_variance_change = percentile_range( +# crosstrialbs +# ) # How the variance changes with time averaged across all trials for a session +# within_trial_variance_change = percentile_range( +# bootstrap +# ) # How the variance changes with time within each trial + +sd_change = bin_data_SD(bin_data) + + +# Plot this change in variance to see how it changes with time, relative to LED on +# First, for every trial in a session averaged, how does the variance change with time +legend_names = [] +plt.rcParams.update({"font.size": 12}) +for s, session in enumerate(myexp): + + name = session.name + legend_names.append(name) + ses_data = sd_change.loc[sd_change.index.get_level_values("session") == name] + ses_means = ses_data.mean(axis=0) + + bin_duration = duration / len(sd_change.columns) + p = sns.lineplot(data=ses_means, palette="hls") + + plt.tick_params( + axis="x", # changes apply to the x-axis + which="both", # both major and minor ticks are affected + bottom=True, # ticks along the bottom edge are on + top=False, # ticks along the top edge are off + labelbottom=False, + ) # labels along the bottom edge are off + +# Plot signficance annotations +x1 = "-0.6, -0.5" # first col +x2 = "-0.1, 0.0005" # last col +p.axvspan( + x1, + x2, + color="red", + alpha=0.1, +) + +# Adjust labels/legend +plt.ylabel("Average Variance in Firing Rate (SD)") +plt.xlabel( + f"Trial Duration Elapsed Relative to LED Off (as {bin_duration}s Bins)", labelpad=20 +) +plt.legend( + labels=legend_names, + bbox_to_anchor=(1.1, 1.1), + title="Session (RecordingDate_MouseID)", +) +plt.suptitle("Average Trial Firing Rate Variance") +plt.axvline( + x=round((len(ses_data.columns) / 2)), ls="--", color="green" +) # Plot vertical line at midpoint, representing the event the data is aligned to + +utils.save( + f"/home/s1735718/Figures/AllSession_Average_Variance_Change", + nosize=True, +) +plt.show() + +#%% +# Now plot average firing rate relative to led off to go with this + +legend_names = [] +for s, session in enumerate(myexp): + + ax = subplots.axes_flat[s] + name = session.name + + legend_names.append(name) + ses_data = means.loc[means.index.get_level_values("session") == name].mean( + axis=0 + ) # average firing rate for each bin in a given session + bin_duration = duration / len(ses_data) + + p = sns.lineplot( + x=ses_data.index, y=ses_data.values, palette="hls" # bins # firing rates + ) + + plt.tick_params( + axis="x", # changes apply to the x-axis + which="both", # both major and minor ticks are affected + bottom=True, # ticks along the bottom edge are on + top=False, # ticks along the top edge are off + labelbottom=False, + ) # labels along the bottom edge are off + + # Plot significance indicator + x1 = "-0.6, -0.5" # first col + x2 = "-0.1, 0.0005" # last col + p.axvspan( + x1, + x2, + color="red", + alpha=0.1, + ) + +# Adjust labels +plt.ylabel("Average Firing Rate (Hz)") +plt.xlabel( + f"Trial Duration Elapsed Relative to LED Off (as {bin_duration}s Bins)", labelpad=20 +) +plt.legend( + labels=legend_names, + bbox_to_anchor=(1.1, 1.1), + title="Session (RecordingDate_MouseID)", +) +plt.suptitle("Average Change In Firing Rate Relative to Grasp") +plt.axvline( + x=round((len(ses_data.index) / 2)), ls="--", color="green" +) # Plot vertical line at midpoint, representing the event the data is aligned to + +utils.save( + f"/home/s1735718/Figures/AllSession_Average_FiringRate_Change", + nosize=True, +) +#%% +# Then, plot every trial, with average variance per bin (i.e., after boostrapping across unit activity) +legend_names = [] +for s, session in enumerate(myexp): + + name = session.name + legend_names.append(name) + ses_data = within_trial_variance_change[s] + + # Determien the number of trials per session to plot as individual graphs + ses_values = ses_data.columns.get_level_values("trial").unique() + + subplots = Subplots2D( + ses_values, sharex=True, sharey=True + ) # determines the number of subplots to create (as many as there are trials in the session) + + for i, value in enumerate(ses_values): + trial_data = ses_data[value] + ax = subplots.axes_flat[i] + + p = sns.lineplot( + x=trial_data.columns, + y=trial_data.loc["range"].values, + ax=ax, + ci=None, + markers=None, + ) + + p.tick_params( + axis="x", # changes apply to the x-axis + which="both", # both major and minor ticks are affected + bottom=False, # ticks along the bottom edge are off + top=False, # ticks along the top edge are off + labelbottom=False, + ) # labels along the bottom edge are off + + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + p.axvline( + x=round((len(trial_data.columns) / 2)), ls="--", color="green" + ) # Plot vertical line at midpoint, representing the event the data is aligned to + + # Finally, return the legend. + legend = subplots.legend + legend.text(0.3, 0.5, "Trial", transform=legend.transAxes) + legend.text( + -0.2, + -0.5, + "Variance in Firing Rate (95% CI)", + transform=legend.transAxes, + rotation=90, + ) # y axis label + legend.text( + 0, + -0.3, + f"Trial Time Elapsed Relative to LED Off (as {bin_duration}s Bins)", + transform=legend.transAxes, + ) + + legend.set_visible(True) + legend.get_xaxis().set_visible(False) + legend.get_yaxis().set_visible(False) + legend.set_facecolor("white") + legend.set_box_aspect(1) + + plt.gcf().set_size_inches(20, 20) + plt.show() + + +#%% +# Will now compare the change in variance between bins as the trial progresses. +# i.e., via another boostrapping anaysis +# Once again do this by session + +cross_trial_variance = variance_boostrapping( + within_trial_variance_change, + myexp, + CI=95, + ss=20, + bootstrap=10000, + cache_name="variance_range_bootstrap", + cache_overwrite=False, +) +# %% +# Run this boostrapping of variance size through the comparison +comp_cross_trial = cross_trial_bincomp(cross_trial_variance, myexp) + +# Then plot changing variance with inclusion of lines denoting significance +for s, session in enumerate(myexp): + name = session.name + legend_names.append(name) + ses_data = within_trial_variance_change[s] + ses_comps = comp_cross_trial.loc[name] + + # Get only range values, convert to longform + ses_data = ses_data.loc[ses_data.index == "range"] + ses_data = ses_data.melt() + + # Now plot all bin values for each trial as a set of lines + p = sns.lineplot( + data=ses_data, + x="bin", + y="value", + hue="trial", + estimator="mean", + lw=0.7, + ) + + p.tick_params( + axis="x", # changes apply to the x-axis + which="both", # both major and minor ticks are affected + bottom=False, # ticks along the bottom edge are off + top=False, # ticks along the top edge are off + labelbottom=False, + ) # labels along the bottom edge are off + + p.axvline( + x=round((len(ses_data.bin.unique()) / 2)), ls="--", color="green" + ) # Plot vertical line at midpoint, representing the event the data is aligned to + + # iterate through the comparison data and where there is a significnat change, plot a shaded area + for i, cols in enumerate(ses_comps): + current_bin = ses_comps.index[i] + previous_bin = ses_comps.index[i - 1] + + if cols == "increased" or cols == "decreased": + p.axvspan( + current_bin, + previous_bin, + color="red", + alpha=0.1, + hatch=r"/o", + ) + + plt.suptitle(f"Size of Variance per Trial for Session {name}", y=0.9) + plt.ylabel("Variance in Firing Rate (95% CI)") + plt.xlabel(f"Trial Time Elapsed Relative to LED Off ({bin_duration}s Bins)") + plt.gcf().set_size_inches(10, 10) + utils.save( + f"/home/s1735718/Figures/{myexp[s].name}variance_change", + nosize=True, + ) + plt.show() + +# %% +####Plot individual session FRs +legend_names = [] +subplots = Subplots2D( + myexp, sharex=True, sharey=True +) # determines the number of subplots to create (as many as there are seaaions in the session) +plt.rcParams.update({"font.size": 30}) + +for s, session in enumerate(myexp): + name = session.name + ax = subplots.axes_flat[s] + + legend_names.append(name) + ses_data = means.loc[ + means.index.get_level_values("session") == name + ] # average firing rate for each bin in a given session + ses_data = pd.melt(ses_data.reset_index(), id_vars=["session", "trial"]) + bin_duration = duration / len(ses_data["bin"].unique()) + + p = sns.lineplot( + x=ses_data["bin"], + y=ses_data["value"], + palette="hls", + ax=ax, # bins # firing rates + ci="sd", + ) + + p.tick_params( + axis="x", # changes apply to the x-axis + which="both", # both major and minor ticks are affected + bottom=True, # ticks along the bottom edge are on + top=False, # ticks along the top edge are off + labelbottom=False, + ) # labels along the bottom edge are off + + p.get_yaxis().get_label().set_visible(False) + # Plot significance indicator + if name == "211027_VR49": # plot smaller bin for here + x1 = "-0.2, -0.1" # first col + x2 = "-0.1, 0.0005" # last col + p.axvspan(x1, x2, color="red", alpha=0.1) + p.axvline(x=round((len(ses_data["bin"].unique()) / 2)), ls="--", color="green") + p.text(1, 14, name) + p.get_xaxis().set_visible(False) + continue + + # plot other significance values + x1 = "-0.6, -0.5" # first col + x2 = "-0.1, 0.0005" # last col + p.axvspan(x1, x2, color="red", alpha=0.1) + # Adjust labels + + p.get_xaxis().set_visible(False) + p.axvline( + x=round((len(ses_data["bin"].unique()) / 2)), ls="--", color="green" + ) # Plot vertical line at midpoint, representing the event the data is aligned to + + # plot session number + p.text(1, 14, name) + + +# Finally, return the legend. +legend = subplots.legend + +legend.text( + -0.3, + 0, + "\n".join(wrap("Average Firing Rate (Hz)", width=20)), + transform=legend.transAxes, + rotation=90, +) # y axis label +legend.text( + 0, + -0.4, + "\n".join( + wrap( + f"Trial Time Elapsed Relative to LED Off (as {bin_duration}s Bins)", + width=25, + ) + ), + transform=legend.transAxes, +) + +legend.set_visible(True) +legend.get_xaxis().set_visible(False) +legend.get_yaxis().set_visible(False) +legend.set_facecolor("white") +legend.set_box_aspect(1) + +plt.suptitle("Average Firing Rate Per Session", y=0.95) +plt.gcf().set_size_inches(25, 25) +plt.rcParams.update({"font.size": 30}) +utils.save( + f"/home/s1735718/Figures/AllSession_Average_FiringRate_Change_individualPlots", + nosize=True, +) + +# %% diff --git a/projects/Aidan_Analysis/FullAnalysis/cell_atlas_neuron_distribution_M2.csv b/projects/Aidan_Analysis/FullAnalysis/cell_atlas_neuron_distribution_M2.csv new file mode 100644 index 0000000..469be32 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/cell_atlas_neuron_distribution_M2.csv @@ -0,0 +1,2 @@ +Cells,Neurons,Excitatory,Inhibitory,REST,PV+,SST+,VIP+,Glia,Oligodendrocytes,Astrocytes,Microglia +1380708,465259,301973,163286,73770,36328,30289,22899,915449,527419,135943,252087, \ No newline at end of file diff --git a/projects/Aidan_Analysis/FullAnalysis/channeldepth.py b/projects/Aidan_Analysis/FullAnalysis/channeldepth.py new file mode 100644 index 0000000..731eec6 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/channeldepth.py @@ -0,0 +1,22 @@ +#This file will read in channel depth information from the available metadata +#First import required packages + +from base import * +import probeinterface as pi + +from reach import Session + +#Find the metadata containing the depth of the channels used in the recording +#Remember must index into experiment and then files to find the spike metadata +#Shall create a function to do this +def meta_spikeglx(exp, session): + meta=exp[session].files[0]["spike_meta"] + data_path= myexp[session].find_file(meta) + data=pi.read_spikeglx(data_path) + + return data + + + + + diff --git a/projects/Aidan_Analysis/FullAnalysis/chisquare_by_unit_type.py b/projects/Aidan_Analysis/FullAnalysis/chisquare_by_unit_type.py new file mode 100644 index 0000000..a65da2a --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/chisquare_by_unit_type.py @@ -0,0 +1,99 @@ +#%% +# First import required packages +from argparse import Action +from base import * +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import scipy.stats as sc +import sys +from channeldepth import meta_spikeglx +import statsmodels.api as sm +from statsmodels.formula.api import ols +from reach import Cohort +from xml.etree.ElementInclude import include +from matplotlib.pyplot import legend, title, ylabel, ylim +import matplotlib.lines as mlines +from tqdm import tqdm +from pixels import ioutils + +from functions import ( + event_times, + unit_depths, + per_trial_binning, + event_times, + within_unit_GLM, + bin_data_average, + # unit_delta, #for now use a local copy +) + +#%% +# First select units based on spike width analysis +all_units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200) + +pyramidals = get_pyramidals(myexp) +interneurons = get_interneurons(myexp) + + +# Then import the known neuronal type distributions from csv +known_units = pd.read_csv("cell_atlas_neuron_distribution_M2.csv", index_col=False) + +#%% +# Now align Trials +per_unit_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=all_units +) + +left_aligned = myexp.align_trials( + ActionLabels.correct_left, Events.led_off, "spike_rate", duration=2, units=all_units +) + +right_aligned = myexp.align_trials( + ActionLabels.correct_right, + Events.led_off, + "spike_rate", + duration=2, + units=all_units, +) + +pyramidal_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=pyramidals +) +interneuron_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=interneurons +) + +# Now compare counts of neuropixel data to known data +counts = pd.DataFrame(columns=["Session", "Neurons", "Excitatory", "Inhibitory"]) +for s, session in enumerate(myexp): + + name = session.name + ses_pyr_count = len(pyramidals[s]) + ses_int_count = len(interneurons[s]) + total = ses_pyr_count + ses_int_count + neurons = pd.Series( + [name, total, ses_pyr_count, ses_int_count], index=counts.columns + ) + + counts.loc[s] = neurons # append as new row + +# Now calculate percentage proportions +counts["excitatory_percentage"] = (counts.Excitatory / counts.Neurons) * 100 +counts["inhibitory_percentage"] = (counts.Inhibitory / counts.Neurons) * 100 +median_counts = counts.median() + +known_counts = known_units[["Neurons", "Excitatory", "Inhibitory"]] +known_counts["excitatory_percentage"] = ( + known_counts.Excitatory / known_counts.Neurons +) * 100 +known_counts["inhibitory_percentage"] = ( + known_counts.Inhibitory / known_counts.Neurons +) * 100 +known_median = known_counts.median() + +# Now perform a chi-squared analysis to check across distributions +sc.chisquare( + median_counts[["excitatory_percentage", "inhibitory_percentage"]], + f_exp=known_median[["excitatory_percentage", "inhibitory_percentage"]], +) diff --git a/projects/Aidan_Analysis/FullAnalysis/dprime_training.py b/projects/Aidan_Analysis/FullAnalysis/dprime_training.py new file mode 100644 index 0000000..9406333 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/dprime_training.py @@ -0,0 +1,50 @@ +# Produces graphs of d' training progress in all mice across three month periods +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import sys + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels") +from pixels.behaviours import Behaviour +from reach import Cohort +from pixels import ioutils +from pixtools import utils + +#%% +# create the cohort class +cohort = Cohort.init_from_files( + data_dir="/home/s1735718/duguidlab/CuedBehaviourAnalysis/Data/TrainingJSON", + mouse_ids=[ + "VR46", + "VR49", + "VR52", + "VR55", + "VR59", + ], +) + +# now get the training results from file +results = pd.DataFrame(cohort.get_results()) +trials = pd.DataFrame(cohort.get_trials()) +# NB: D' may be incorrect where days have multiple sessions! May have to calculate manually. +# Or exclude from the plotting (CANT DO THIS FOR THE MAIN DISS ONLY TO SHOW TRAINING PROGRESSION) + +#%% +# Plot linegraph of mouse and d' by day +plt.rcParams.update({"font.size": 35}) +p = sns.lineplot(x="day", y="d_prime", hue="mouse_id", data=results, lw=3) + +p.axhline(y=0, ls="--", color="grey", lw=3) +p.axhline(y=1.5, ls="--", color="green", lw=3) +plt.xlabel("Training Day") +plt.ylabel("d-Prime Score (d')") +plt.suptitle("Mouse d-Prime Score Throughout Training Period") + +plt.gcf().set_size_inches(15,10) +utils.save( + "/home/s1735718/Figures/cohort_dprime", + nosize=True, +) +# %% diff --git a/projects/Aidan_Analysis/FullAnalysis/firingrate_histogram_toLEDOFF.py b/projects/Aidan_Analysis/FullAnalysis/firingrate_histogram_toLEDOFF.py new file mode 100644 index 0000000..8396d3c --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/firingrate_histogram_toLEDOFF.py @@ -0,0 +1,107 @@ +# This will plot a figure aligned to LED off with the following features: +# A series of indicators displaying when LED came on across trials for each unit, plotted as a histogram +# The confidence interval for the spike rate. + +# First shall import the required packages +from argparse import Action +import sys +from cv2 import bitwise_and +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np +import seaborn as sns + +from base import * +from rugplotbase import * +from matplotlib.pyplot import figure +from distutils.command.sdist import sdist +from textwrap import wrap + + +##Now import the main experimental data. Here all good units from cortex## +unit = myexp.select_units(group="good", uncurated=False, max_depth=1500) + +# Now must align these trials to LED off for the main spike rate data +hits_off = myexp.align_trials( + ActionLabels.correct, + Events.led_off, + "spike_rate", + duration=6, # May need to increase this depending on how far back the LED came on + units=unit, +) + +##Now will obtain the data containing time at which the LED turned on and duration## + +# I shall iterate through the trial taking the difference between the event on (LED_on) and event off (LED_off) +# This will give a duration of events and allow plotting +# First iterate through the experimental data, both sessions and get the action labels and events +# Importantly, this will be structured as a function to allow me to run this easily in future + + +##The following function will return the timepoints for the requested event type for a given experiment +# +def event_times(event, myexp): + """ + + This function will give the timepoints for the specified event across experimental sessions + + event: the specific event to search for, must be input within quotes ("") + + myexp: the experiment defined in base.py + + NB: Setting the event to LED_off when the action label is correct (whose start point therefore defines LED_on as zero) will return the DURATION of the event! + """ + times = [] # Give an empty list to store times + + for ses in myexp: + sessiontimepoints = ( + [] + ) # Give an empty list to store this sessions times in before appending to the master list + als = ses.get_action_labels() # Get the action labels for this session + + for rec_num in range( + len(als) + ): # This will run through all recording numbers in the above action label data + actions = als[rec_num][:, 0] + events = als[rec_num][:, 1] + + # Call all trials where the mouse was correct to search + start = np.where(np.bitwise_and(actions, ActionLabels.correct))[0] + + for trial in start: + # Now iterate through these correct trials and return all times for selected event + event_time = np.where( + np.bitwise_and( + events[trial : trial + 10000], getattr(Events, event) + ) + )[0] + sessiontimepoints.append(event_time[0]) + + times.append(sessiontimepoints) + + return times + + +##Now let us define the event times, and durations for the experiment +led_duration = event_times("led_off", myexp) + + +# Combine these as a dictionary, now have all on times across sessions. Each representing an individual trial +rugdata = {ses.name: pd.DataFrame(led_duration[s]) for s, ses in enumerate(myexp)} + +rugdata = pd.concat(rugdata, axis=1) +rugdata = ( + -rugdata / 1000 +) # Converts the positive values to a negative second, to signify the duration before the LED off + +##Now will plot the base information, combined with a rugplot addition +# To do so, must edit the _plot function, imported from rugplotbase +for s, session in enumerate(myexp): + name = session.name + + per_unit_spike_rate(hits_off[s], rugdata[name], ci="sd") + plt.suptitle( + f"session {name} - per-unit across trial firing rate, aligned to LED off" + ) + +plt.show() diff --git a/projects/Aidan_Analysis/FullAnalysis/firingrate_to_LED_on.py b/projects/Aidan_Analysis/FullAnalysis/firingrate_to_LED_on.py new file mode 100644 index 0000000..c11b759 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/firingrate_to_LED_on.py @@ -0,0 +1,52 @@ +from argparse import Action +import sys +from cv2 import bitwise_and +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np +import seaborn as sns + +from base import * +from rugplotbase import * +from matplotlib.pyplot import figure +from distutils.command.sdist import sdist +from textwrap import wrap + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +from pixtools import spike_rate +from pixtools import utils + + +##Now import the main experimental data. Here all good units from cortex## +unit = myexp.select_units( + group="good", + uncurated=False, + max_depth=1500, + name="cortex" +) + +#Now must align these trials to LED on for the main spike rate data +hits_off = myexp.align_trials( + ActionLabels.correct, + Events.led_on, + "spike_rate", + duration=8, + units=unit +) + +#Now plot average firing rate per unit, across trials. Will only examine the 4 seconds preceeding +ci = "sd" + +for s, session in enumerate(myexp): + + fig = plt.figure() + name = session.name + spike_rate.per_unit_spike_rate(hits_off[s], ci=ci) + fig.suptitle( + f"session {name} - per-unit firing rate 4s before LED on (Trial Beginning)" + ) + plt.gcf().set_size_inches(20, 20) + utils.save(f"/home/s1735718/Figures/{myexp[s].name}_spikerate_LED_on", nosize=True) + #plt.show() + + diff --git a/projects/Aidan_Analysis/FullAnalysis/functions.py b/projects/Aidan_Analysis/FullAnalysis/functions.py new file mode 100644 index 0000000..6f2ac0d --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/functions.py @@ -0,0 +1,1789 @@ +""" +This is the master file containing any functions used in my analyses. +Most require that experiments are initially defined in base.py. +sys.path.append allows the local importing of a copy of the pixels analysis repo, though this may be added to path otherwise. + +Notes +---------------------------------------------------------- +| +|Some functions utilise the depreciated rec_num argument! +| +| +| +----------------------------------------------------------- +""" + +from base import * # Import our experiment and metadata needed +import pandas as pd +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt +import matplotlib.lines as mlines + +from pyrsistent import b +from xml.etree.ElementInclude import include +import sys +import random +import probeinterface as pi +from reach import Session +from argparse import Action +from cv2 import bitwise_and +import matplotlib.pyplot as plt +from textwrap import wrap +from tqdm import tqdm +from channeldepth import meta_spikeglx + +import statsmodels.api as sm +from statsmodels.formula.api import ols +import statsmodels.stats.multicomp as mc +from statsmodels.graphics.factorplots import interaction_plot +from bioinfokit.analys import stat + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +from pixtools.utils import Subplots2D +from mpl_toolkits.axes_grid1 import make_axes_locatable +from pixtools import utils +from pixtools import spike_rate +from pixels import ioutils + + +def _raster(per, data, sample, start, subplots, label): + per_values = data.columns.get_level_values(per).unique() + per_values.values.sort() + + if per == "trial": + data = data.reorder_levels(["trial", "unit"], axis=1) + other = "unit" + else: + other = "trial" + + # units if plotting per trial, trials if plotting per unit + samp_values = list(data.columns.get_level_values(other).unique().values) + if sample and sample < len(samp_values): + sample = random.sample(samp_values, sample) + else: + sample = samp_values + + if subplots is None: + subplots = Subplots2D(per_values) + + palette = sns.color_palette("pastel") # Set the colours for the graphs produced + + for i, value in enumerate(per_values): + val_data = data[value][ + sample + ] # Requires I index into the data when assembling plots (see plots.py) + val_data.columns = np.arange(len(sample)) + start + val_data = val_data.stack().reset_index(level=1) + + p = sns.scatterplot( + data=val_data, + x=0, + y="level_1", + ax=subplots.axes_flat[i], + s=0.5, + legend=None, + ) + + p.set_yticks([]) + p.set_xticks([]) + p.autoscale(enable=True, tight=True) + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + p.text( + 0.95, + 0.95, + value, + horizontalalignment="right", + verticalalignment="top", + transform=subplots.axes_flat[i].transAxes, + color="0.3", + ) + p.axvline(c=palette[2], ls="--", linewidth=0.5) + + if not subplots.axes_flat[i].yaxis_inverted(): + subplots.axes_flat[i].invert_yaxis() + + if ( + label + ): # Chane our axis labels, here to trial number and time from the reach (which the data is aligned to!) + to_label = subplots.to_label + to_label.get_yaxis().get_label().set_visible(True) + to_label.set_ylabel("Trial Number") + to_label.get_xaxis().get_label().set_visible(True) + to_label.set_xlabel("Time from reach (s)") + + # plot legend subplot + legend = subplots.legend + legend.text( + 0, + 0.9, + "Trial number" if per == "trial" else "Unit ID", + transform=legend.transAxes, + color="0.3", + ) + legend.set_visible(True) + legend.get_xaxis().set_visible(False) + legend.get_yaxis().set_visible(False) + legend.set_facecolor("white") + + return subplots + + +def within_unit_GLM(bin_data, myexp): + + """ + Function takes binned data (with levels in order session -> unit -> trial) and performs an ANOVA on each session + Returns the multiple comparisons for the interaction and prints anova results + Following parameters are used: + + IV = Firing Rate, DV = Bin, Unit (containing many trials) + + Model is as follows: + + firing_rate ~ C(bin) + C(unit) + C(bin):C(unit) + + bin_data: the output of the per_trial_binning function reordered to swap unit and trial level hierarchy + + myexp: experimental cohort defined in base.py + + + NB: This function runs the model as a distributed set of calculations!! + i.e., it slices the data and runs several smaller calculations before concatenating results + + """ + # store data as a resillient distributed dataset + # will do this using pyspark + + multicomp_bin_results = [] + multicomp_unit_results = [] + multicomp_int_results = [] + + # Now split this data into sessions + for s, session in tqdm(enumerate(myexp)): + + sessions = bin_data.columns.get_level_values(0).unique() + name = session.name + ses_data = bin_data[sessions[s]] + + # Now shall average the data per bin + ses_avgs = bin_data_average(ses_data) + + # Then melt data to prep for analysis + ses_avgs = ses_avgs.reset_index().melt(id_vars=["unit", "trial"]) + ses_avgs.rename({"value": "firing_rate"}, axis=1, inplace=True) + + ##Now shall construct a linear model to allow for analysis + # IV: Firing Rate + # DVs: Bin (within subjects), Unit (Within subjects - trials averaged out) + + # Now run the GLM analysis + model = ols( + "firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)", data=ses_avgs + ).fit() + + output = sm.stats.anova_lm(model, typ=2) # Type II SS as this is faster + print(f"ANOVA Results - Session {name}") + print(output) + + ####Run Multiple Comparisons#### + # Main effect of bin + res = stat() + + print(f"starting multiple comparisons for session {name}") + # Interaction + res.tukey_hsd( + df=ses_avgs, + res_var="firing_rate", + xfac_var=["bin", "unit"], + anova_model="firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)", + phalpha=0.05, + ss_typ=2, + ) + int_effect = res.tukey_summary + multicomp_int_results.append(int_effect) + return multicomp_int_results + + +# Using this function we may now plot the graphs we desire! +# First let us define a function to plot spikes per unit + + +def per_unit_raster(data, sample=None, start=0, subplots=None, label=True): + """ + Plots a spike raster for every unit in the given data with trials as rows. + + data is a multi-dimensional dataframe as returned from + Experiment.align_trials(, , 'spike_times') indexed into to get the + session and recording. + """ + return _raster("unit", data, sample, start, subplots, label) + + +# And per trial +def per_trial_raster(data, sample=None, start=0, subplots=None, label=True): + """ + Plots a spike raster for every trial in the given data with units as rows. + + data is a multi-dimensional dataframe as returned from + Experiment.align_trials(, , 'spike_times') indexed into to get the + session and recording. + """ + return _raster("trial", data, sample, start, subplots, label) + + +# Now finally define a function that allows plotting a single unit as a raster +def single_unit_raster(data, ax=None, sample=None, start=0, unit_id=None): + # units if plotting per trial, trials if plotting per unit + samp_values = list(data.columns.get_level_values("trial").unique().values) + if sample and sample < len(samp_values): + sample = random.sample(samp_values, sample) + else: + sample = samp_values + + val_data = data[sample] + val_data.columns = np.arange(len(sample)) + start + val_data = val_data.stack().reset_index(level=1) + + if not ax: + ax = plt.gca() + + p = sns.scatterplot( + data=val_data, + x=0, + y="level_1", + ax=ax, + s=1.5, + legend=None, + edgecolor=None, + ) + p.set_yticks([]) + p.set_xticks([]) + p.autoscale(enable=True, tight=True) + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + + palette = sns.color_palette() + p.axvline(c="black", ls="--", linewidth=0.5) + + if not ax.yaxis_inverted(): + ax.invert_yaxis() + + if unit_id is not None: + p.text( + 0.95, + 0.95, + unit_id, + horizontalalignment="right", + verticalalignment="top", + transform=ax.transAxes, + color="0.3", + ) + + +def _plot(level, data, rugdata, ci, subplots, label): + values = data.columns.get_level_values(level).unique() + values.values.sort() + if level == "trial": + data = data.reorder_levels(["trial", "unit"], axis=1) + + # no. units if plotting per trial, no. trials if plotting per unit + num_samples = len(data[values[0]].columns) + + if subplots is None: + subplots = Subplots2D(values) + + palette = sns.color_palette() + + for i, value in enumerate(values): + val_data = data[value].stack().reset_index() + val_data["y"] = val_data[0] + ax = subplots.axes_flat[i] + divider = make_axes_locatable(ax) + + r = sns.rugplot( + ax=ax, + palette=palette, + data=rugdata, + x=rugdata[0], + height=0.1, + legend=False, + expand_margins=True, + ) + r.autoscale(enable=False, tight=False) + + p = sns.lineplot( + data=val_data, + x="time", + y="y", + ci=ci, + ax=ax, + linewidth=0.5, + ) + p.autoscale(enable=True, tight=False) + p.set_yticks([]) + p.set_xticks([]) + peak = data[value].values.mean(axis=1).max() + p.text( + 0.05, + 0.95, + "%.1f" % peak, + horizontalalignment="left", + verticalalignment="top", + transform=ax.transAxes, + color="grey", + ) + p.text( + 0.95, + 0.95, + value, + horizontalalignment="right", + verticalalignment="top", + transform=ax.transAxes, + color=palette[0], + ) + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + p.axvline(c=palette[1], ls="--", linewidth=0.5) + p.set_box_aspect(1) + + if label: + to_label = subplots.to_label + to_label.get_yaxis().get_label().set_visible(True) + to_label.set_ylabel("Firing rate (Hz)") + to_label.get_xaxis().get_label().set_visible(True) + to_label.set_xlabel("Time from push (s)") + to_label.set_xticks(data.index) + # to_label.set_xticklabels([- duration / 2, 0, duration / 2]) + + # plot legend subplot + legend = subplots.legend + legend.text( + 0, + 0.7, + "Peak of mean", + transform=legend.transAxes, + color="grey", + ) + legend.text( + 0, + 0.3, + "Trial number" if level == "trial" else "Unit ID", + transform=legend.transAxes, + color=palette[0], + ) + legend.set_visible(True) + legend.get_xaxis().set_visible(False) + legend.get_yaxis().set_visible(False) + legend.set_facecolor("white") + legend.set_box_aspect(1) + + return subplots + + +def per_unit_spike_rate(data, rugdata, ci=95, subplots=None, label=True): + """ + Plots a histogram for every unit in the given session showing spike rate. + + Parameters + ---------- + + data : pandas.DataFrame + A dataframe for a single session as returned from Behaviour.align_trials (or + Experiment.align_trials but indexed into for session and rec_num). + + rugdata : pandas.DataFrame + A dataframe for a single session containing the onset of the event (led_on) to plot as a rug. + + ci : int or 'sd', optional + Confidence intervals to plot as envelope around line. Default is 95 i.e. 95% + confidence intervals. Also accepts "'sd'" which will draw standard deviation + envelope, or None to plot no envelope. + """ + return _plot("unit", data, rugdata, ci, subplots, label) + + +def per_trial_spike_rate(data, rugdata, ci=95, subplots=None, label=True): + """ + Plots a histogram for every trial in the given session showing spike rate. + + Parameters + ---------- + + data : pandas.DataFrame + A dataframe for a single session as returned from Behaviour.align_trials (or + Experiment.align_trials but indexed into for session and rec_num). + + ci : int or 'sd', optional + Confidence intervals to plot as envelope around line. Default is 95 i.e. 95% + confidence intervals. Also accepts "'sd'" which will draw standard deviation + envelope, or None to plot no envelope. + """ + return _plot("trial", data, rugdata, ci, subplots, label) + + +def event_times(event, myexp): + """ + + This function will give the timepoints for the specified event across experimental sessions + + event: the specific event to search for, must be input within quotes ("") + + myexp: the experiment defined in base.py + + NB: Setting the event to LED_off when the action label is correct (whose start point therefore defines LED_on as zero) will return the DURATION of the event! + """ + times = [] # Give an empty list to store times + + for ses in myexp: + sessiontimepoints = ( + [] + ) # Give an empty list to store this sessions times in before appending to the master list + als = ses.get_action_labels() # Get the action labels for this session + + for rec_num in range( + len(als) + ): # This will run through all recording numbers in the above action label data + actions = als[rec_num][:, 0] + events = als[rec_num][:, 1] + + # Call all trials where the mouse was correct to search + start = np.where(np.bitwise_and(actions, ActionLabels.correct))[0] + + for trial in start: + # Now iterate through these correct trials and return all times for selected event + event_time = np.where( + np.bitwise_and( + events[trial : trial + 10000], getattr(Events, event) + ) + )[0] + sessiontimepoints.append(event_time[0]) + + times.append(sessiontimepoints) + + return times + + +def meta_spikeglx(exp, session): + """ + This function finds the metadata containing the depth of the channels used in the recording + Remember must index into experiment and then files to find the spike metadata + + exp: myexp, the class defined in base.py + + session: the session to analyse, in the form of an integer (i.e., 0 for first session, 1 for second etc.) + """ + + meta = exp[session].files[0]["spike_meta"] + data_path = myexp[session].find_file(meta) + data = pi.read_spikeglx(data_path) + + return data + + +def significance_extraction(CI): + """ + This function takes the output of the get_aligned_spike_rate_CI function and extracts any significant values, returning a dataframe in the same format. + + CI: The dataframe created by the CI calculation previously mentioned + + """ + + sig = [] + keys = [] + rec_num = 0 + + # This loop iterates through each column, storing the data as un, and the location as s + for s, unit in CI.items(): + # Now iterate through each recording, and unit + # Take any significant values and append them to lists. + if unit.loc[2.5] > 0 or unit.loc[97.5] < 0: + sig.append( + unit + ) # Append the percentile information for this column to a list + keys.append( + s + ) # append the information containing the point at which the iteration currently stands + + # Now convert this list to a dataframe, using the information stored in the keys list to index it + sigs = pd.concat( + sig, axis=1, copy=False, keys=keys, names=["session", "unit", "rec_num"] + ) + + return sigs + + +def percentile_plot(CIs, sig_CIs, exp, sig_only=False, dir_ascending=False): + """ + + This function takes the CI data and significant values and plots them relative to zero. + May specify if percentiles should be plotted in ascending or descending order. + + CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points. + + sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero + + exp: The experimental session to analyse, defined in base.py + + sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False) + + dir_ascending: Whether to plot the values in ascending order (True/False) + + """ + # First sort the data into long form for both datasets, by percentile + CIs_long = ( + CIs.reset_index() + .melt("percentile") + .sort_values("value", ascending=dir_ascending) + ) + CIs_long = CIs_long.reset_index() + CIs_long["index"] = pd.Series( + range(0, CIs_long.shape[0]) + ) # reset the index column to allow ordered plotting + + CIs_long_sig = ( + sig_CIs.reset_index() + .melt("percentile") + .sort_values("value", ascending=dir_ascending) + ) + CIs_long_sig = CIs_long_sig.reset_index() + CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0])) + + # Now select if we want only significant values plotted, else raise an error. + if sig_only is True: + data = CIs_long_sig + + elif sig_only is False: + data = CIs_long + + else: + raise TypeError("Sig_only argument must be a boolean operator (True/False)") + + # Plot this data for the experimental sessions as a pointplot. + for s, session in enumerate(exp): + name = session.name + + p = sns.pointplot( + x="unit", + y="value", + data=data.loc[(data.session == s)], + order=data.loc[(data.session == s)]["unit"].unique(), + join=False, + legend=None, + ) # Plots in the order of the units as previously set, uses unique values to prevent double plotting + + p.set_xlabel("Unit") + p.set_ylabel("Confidence Interval") + p.set(xticklabels=[]) + p.axhline(0) + plt.suptitle( + "\n".join( + wrap( + f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}" + ) + ) + ) # Wraps the title of the plot to fit on the page. + + plt.show() + + +def per_trial_binning(trial, myexp, timespan, bin_size): + """ + Function takes data aligned by trial (i.e., using myexp align_trials method), + bins them, then returns the dataframe with an added bin index + + The end result of this is an index structured as (time, 'lower bin, upper bin') + + trial: The data aligned to an event according to myexp.align_trials + + myexp: The experimental cohort defined in base.py + + timespan: the total time data was aligned to in myexp.align_trials (i.e., duration) in seconds + + bin size: the size of the bins in seconds + + """ + + # First reorder data hierarchy + reorder = trial.reorder_levels(["session", "trial", "unit"], axis=1) + + # Perform binning on whole dataset + # First calculate the number of bins to create based on total duration and bin length + bin_number = int(timespan / bin_size) + + values = reorder.columns.get_level_values( + "trial" + ).unique() # Get trials for session + + # Add a new level containing the bin data for the dataframe + bin_vals = pd.cut(reorder.index, bin_number, include_lowest=True, precision=1) + bin_vals = bin_vals.astype(str) # Convert the interval series to a list of strings + + # Remove brackets from bin_vals + bin_vals_str = [] + for t in bin_vals: + t = t.strip("()[]") + bin_vals_str.append(t) + + # This will be added as an index, before time. + full_data = reorder.assign( + bin=bin_vals_str + ) # Add bin values temporarily as a column + full_data.set_index( + "bin", append=True, inplace=True + ) # Convert this column to a new index, below time + return full_data + + +def bin_data_average(bin_data): + """ + This function takes the dataframe produced by per_trial_binning() including bins as a multiindex + and takes the average of each bin's activity within a given time period + This returns the average firing rate of each unit (i.e., per session and time) + + bin_data: the dataframe produced by per_trial_binning, with predefined bins + + NB: To return a transposed version of the data (i.e., with bin averages as rows rather than columns) + add ".T" to function call. + """ + + # First extract the values of each bin in bin_data + bins = bin_data.index.get_level_values( + "bin" + ) # a list of the bins created by the previous function + bin_names = ( + bins.unique() + ) # Take the unique values present, giving us the list of bin categories + + # Take averages for each bin, across all units and trials then concatenate into a dataframe, where each column is a bin + binned_avgs = pd.concat( + (bin_data.loc[bins == b].mean(axis=0) for b in bin_names), + axis=1, + keys=bin_names, + ) + binned_avgs = pd.DataFrame(binned_avgs) + + # Return the data + return binned_avgs + + +def bin_data_bootstrap( + bin_data, CI=95, ss=20, bootstrap=10000, name=None, cache_overwrite=False +): + """ + This function takes the data, with bin averages calculated by bin_data_average and calculates boostrapped confidence intervals + To do so, it takes a random sample many times over and uses this data to calculate percentiles + + bin_data: the dataframe produced by bin_data_average, with bin times represented as columns + + CI: the confidence interval to calculate (default is 95%) + + ss: the sample size to randomly select from the averages (default is 20) + + bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000) + + name: the name to save the output under in cache, speeds the process if this has been run before + + cache_overwrite: whether to overwrite the saved cache with new data, (default is False) + """ + + ##Stages of this analysis are as follows: + # 1. Take individual bin + # 2. Calculate confidence interval via bootstapping + # 3. Store all bin CIs + + # Check if the function has been run before, and saved to a chache + # NB: This function caches under the name of the first session in the list! + if name is not None: + # Read in data from previous run + output = myexp.sessions[0].interim / "cache" / (name + ".h5") + if output.exists() and cache_overwrite is not True: + df = ioutils.read_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5")) + return df + + # First define confidence intervals + lower = (100 - CI) / 2 # Lower confidence interval bound (default 2.5%) + upper = 100 - lower # Upper bound (default 97.5%) + percentiles = [lower, 50, upper] # The defined percentiles to analyse + cis = [] # empty list to store calculated intervals + + bin_values = bin_data.columns + keys = [] + + # Loop through each of the bin averages and calculate their confidence interval + for s, session in enumerate(myexp): + names = session.name + ses_bins = bin_data[bin_data.index.get_level_values("session") == s] + trials = ses_bins.index.get_level_values( + "trial" + ).unique() # get the trials for a given session + + for t in trials: + trial_bins = ses_bins[ses_bins.index.get_level_values("trial") == t] + + # Take every bin within a trial + for b in bin_values: + bin_avg = trial_bins[b] + samples = np.array( + [ + [np.random.choice(bin_avg.values, size=ss)] + for j in range(bootstrap) + ] + ) + + # Use these samples to calculate the percentiles for each bin + medians = np.median(samples, axis=2) # Median of the samples taken + results = np.percentile( + medians, percentiles, axis=0 + ) # The percentiles, relative to the median + cis.append(pd.DataFrame(results)) + keys.append( + (s, t, b) + ) # Set the keys for the dataframe to be bin, session, trial + + # Now save these calculated confidence intervals to a dataframe + df = pd.concat(cis, axis=1, names=["session", "trial", "bin"], keys=keys) + df.set_index( + pd.Index(percentiles, name="percentile"), inplace=True + ) # Set the name of the index to percentiles + df.columns = df.columns.droplevel( + level=3 + ) # remove the redundant units row, as they have been averaged out + + # Finally, save this df to cache to speed up future runs + if name is not None: + ioutils.write_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5"), df) + return df + + return df + + +def bs_pertrial_bincomp(bs_data, myexp): + """ + This function imports percentile data produced from bootstrapping, and compares across bins to determine if a significant change from previous states occurs + This process is repeated across trials to allow plotting of significance. + Returns a dataframe with bins classified as either increasing, decreasing or displaying little change from previous timepoint. + + bs_data: the percentile information produced by bin_data_bootstrap() + + myexp: the experimental cohort defined in base.py + """ + + total_bin_response = {} + # First import the data and split it by session, and trial. + for s, session in enumerate(myexp): + name = session.name + ses_data = bs_data[0] + ses_bin_response = {} + for t, cols in enumerate(ses_data): + trial_data = ses_data[ + cols[0] + ] # All bins, and associated percentiles for a given trial + + # Now that we have all the bins for a given trial, + # t count value in enumerate loop will allow comparison to previous iteration + bins = trial_data.columns + + # compare each column's 2.5% (lower) to previous column's 97.5% + # to check for sig. increase in activity + previous_bin = [] + trial_bin_response = {} # store as a dictionary + for b, bins in enumerate(trial_data): + + # First store the initial bin to previous_bin + if len(previous_bin) == 0: + previous_bin.append(trial_data[bins]) + continue + + # Once this has been done, will compare across bins + current_bin = trial_data[bins] + + # First check if there is a sig. increase - i.e., if current 2.5 and previous 97.5 do not overlap + if current_bin[2.5] >= previous_bin[0][97.5]: + trial_bin_response.update({bins: "increased"}) + + # Now check if there is a sig. decrease + elif current_bin[97.5] <= previous_bin[0][2.5]: + trial_bin_response.update({bins: "decreased"}) + + # And finally if there is no change at all + else: + trial_bin_response.update({bins: "nochange"}) + + # The replace previous bin with current bin + previous_bin = [] + previous_bin.append(current_bin) + + # Now append data for each trial to a larget list + ses_bin_response.update( + {cols[0]: trial_bin_response} + ) # contains every trial for a given session + total_bin_response.update({name: ses_bin_response}) + + # Convert nested dictionary to dataframe + df = pd.DataFrame.from_dict( + { + (i, j): total_bin_response[i][j] + for i in total_bin_response.keys() + for j in total_bin_response[i].keys() + }, + orient="index", + ) + df.index.rename(["session", "trial"], inplace=True) + df.columns.names = ["bin"] + return df + + +def bs_graph(aligned_data, binned_data, bin_comparisons, myexp): + """ + Function plots the binned data graph including period of LED onset for each trial within a given session. + + aligned_data: the dataset produced by align_trials + + binned_data: the dataset produced by per_trial_binning + + bin_comparisons: the dataset containing comparisons between bins, produced by bs_pertrial_bincomp + + myexp: the experimental cohort defined in base.py + + """ + for s, session in enumerate(myexp): + name = session.name + + ses_data = binned_data[s] + ses_values = ses_data.columns.get_level_values( + "trial" + ).unique() # Get all trial values for the session + ses_start_times = start[ + name + ] # LED on time for the session, will leave trial numbers in ses_values unsorted to ensure they align + + ses_bin_comps = bin_comparisons[ + bin_comparisons.index.get_level_values("session") == name + ] + + num_trials = len( + ses_data[ses_values[0]].columns + ) # How many trials shall be plotted + subplots = Subplots2D( + ses_values, sharex=True, sharey=True + ) # determines the number of subplots to create (as many as there are trials in the session) + + for i, value in enumerate(ses_values): + data = ses_data[value].stack().reset_index() + data["y"] = data[0] # Add a new column containing y axis values + ax = subplots.axes_flat[i] # Plot this on one graph + + # bin responses for the trial to be plotted + trial_bins = ses_bin_comps[ + ses_bin_comps.index.get_level_values("trial") == i + ] + + # Now create the actual lineplot + p = sns.lineplot(x="time", y="y", data=data, ax=ax, ci="sd") + + # Now for each of these plots, add the LED on-off time as a shaded area + p.axvspan(ses_start_times[i], 0, color="green", alpha=0.3) + + ##Now, within each trial, add shaded areas representing the binned timescales + for i, cols in enumerate(ses_bin_comps.columns): + shaded_bin = data["time"].loc[data["bin"] == ses_bin_comps.columns[i]] + bin_significance = trial_bins[cols] + + # Then plot an indicator of significance to these bins + if bin_significance.values == "nochange": + p.axvspan( + shaded_bin.values[0], + shaded_bin.values[-1], + color="gray", + alpha=0, + hatch=r"//", + ) # Change this alpha value to above zero to plot nonsignificant bins. + + if ( + bin_significance.values == "increased" + or bin_significance.values == "decreased" + ): + p.axvspan( + shaded_bin.values[0], + shaded_bin.values[-1], + color="red", + alpha=0.1, + hatch=r"/o", + ) + + # Now remove axis labels + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + + # Then set the x limit to be one second post trial completion + p.set(xlim=(-5, 1)) + + # Now plot the legend for each of these figures + legend = subplots.legend + + legend.text(0.3, 0.5, "Trial", transform=legend.transAxes) + legend.text( + -0.2, 0, "Firing Rate (Hz)", transform=legend.transAxes, rotation=90 + ) # y axis label + legend.text( + 0, -0.3, "Time Relative to LED Off (s)", transform=legend.transAxes + ) # x axis label + + legend.set_visible(True) + legend.get_xaxis().set_visible(False) + legend.get_yaxis().set_visible(False) + legend.set_facecolor("white") + legend.set_box_aspect(1) + + # Display the plot + plt.gcf().set_size_inches(20, 20) + + # Uncomment to save figures as pdf. + utils.save( + f"/home/s1735718/Figures/{myexp[s].name}_spikerate_LED_Range_PerTrial", + nosize=True, + ) + + +def cross_trial_bootstrap( + bin_data, CI=95, ss=20, bootstrap=10000, cache_name=None, cache_overwrite=False +): + """ + This function takes the averages across trials for a given bin as previously calculated + and samples across values, to give a series of percentiles for average firing rate in a given bin + + bin_data: the dataframe produced by bin_data_average, with bin times represented as columns + + CI: the confidence interval to calculate (default is 95%) + + ss: the sample size to randomly select from the averages (default is 20) + + bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000) + + cache_name: the name to save the output under in cache, speeds the process if this has been run before + + cache_overwrite: whether to overwrite the saved cache with new data, (default is False) + """ + + ##Stages of this analysis are as follows: + # 1. Take individual bin + # 2. Calculate confidence interval via bootstapping + # 3. Store all bin CIs + + # Check if the function has been run before, and saved to a chache + # NB: This function caches under the name of the first session in the list! + + if cache_name is not None: + # Read in data from previous run + output = myexp.sessions[0].interim / "cache" / (name + ".h5") + if output.exists() and cache_overwrite is not True: + df = ioutils.read_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5")) + return df + + # First define confidence intervals + lower = (100 - CI) / 2 # Lower confidence interval bound (default 2.5%) + upper = 100 - lower # Upper bound (default 97.5%) + percentiles = [lower, 50, upper] # The defined percentiles to analyse + cis = [] # empty list to store calculated intervals + + bin_values = bin_data.columns + keys = [] + + # Loop through each of the bin averages and calculate their confidence interval + for s, session in enumerate(myexp): + + names = session.name + ses_bins = bin_data[bin_data.index.get_level_values("session") == names] + trials = ses_bins.index.get_level_values( + "trial" + ).unique() # get the trials for a given session + + # Sample from every trial within a bin + for b in bin_values: + + # Take every trial within a bin + bin_avgs = ses_bins[b] + samples = np.array( + [[np.random.choice(bin_avgs.values, size=ss)] for j in range(bootstrap)] + ) + + # Use these samples to calculate the percentiles for each bin + medians = np.median(samples, axis=2) # Median of the samples taken + results = np.percentile( + medians, percentiles, axis=0 + ) # The percentiles, relative to the median + cis.append(pd.DataFrame(results)) + keys.append( + (names, b) + ) # Set the keys for the dataframe to be bin, session, trial + + # Now save these calculated confidence intervals to a dataframe + df = pd.concat(cis, axis=1, names=["session", "bin"], keys=keys) + df.set_index( + pd.Index(percentiles, name="percentile"), inplace=True + ) # Set the name of the index to percentiles + df.columns = df.columns.droplevel( + level=2 + ) # remove the redundant units row, as they have been averaged out + + # Finally, save this df to cache to speed up future runs + if cache_name is not None: + ioutils.write_hdf5( + myexp.sessions[0].interim / "cache" / (cache_name + ".h5"), df + ) + return df + + return df + + +def cross_trial_bincomp(bs_data, myexp): + """ + This function imports percentile data produced from bootstrapping, and compares across bins to determine if a significant change from previous states occurs + This process is repeated across trials to allow plotting of significance. + Returns a dataframe with bins classified as either increasing, decreasing or displaying little change from previous timepoint. + + bs_data: the percentile information produced by bin_data_bootstrap() + + myexp: the experimental cohort defined in base.py + """ + + total_bin_response = {} + # First import the data and split it by session, and trial. + for s, session in enumerate(myexp): + name = session.name + ses_data = bs_data[name] + ses_bin_response = {} + for t, cols in enumerate(ses_data): + + # Now that we have all the bins for a given trial, + # t count value in enumerate loop will allow comparison to previous iteration + bins = ses_data.columns + + # compare each column's 2.5% (lower) to previous column's 97.5% + # to check for sig. increase in activity + previous_bin = [] + for b, bins in enumerate(ses_data): + + # First store the initial bin to previous_bin + if len(previous_bin) == 0: + previous_bin.append(ses_data[bins]) + continue + + # Once this has been done, will compare across bins + current_bin = ses_data[bins] + + # First check if there is a sig. increase - i.e., if current 2.5 and previous 97.5 do not overlap + if current_bin[2.5] >= previous_bin[0][97.5]: + ses_bin_response.update({bins: "increased"}) + + # Now check if there is a sig. decrease + elif current_bin[97.5] <= previous_bin[0][2.5]: + ses_bin_response.update({bins: "decreased"}) + + # And finally if there is no change at all + else: + ses_bin_response.update({bins: "nochange"}) + + # The replace previous bin with current bin + previous_bin = [] + previous_bin.append(current_bin) + + # Now append data for each trial to a larget list + total_bin_response.update({name: ses_bin_response}) + + # Convert nested dictionary to dataframe + df = pd.DataFrame.from_dict( + total_bin_response, + orient="index", + ) + # df.index.rename(["session"], inplace=True) + df.columns.names = ["bin"] + df.index.rename("session", inplace=True) + return df + + +def percentile_range(bs_data): + """ + Function takes percentiles for given bins and calculates the range, adding this as a new index row + + bs_data: a bootstrapped dataset containing 2.5, 50, and 97.5% percentiles as index + with data organised as columns for some given bin + """ + + # Run through the set col by col + ranges = [] + + for col in bs_data: + column = bs_data[col] + perc_delta = column[97.5] - column[2.5] # Range of the percentiles + ranges.append(perc_delta) + + # Now append this as a new row + bs_data.loc["range"] = ranges + return bs_data + + +def variance_boostrapping( + bs_data, + myexp, + CI=95, + ss=20, + bootstrap=10000, + cache_name=None, + cache_overwrite=False, +): + """ + Similar to previous functions, this performs a boostrapping analysis per bin throughout a sessions trials. + However, it instead analyses if the size of the variance (ie. the range between percentiles calculated previously) differs significantly between bins + This will act as a proxy for the synchronicity of neuronal activity for a given bin. + + bs_data: the data calculated within each trial by bin_data_bootstrap (NOT Cross data bootstrap!) + + myexp: the experimental cohort defined in base.py + + CI: the confidence interval to calculate (default is 95%) + + ss: the sample size to randomly select from the averages (default is 20) + + bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000) + + cache_name: the name to save the output under in cache, speeds the process if this has been run before + + cache_overwrite: whether to overwrite the saved cache with new data, (default is False) + + """ + # Check if the function has been run before, and saved to a chache + # NB: This function caches under the name of the first session in the list! + if cache_name is not None: + # Read in data from previous run + output = myexp.sessions[0].interim / "cache" / (cache_name + ".h5") + if output.exists() and cache_overwrite is not True: + df = ioutils.read_hdf5( + myexp.sessions[0].interim / "cache" / (cache_name + ".h5") + ) + return df + + # First define confidence intervals + lower = (100 - CI) / 2 # Lower confidence interval bound (default 2.5%) + upper = 100 - lower # Upper bound (default 97.5%) + percentiles = [lower, 50, upper] # The defined percentiles to analyse + cis = [] # empty list to store calculated intervals + + keys = [] + + for s, session in enumerate(myexp): + ses_data = bs_data[s] + ses_data = ses_data.loc[ses_data.index == "range"] + ses_data = ses_data.melt() + + name = session.name + legend_names.append(name) + bin_vals = ses_data.bin.unique() + + # Sample across all trial ranges for a given bin + for b in bin_vals: + bin_data = ses_data.loc[ses_data.bin == b] + samples = np.array( + [[np.random.choice(bin_data.value, size=ss)] for j in range(bootstrap)] + ) + + # Save results of single bin boostrapping + medians = np.median(samples, axis=2) # Median of the samples taken + results = np.percentile( + medians, percentiles, axis=0 + ) # The percentiles, relative to the median + cis.append(pd.DataFrame(results)) + + keys.append((name, b)) + + # Now save these calculated confidence intervals to a dataframe + df = pd.concat(cis, axis=1, names=["session", "bin"], keys=keys) + df.set_index( + pd.Index(percentiles, name="percentile"), inplace=True + ) # Set the name of the index to percentiles + df.columns = df.columns.droplevel( + level=2 + ) # remove the redundant units row, as they have been averaged out + + if cache_name is not None: + ioutils.write_hdf5( + myexp.sessions[0].interim / "cache" / (cache_name + ".h5"), df + ) + return df + + return df + + +def bin_data_SD(bin_data): + """ + Function is similar to bin_data_mean. + + This function takes the dataframe produced by per_trial_binning() including bins as a multiindex + and calculates the standard deviation of each bin's activity within a given time period + This returns the SD of the firing rate of each unit (i.e., per session and time) + + bin_data: the dataframe produced by per_trial_binning, with predefined bins + + NB: To return a transposed version of the data (i.e., with bin averages as rows rather than columns) + add ".T" to function call. + """ + + # First extract the values of each bin in bin_data + bins = bin_data.index.get_level_values( + "bin" + ) # a list of the bins created by the previous function + bin_names = ( + bins.unique() + ) # Take the unique values present, giving us the list of bin categories + ses_bins = {} + for s, session in enumerate(myexp): + name = session.name + + ses_data = bin_data[s] + trial_bins = {} + + # Trial number will give the following loop the number of times to iterate + trial_number = ses_data.columns.get_level_values("trial").unique() + # Now, take each trial and average across bins + for t in trial_number: + trial_data = ses_data[t] + individual_bins = {} + for b in bin_names: + + bin_vals = trial_data.loc[trial_data.index.get_level_values("bin") == b] + bin_st_dev = np.std( + bin_vals.values + ) # take standard deviation for all values in this trials bin + individual_bins.update({b: bin_st_dev}) + trial_bins.update({t: individual_bins}) + + ses_bins.update({name: trial_bins}) + + # Merge these SDs as a dataframe + binned_sd = pd.DataFrame.from_dict( + {(i, j): ses_bins[i][j] for i in ses_bins.keys() for j in ses_bins[i].keys()}, + orient="index", + ) + + # Take the mean of the SDs for each trial Bin + + binned_sd.index.rename(["session", "trial"], inplace=True) + binned_sd.columns.names = ["bin"] + + # Return the data + return binned_sd + + +def unit_depths(exp): + """ + Parameters + ========== + exp : pixels.Experiment + Your experiment. + + This is an updated version of the pixtools function + """ + info = exp.get_cluster_info() + depths = [] + keys = [] + + for s, session in enumerate(exp): + name = session.name + session_depths = {} + rec_num = 0 + for rec_num, probe_depth in enumerate(session.get_probe_depth()): + rec_depths = {} + rec_info = info[s] + id_key = "id" if "id" in rec_info else "cluster_id" # Depends on KS version + + for unit in rec_info[id_key]: + unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict() + rec_depths[unit] = probe_depth - unit_info["depth"] + + session_depths = pd.DataFrame(rec_depths, index=["depth"]) + keys.append(name) + session_depths = pd.concat([session_depths], axis=1, names="unit") + depths.append(session_depths) + + return pd.concat(depths, axis=1, names=["session", "unit"], keys=keys) + + +def unit_delta(glm, myexp, bin_data, bin_duration, sig_only, percentage_change): + """ + Function takes the output of the multiple comparisons calculated by within_unit_GLM() and calculates the larges change in firing rate (or other IV measured by the GLM) between bins, per unit + This requires both experimental cohort data, and bin_data calculated by per_trial_binning() and passed through reorder levels (as "session", "unit", "trial") + Function returns a list of lists, in a hierarchy of [session[unit deltas]] + + Using this data, a depth plot may be created displaying the relative delta by unit location in pM2 + + glm: the result of the ANOVA and subsequent Tukey adjusted multiple comparisons performed by within_unit_GLM + NB: THIS IS A COMPUTATIONALLY EXPENSIVE FUNCTION, DO NOT RUN IT ON MORE THAN ONE SESSION AT A TIME + + myexp: the experimental cohort defined in base.py + + bin_data: the binned raw data computed by the per_trial_binning function + + bin_duration: the length of the bins in the data + + sig_only: whether to return only the greatest delta of significant bins or not + + percentage_change: whether to return deltas as a percentage change + + """ + ses_deltas = [] + for s, session in enumerate(myexp): + + final_deltas = [] + unit_deltas = [] + sigunit_comps = [] + ses_comps = glm[s] + + if sig_only is True: + # Return only significant comparisons + ses_comps = ses_comps.loc[ses_comps["p-value"] < 0.05] + + # Determine list of units remaining in comparison + units = [] + for i in ses_comps["group1"]: + units.append(i[1]) + units = np.array(units) + units = np.unique(units) + + # Now iterate through comparisons, by unit, Saving the significant comparisons to allow later calculation of delta + + for i in tqdm(units): + unit_comps = ses_comps[ + ses_comps["group1"].apply(lambda x: True if i in x else False) + ] # Lambda function checks if the value is in the tuple for group one + unit_comps = unit_comps[ + unit_comps["group2"].apply(lambda x: True if i in x else False) + ] + unit_comps.reset_index(inplace=True) + + # find row with largest difference + if unit_comps.empty: # skip if empty + continue + + unit_comps = unit_comps.sort_values("Diff", ascending=False) + ##Right around here I need to work out a way to sort by adjacent bins only + # Extract only adjacent bins, then take the largest value with this set. + for item in unit_comps.iterrows(): + # row = unit_comps["Diff"].idxmax() + row = item[1] + g1_bin, unit_num = item[1]["group1"] + g1_bin1 = round(float(g1_bin.split()[0][0:-1]), 1) + g1_bin2 = round(float(g1_bin.split()[1][0:]), 1) + + g2_bin, unit_num = item[1]["group2"] + g2_bin1 = round(float(g2_bin.split()[0][0:-1]), 1) + g2_bin2 = round(float(g2_bin.split()[1][0:]), 1) + + # Check if bins are sequential, will return the largest value where bins are next to eachother + if g2_bin1 == g1_bin2 + bin_duration: + + sigunit_comps.append([row]) + else: + continue + + # Now that we know the units with significant comparisons, take these bins from raw firing rate averages + ses_avgs = bin_data_average(bin_data[s]) + # Iterate through our significant comparisons, calculating the actual delta firing rate + for i in range(len(sigunit_comps)): + sig_comp = sigunit_comps[i] + sig_comp = pd.DataFrame(sig_comp) # convert list to dataframe + + unit = [x[1] for x in sig_comp["group1"]] + ses_unit = ses_avgs.loc[ + ses_avgs.index.get_level_values("unit") == int(unit[0]) + ].mean() # get all rows where our units firing rate is present + + # Get the bins that the signficant comparison looked at + bin_val1 = [x[0] for x in sig_comp["group1"]][0] + bin_val2 = [x[0] for x in sig_comp["group2"]][0] + + # Return the percentage change from initial state rather than raw value + if percentage_change == True: + change = ses_unit[bin_val2] - ses_unit[bin_val1] + delta = (change / ses_unit[bin_val1]) * 100 + if ses_unit[bin_val1] == 0: + continue # Skip this value, if it changes from a value of zero, this is infinite + unit_deltas.append([int(unit[0]), delta]) + # Finally, get the delta value across these bins for the given unit + elif percentage_change == False: + delta = ses_unit[bin_val2] - ses_unit[bin_val1] + unit_deltas.append([int(unit[0]), delta]) + + # Iterate through unit_deltas and remove any duplicate units + # Keeping only the units of largest values + unit_deltas = pd.DataFrame(unit_deltas, columns=["unit", "delta"]) + for j in unit_deltas["unit"].unique(): + vals = unit_deltas.loc[unit_deltas["unit"] == j] + vals = vals.sort_values("delta", key=abs, ascending=False) + + final_deltas.append(vals.iloc[0].values.tolist()) + + ses_deltas.append(final_deltas) + + return ses_deltas + + +def cross_trial_FF( + spike_times, + exp, + duration, + bin_size, + cache_name, + ff_iterations=50, + mean_matched=False, + raw_values=False, +): + """ + This function will take raw spike times calculated through the exp.align_trials method and calculate the fano factor for each trial, cross units + or for individual units, if mean matched is False. + + ------------------------------------------------------------------------ + |NB: Output of the function is saved to an h5 file as "FF_Calculation" | + ------------------------------------------------------------------------ + + spike_times: The raw spike time data produced by the align_trials method + + exp: the experimental cohort defined in base.py (through the Experiment class) + + duration: the window of the aligned data (in s) + + bin_size: the size of the bins to split the trial duration by (in s) + + name: the name to cache the results under + + mean_matched: Whether to apply mean matching adjustment (as defined by Churchland 2010) to correct for large changes in unit mean firing rates across trials + If this is False, the returned dataframe will be of unadjusted single unit FFs + + raw_values: Whether to return the raw values rather than mean FFs + + ff_iterations: The number of times to repeat the random mean-matched sampling when calculating population fano factor (default 50) + + """ + # First create bins that the data shall be categorised into + duration = duration * 1000 # convert s to ms + bin_size = bin_size * 1000 + bins = np.arange((-duration / 2), (duration / 2), bin_size) + all_FF = pd.DataFrame() + + # First split data by session + for s, session in enumerate(myexp): + ses_data = spike_times[s] + ses_data = ses_data.reorder_levels(["unit", "trial"], axis=1) + name = session.name + session_FF = {} + session_se = {} + session_ci = {} + + repeat_FF = pd.DataFrame() + repeat_se = pd.DataFrame() + repeat_ci = pd.DataFrame() + + # Bin the session data + ses_vals = ses_data.values + bin_ids = np.digitize(ses_vals[~np.isnan(ses_vals)], bins, right=True) + + # Then by unit + unit_bin_means = [] + unit_bin_var = [] + + print(f"beginning mean/variance calculation for session {name}") + keys = [] + for t in tqdm(ses_data.columns.get_level_values("unit").unique()): + unit_data = ses_data[t] # take each unit values for all trials + unit_data = ( + unit_data.melt().dropna() + ) # Pivot this data and drop missing values + + # Once these values have been isolated, can begin FF calculation. + unit_data["bin"] = np.digitize(unit_data["value"], bins, right=True) + + bin_count_means = [] + bin_count_var = [] + + for b in np.unique(bin_ids): + bin_data = unit_data.loc[unit_data["bin"] == b] + + # Calculate the count mean of this bin, i.e., the average number of events per trial + bin_count_mean = ( + bin_data.groupby("trial").count().mean()["value"] + ) # the average number of spikes per unit in this bin + + # If there are any bins where no spikes occurred (i.e, empty dataframes) then return zero + if np.isnan(bin_count_mean): + bin_count_mean = 0 + + bin_count_means.append(bin_count_mean) + + # Now calculate variance of the event count + bin_var = np.var(bin_data.groupby("trial").count()["value"]) + + # Again return zero if no events + if np.isnan(bin_var): + bin_var = 0 + + bin_count_var.append(bin_var) + + # These values will make up a single point on the scatterplots used to calculate cross-unit FF + # Append to list containing values for each unit, across trials + + unit_bin_means.append(bin_count_means) + unit_bin_var.append(bin_count_var) + keys.append(t) + + # Now convert this to a dataframe + unit_bin_means = pd.DataFrame.from_records(unit_bin_means, index=keys) + unit_bin_means.index.name = "unit" + unit_bin_means.columns.name = "bin" + + unit_bin_var = pd.DataFrame.from_records(unit_bin_var, index=keys) + unit_bin_var.index.name = "unit" + unit_bin_var.columns.name = "bin" + + # can now use this information to calculate FF, and include any potential adjustments needed. + # To do so, shall create a linear regression to calculate FF, for each bin + + if mean_matched is True: + bin_heights = {} + """ + First calculate the greatest common distribution for this session's data + 1. Calculate the distribution of mean counts for each timepoint (binned trial time) + 2. Take the smallest height of each bin across timepoints, this will form the height of the GCD's corresponding bin + 3. Save this GCD to allow further mean matching + """ + + # Take each bin, and unit, then calculate the distribution of mean counts + for b in unit_bin_means.columns: + + # Isolate bin values, across units + bin_means = unit_bin_means[b] + + # Calculate distribution for this time, across units + p = np.histogram(bin_means, bins=9) + + # Save the heights of these bins for each bin + bin_heights.update( + {b: p[0]} + ) # append to a dictionary, where the key is the bin + + # Convert this dictionary to a dataframe, containing the heights of each timepoints distribution + bin_heights = pd.DataFrame.from_dict( + bin_heights, orient="index" + ) # the index of this dataframe will be the binned timepoints + gcd_heights = ( + bin_heights.min() + ) # get the minimum value for each "bin" of the histogram across timepoints + gcd_heights.index += 1 + + # Now may begin mean matching to the gcd + # Take each bin (i.e., timepoint) and calculate the distribution of points + # TODO: Update this analysis to utilise a rolling bin approach. I.e, take data at 100ms intervals, then shift the bin 10ms right + + print(f"Initiating Mean Matched Fano Factor Calculation for Session {name}") + for iteration in tqdm(range(ff_iterations)): + np.random.seed(iteration) + + for b in unit_bin_means.columns: + timepoint_means = pd.DataFrame(unit_bin_means[b]) + timepoint_edges = pd.DataFrame( + np.histogram(timepoint_means, bins=9)[1] + ) # the left edges of the histogram used to create a distribution + timepoint_var = unit_bin_var[b] + + # split data into bins + binned_data = pd.cut( + np.ndarray.flatten(timepoint_means.values), + np.ndarray.flatten(timepoint_edges.values), + labels=timepoint_edges.index[1:], + include_lowest=True, + ) # seperate raw data into the distribution bins + + timepoint_means["bin"] = binned_data + + # Isolate a single bin from the timepoint histogram + timepoint_FF_mean = {} + timepoint_FF_var = {} + + for i in range(len(timepoint_edges.index)): + if i == 0: + continue + # Bins begin at one, skip first iteration + repeat = True + bin_data = timepoint_means.loc[timepoint_means["bin"] == i] + + while repeat == True: + + # Check if bin height matches gcd height - if it does, append timepoint data to list + + if bin_data.count()["bin"] == gcd_heights[i]: + timepoint_FF_mean.update( + {i: bin_data[bin_data.columns[0]]} + ) # append the bin, and unit/mean count + repeat = False + + # If not, remove randomly points from the dataset one at a time, until they match + else: + dropped_data = np.random.choice( + bin_data.index, 1, replace=False + ) + bin_data = bin_data.drop( + dropped_data + ) # remove one datapoint, then continue with check + + ##Now extract the associated variance for the timepoint + timepoint_FF_mean = pd.DataFrame.from_dict(timepoint_FF_mean) + timepoint_FF_mean = timepoint_FF_mean.melt( + ignore_index=False + ).dropna() + timepoint_FF_var = timepoint_var[ + timepoint_var.index.isin(timepoint_FF_mean.index) + ] + + # Using the count mean and variance of the count, construct a linear regression for the population + # Ensure the origin of the linear model is set as zero + + # Set up data in correct shape + x = np.array(timepoint_FF_mean["value"]).reshape( + (-1, 1) + ) # mean count, the x axis + y = np.array(timepoint_FF_var) + + # Now fit model + timepoint_model = sm.OLS(y, x).fit() + + # The slope (i.e., coefficient of variation) of this model + ff_score = timepoint_model.params[0] + + # Extract the confidence interval of this model fit + ff_se = timepoint_model.bse[0] + ff_ci = timepoint_model.conf_int()[0] + + session_FF.update({b: ff_score}) + session_se.update({b: ff_se}) + session_ci.update({b: tuple(ff_ci)}) + + # Convert the session's FF to dataframe, then add to a master + session_FF = pd.DataFrame(session_FF, index=[0]) + session_se = pd.DataFrame(session_se, index=[0]) + session_ci = pd.DataFrame(session_ci.items()) + + repeat_FF = pd.concat([repeat_FF, session_FF], axis=0) + repeat_se = pd.concat([repeat_se, session_se], axis=0) + repeat_ci = pd.concat([repeat_ci, session_ci], axis=0) + + session_FF = {} + session_se = {} + session_ci = {} + + # Take the average across repeats for each session, add this to a final dataframe for session, timepoint, mean_FF, SE + + for i, cols in enumerate(repeat_FF.iteritems()): + timepoint_vals = repeat_FF.describe()[i] + timepoint_se = repeat_se.describe()[i] + + ci_bins = repeat_ci[0].tolist() + timepoint_ci = pd.DataFrame(repeat_ci[1].tolist()) + timepoint_ci = timepoint_ci.rename( + columns={0: "lower_ci", 1: "upper_ci"} + ).reset_index(drop=True) + timepoint_ci.insert(0, "bin", ci_bins) + + mean = timepoint_vals["mean"] + se = timepoint_se["mean"] + lower_ci = timepoint_ci.loc[timepoint_ci["bin"] == i].describe()[ + "lower_ci" + ]["mean"] + upper_ci = timepoint_ci.loc[timepoint_ci["bin"] == i].describe()[ + "upper_ci" + ]["mean"] + + if raw_values == False: + vals = pd.DataFrame( + { + "session": name, + "bin": i, + "mean FF": mean, + "SE": se, + "lower_ci": lower_ci, + "upper_ci": upper_ci, + }, + index=[0], + ) + all_FF = pd.concat([all_FF, vals], ignore_index=True) + elif raw_values == True: + repeat_FF = repeat_FF.melt() + repeat_se = repeat_se.melt() + + repeat_FF["session"] = name + repeat_FF = repeat_FF.set_index("session") + repeat_FF = repeat_FF.rename( + columns={"value": "FF", "variable": "bin"} + ) + + repeat_se["session"] = name + repeat_se = repeat_se.set_index("session") + repeat_FF["se"] = repeat_se["value"] + repeat_FF["lower_ci"] = timepoint_ci["lower_ci"] + repeat_FF["upper_ci"] = timepoint_ci["upper_ci"] + + all_FF = pd.concat([all_FF, repeat_FF]) + break + + elif mean_matched is False: + # Remember, this will return the individual FF for each unit, not the population level FF + # calculate raw FF for each unit + session_FF = pd.DataFrame(columns=["bin", "FF", "session", "unit"]) + for unit in unit_bin_means.index: + unit_FF = pd.DataFrame( + unit_bin_var.loc[unit] / unit_bin_means.loc[unit] + ).rename( + columns={unit: "FF"} + ) # return all bins for this unit + unit_FF = unit_FF.reset_index() + unit_FF["session"] = name + unit_FF["unit"] = unit + + session_FF = pd.concat([session_FF, unit_FF]) + + # Reorder columns, then return data + session_FF = session_FF[["session", "unit", "bin", "FF"]] + session_FF.reset_index(drop=True) + all_FF = pd.concat([all_FF, session_FF], axis=0) + + ioutils.write_hdf5( + f"/data/aidan/{cache_name}_FF_Calculation_mean_matched_{mean_matched}_raw{raw_values}.h5", + all_FF, + ) + return all_FF + + +def fano_fac_sig(population_FF): + """ + Function checks for significance in fano factor output, assuming the data is NOT raw, and mean-matched. + Returns a list of significance values (sig or nonsig) for each bin, compared to the previous timepoint. + + population_FF: the data obtained from the cross_trial_FF() function, where mean_matched is True, and raw_data is False. + """ + significance = [] + for i, items in enumerate(population_FF.iterrows()): + if i == 0: + continue + + bin_upper = items[1]["upper_ci"] + bin_lower = items[1]["lower_ci"] + + previous_upper = population_FF.loc[population_FF["bin"] == i - 1]["upper_ci"] + previous_lower = population_FF.loc[population_FF["bin"] == i - 1]["lower_ci"] + + # Compare bins for overlap + if (bin_upper <= previous_lower.item()) or (bin_lower >= previous_upper.item()): + significance.append("significant") + + else: + significance.append("nonsignificant") + + return significance diff --git a/projects/Aidan_Analysis/FullAnalysis/granulardepths_VR46_VR59.csv b/projects/Aidan_Analysis/FullAnalysis/granulardepths_VR46_VR59.csv new file mode 100755 index 0000000..7df34be --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/granulardepths_VR46_VR59.csv @@ -0,0 +1,12 @@ +MouseID,LayerV_Depth +VR46,0.26 +VR47,0.285 +VR49,0.341 +VR50,0.34 +VR59,0.439 +VR52,0.222 +VR55,0.584 +VR53,0.505 +VR56,0.297 +VR58,0.262 +VR54,0.417 diff --git a/projects/Aidan_Analysis/FullAnalysis/neuron_types.py b/projects/Aidan_Analysis/FullAnalysis/neuron_types.py new file mode 100644 index 0000000..0652360 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/neuron_types.py @@ -0,0 +1,211 @@ +#%% +##This script simply produces a pie chart of the distribution of neuron types +# Import required packages +import enum +from xml.etree.ElementInclude import include +from cv2 import multiply, rotate +from matplotlib.pyplot import colormaps, title, ylabel, ylim +import numpy as np +import pandas as pd +import seaborn as sns + +import sys +from base import * +from channeldepth import * +from functions import event_times, per_trial_spike_rate +from functions import unit_depths + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels") +from pixtools.utils import Subplots2D # use the local copy of base.py +from pixtools import utils +from pixtools import spike_rate +from pixels import ioutils +from pixels import PixelsError +from matplotlib.axes import Axes + +##Now select units for pyramidal and interneurons +all_units = myexp.select_units(group="good", name="m2", max_depth=1200, min_depth=200) + +# select all pyramidal neurons +pyramidal_units = get_pyramidals(myexp) + +# and all interneurons +interneuron_units = get_interneurons(myexp) + + +# Then align trials +all_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=10, units=all_units +) + +pyramidal_aligned = myexp.align_trials( + ActionLabels.correct, + Events.led_off, + "spike_rate", + duration=10, + units=pyramidal_units, +) + +interneuron_aligned = myexp.align_trials( + ActionLabels.correct, + Events.led_off, + "spike_rate", + duration=10, + units=interneuron_units, +) + +#%% +# Now plot a chart containing a count of all neuron types per session +# To do so, will require a count of neuronal units, only need to do this once +ses_counts = [] +raw_data = [] +for s, session in enumerate(myexp): + name = session.name + + ses_pyramidals = pyramidal_aligned[s] + ses_interns = interneuron_aligned[s] + + pyr_count = pd.DataFrame( + [ + name, + "pyramidal", + len(ses_pyramidals.columns.get_level_values("unit").unique()), + ] + ).T + + pyr_num = pd.DataFrame( + [ + name, + "pyramidal", + ses_pyramidals.columns.get_level_values("unit").unique(), + ] + ).T + ses_counts.append(pyr_count) + raw_data.append(pyr_num) + + int_count = pd.DataFrame( + [ + name, + "interneuron", + len(ses_interns.columns.get_level_values("unit").unique()), + ] + ).T + + int_num = pd.DataFrame( + [ + name, + "interneuron", + ses_interns.columns.get_level_values("unit").unique(), + ] + ).T + + ses_counts.append(int_count) + raw_data.append(int_num) + +ses_counts = pd.concat(ses_counts) +ses_counts.rename(columns={0: "session", 1: "neuron", 2: "count"}, inplace=True) + +ses_raw_data = pd.concat(raw_data) +ses_raw_data.rename(columns={0: "session", 1: "neuron", 2: "unit"}, inplace=True) +ses_raw_data = ses_raw_data.explode("unit").reset_index(drop=True) +#%% +# plot these as stacked bar +# plt.rcParams.update({"font.size": 30}) + +# plot_dat = ( +# ses_raw_data.groupby(["session", "neuron"]) +# .size() +# .reset_index() +# .pivot(columns="neuron", index="session") +# ) +# plot_dat.plot(kind="bar", stacked=True, color=["pink", "cornflowerblue"]) +# plt.legend( +# labels=["Interneuron", "Pyramidal"], +# bbox_to_anchor=(1.22, 1.1), +# title="Neuronal Type", +# ) + +# plt.suptitle("Proportion of Neuronal Types Within pM2") +# plt.xlabel("Recording Session & Mouse ID (SessionDate_MouseID)") +# plt.ylabel("Number of Recorded Neurons") +# plt.xticks(rotation=45) +# plt.gcf().set_size_inches(10, 10) + + +# Uncomment below to save as pdf +# utils.save( +# f"/home/s1735718/Figures/NeuronalTypeCountPlot", +# nosize=True, +# ) + + +#%% + +####Will also plot a depth graph of neuron type#### +####Like the previous graph but Y is depth rather than count, x is session again#### +##Get all unit depths for the experiment +depths = unit_depths(myexp) +all_unit_depths = [] + +for s, session in enumerate(myexp): + name = session.name + ses_depths = depths[name] + ses_pyramidals = pyramidal_aligned[s] + ses_int = interneuron_aligned[s] + + pyr_depths = ses_depths[ses_pyramidals.columns.get_level_values("unit").unique()] + int_depths = ses_depths[ses_int.columns.get_level_values("unit").unique()] + + # melt then merge these datasets to allow plotting + pyr_depths = pyr_depths.melt() + pyr_depths["type"] = "pyramidal" + + int_depths = int_depths.melt() + int_depths["type"] = "interneuron" + + units = pd.concat([pyr_depths, int_depths]).reset_index(drop=True) + units["session"] = name + + all_unit_depths.append(units) + +all_unit_depths = pd.concat(all_unit_depths, axis=0).reset_index(drop=True) +# %% +# Plot these depths by session and type +plt.rcParams.update({"font.size": 50}) + +p = sns.stripplot( + x="session", + y="value", + hue="type", + data=all_unit_depths, + s=15, + linewidth=1, + palette="bright", +) + +plt.xticks(rotation=90) +plt.xlabel("Recording Session") +plt.ylabel("pM2 Depth (μm)") +plt.ylim(0) +plt.gca().invert_yaxis() # invert the y axis, make understanding easier + + +# Plot lines showing lower bound of pM2 + +plt.axhline(y=1200, color="black", ls="--") +plt.text(1.55, 1200, "pM2 Lower Bound") + +plt.legend(bbox_to_anchor=(1, 1), title="Neuron Type") +plt.gcf().set_size_inches(15, 15) +plt.suptitle("Neuronal Types by Location in pM2") + +# Save Figure then Show +utils.save( + "/home/s1735718/Figures/neurontype_by_depth", + nosize=True, +) + +plt.show() + +# %% diff --git a/projects/Aidan_Analysis/FullAnalysis/noisebase.py b/projects/Aidan_Analysis/FullAnalysis/noisebase.py new file mode 100644 index 0000000..39c845e --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/noisebase.py @@ -0,0 +1,50 @@ +import json + +import datetime +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd + +from pixels import Experiment +from pixels.behaviours.leverpush import LeverPush +from pixels.behaviours.pushpull import PushPull +from pixels.behaviours.reach import Reach +from pixels.behaviours.no_behaviour import NoBehaviour + +import sys +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +from pixtools import utils +from pixtools import clusters + +from base import * + + +noise_tests = Experiment( + ["noisetest1"], + NoBehaviour, + "~/duguidlab/visuomotor_control/neuropixels", +) + +noise_test_unchanged = Experiment( + ["noisetest_unchanged"], + NoBehaviour, + "~/duguidlab/visuomotor_control/neuropixels", +) + +noise_test_nopi = Experiment( + ["noisetest_nopi"], + NoBehaviour, + "~/duguidlab/visuomotor_control/neuropixels", +) + +noise_test_no_caps = Experiment( + "VR50", + Reach, + "~/duguidlab/visuomotor_control/neuropixels", +) + +#This contains the list of experiments we want to plot noise for, here only interested in the reaching task +#Remember to change this in base.py +exps = { + "reaching": myexp +} \ No newline at end of file diff --git a/projects/Aidan_Analysis/FullAnalysis/noiseplot_By_Channel_Stripplot.py b/projects/Aidan_Analysis/FullAnalysis/noiseplot_By_Channel_Stripplot.py new file mode 100644 index 0000000..327c120 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/noiseplot_By_Channel_Stripplot.py @@ -0,0 +1,92 @@ +# This script will assess the noise in the recordings from the cortex, to allow us to decide which recordings to use +# Import required packages and data from base files +# NB: I changed how unit_depths() works in my local copy to remove the reference to rec_num + + + +from turtle import fd + +from matplotlib.pyplot import suptitle, xlabel +from base import * +from noisebase import * +from channeldepth import * +from matplotlib.cm import ScalarMappable + + +import numpy as np +import pandas as pd +import seaborn as sns + + +# Now select the experiment we want to use +# Here we are taking the data from the 17th. + + +# Now from this data, give us all units that are good and within the cortex +units = myexp.select_units(group="good", max_depth=1200, name="m2") + +# #Now that we have the units selected, let us assess the noise +myexp.assess_noise() + +# now read these dataframes in from the json files just created +# If we want to change the data read in, simply change the experimental call in noisebase! +# May also add more experiments to the list here if desired! + +noise = [] +for s, session in enumerate(myexp): + for i in range(len(session.files)): + path = session.processed / f"noise_{i}.json" + with path.open() as fd: + ses_noise = json.load(fd) + date = datetime.datetime.strptime(session.name[:6], "%y%m%d") + for SD in ses_noise["SDs"]: + noise.append((session.name, date, name, SD)) + + +df = pd.DataFrame(noise, columns=["session", "date", "project", "SDs", "median SD"]) + +# Now read in the metadata containing channel coordinates and convert it to dataframe format + +depth1 = meta_spikeglx( + myexp, 0 +) # give us data for session1, session two will be the same so perhaps we can simply clone this?? +depth1 = depth1.to_dataframe() + +depth = pd.concat([depth1, depth1.copy()], axis=0) + +# ##Can use the code below to print the individual datapoints rather than just the median, with each point representing the noise from each channel## +# #First requires we convert the dataframe to longform (Check this once the fixed noise assessment has completed running) + +df2 = df.set_index("date")["SDs"].apply(pd.Series).stack() +df2 = df2.reset_index() +df2.columns = ["date", "channel", "SDs"] +print(df2) +# #Now add the required coordinates from the metadata +# df2=pd.concat([df2, depth], axis=1).reset_index() +# print(df2) + + +# Channel depth is simply organised in order of the noise json, so let us simply use the channel column in df2 as a binned category +fig, ax = plt.subplots() +p = sns.stripplot( + x="date", y="SDs", data=df2, palette="Spectral", ax=ax, hue="channel" +).set(title="Noise for Subject VR59") + +ax.set_xticklabels( + [t.get_text().split("T")[0] for t in ax.get_xticklabels()] +) # Remove zeroes from date line +ax.legend_.remove() # Remove the huge autogenerated legend + +# Now will add a colour bar to change hue based on channel coordinates + + +# This code adds a legend for the colourmap +cmap = plt.get_cmap("Spectral") +norm = plt.Normalize(df2["channel"].min(), df2["channel"].max()) +sm = ScalarMappable(norm=norm, cmap=cmap) +sm.set_array([]) +cbar = fig.colorbar(sm, ax=ax) +cbar.ax.set_title('"Channel Num"') + + +plt.show() diff --git a/projects/Aidan_Analysis/FullAnalysis/noiseplot_SD_Clustering.py b/projects/Aidan_Analysis/FullAnalysis/noiseplot_SD_Clustering.py new file mode 100644 index 0000000..634bc3b --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/noiseplot_SD_Clustering.py @@ -0,0 +1,251 @@ +#%% +# Unlike other noiseplot file (by channel) this file will cluster the SDs, allowing for a mean square analysis +# If there are distinct clusters able to be seperated by depth, it will indicate that there is a clear relationship to noise +# TODO: Test Functionality after noise analysis is complete, push to pixtools. +# First import required packages + +import json +import datetime +from turtle import fd + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from pixels import Experiment +from pixels.behaviours.leverpush import LeverPush +from pixels.behaviours.no_behaviour import NoBehaviour +from pixels.behaviours.pushpull import PushPull +from pixels.behaviours.reach import Reach +from sklearn.cluster import KMeans + +from base import * +from channeldepth import * +from tqdm import tqdm +import os +from pathlib import Path + +import sys + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels") +from pixtools.utils import Subplots2D +from pixtools import clusters, utils + +myexp.assess_noise() + + +def noise_per_channeldepth(myexp): + """ + Function extracts the noise for each channel, combining this into a dataframe + + myexp: the experiment defined in base.py, will extract the depth information from here. + """ + noise = pd.DataFrame( + columns=["session", "project", "SDs", "x", "y"] + ) # Create the empty array to hold the noise information + depths = meta_spikeglx(myexp, 0) + depths = depths.to_dataframe() + coords = depths[ + ["x", "y"] + ] # Create a dataframe containing the generic x and y coords. + tot_noise = [] + + # Iterate through each session, taking the noise for each file and loading them into one continuous data frame. + for s, session in enumerate(myexp): + for i in tqdm(range(len(session.files))): + path = session.processed / f"noise_{i}.json" + with path.open() as fd: + ses_noise = json.load(fd) + + chan_noises = [] + for j, SD in enumerate( + ses_noise["SDs"][0:-1] + ): # This will iterate over first 384 channels, and exclude the sync channel + x = coords["x"].iloc[j] + y = coords["y"].iloc[j] + noise_row = pd.DataFrame.from_records( + {"session": [session.name], "SDs": [SD], "x": x, "y": y} + ) + chan_noises.append(noise_row) + + # Take all datafrom channel noises for a session, then concatenate + noise = pd.concat(chan_noises) + tot_noise.append(noise) # Take all channel noises and add to a master file + df2 = pd.concat( + tot_noise + ) # Convert this master file, containing every sessions noise data into a dataframe + + return df2 + + +df2 = noise_per_channeldepth(myexp) + +# Now that we have both noise and depth information for every channel, we may perform a means clustering +# Initiate the kmeans class, describing the parameters of our analysis +# First double check how many clusters are required to best describe the data +# Reshape the array to allow clustering + + +# Now determine the optimal number of clusters +def elbowplot(data, myexp): + + """ + + This function takes data formatted according to the function above, containing the noise values for all channels + Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends. + This point is defined as the boundary where additional clusters no longer explain much more variance in the data. + + data: The dataframe, as formatted by noise_per_channel() + + myexp: The experiment, defined in base.py containing the session information. + + """ + + for s, session in enumerate(myexp): + name = session.name + ses_data = data.loc[data["session"] == name] + df3 = ses_data["SDs"].values.reshape( + -1, 1 + ) # Just gives all noise values, for each session + Sum_of_squares = [] # create an empty list to store these in. + + k = range(1, 10) + for num_clusters in k: + kmeans = KMeans(n_clusters=num_clusters) + kmeans.fit(df3) + Sum_of_squares.append(kmeans.inertia_) + + fig, ax = plt.subplots() + + # This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters + # This gives the distance from the centroids, as a measure of the variability explained + # We want this to drop off indicating that there is no remaining data explained by further centroid inclusion + + # Figure has two rows, one columns, this is the first plot + plt.plot(k, Sum_of_squares, "bx-") # bx gives blue x as each point. + plt.xlabel("Putative Number of Clusters") + plt.ylabel("Sum of Squares Distances/Inertia") + plt.title( + f"Determining Optimal Number of Clusters for Analysis - Session {name}" + ) + utils.save(f"/home/s1735718/Figures/{myexp[s].name}_elbow_plot") + plt.show() + + +elbowplot(df2, myexp) + +# Must now define kmeans parameters based on elbow analysis! +# Seems that 2 clusters is still optimal across datasets + +kmeans = KMeans( + init="random", # Initiate the iterative analysis with random centres + n_clusters=2, # How many clusters to bin the data into, based on the elbow analysis! + n_init=10, # Number of centroids to generate initially + max_iter=300, # Max number of iterations before ceasing analysis + random_state=42, # The random number seed for centroid generation, can really be anything for our purposes +) + +# Plot the kmeans clustering by depth (y coord) with hue representing generated clusters. +for s, session in enumerate(myexp): + name = session.name + + ses = df2.loc[df2["session"] == name] + df3 = ses["SDs"].values.reshape(-1, 1) + x_kmeans = kmeans.fit(df3) + y_means = kmeans.fit_predict(df3) + + # Now plot the kmeans analysis + # Remember we use our original data (df2) but use the df3 analysis to generate the labels + plt.scatter(ses["y"], ses["SDs"], c=y_means, cmap="viridis") + + plt.xlabel("Probe Channel Y-Coordinate") + plt.ylabel("Channel Noise (SD)") + plt.title(f"{myexp[s].name} Channel Noise k-Mean Clustering Analysis") + + # Save figures to folder + utils.save(f"/home/s1735718/Figures/{myexp[s].name}_noise_clustering") + plt.show() + +# Extract the k means values and plot these as a histogram +# Where we want to plot only values within the brain across sessions +all_means = pd.DataFrame() +all_data = pd.DataFrame() +for s, session in enumerate(myexp): + name = session.name + + ses = df2.loc[df2["session"] == name] + df3 = ses["SDs"].values.reshape(-1, 1) + x_kmeans = kmeans.fit(df3) + y_means = kmeans.fit_predict(df3) + + # Add this classification to the main session info + ses["cluster"] = y_means + + # Determine which cluster is on average deeper (lower y value) + # This will be the within brain cluster + mean = ses.groupby("cluster").mean() + mins = mean.idxmin()["y"] # give the cluster where depth is average higher + + # Take mean of this cluster + inbrain = ses.loc[ses["cluster"] == mins] + inbrain_mean = inbrain.mean()["SDs"] + + ses_means = pd.DataFrame({"session": [name], "mean_SD": [inbrain_mean]}) + + # Now concatenate this into a single dataframe + all_means = pd.concat([all_means, ses_means], ignore_index=True, axis=0) + all_data = pd.concat([all_data, inbrain], ignore_index=True, axis=0) +# use this data to plot a histogram of only values considered "within brain" by k-means clustering + +# plot histogram +p = sns.histplot(data=all_means, x="mean_SD") + +#%% +# Now will determine the depth of the boundary between inbrain and outbrain clusters for each session +# Will extract the y coordinate for these points, then convert to real depth +brain_boundaries = pd.DataFrame(columns=["session", "y coordinate", "probe depth"]) +for s, session in enumerate(myexp): + name = session.name + + ses_data = all_data.loc[all_data["session"] == name] + + # Sort data to find boundary + ses_data.sort_values("y", ascending=False, inplace=True) + + # Append values to dataframe + ses_vals = pd.DataFrame(ses_data.iloc[0][["session", "y"]].values).T + ses_vals = ses_vals.rename(columns={0: "session", 1: "y coordinate"}) + + # Add probe depth as a new column + #ses_vals["probe depth"] = session.get_probe_depth()[0] + # Calculate actual depth + brain_boundaries = pd.concat( + [brain_boundaries, ses_vals], ignore_index=True, axis=0 + ) + + +# %% +# Take file below that contains probe depths, rename it to depths.txt.histology +# Recreate it using y coordinate value for each mouse, nothing else +# DO NOT UNCOMMENT, WILL OVERWRITE SAVED OLD VERSION + +# session.processed /"depths.txt" + +# for s, session in enumerate(myexp): +# name = session.name + +# ses_data = brain_boundaries.loc[brain_boundaries["session"] == name] + +# # open path, rename file +# #os.rename(Path(session.processed / "depth.txt"), Path(session.processed / "depth.txt.histology")) + +# # Create depths.txt +# with open(Path(session.processed / "depth.txt"), "w") as f: + +# #Write the probe y value to the file +# f.write(str(ses_data["y coordinate"].values[0])) +# f.write("\n") + + +# %% diff --git a/projects/Aidan_Analysis/FullAnalysis/params.py b/projects/Aidan_Analysis/FullAnalysis/params.py new file mode 100644 index 0000000..6e983f2 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/params.py @@ -0,0 +1,6 @@ +dat_path = '/home/mcolliga/duguidlab/visuomotor_control/neuropixels/processed/220217_VR59/sorted_stream_0/temp_wh.dat' +n_channels_dat = 384 +dtype = 'int16' +offset = 0 +sample_rate = 30000 +hp_filtered = True \ No newline at end of file diff --git a/projects/Aidan_Analysis/FullAnalysis/phaseI_Visualisation.py b/projects/Aidan_Analysis/FullAnalysis/phaseI_Visualisation.py new file mode 100644 index 0000000..71eb618 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/phaseI_Visualisation.py @@ -0,0 +1,451 @@ +# Import required packages +from argparse import Action +from base import * +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +from matplotlib.ticker import MultipleLocator +import matplotlib as mpl +import numpy as np +import pandas as pd +import seaborn as sns +import scipy.stats as sc +import sys +from channeldepth import meta_spikeglx +import statsmodels.api as sm +from statsmodels.formula.api import ols +from reach import Cohort +from xml.etree.ElementInclude import include +from matplotlib.pyplot import legend, title, ylabel, ylim +import matplotlib.lines as mlines +from tqdm import tqdm +from pixels import ioutils +from pixtools import utils +from textwrap import wrap + +from functions import ( + event_times, + unit_depths, + per_trial_binning, + event_times, + within_unit_GLM, + bin_data_average, + # unit_delta, #for now use a local copy +) + +#%% +# import datafile calculated previously +data = ioutils.read_hdf5("/data/aidan/concatenated_session_information.h5") +# test = ioutils.read_hdf5("/data/aidan/concatenated_session_information_test.h5") +# Add layer V data +layer_vals = pd.read_csv("granulardepths_VR46_VR59.csv") +#%% +# Visualise the data as a depth plot, showing units above and below layer V boundary for each session +fig, ax = plt.subplots(2, 1, sharex=True) +fig.set_size_inches(10, 10) + +p = sns.swarmplot( + data=data, x="depth", y="session", hue="Depth Classification", ax=ax[1] +) +p.set_xlim(0) +ax[1].legend(loc="upper right", bbox_to_anchor=(0, 1.7), title="Layer Class") + +# Plot the overall distribution of points across depths. +q = sns.kdeplot( + data=data, x="depth", ax=ax[0], hue="Depth Classification", legend=False +) +q.set_ylabel("") +q.set_yticklabels("") +q.set(yticks=[]) + +plt.subplots_adjust(wspace=0, hspace=0) +plt.suptitle("Distribution of Recorded Units Across pM2") + +#%% +# Plot the proportion of each neuron type by up/downregulation +# As a stacked bar chart +# first isolate up/downreg units +delta_types = [] +for i in data.iterrows(): + if i[1]["delta"] > 0: + delta_types.append("upregulated") + + if i[1]["delta"] < 0: + delta_types.append("downregulated") + +data.insert(3, "delta type", delta_types) +# now plot these proportions, as a percentage +vals = data[["session", "delta type", "delta significance"]] +change = [] +for i in vals.iterrows(): + if i[1]["delta significance"] == "non-significant": + change.append("no-change") + if (i[1]["delta significance"] == "significant") & ( + i[1]["delta type"] == "downregulated" + ): + change.append("downregulated") + if (i[1]["delta significance"] == "significant") & ( + i[1]["delta type"] == "upregulated" + ): + change.append("upregulated") +vals["change"] = change +vals.drop(columns=["delta type", "delta significance"]) + +vals = ( + vals.groupby(["session", "change"]) + .size() + .reset_index() + .pivot(columns="change", index="session", values=0) +) +vals["upregulated"] = vals["upregulated"].fillna(0) +vals["downregulated"] = vals["downregulated"].fillna(0) +vals["no-change"] = vals["no-change"].fillna(0) +val_props = pd.DataFrame(index=vals.index) +val_props["upreg_%"] = ( + vals["upregulated"] + / (vals["upregulated"] + vals["downregulated"] + vals["no-change"]) +) * 100 +val_props["downreg_%"] = ( + vals["downregulated"] + / (vals["upregulated"] + vals["downregulated"] + vals["no-change"]) +) * 100 +val_props["nochange_%"] = ( + vals["no-change"] + / (vals["upregulated"] + vals["downregulated"] + vals["no-change"]) +) * 100 +val_props.plot(kind="bar", stacked=True) + +plt.suptitle("Proportion of Responsive Units in pM2") +utils.save(f"/home/s1735718/Figures/AllSessionUnitResponsiveness", nosize=True) + +#%% +# Now shall plot delta firing rates by depth, as a violin plot +# NB: this may require binning depth to prevent pseudoreplication when collapsing across sessions +# Plot two seperate subplots for each session them merge, for up and downregulated data +pyr = data.loc[data["type"] == "pyramidal"] +inter = data.loc[data["type"] == "interneuron"] +pyr["all"] = 1 +inter["all"] = 1 +# first plot pyramidals +p = sns.violinplot( + data=pyr, + y="depth", + x="session", + hue="delta type", + split=True, + palette={"upregulated": "orange", "downregulated": "blue"}, +) +for i, violin in enumerate(p.findobj(mpl.collections.PolyCollection)): + violin.set_hatch("//") + +p2 = sns.violinplot( + data=inter, + y="depth", + x="session", + hue="delta type", + split=True, + palette={"upregulated": "orange", "downregulated": "blue"}, +) + +plt.setp(p.collections, alpha=0.3) + +# Now set legend +down = mpatches.Patch(facecolor="blue", alpha=0.5, label="Downregulated") + +up = mpatches.Patch(facecolor="orange", alpha=0.5, label="Upregulated") + +pyr_leg = mpatches.Patch(facecolor="grey", hatch="//", alpha=0.5, label="Pyramidals") + +inter_leg = mpatches.Patch(facecolor="grey", alpha=0.5, label="Interneurons") +plt.legend( + handles=[down, up, pyr_leg, inter_leg], + bbox_to_anchor=(1, 1), + title="Activity Change/Cell Type", + fontsize=15, +) + +plt.suptitle("Proportion of Significantly Changing Units During Grasp") +plt.gca().set_ylim(0, 1200) +plt.gca().invert_yaxis() +plt.gca().set_ylabel("pM2 Depth (um)") +plt.gca().set_xlabel("Session") +plt.xticks(rotation=90) + + +plt.show() + + +# Now plot the point at which layer V occurs + +# %% +# A similar plot that may be of use is to instead plot a scatter with histogram overlaid +# Reuse scatter plot code, adjust it + +for s, session in enumerate(myexp): + name = session.name + ses_data = data.loc[data["session"] == name] + sigs = ses_data.loc[ses_data["delta significance"] == "significant"] + nosigs = ses_data.loc[ses_data["delta significance"] == "non-significant"] + + fig = plt.figure() + ax = fig.add_subplot(111, label="1") + + # Finally, plot a graph of deltas relative to zero, by depth + sns.scatterplot( + x="delta", + y="depth", + data=sigs, + # size="average firing rate", + # sizes=(40, 400), + s=100, + linewidth=1, + style="type", + color="#972c7f", + alpha=0.5, + ax=ax, + legend=False, + ) + + ###################################################################### + # ##Now overlay a kde plot over these points + # ##Uncomment to plot this version + # up_kde = sns.kdeplot( + # data=sigs.loc[sigs["delta type"] == "upregulated"], + # y="depth" + # ).get_lines()[0].get_data() + + # #Extract density data, and scale it to the order of magnitude plotted (i.e., x10^5) + # #Plot these values + + # up_kde=pd.DataFrame(up_kde, index=["density", "depth"]) + # up_kde.loc["density"] = up_kde.loc["density"].apply(lambda x: x*30000) + # up_kde = up_kde.T + # sns.scatterplot(data = up_kde, x="density", y = "depth") + + # down_kde = sns.kdeplot( + # data=sigs.loc[sigs["delta type"] == "downregulated"], + # y="depth" + # ).get_lines()[1].get_data() + + # down_kde=pd.DataFrame(down_kde, index=["density", "depth"]) + # down_kde.loc["density"] = down_kde.loc["density"].apply(lambda x: x*-30000) #times by -1 to allow inverted plotting of kde plot + # down_kde = down_kde.T + # sns.scatterplot(data = down_kde, x="density", y = "depth") + + ###################################################################### + + ##Uncomment below to plot histogram over points. + # ax2=fig.add_subplot(111, label="2", frame_on=False) + # ax3 = fig.add_subplot(111, label="2", frame_on=False) + # up_hist = sns.histplot( + # data=sigs.loc[sigs["delta type"] == "upregulated"], y = "depth", binwidth=100, binrange=(0,1300), ax=ax2, legend=False + # ) + + # ax2.xaxis.tick_top() + # ax2.set_xlim(-3,3) + # ax2.axes.xaxis.set_visible(False) + # ax2.axes.yaxis.set_visible(False) + # ax2.set_ylabel("") + + # down_hist = sns.histplot( + # data=sigs.loc[sigs["delta type"] == "downregulated"], y = "depth", binwidth=100, binrange=(0,1300), ax=ax3, legend=False + # ) + + # ax3.invert_yaxis() + # ax3.xaxis.tick_top() + # ax3.set_xlim(3,-3) + # ax3.axes.xaxis.set_visible(False) + # ax3.axes.yaxis.set_visible(False) + + ###################################################################### + + # p2 = sns.scatterplot( + # x="delta", + # y="depth", + # data=nosigs, + # size="average firing rate", + # sizes=(40, 400), + # s=100, + # linewidth=1, + # style="type", + # color="#221150", + # ) + + ax.axvline(x=0, color="green", ls="--") + + ax.set_xlim(-60, 60) + ax.set_ylim(0, 1300) + + ax.set_xlabel("Δ Firing Rate (Hz)", fontsize=20) + ax.set_ylabel("Depth of Recorded Neuron (μm)", fontsize=20) + plt.xticks(fontsize=25) + plt.yticks(fontsize=25) + plt.suptitle( + "\n".join( + wrap( + f"Largest Trial Firing Rate Change by pM2 Depth - Session {name}", + width=30, + ) + ), + y=1.1, + fontsize=20, + ) + + # Create lengend manually + # Entries for legend listed below as handles: + sig_dot = mlines.Line2D( + [], + [], + color="#221150", + marker="o", + linestyle="None", + markersize=10, + label="Significant", + ) + nonsig_dot = mlines.Line2D( + [], + [], + color="#972c7f", + marker="o", + linestyle="None", + alpha=0.5, + markersize=10, + label="Non-Significant", + ) + grey_dot = mlines.Line2D( + [], + [], + color="grey", + marker="o", + linestyle="None", + markersize=10, + label="Pyramidal", + ) + grey_cross = mlines.Line2D( + [], + [], + color="grey", + marker="X", + lw=5, + linestyle="None", + markersize=10, + label="Interneuronal", + ) + + plt.legend( + handles=[ # sig_dot, + # nonsig_dot, + grey_dot, + grey_cross, + ], + bbox_to_anchor=(1.7, 1), + title="Significance (p < 0.05)", + fontsize=15, + ) + + # Now invert y-axis and save + ax.invert_yaxis() + + # utils.save( + # f"/home/s1735718/Figures/{name}_DeltaFR_byDepth", + # nosize=True, + # ) + plt.show() + +# %% +# Plot only the KDE values + +for s, session in enumerate(myexp): + name = session.name + ses_data = data.loc[data["session"] == name] + sigs = ses_data.loc[ses_data["delta significance"] == "significant"] + nosigs = ses_data.loc[ses_data["delta significance"] == "non-significant"] + + fig = plt.figure() + ax = fig.add_subplot(111, label="1") + ax2 = fig.add_subplot(111, label = "2", frame_on=False) + ###################################################################### + # ##Now overlay a kde plot over these points + # ##Uncomment to plot this version + up_kde = sns.kdeplot( + data=sigs.loc[sigs["delta type"] == "upregulated"], + y="depth", ax = ax, color = "blue" + ) + + down_kde = sns.kdeplot( + data=sigs.loc[sigs["delta type"] == "downregulated"], + y="depth", ax = ax2, color = "orange" + ) + + ax2.set_xlim(-0.002, 0.002) + ax2.invert_xaxis() + ax2.axes.xaxis.set_visible(False) + ax2.axes.yaxis.set_visible(False) + + + plt.axvline(x=0, color="green", ls="--") + ax.set_xlim(-0.002, 0.002) + ax.set_ylim(0, 1300) + ax.invert_yaxis() + ax2.invert_yaxis() + + ax.set_xlabel("Likelihood of Unit Presence", fontsize=20) + ax.set_ylabel("Depth Across pM2 (μm)", fontsize=20) + ax.tick_params(axis='both', which='major', labelsize=20) + ax.yaxis.set_ticks(np.arange(0, 1300, 200)) + + plt.suptitle( + "\n".join( + wrap( + f"Kernel Density Estimate of Responsive Unit Type - Session {name}", + width=30, + ) + ), + y=1.1, + fontsize=20, + ) + + + + # Create lengend manually + # Entries for legend listed below as handles: + upreg = mlines.Line2D( + [], + [], + color="blue", + marker="o", + linestyle="None", + markersize=10, + label="Upregulated Units", + ) + + downreg = mlines.Line2D( + [], + [], + color="orange", + marker="o", + linestyle="None", + markersize=10, + label="Downregulated Units", + ) + + plt.legend( + handles=[ + upreg, + downreg, + ], + bbox_to_anchor=(1.7, 1), + title="Significance (p < 0.05)", + fontsize=15, + ) + + # Now invert y-axis and save + plt.gca().invert_yaxis() + + # utils.save( + # f"/home/s1735718/Figures/{name}_DeltaFR_byDepth", + # nosize=True, + # ) + plt.show() + +# %% +# Furthermore, will now plot a ball and stick graph showing the actual changes in firing rates and the point at which these deltas occur diff --git a/projects/Aidan_Analysis/FullAnalysis/phaseI_pm2_map.py b/projects/Aidan_Analysis/FullAnalysis/phaseI_pm2_map.py new file mode 100644 index 0000000..3bec38f --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/phaseI_pm2_map.py @@ -0,0 +1,575 @@ +""" +This file contains the first section of my full analysis, it will aim to do the following: + +1. Determine if the distribution of pyramidal to interneuron type alignes with known data. This will give an idea of the rate of undersampling by the probe +2. Concatenate as much information surrounding unit activity as possible. i.e., type, activity, statistically significant changes, depth etc. + This will be concatenated and saved as a dataframe with the following structure: + + --------------------------------------------------------------------------------- + |Session | Unit No | Delta Change | Depth (um) | Supra/Infragranular | Neuron Type| Delta Significance| + --------------------------------------------------------------------------------- +""" + +# First import required packages +from argparse import Action +from base import * +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import scipy.stats as sc +import sys +from channeldepth import meta_spikeglx +import statsmodels.api as sm +from statsmodels.formula.api import ols +from reach import Cohort +from xml.etree.ElementInclude import include +from matplotlib.pyplot import legend, title, ylabel, ylim +import matplotlib.lines as mlines +from tqdm import tqdm +from pixels import ioutils + +from functions import ( + event_times, + unit_depths, + per_trial_binning, + event_times, + within_unit_GLM, + bin_data_average, + # unit_delta, #for now use a local copy +) + +#%% +# First select units based on spike width analysis +all_units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200) + +pyramidals = get_pyramidals(myexp) +interneurons = get_interneurons(myexp) + + +# Then import the known neuronal type distributions from csv +known_units = pd.read_csv("cell_atlas_neuron_distribution_M2.csv", index_col=False) + +#%% +# Now align Trials +per_unit_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=all_units +) + +left_aligned = myexp.align_trials( + ActionLabels.correct_left, Events.led_off, "spike_rate", duration=2, units=all_units +) + +right_aligned = myexp.align_trials( + ActionLabels.correct_right, + Events.led_off, + "spike_rate", + duration=2, + units=all_units, +) + +pyramidal_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=pyramidals +) +interneuron_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=interneurons +) + +#%% +##Now that we have determined the optimal sessions to take from each mouse +# Create a dataframe of neuron type and depth + +# First bin data, then shift hierachy +bin_data = per_trial_binning(per_unit_aligned, myexp, timespan=2, bin_size=0.1) +bin_data.reorder_levels(["session", "unit", "trial"], axis=1) + +# Then perform an ANOVA +glm = within_unit_GLM(bin_data, myexp) + +# Finally take average firing rate +fr_avgs = per_unit_aligned.mean() +fr_avgs = fr_avgs.groupby(["session", "unit"]).mean() + +#%% +# This cell is to serve as an interactive method of altering the deltas function +# The aim is to allow selection of the type of delta +# I.e., first, or largest + + +def unit_delta(glm, myexp, bin_data, sig_only, delta_type, percentage_change): + """ + Function takes the output of the multiple comparisons calculated by within_unit_GLM() and calculates the change in firing rate (or other IV measured by the GLM) between bins, per unit + This requires both experimental cohort data, and bin_data calculated by per_trial_binning() and passed through reorder levels (as "session", "unit", "trial") + Function returns a list of lists, in a hierarchy of [session[unit deltas]] + + Using this data, a depth plot may be created displaying the relative delta by unit location in pM2. + + glm: the result of the ANOVA and subsequent Tukey adjusted multiple comparisons performed by within_unit_GLM + NB: THIS IS A COMPUTATIONALLY EXPENSIVE FUNCTION, DO NOT RUN IT ON MORE THAN ONE SESSION AT A TIME + + myexp: the experimental cohort defined in base.py + + bin_data: the binned raw data computed by the per_trial_binning function + + delta_type: the type of delta change to return, i.e., "largest", "first", or "last" + + sig_only: whether to return only the greatest delta of significant bins or not + + percentage_change: whether to return deltas as a percentage change + + """ + ses_deltas = [] + print("beginning unit delta calculation") + for s, session in enumerate(myexp): + + unit_deltas = [] + sigunit_comps = [] + ses_comps = glm[s] + + if sig_only is True: + # Return only significant comparisons + ses_comps = ses_comps.loc[ses_comps["p-value"] < 0.05] + + # Determine list of units remaining in comparison + units = [] + for i in ses_comps["group1"]: + units.append(i[1]) + units = np.array(units) + units = np.unique(units) + + # Now iterate through comparisons, by unit, Saving the significant comparisons to allow later calculation of delta + + for i in tqdm(units): + unit_comps = ses_comps[ + ses_comps["group1"].apply(lambda x: True if i in x else False) + ] # Lambda function checks if the value is in the tuple for group one + unit_comps = unit_comps[ + unit_comps["group2"].apply(lambda x: True if i in x else False) + ] + unit_comps.reset_index(inplace=True) + + # Now determine which delta to obtain + try: + delta_type == "largest" or delta_type == "first" or delta_type == "last" + + if delta_type == "largest": + + # find row with largest difference + if unit_comps.empty: # skip if empty + continue + + row = unit_comps["Diff"].idxmax() + sigunit_comps.append(unit_comps[unit_comps.index == row]) + + if delta_type == "first": + + if unit_comps.empty: + continue + + row = unit_comps[ + unit_comps.index == unit_comps.index[0] + ] # get the first delta that occurs + sigunit_comps.append(row) # append this to the list + + if delta_type == "last": + + if unit_comps.empty: + continue + + row = unit_comps[unit_comps.index == unit_comps.index[-1]] + + except: + print("Invalid Delta_Type Specified") + break + + # Now that we know the units with significant comparisons, take these bins from raw firing rate averages + ses_avgs = bin_data_average(bin_data[s]) + + # Iterate through our significant comparisons, calculating the actual delta firing rate + for i in range(len(sigunit_comps)): + sig_comp = sigunit_comps[i] + + unit = [x[1] for x in sig_comp["group1"]] + ses_unit = ses_avgs.loc[ + ses_avgs.index.get_level_values("unit") == int(unit[0]) + ].mean() # get all rows where our units firing rate is present + + # Get the bins that the signficant comparison looked at + bin_val1 = [x[0] for x in sig_comp["group1"]][0] + bin_val2 = [x[0] for x in sig_comp["group2"]][0] + + # Return the percentage change from initial state rather than raw value + if percentage_change == True: + change = ses_unit[bin_val2] - ses_unit[bin_val1] + delta = (change / ses_unit[bin_val1]) * 100 + if ses_unit[bin_val1] == 0: + continue # Skip this value, if it changes from a value of zero, this is infinite + print(delta) + unit_deltas.append( + [ + int(unit[0]), + delta, + ses_unit[bin_val2], + ses_unit[bin_val1], + bin_val2, + bin_val1, + ] + ) + # Finally, get the delta value across these bins for the given unit + elif percentage_change == False: + delta = ses_unit[bin_val2] - ses_unit[bin_val1] + unit_deltas.append( + [ + int(unit[0]), + delta, + ses_unit[bin_val2], + ses_unit[bin_val1], + bin_val2, + bin_val1, + ] + ) + + ses_deltas.append(unit_deltas) + + return ses_deltas + + +largest_sig_deltas = unit_delta( + glm, myexp, bin_data, sig_only=True, delta_type="largest", percentage_change=False +) + +deltas = unit_delta( + glm, myexp, bin_data, sig_only=False, delta_type="largest", percentage_change=False +) +#%% +# Now concatenate this information into a single dataframe +# TODO: Add the ability to extract the bin beginnings and ends including firing rates to this function. + +depths = unit_depths(myexp) +layer_info = pd.read_csv("granulardepths_VR46_VR59.csv") + + +def unit_info_concatenation( + average_fr, + sig_deltas, + all_deltas, + depths, + layer_info, + pyramidal_aligned, + interneuron_aligned, + myexp, +): + + """ + This function takes all previously calculated information surrounding unit activity, type, depth, etc. and concatenates it as a single dataframe + Requires all of these values are previously determined using predefined functions shown + + average_fr: Average firing rate for each unit, across trials, organised in long form as a multiindexed dataframe (session->unit) + + sig_deltas: Significant delta changes to firing rate (or SD) determined by within_unit_glm + + all_deltas: All delta changes, regardless of significance, determined by the same within_unit_glm function + + depths: Depths of all units across sessions, determined by unit_depths + + layer_info: Dataframe containing the granular layer boundaries defined through histological analyses + + pyramidal_aligned: Only pyramidal units, aligned to the same event, determined by myexp.align_trials + + interneuron_aligned: Only interneuronal units, aligned to the same event + + myexp: The experimental cohort defined in base.py + + """ + unit_info = pd.DataFrame() + for s, session in enumerate(myexp): + name = session.name + ses_deltas = sig_deltas[s] + + ses_deltas = pd.DataFrame( + ses_deltas, + columns=[ + "unit", + "delta", + "start value", + "end value", + "start bin", + "end bin", + ], + ) + ses_depths = depths[name] + + # Determine depths of units + unit_depth = ses_depths[ses_deltas["unit"]].melt() + ses_deltas = pd.concat([ses_deltas, unit_depth["value"]], axis=1) + ses_deltas.rename(columns={"value": "depth"}, inplace=True) + + # Do the same for nonsignificant units + ses_nosig_deltas = all_deltas[s] + ses_nosig_deltas = pd.DataFrame( + ses_nosig_deltas, + columns=[ + "unit", + "delta", + "start value", + "end value", + "start bin", + "end bin", + ], + ) + + # Find depths of associated units + unit_nosig_depth = ses_depths[ses_nosig_deltas["unit"]].melt() + ses_nosig_deltas = pd.concat( + [ses_nosig_deltas, unit_nosig_depth["value"]], axis=1 + ) + ses_nosig_deltas.rename(columns={"value": "depth"}, inplace=True) + # Now remove any significant values + ses_nosig_deltas = ses_nosig_deltas[ + ~ses_nosig_deltas["unit"].isin(ses_deltas["unit"]) + ].dropna() + ses_nosig_deltas = ses_nosig_deltas.reset_index(drop=True) + + ##Then determine the type of neuron these units are (i.e., pyramidal or interneuron) + pyr_units = pyramidal_aligned[s].melt()["unit"].unique() + int_units = interneuron_aligned[s].melt()["unit"].unique() + dicts = {} + + unit_types = [] + for item_sig in ses_deltas["unit"]: + if item_sig in pyr_units: + unit_types.append("pyramidal") + if item_sig in int_units: + unit_types.append("interneuron") + if item_sig not in pyr_units and item_sig not in int_units: + unit_types.append("neuron unclassified") + + nosig_unit_types = [] + for item in ses_nosig_deltas["unit"]: + if item in pyr_units: + nosig_unit_types.append("pyramidal") + if item in int_units: + nosig_unit_types.append("interneuron") + if item not in pyr_units and item not in int_units: + nosig_unit_types.append("neuron unclassified") + + # Append unit_types as a new column + # Will try and append this as a new column, to see where the extra value lies + unit_types = pd.DataFrame(unit_types) + ses_deltas = pd.concat([ses_deltas, unit_types], ignore_index=True, axis=1) + ses_deltas.rename( + columns={ + 0: "unit", + 1: "bin1 val", + 2: "bin2 val", + 3: "delta", + 4: "start bin", + 5: "end bin", + 6: "depth", + 7: "type", + }, + inplace=True, + ) + ses_deltas["delta significance"] = "significant" + ses_deltas.insert(0, "session", "") + ses_deltas["session"] = name + + nosig_unit_types = pd.DataFrame(nosig_unit_types) + ses_nosig_deltas = pd.concat( + [ses_nosig_deltas, nosig_unit_types], ignore_index=True, axis=1 + ) + ses_nosig_deltas.rename( + columns={ + 0: "unit", + 1: "bin1 val", + 2: "bin2 val", + 3: "delta", + 4: "start bin", + 5: "end bin", + 6: "depth", + 7: "type", + }, + inplace=True, + ) + ses_nosig_deltas["delta significance"] = "non-significant" + ses_nosig_deltas.insert(0, "session", "") + ses_nosig_deltas["session"] = name + # Concatenate sig and nonsig deltas as unit types + session_info = pd.concat( + [ses_deltas, ses_nosig_deltas], ignore_index=True, axis=0 + ) + + # Now iterate through session info by row, classifying depth into supra or infragranular + # Will use the layer_info to do so + mouse_id = session.name[-4:] + mouse_boundary = ( + layer_info.loc[layer_info["MouseID"] == mouse_id]["LayerV_Depth"].item() + * 1000 + ) # get value and convert from mm to um + + unit_depth_class = [] + for index, row in session_info.iterrows(): + if row["depth"] < mouse_boundary: + unit_depth_class.append("Supragranular") + + if row["depth"] > mouse_boundary: + unit_depth_class.append("Infragranular") + + # Append this to the session_info as a new column (after depth) + session_info.insert(4, "Depth Classification", unit_depth_class) + + # Now add firing rates + ses_rates = average_fr[s].reindex( + index=session_info["unit"] + ) # reorder this to match out data + session_info.insert( + len(session_info.columns), "average firing rate", ses_rates.values + ) + + # Finally, concatenate this info for the session into the master dataframe to be returned + unit_info = pd.concat([unit_info, session_info], axis=0, ignore_index=True) + + return unit_info + + +data = unit_info_concatenation( + fr_avgs, + largest_sig_deltas, + deltas, + depths, + layer_info, + pyramidal_aligned=pyramidal_aligned, + interneuron_aligned=interneuron_aligned, + myexp=myexp, +) + +# %% +# Now finally, will export this data as a saved file using utils.save +ioutils.write_hdf5("/data/aidan/concatenated_session_information.h5", data) + + +# %% + + +# def tester( +# average_fr, +# sig_deltas, +# all_deltas, +# depths, +# layer_info, +# myexp, +# ): + +# """ +# This function takes all previously calculated information surrounding unit activity, type, depth, etc. and concatenates it as a single dataframe +# Requires all of these values are previously determined using predefined functions shown + +# average_fr: Average firing rate for each unit, across trials, organised in long form as a multiindexed dataframe (session->unit) + +# sig_deltas: Significant delta changes to firing rate (or SD) determined by within_unit_glm + +# all_deltas: All delta changes, regardless of significance, determined by the same within_unit_glm function + +# depths: Depths of all units across sessions, determined by unit_depths + +# layer_info: Dataframe containing the granular layer boundaries defined through histological analyses + +# pyramidal_aligned: Only pyramidal units, aligned to the same event, determined by myexp.align_trials + +# interneuron_aligned: Only interneuronal units, aligned to the same event + +# myexp: The experimental cohort defined in base.py + +# """ +# unit_info = pd.DataFrame() +# for s, session in enumerate(myexp): +# name = session.name +# ses_deltas = sig_deltas[s] + +# ses_deltas = pd.DataFrame(ses_deltas, columns=["unit", "delta"]) +# ses_depths = depths[name] + +# # Determine depths of units +# unit_depth = ses_depths[ses_deltas["unit"]].melt() +# ses_deltas = pd.concat([ses_deltas, unit_depth["value"]], axis=1) +# ses_deltas.rename(columns={"value": "depth"}, inplace=True) + +# # Do the same for nonsignificant units +# ses_nosig_deltas = all_deltas[s] +# ses_nosig_deltas = pd.DataFrame(ses_nosig_deltas, columns=["unit", "delta"]) + +# # Find depths of associated units +# unit_nosig_depth = ses_depths[ses_nosig_deltas["unit"]].melt() +# ses_nosig_deltas = pd.concat( +# [ses_nosig_deltas, unit_nosig_depth["value"]], axis=1 +# ) +# ses_nosig_deltas.rename(columns={"value": "depth"}, inplace=True) +# # Now remove any significant values +# ses_nosig_deltas = ses_nosig_deltas[ +# ~ses_nosig_deltas["unit"].isin(ses_deltas["unit"]) +# ].dropna() +# ses_nosig_deltas = ses_nosig_deltas.reset_index(drop=True) + +# # Append unit_types as a new column +# # Will try and append this as a new column, to see where the extra value lies +# unit_types = pd.DataFrame(unit_types) +# ses_deltas.rename(columns={0: "unit", 1: "delta", 2: "depth"}, inplace=True) +# ses_deltas["delta significance"] = "significant" +# ses_deltas.insert(0, "session", "") +# ses_deltas["session"] = name + +# nosig_unit_types = pd.DataFrame(nosig_unit_types) +# ses_nosig_deltas.rename( +# columns={0: "unit", 1: "delta", 2: "depth"}, inplace=True +# ) +# ses_nosig_deltas["delta significance"] = "non-significant" +# ses_nosig_deltas.insert(0, "session", "") +# ses_nosig_deltas["session"] = name +# # Concatenate sig and nonsig deltas as unit types +# session_info = pd.concat( +# [ses_deltas, ses_nosig_deltas], ignore_index=True, axis=0 +# ) + +# # Now iterate through session info by row, classifying depth into supra or infragranular +# # Will use the layer_info to do so +# mouse_id = session.name[-4:] +# mouse_boundary = ( +# layer_info.loc[layer_info["MouseID"] == mouse_id]["LayerV_Depth"].item() +# * 1000 +# ) # get value and convert from mm to um + +# unit_depth_class = [] +# for index, row in session_info.iterrows(): +# if row["depth"] < mouse_boundary: +# unit_depth_class.append("Supragranular") + +# if row["depth"] > mouse_boundary: +# unit_depth_class.append("Infragranular") + +# # Append this to the session_info as a new column (after depth) +# session_info.insert(4, "Depth Classification", unit_depth_class) + +# # Now add firing rates +# ses_rates = average_fr[s].reindex( +# index=session_info["unit"] +# ) # reorder this to match out data +# session_info.insert( +# len(session_info.columns), "average firing rate", ses_rates.values +# ) + +# # Finally, concatenate this info for the session into the master dataframe to be returned +# unit_info = pd.concat([unit_info, session_info], axis=0, ignore_index=True) + +# return unit_info + + +# test = tester( +# fr_avgs, +# largest_sig_deltas, +# deltas, +# depths, +# layer_info, +# myexp=myexp, +# ) +ioutils.write_hdf5("/data/aidan/concatenated_session_information_test.h5", test) +# %% diff --git a/projects/Aidan_Analysis/FullAnalysis/processing.py b/projects/Aidan_Analysis/FullAnalysis/processing.py new file mode 100644 index 0000000..3b1662e --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/processing.py @@ -0,0 +1,25 @@ +#Import our experimental data from base + +from base import * + +#The first time this is run it will take a while!! There is a lot of data to go through and many steps## +#It will pull from raw data and output to processed data. + +#First we will select only the recording from the 17th and assign this to a variable + + +#First run Kilosort on data to prepare for phy curation +#Remember to open phy after to generate the cluster info file!! +myexp.sort_spikes() + +# #Then process behavioural data +myexp.process_behaviour() + +# # #Then alicn, crop, and downsample spike data +myexp.process_spikes() + +# # #Now do the same with local field potential data +myexp.process_lfp() + +#Opetionally analyse the noise of the recordings +myexp.assess_noise() \ No newline at end of file diff --git a/projects/Aidan_Analysis/FullAnalysis/raster_plots_VR59.py b/projects/Aidan_Analysis/FullAnalysis/raster_plots_VR59.py new file mode 100644 index 0000000..66b8c10 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/raster_plots_VR59.py @@ -0,0 +1,57 @@ +#First import experimental data from base.py +from cProfile import label + +from matplotlib.pyplot import subplots +from base import * + + +#Now add the pixtools directory to the path to allow us to use the modules within +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +from pixtools.utils import Subplots2D + + +#Then import the data to allow raster plotting +from rasterbase import * + +#Now let us plot the raster info for the 17th, per UNIT + + +unit1 = myexp.select_units( + group="good", + max_depth=1500, + name="cortex" +) + + + +#Now take all spike timees for all left hand events +session1_L = myexp.align_trials( + ActionLabels.correct_left, + Events.led_on, + "spike_times", + duration=4, + units = unit1 +) + +#And now as right hand +session1_R = myexp.align_trials( + ActionLabels.correct_right, + Events.led_on, + "spike_times", + duration=4, + units = unit1 +) + + +#Now plot left and right as subplots, remember we are taking session 2 here (by indexing into session) +subplot1 = per_unit_raster(session1_L[1], sample=None, start=0, subplots=None, label=True) + +per_unit_raster( + session1_R[1], + sample=None, + start=0, + subplots=subplot1, + label=True +) + +plt.show() diff --git a/projects/Aidan_Analysis/FullAnalysis/rasterbase.py b/projects/Aidan_Analysis/FullAnalysis/rasterbase.py new file mode 100644 index 0000000..eebf078 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/rasterbase.py @@ -0,0 +1,165 @@ +#First import required packages from base. + + +from base import * #Import our experiment and metadata needed +import sys +import seaborn as sns +import matplotlib.pyplot as plt +import numpy as np +import random +from math import ceil + +#Add the location of pixtools to the path +# sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") #Adds the location of the pixtools folder to the path +# from pixtools.utils import Subplots2D #Allows us to use the custom class designed to give the figs/axes from a set of subplots that will fit the data. This generates as many subplots as required in a square a shape as possible + +#Now import our sorted data functions + + +#Defines a function that will return plots of EITHER units per trial or trials per unit (i.e., inverted datasets) +#To do so, specify "trial" or "unit" as the per value +def _raster(per, data, sample, start, subplots, label): + per_values = data.columns.get_level_values(per).unique() + per_values.values.sort() + + if per == "trial": + data = data.reorder_levels(["trial", "unit"], axis=1) + other = "unit" + else: + other = "trial" + + # units if plotting per trial, trials if plotting per unit + samp_values = list(data.columns.get_level_values(other).unique().values) + if sample and sample < len(samp_values): + sample = random.sample(samp_values, sample) + else: + sample = samp_values + + if subplots is None: + subplots = Subplots2D(per_values) + + palette = sns.color_palette("pastel") #Set the colours for the graphs produced + + for i, value in enumerate(per_values): + val_data = data[value][sample] #Requires I index into the data when assembling plots (see plots.py) + val_data.columns = np.arange(len(sample)) + start + val_data = val_data.stack().reset_index(level=1) + + p = sns.scatterplot( + data=val_data, + x=0, + y='level_1', + ax=subplots.axes_flat[i], + s=0.5, + legend=None, + ) + p.set_yticks([]) + p.set_xticks([]) + p.autoscale(enable=True, tight=True) + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + p.text( + 0.95, 0.95, + value, + horizontalalignment='right', + verticalalignment='top', + transform=subplots.axes_flat[i].transAxes, + color='0.3', + ) + p.axvline(c=palette[2], ls='--', linewidth=0.5) + + if not subplots.axes_flat[i].yaxis_inverted(): + subplots.axes_flat[i].invert_yaxis() + + if label: #Chane our axis labels, here to trial number and time from the reach (which the data is aligned to!) + to_label = subplots.to_label + to_label.get_yaxis().get_label().set_visible(True) + to_label.set_ylabel('Trial Number') + to_label.get_xaxis().get_label().set_visible(True) + to_label.set_xlabel('Time from reach (s)') + + # plot legend subplot + legend = subplots.legend + legend.text( + 0, 0.9, + 'Trial number' if per == 'trial' else 'Unit ID', + transform=legend.transAxes, + color='0.3', + ) + legend.set_visible(True) + legend.get_xaxis().set_visible(False) + legend.get_yaxis().set_visible(False) + legend.set_facecolor('white') + + return subplots + +#Using this function we may now plot the graphs we desire! +#First let us define a function to plot spikes per unit + +def per_unit_raster(data, sample=None, start=0, subplots=None, label=True): + """ + Plots a spike raster for every unit in the given data with trials as rows. + + data is a multi-dimensional dataframe as returned from + Experiment.align_trials(, , 'spike_times') indexed into to get the + session and recording. + """ + return _raster("unit", data, sample, start, subplots, label) + +#And per trial +def per_trial_raster(data, sample=None, start=0, subplots=None, label=True): + """ + Plots a spike raster for every trial in the given data with units as rows. + + data is a multi-dimensional dataframe as returned from + Experiment.align_trials(, , 'spike_times') indexed into to get the + session and recording. + """ + return _raster("trial", data, sample, start, subplots, label) + +#Now finally define a function that allows plotting a single unit as a raster +def single_unit_raster(data, ax=None, sample=None, start=0, unit_id=None): + # units if plotting per trial, trials if plotting per unit + samp_values = list(data.columns.get_level_values('trial').unique().values) + if sample and sample < len(samp_values): + sample = random.sample(samp_values, sample) + else: + sample = samp_values + + val_data = data[sample] + val_data.columns = np.arange(len(sample)) + start + val_data = val_data.stack().reset_index(level=1) + + if not ax: + ax = plt.gca() + + p = sns.scatterplot( + data=val_data, + x=0, + y='level_1', + ax=ax, + s=1.5, + legend=None, + edgecolor=None, + ) + p.set_yticks([]) + p.set_xticks([]) + p.autoscale(enable=True, tight=True) + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + + palette = sns.color_palette() + p.axvline(c='black', ls='--', linewidth=0.5) + + if not ax.yaxis_inverted(): + ax.invert_yaxis() + + if unit_id is not None: + p.text( + 0.95, 0.95, + unit_id, + horizontalalignment='right', + verticalalignment='top', + transform=ax.transAxes, + color='0.3', + ) diff --git a/projects/Aidan_Analysis/FullAnalysis/rugplotbase.py b/projects/Aidan_Analysis/FullAnalysis/rugplotbase.py new file mode 100644 index 0000000..c16c6ac --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/rugplotbase.py @@ -0,0 +1,150 @@ +#This file contains the modified plot function to return a rugplot and firing rate +from base import * +import seaborn as sns +import matplotlib.pyplot as plt +import numpy as np +from math import ceil +import sys + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +from pixtools.utils import Subplots2D +from mpl_toolkits.axes_grid1 import make_axes_locatable + + +def _plot(level, data, rugdata, ci, subplots, label): + values = data.columns.get_level_values(level).unique() + values.values.sort() + if level == "trial": + data = data.reorder_levels(["trial", "unit"], axis=1) + + # no. units if plotting per trial, no. trials if plotting per unit + num_samples = len(data[values[0]].columns) + + if subplots is None: + subplots = Subplots2D(values) + + palette = sns.color_palette() + + for i, value in enumerate(values): + val_data = data[value].stack().reset_index() + val_data['y'] = val_data[0] + ax = subplots.axes_flat[i] + divider = make_axes_locatable(ax) + + r = sns.rugplot( + ax = ax, + palette=palette, + data=rugdata, + x=rugdata[0], + height=.1, + legend=False, + expand_margins=True + ) + r.autoscale(enable = False, tight=False) + + + p = sns.lineplot( + data=val_data, + x='time', + y='y', + ci=ci, + ax=ax, + linewidth=0.5, + ) + p.autoscale(enable=True, tight=False) + p.set_yticks([]) + p.set_xticks([]) + peak = data[value].values.mean(axis=1).max() + p.text( + 0.05, 0.95, + "%.1f" % peak, + horizontalalignment='left', + verticalalignment='top', + transform=ax.transAxes, + color='grey', + ) + p.text( + 0.95, 0.95, + value, + horizontalalignment='right', + verticalalignment='top', + transform=ax.transAxes, + color=palette[0], + ) + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + p.axvline(c=palette[1], ls='--', linewidth=0.5) + p.set_box_aspect(1) + + + if label: + to_label = subplots.to_label + to_label.get_yaxis().get_label().set_visible(True) + to_label.set_ylabel('Firing rate (Hz)') + to_label.get_xaxis().get_label().set_visible(True) + to_label.set_xlabel('Time from push (s)') + to_label.set_xticks(data.index) + #to_label.set_xticklabels([- duration / 2, 0, duration / 2]) + + # plot legend subplot + legend = subplots.legend + legend.text( + 0, 0.7, + 'Peak of mean', + transform=legend.transAxes, + color='grey', + ) + legend.text( + 0, 0.3, + 'Trial number' if level == 'trial' else 'Unit ID', + transform=legend.transAxes, + color=palette[0], + ) + legend.set_visible(True) + legend.get_xaxis().set_visible(False) + legend.get_yaxis().set_visible(False) + legend.set_facecolor('white') + legend.set_box_aspect(1) + + return subplots + + +def per_unit_spike_rate(data, rugdata, ci=95, subplots=None, label=True): + """ + Plots a histogram for every unit in the given session showing spike rate. + + Parameters + ---------- + + data : pandas.DataFrame + A dataframe for a single session as returned from Behaviour.align_trials (or + Experiment.align_trials but indexed into for session and rec_num). + + rugdata : pandas.DataFrame + A dataframe for a single session containing the onset of the event (led_on) to plot as a rug. + + ci : int or 'sd', optional + Confidence intervals to plot as envelope around line. Default is 95 i.e. 95% + confidence intervals. Also accepts "'sd'" which will draw standard deviation + envelope, or None to plot no envelope. + """ + return _plot("unit", data, rugdata, ci, subplots, label) + + +def per_trial_spike_rate(data, rugdata, ci=95, subplots=None, label=True): + """ + Plots a histogram for every trial in the given session showing spike rate. + + Parameters + ---------- + + data : pandas.DataFrame + A dataframe for a single session as returned from Behaviour.align_trials (or + Experiment.align_trials but indexed into for session and rec_num). + + ci : int or 'sd', optional + Confidence intervals to plot as envelope around line. Default is 95 i.e. 95% + confidence intervals. Also accepts "'sd'" which will draw standard deviation + envelope, or None to plot no envelope. + """ + return _plot("trial", data, rugdata, ci, subplots, label) \ No newline at end of file diff --git a/projects/Aidan_Analysis/FullAnalysis/spikerate_plots_VR59.py b/projects/Aidan_Analysis/FullAnalysis/spikerate_plots_VR59.py new file mode 100644 index 0000000..faec577 --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/spikerate_plots_VR59.py @@ -0,0 +1,48 @@ +#Let us now plot the spike rates for the desired experiment, here aligned to the LED on +#Take all correct trials from both left and right sides +#Remember this all requires data be processed beforehand (use processing.py!) + +from distutils.command.sdist import sdist +from base import * + + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") #Adds the location of the pixtools folder to the path +from pixtools import spike_rate #Allows us to use the custom class designed to give the figs/axes from a set of subplots that will fit the data. This generates as many subplots as required in a square a shape as possible + +#Seperate VR59 into two recording sessions, for the 17th and 18th to allow comparison + + +#now get units from these +units = myexp.select_units( + group="good", + max_depth=1500, + name="cortex", + uncurated=False +) + + +#Now that we have selected the required units, let us plot thespike rates according to LED on +hits =myexp.align_trials( + ActionLabels.correct, + Events.led_on, + "spike_rate", + duration=4, + units=units +) + +#First set the confidence interval for the graph (This significantly increases time to run!) +#ci = 95 + +#Now run the code to enumerate over the exp. +#The code will plot the spike rates of every unit selected above aligned to the defined event +#For every session + +for s, session in enumerate(myexp): + name = session.name + + spike_rate.per_unit_spike_rate(hits[s], ci="sd") + plt.suptitle( + f"Session {name} - per-unit across-trials firing rate (aligned to LED on)", + ) + plt.set_size_inches(10, 10) + plt.show() #Plot this information diff --git a/projects/Aidan_Analysis/FullAnalysis/training_summary.py b/projects/Aidan_Analysis/FullAnalysis/training_summary.py new file mode 100644 index 0000000..afb436a --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/training_summary.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +""" +Example script plotting metrics to illustrate how well a cohort of mice are training +across sessions. + +Usage: + training_summary.py /path/to/data_dir mouse1 mouse2 ... + +""" + +import sys +from pathlib import Path +from statistics import NormalDist + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import typer +from matplotlib.ticker import MultipleLocator + +from reach import Cohort +from reach.session import Outcomes, Targets + + +def main( + data_dir: Path = typer.Argument(..., help="Folder containing task data."), + mouse_ids: list[str] = typer.Argument(..., help="The IDs of mice to summarise."), + no_collapse_days: bool = typer.Option( + False, "-n", help="Don't merge sessions with the same date" + ), +) -> None: + """ + Plot training summaries for a cohort of mice using matplotlib. + """ + cohort = Cohort.init_from_files( + data_dir=data_dir, + mouse_ids=mouse_ids, + ) + + sns.set_style("darkgrid") + results = pd.DataFrame(cohort.get_results(not no_collapse_days)) + trials = pd.DataFrame(cohort.get_trials(not no_collapse_days)) + + _, axes = plt.subplots(6, 1, sharex=True) + + # NaN-ify results for any sessions that didn't have any trials + results.loc[ + results["trials"] == 0, + ["correct_r", "correct_l", "incorrect_l", "incorrect_r", "d_prime", "trials"], + ] = np.nan + + # Number of trials + sns.lineplot( + data=results, + x="day", + y="trials", + hue="mouse_id", + legend="brief", + ax=axes[0], + markers=True, + ) + axes[0].set_ylabel("No.\ntrials", rotation="horizontal", ha="right") + axes[0].set_ylim(bottom=0) + + # Number of correct trials + sns.lineplot( + data=results, + x="day", + y="correct_l", + hue="mouse_id", + legend=False, + ax=axes[1], + markers=True, + ) + sns.lineplot( + data=results, + x="day", + y="correct_r", + hue="mouse_id", + legend=False, + ax=axes[1], + markers=True, + ) + axes[1].set_ylabel("No.\ncorrect", rotation="horizontal", ha="right") + + # Hit rate + results["Hit rate"] = (results["correct_l"] + results["correct_r"]) / results[ + "trials" + ] + sns.lineplot( + data=results, + x="day", + y="Hit rate", + hue="mouse_id", + legend=False, + ax=axes[2], + markers=True, + ) + axes[2].set_ylabel("Hit rate", rotation="horizontal", ha="right") + + # Number of incorrect trials + sns.lineplot( + data=results, + x="day", + y="incorrect_l", + hue="mouse_id", + legend=False, + ax=axes[3], + markers=True, + ) + sns.lineplot( + data=results, + x="day", + y="incorrect_r", + hue="mouse_id", + legend=False, + ax=axes[3], + markers=True, + ) + axes[3].set_ylabel("No.\nincorrect", rotation="horizontal", ha="right") + + # d' + if not no_collapse_days and results["d_prime"].isna().any(): + # We have to re-calcuate d' for days that had multiple (merged) sessions using + # the trial data. + def z(p): + return -NormalDist().inv_cdf(p) + + for _, data in results.loc[results["d_prime"].isna()].iterrows(): + day_trials = trials.loc[ + (trials.day == data.day) & (trials.mouse_id == data.mouse_id) + ] + # Trials preceded by an incorrect trial are considered to be correction trials, + # and are therefore ignored. Remove missed trials to help identify these. + day_trials = day_trials.loc[ + (day_trials.outcome == Outcomes.CORRECT) + | (day_trials.outcome == Outcomes.INCORRECT) + ] + + lefts = [] + rights = [] + prev = None + for _, trial in day_trials.iterrows(): + outcome = trial.outcome + if prev != Outcomes.INCORRECT: + if trial.spout == Targets.LEFT: + lefts.append(outcome) + else: + rights.append(outcome) + prev = outcome + + H = (lefts.count(Outcomes.CORRECT) + 0.5) / (len(lefts) + 1) + FA = (rights.count(Outcomes.INCORRECT) + 0.5) / (len(rights) + 1) + d_prime = z(FA) - z(H) + results.loc[ + (results.mouse_id == data.mouse_id) & (results.day == data.day), + "d_prime", + ] = d_prime + + assert not results["d_prime"].isna().any() + + axes[4].axhline(0, color="#aaaaaa", alpha=0.5, ls="--") + # 1.5 here is an arbitrary threshold of ability to discriminate + axes[4].axhline(1.5, color="#aaaaaa", alpha=0.5, ls=":") + sns.lineplot( + data=results, + x="day", + y="d_prime", + hue="mouse_id", + legend=False, + ax=axes[4], + markers=True, + ) + axes[4].set_ylabel("d'", rotation="horizontal", ha="right") + + # Spout position + axes[5].axhline(7, color="#aaaaaa", alpha=0.5, ls="--") + sns.lineplot( + data=trials, + x="day", + y="spout_position_0", + hue="mouse_id", + legend=False, + ax=axes[5], + markers=True, + ) + sns.lineplot( + data=trials, + x="day", + y="spout_position_1", + hue="mouse_id", + legend=False, + ax=axes[5], + markers=True, + linestyle="dashed", + ) + axes[5].set_ylim(bottom=0, top=8) + axes[5].set_ylabel("Spout\nposition\n(mm)", rotation="horizontal", ha="right") + + axes[5].xaxis.set_major_locator(MultipleLocator(2)) + + plt.suptitle("Training summary for: " + ", ".join(mouse_ids)) + plt.show() + + +if __name__ == "__main__": + typer.run(main) diff --git a/projects/Aidan_Analysis/FullAnalysis/unit_binning_analysis.py b/projects/Aidan_Analysis/FullAnalysis/unit_binning_analysis.py new file mode 100644 index 0000000..7c0362b --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/unit_binning_analysis.py @@ -0,0 +1,486 @@ +################################################################################################################# +# As described in my notes, this analysis shall perform the following: +# 1. Take data from entire trials across units (i.e., aligned to LED off) +# 2. Normalise each trials data as a percentage of the total time elapsed +# 3. Bin this data into 10% bins +# 4. Compare each bin to the previous, with the exception of the first bin, where no comparison shall take place +# 5. Create a histogram of the number of significant units across general trial space (i.e., as a normalised %) +################################################################################################################# + +##TODO: Split the two analyses in this file into seperate scripts, will make things easier to manage +##TODO: Rerun this code on only M2 and see if this changes findings. + +#%% +# First shall import required packages +import enum +from xml.etree.ElementInclude import include +from matplotlib.pyplot import title, ylabel, ylim +import numpy as np +import pandas as pd +import seaborn as sns + +import sys +from base import * +from CI_Analysis import significance_extraction +from CI_Analysis import percentile_plot +from functions import event_times, per_trial_spike_rate + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels") +from pixtools.utils import Subplots2D # use the local copy of base.py +from pixtools import utils +from pixtools import spike_rate +from pixels import ioutils + + +# Now select the units that shall be analysed + + +units = myexp.select_units(group="good", name="all_layers", max_depth=3500) + +# select all pyramidal neurons +# pyramidal_units = myexp.select_units( +# group="good", min_depth=200, max_depth=1200, name="pyramidals", min_spike_width=0.4 +# ) + +# interneuron_units = myexp.select_units( +# group="good", +# min_depth=200, +# max_depth=1200, +# name="interneurons", +# max_spike_width=0.35, +# ) + +# Then align trials to LED off (i.e, end of trial) with the time window determined by the max trial length +duration = 10 +trial_aligned = myexp.align_trials( + ActionLabels.correct, Events.led_off, "spike_rate", duration=duration, units=units +) + +# pyramidal_aligned = myexp.align_trials( +# ActionLabels.correct, +# Events.led_off, +# "spike_rate", +# duration=10, +# units=pyramidal_units, +# ) + +# interneuron_aligned = myexp.align_trials( +# ActionLabels.correct, +# Events.led_off, +# "spike_rate", +# duration=10, +# units=interneuron_units, +# ) + +#%% +##Now shall bin the data within each trial into 100 ms bins +# Will create a function to do so: + + +def per_trial_binning(trial, myexp, timespan, bin_size): + """ + Function takes data aligned by trial (i.e., using myexp align_trials method), + bins them, then returns the dataframe with an added bin index + + The end result of this is an index structured as (time, 'lower bin, upper bin') + + trial: The data aligned to an event according to myexp.align_trials + + myexp: The experimental cohort defined in base.py + + timespan: the total time data was aligned to in myexp.align_trials (i.e., duration) in seconds + + bin size: the size of the bins in seconds + + """ + + # First reorder data hierarchy + reorder = trial.reorder_levels(["session", "trial", "unit"], axis=1) + + # Perform binning on whole dataset + # First calculate the number of bins to create based on total duration and bin length + bin_number = int(timespan / bin_size) + + values = reorder.columns.get_level_values( + "trial" + ).unique() # Get trials for session + + # Add a new level containing the bin data for the dataframe + bin_vals = pd.cut(reorder.index, bin_number, include_lowest=True, precision=1) + bin_vals = bin_vals.astype(str) # Convert the interval series to a list of strings + + # Remove brackets from bin_vals + bin_vals_str = [] + for t in bin_vals: + t = t.strip("()[]") + bin_vals_str.append(t) + + # This will be added as an index, before time. + full_data = reorder.assign( + bin=bin_vals_str + ) # Add bin values temporarily as a column + full_data.set_index( + "bin", append=True, inplace=True + ) # Convert this column to a new index, below time + return full_data + + +#%% +##Now shall create a function to take this bin information and conduct a boostrapping analysis +def bin_data_average(bin_data): + """ + This function takes the dataframe produced by per_trial_binning() including bins as a multiindex + and takes the average of each bin's activity within a given time period + This returns the average firing rate of each unit (i.e., per session and time) + + bin_data: the dataframe produced by per_trial_binning, with predefined bins + + NB: To return a transposed version of the data (i.e., with bin averages as rows rather than columns) + add ".T" to function call. + """ + + # First extract the values of each bin in bin_data + bins = bin_data.index.get_level_values( + "bin" + ) # a list of the bins created by the previous function + bin_names = ( + bins.unique() + ) # Take the unique values present, giving us the list of bin categories + + # Take averages for each bin, across all units and trials then concatenate into a dataframe, where each column is a bin + binned_avgs = pd.concat( + (bin_data.loc[bins == b].mean(axis=0) for b in bin_names), + axis=1, + keys=bin_names, + ) + binned_avgs = pd.DataFrame(binned_avgs) + + # Return the data + return binned_avgs + + +#%% +##Now shall create a function to take these averaged bin values and perform a bootstrapping analysis +def bin_data_bootstrap( + bin_data, CI=95, ss=20, bootstrap=10000, name=None, cache_overwrite=False +): + """ + This function takes the data, with bin averages calculated by bin_data_average and calculates boostrapped confidence intervals + To do so, it takes a random sample many times over and uses this data to calculate percentiles + + bin_data: the dataframe produced by bin_data_average, with bin times represented as columns + + CI: the confidence interval to calculate (default is 95%) + + ss: the sample size to randomly select from the averages (default is 20) + + bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000) + + name: the name to save the output under in cache, speeds the process if this has been run before + + cache_overwrite: whether to overwrite the saved cache with new data, (default is False) + """ + + ##Stages of this analysis are as follows: + # 1. Take individual bin + # 2. Calculate confidence interval via bootstapping + # 3. Store all bin CIs + + # Check if the function has been run before, and saved to a chache + # NB: This function caches under the name of the first session in the list! + if name is not None: + # Read in data from previous run + output = myexp.sessions[0].interim / "cache" / (name + ".h5") + if output.exists() and cache_overwrite is not True: + df = ioutils.read_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5")) + return df + + # First define confidence intervals + lower = (100 - CI) / 2 # Lower confidence interval bound (default 2.5%) + upper = 100 - lower # Upper bound (default 97.5%) + percentiles = [lower, 50, upper] # The defined percentiles to analyse + cis = [] # empty list to store calculated intervals + + bin_values = bin_data.columns + keys = [] + + # Loop through each of the bin averages and calculate their confidence interval + for s, session in enumerate(myexp): + names = session.name + ses_bins = bin_data[bin_data.index.get_level_values("session") == s] + trials = ses_bins.index.get_level_values( + "trial" + ).unique() # get the trials for a given session + + for t in trials: + trial_bins = ses_bins[ses_bins.index.get_level_values("trial") == t] + + # Take every bin within a trial + for b in bin_values: + bin_avg = trial_bins[b] + samples = np.array( + [ + [np.random.choice(bin_avg.values, size=ss)] + for j in range(bootstrap) + ] + ) + + # Use these samples to calculate the percentiles for each bin + medians = np.median(samples, axis=2) # Median of the samples taken + results = np.percentile( + medians, percentiles, axis=0 + ) # The percentiles, relative to the median + cis.append(pd.DataFrame(results)) + keys.append( + (s, t, b) + ) # Set the keys for the dataframe to be bin, session, trial + + # Now save these calculated confidence intervals to a dataframe + df = pd.concat(cis, axis=1, names=["session", "trial", "bin"], keys=keys) + df.set_index( + pd.Index(percentiles, name="percentile"), inplace=True + ) # Set the name of the index to percentiles + df.columns = df.columns.droplevel( + level=3 + ) # remove the redundant units row, as they have been averaged out + + # Finally, save this df to cache to speed up future runs + if name is not None: + ioutils.write_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5"), df) + return df + + return df + + +#%% +##Finally, will create a function to take these percentiles and compare them. +# Specifically, per trial, each bin will be compared to the one previous +# If 2.5% and previous 97.5% overlap then there is a significant change +def bs_pertrial_bincomp(bs_data, myexp): + """ + This function imports percentile data produced from bootstrapping, and compares across bins to determine if a significant change from previous states occurs + This process is repeated across trials to allow plotting of significance. + Returns a dataframe with bins classified as either increasing, decreasing or displaying little change from previous timepoint. + + bs_data: the percentile information produced by bin_data_bootstrap() + + myexp: the experimental cohort defined in base.py + """ + + total_bin_response = {} + # First import the data and split it by session, and trial. + for s, session in enumerate(myexp): + name = session.name + ses_data = bs_data[0] + ses_bin_response = {} + for t, cols in enumerate(ses_data): + trial_data = ses_data[ + cols[0] + ] # All bins, and associated percentiles for a given trial + + # Now that we have all the bins for a given trial, + # t count value in enumerate loop will allow comparison to previous iteration + bins = trial_data.columns + + # compare each column's 2.5% (lower) to previous column's 97.5% + # to check for sig. increase in activity + previous_bin = [] + trial_bin_response = {} # store as a dictionary + for b, bins in enumerate(trial_data): + + # First store the initial bin to previous_bin + if len(previous_bin) == 0: + previous_bin.append(trial_data[bins]) + continue + + # Once this has been done, will compare across bins + current_bin = trial_data[bins] + + # First check if there is a sig. increase - i.e., if current 2.5 and previous 97.5 do not overlap + if current_bin[2.5] >= previous_bin[0][97.5]: + trial_bin_response.update({bins: "increased"}) + + # Now check if there is a sig. decrease + elif current_bin[97.5] <= previous_bin[0][2.5]: + trial_bin_response.update({bins: "decreased"}) + + # And finally if there is no change at all + else: + trial_bin_response.update({bins: "nochange"}) + + # The replace previous bin with current bin + previous_bin = [] + previous_bin.append(current_bin) + + # Now append data for each trial to a larget list + ses_bin_response.update( + {cols[0]: trial_bin_response} + ) # contains every trial for a given session + total_bin_response.update({name: ses_bin_response}) + + # Convert nested dictionary to dataframe + df = pd.DataFrame.from_dict( + { + (i, j): total_bin_response[i][j] + for i in total_bin_response.keys() + for j in total_bin_response[i].keys() + }, + orient="index", + ) + df.index.rename(["session", "trial"], inplace=True) + df.columns.names = ["bin"] + return df + + +#%% +##Run these functions to perform the binning and boostrapping of data## +# Uncomment to run +# bin_data = per_trial_binning(trial_aligned, myexp, timespan=10, bin_size=0.1) +# bin_avgs = bin_data_average(bin_data) +# bootstrap = bin_data_bootstrap( +# bin_avgs, CI=95, ss=20, bootstrap=10000, name="fullbootstrap", cache_overwrite=False +# ) +# bin_comp = bs_pertrial_bincomp(bootstrap, myexp) + +# %% + +#%% +# #### Now may plot changing spikerates for each trial including the point at which LED turned on (i.e., the point where trial began) +# # First determine the times LED turned on for each trial +ledon = event_times("led_on", myexp) +ledon = pd.DataFrame(ledon) + +# Then when LED turned off +ledoff = event_times("led_off", myexp) +ledoff = pd.DataFrame(ledoff) + +# Then determine the time relative to LED off (alignment point) where led turned on for each trial +start = ledon - ledoff +start = {ses.name: start.loc[s] for s, ses in enumerate(myexp)} +start = pd.concat(start, axis=1) +start = start / 1000 # Convert from ms to s +start.index.name = "trial" # Rename the index to trial number (ascending order) + +# Now check what the max length of the trial is +vals = start.values.copy() +vals = vals.astype(int) # Convert nan values to integer (a very large negative value) +vals[vals < -90000000] = 0 # Convert all large negative values to zero +print(np.abs(vals).max()) # Check the max value with nan removed + + +## Plot a single graph for every session +def bs_graph(aligned_data, binned_data, bin_comparisons, myexp): + """ + Function plots the binned data graph including period of LED onset for each trial within a given session. + + aligned_data: the dataset produced by align_trials + + binned_data: the dataset produced by per_trial_binning + + bin_comparisons: the dataset containing comparisons between bins, produced by bs_pertrial_bincomp + + myexp: the experimental cohort defined in base.py + + """ + for s, session in enumerate(myexp): + name = session.name + + ses_data = binned_data[s] + ses_values = ses_data.columns.get_level_values( + "trial" + ).unique() # Get all trial values for the session + ses_start_times = start[ + name + ] # LED on time for the session, will leave trial numbers in ses_values unsorted to ensure they align + + ses_bin_comps = bin_comparisons[ + bin_comparisons.index.get_level_values("session") == name + ] + + num_trials = len( + ses_data[ses_values[0]].columns + ) # How many trials shall be plotted + subplots = Subplots2D( + ses_values, sharex=True, sharey=True + ) # determines the number of subplots to create (as many as there are trials in the session) + + for i, value in enumerate(ses_values): + data = ses_data[value].stack().reset_index() + data["y"] = data[0] # Add a new column containing y axis values + ax = subplots.axes_flat[i] # Plot this on one graph + + # bin responses for the trial to be plotted + trial_bins = ses_bin_comps[ + ses_bin_comps.index.get_level_values("trial") == i + ] + + # Now create the actual lineplot + p = sns.lineplot(x="time", y="y", data=data, ax=ax, ci="sd") + + # Now for each of these plots, add the LED on-off time as a shaded area + p.axvspan(ses_start_times[i], 0, color="green", alpha=0.3) + + ##Now, within each trial, add shaded areas representing the binned timescales + for i, cols in enumerate(ses_bin_comps.columns): + shaded_bin = data["time"].loc[data["bin"] == ses_bin_comps.columns[i]] + bin_significance = trial_bins[cols] + + # Then plot an indicator of significance to these bins + if bin_significance.values == "nochange": + p.axvspan( + shaded_bin.values[0], + shaded_bin.values[-1], + color="gray", + alpha=0, + hatch=r"//", + ) # Change this alpha value to above zero to plot nonsignificant bins. + + if ( + bin_significance.values == "increased" + or bin_significance.values == "decreased" + ): + p.axvspan( + shaded_bin.values[0], + shaded_bin.values[-1], + color="red", + alpha=0.1, + hatch=r"/o", + ) + + # Now remove axis labels + p.get_yaxis().get_label().set_visible(False) + p.get_xaxis().get_label().set_visible(False) + + # Then set the x limit to be one second post trial completion + p.set(xlim=(-5, 1)) + + # Now plot the legend for each of these figures + legend = subplots.legend + + legend.text(0.3, 0.5, "Trial", transform=legend.transAxes) + legend.text( + -0.2, 0, "Firing Rate (Hz)", transform=legend.transAxes, rotation=90 + ) # y axis label + legend.text( + 0, -0.3, "Time Relative to LED Off (s)", transform=legend.transAxes + ) # x axis label + + legend.set_visible(True) + legend.get_xaxis().set_visible(False) + legend.get_yaxis().set_visible(False) + legend.set_facecolor("white") + legend.set_box_aspect(1) + + # Display the plot + plt.gcf().set_size_inches(20, 20) + + # Uncomment to save figures as pdf. + utils.save( + f"/home/s1735718/Figures/{myexp[s].name}_spikerate_LED_Range_PerTrial", + nosize=True, + ) + + +# Uncomment below to plot this +# bs_graph(trial_aligned, bin_data, bin_comp, myexp) diff --git a/projects/Aidan_Analysis/FullAnalysis/unit_depth_plot_VR59.py b/projects/Aidan_Analysis/FullAnalysis/unit_depth_plot_VR59.py new file mode 100644 index 0000000..85698cc --- /dev/null +++ b/projects/Aidan_Analysis/FullAnalysis/unit_depth_plot_VR59.py @@ -0,0 +1,36 @@ +#Import data from base + +from base import * + +#Import required packages +import numpy as np +import pandas as pd +import seaborn as sns + +sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") +from pixtools import clusters #A series of functions that will allow plotting of depth profiles + +#Select which units from the data we would like to plot +#To get an overview of the data, let us select all units of good quality and plot their depth +#Select only from recording from the 17th +session1 = myexp.get_session_by_name("220217_VR59") #It appears this is the line that is screwing with things, why does it make an unindexable item?? + +units = session1.select_units( + group="good", + uncurated = False, +) + +#Now plot a graph of increasing depth vs unit +#First select the depth profile to analyse, function will then automatically plot the required graph as a scatter +#May need to alter this to remove use of rec_num!# Or perhaps simply set rec_num to 0? + +rec_num = 0 + +depth = clusters.depth_profile( + myexp, + group="good", + curated = True +) + + +plt.show() \ No newline at end of file