diff --git a/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py b/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py
new file mode 100644
index 0000000..0da334e
--- /dev/null
+++ b/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py
@@ -0,0 +1,156 @@
+# First import required packages
+import sys
+import json
+from sklearn.cluster import KMeans
+
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import probeinterface as pi
+
+
+def meta_spikeglx(exp, session):
+    """
+    Simply extracts channel depth from probe metadata.
+
+    exp: exp class containing mouse IDs
+
+    session: specific recording session to extract information from
+    """
+    meta = exp[session].files[0]["spike_meta"]
+    data_path = exp[session].find_file(meta)
+    data = pi.read_spikeglx(data_path)
+
+    return data
+
+
+def noise_per_channeldepth(myexp):
+    """
+    Function  extracts the noise for each channel, combining this into a dataframe
+
+    myexp: the experiment defined in base.py, will extract the depth information from here.
+    """
+    noise = pd.DataFrame(
+        columns=["session", "project", "SDs", "x", "y"]
+    )  # Create the empty array to hold the noise information
+    depths = meta_spikeglx(myexp, 0)
+    depths = depths.to_dataframe()
+    coords = depths[
+        ["x", "y"]
+    ]  # Create a dataframe containing the generic x and y coords.
+    tot_noise = []
+
+    # Iterate through each session, taking the noise for each file and loading them into one continuous data frame.
+    for s, session in enumerate(myexp):
+        for i in range(len(session.files)):
+            path = session.processed / f"noise_{i}.json"
+            with path.open() as fd:
+                ses_noise = json.load(fd)
+
+            chan_noises = []
+            for j, SD in enumerate(
+                ses_noise["SDs"][0:-1]
+            ):  # This will iterate over first 384 channels, and exclude the sync channel
+                x = coords["x"].iloc[j]
+                y = coords["y"].iloc[j]
+                noise_row = pd.DataFrame.from_records(
+                    {"session": [session.name], "SDs": [SD], "x": x, "y": y}
+                )
+                chan_noises.append(noise_row)
+
+        # Take all datafrom channel noises for a session, then concatenate
+        noise = pd.concat(chan_noises)
+        tot_noise.append(noise)  # Take all channel noises and add to a master file
+        df2 = pd.concat(
+            tot_noise
+        )  # Convert this master file, containing every sessions noise data into a dataframe
+
+    return df2
+
+
+def elbowplot(data, myexp):
+
+    """
+
+    This function takes data formatted according to noise_per_channeldepth(), containing the noise values for all channels
+    Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends.
+    This point is defined as the boundary where additional clusters no longer explain much more variance in the data.
+
+    data: The dataframe, as formatted by noise_per_channeldepth()
+
+    myexp: The experiment, defined in base.py containing the session information.
+
+    """
+
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = data.loc[data["session"] == name]
+        df3 = ses_data["SDs"].values.reshape(
+            -1, 1
+        )  # Just gives all noise values, for each session
+        Sum_of_squares = []  # create an empty list to store these in.
+
+        k = range(1, 10)
+        for num_clusters in k:
+            kmeans = KMeans(n_clusters=num_clusters)
+            kmeans.fit(df3)
+            Sum_of_squares.append(kmeans.inertia_)
+
+        fig, ax = plt.subplots()
+
+        # This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters
+        # This gives the distance from the centroids, as a measure of the variability explained
+        # We want this to drop off indicating that there is no remaining data explained by further centroid inclusion
+
+        # Figure has two rows, one columns, this is the first plot
+        plt.plot(k, Sum_of_squares, "bx-")  # bx gives blue x as each point.
+        plt.xlabel("Putative Number of Clusters")
+        plt.ylabel("Sum of Squares Distances/Inertia")
+        plt.title(
+            f"Determining Optimal Number of Clusters for Analysis - Session {name}"
+        )
+
+        f = plt.gca()
+        return f
+
+
+def clusterplot(data, myexp, cluster_num):
+    """
+    Function takes the noise per channel and depth information, produced by noise_per_channeldepth() and produces a clusterplot.
+    Clustering is performed by K-means analysis, elbow plot should be produced by elbowplot() to determine optimal cluster number.
+
+    data: data produced by noise_per_channel_depth() containing channel ID, coordinate, and recording noise for each session in the exp class
+
+    myexp: the exp class containing mouse IDs
+
+    cluster_num: the number of clusters to produce through the k-means analysis, determined by qualitative analysis of elbow plots. (where the "bow" of the line occurs)
+
+    """
+
+    # First define k-means parameters for clustering
+    kmeans = KMeans(
+        init="random",  # Initiate the iterative analysis with random centres
+        n_clusters=cluster_num,  # How many clusters to bin the data into, based on the elbow analysis!
+        n_init=10,  # Number of centroids to generate initially
+        max_iter=300,  # Max number of iterations before ceasing analysis
+        random_state=42,  # The random number seed for centroid generation, can really be anything for our purposes
+    )
+
+    for s, session in enumerate(myexp):
+        name = session.name
+
+        ses = data.loc[data["session"] == name]
+        sd = ses["SDs"].values.reshape(-1, 1)
+        y_means = kmeans.fit_predict(sd)
+
+        # Now plot the kmeans analysis
+        # Remember we use our original data (ses) but use the clustering analysis to generate the labels
+        plt.scatter(ses["y"], ses["SDs"], c=y_means, cmap="viridis")
+
+        plt.xlabel("Probe Channel Y-Coordinate")
+        plt.ylabel("Channel Noise (SD)")
+        plt.title(f"{name} Channel Noise k-Mean Clustering Analysis")
+
+        f = plt.gca()
+        return f
diff --git a/pixtools/clusters/unit_depths.py b/pixtools/clusters/unit_depths.py
index 4df3881..01ee26d 100644
--- a/pixtools/clusters/unit_depths.py
+++ b/pixtools/clusters/unit_depths.py
@@ -19,17 +19,17 @@ def unit_depths(exp):
 
     for s, session in enumerate(exp):
         session_depths = {}
-
+        rec_num=0
         for rec_num, probe_depth in enumerate(session.get_probe_depth()):
             rec_depths = {}
-            rec_info = info[s][rec_num]
+            rec_info = info[s]
             id_key = 'id' if 'id' in rec_info else 'cluster_id'  # Depends on KS version
 
             for unit in rec_info[id_key]:
                 unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict()
                 rec_depths[unit] = probe_depth - unit_info["depth"]
 
-            session_depths[rec_num] = pd.DataFrame(rec_depths, index=["depth"])
+            session_depths[0] = pd.DataFrame(rec_depths, index=["depth"])
 
         depths.append(pd.concat(session_depths, axis=1, names=["rec_num", "unit"]))
 
diff --git a/pixtools/responsiveness/CI_Analysis_pointplot.py b/pixtools/responsiveness/CI_Analysis_pointplot.py
new file mode 100644
index 0000000..e4d3e1c
--- /dev/null
+++ b/pixtools/responsiveness/CI_Analysis_pointplot.py
@@ -0,0 +1,81 @@
+def significance_extraction(CI):
+    """
+    This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format. 
+
+    CI: The dataframe created by the CI calculation previously mentioned
+
+    """
+    
+    sig = []
+    keys=[]
+    rec_num = 0
+
+    #This loop iterates through each column, storing the data as un, and the location as s
+    for s, unit in CI.items():
+        #Now iterate through each recording, and unit
+        #Take any significant values and append them to lists.
+        if unit.loc[2.5] > 0 or unit.loc[97.5] < 0:
+            sig.append(unit) #Append the percentile information for this column to a list
+            keys.append(s) #append the information containing the point at which the iteration currently stands
+
+
+    #Now convert this list to a dataframe, using the information stored in the keys list to index it
+    sigs = pd.concat(
+        sig, axis = 1, copy = False,
+        keys=keys,
+        names=["session", "unit", "rec_num"]
+    )
+    
+    return sigs
+
+def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False):
+    """
+
+    This function takes the CI data and significant values and plots them relative to zero. 
+    May specify if percentiles should be plotted in ascending or descending order. 
+
+    CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points.
+
+    sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero
+    
+    exp: The experimental session to analyse, defined in base.py
+
+    sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False)
+
+    dir_ascending: Whether to plot the values in ascending order (True/False)
+    
+    """
+    #First sort the data into long form for the full dataset, by percentile
+    CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending)
+    CIs_long = CIs_long.reset_index()
+    CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting
+
+    #Now select if we want only significant values plotted, else raise an error. 
+    if sig_only is True:
+        CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending)
+        CIs_long_sig = CIs_long_sig.reset_index()
+        CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0]))
+        
+        data = CIs_long_sig
+    
+    elif sig_only is False:
+        data = CIs_long
+    
+    else:
+        raise TypeError("Sig_only argument must be a boolean operator (True/False)")
+
+    #Plot this data for the experimental sessions as a pointplot. 
+    for s, session in enumerate(exp):
+        name = session.name
+
+        p = sns.pointplot(
+        x="unit", y = "value", data = data.loc[(data.session == s)],
+        order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting
+        
+        p.set_xlabel("Unit")
+        p.set_ylabel("Confidence Interval")
+        p.set(xticklabels=[])
+        p.axhline(0)
+        plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page.
+
+        plt.show()
\ No newline at end of file
diff --git a/pixtools/responsiveness/ci_analysis_pointplot.py b/pixtools/responsiveness/ci_analysis_pointplot.py
new file mode 100644
index 0000000..e4d3e1c
--- /dev/null
+++ b/pixtools/responsiveness/ci_analysis_pointplot.py
@@ -0,0 +1,81 @@
+def significance_extraction(CI):
+    """
+    This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format. 
+
+    CI: The dataframe created by the CI calculation previously mentioned
+
+    """
+    
+    sig = []
+    keys=[]
+    rec_num = 0
+
+    #This loop iterates through each column, storing the data as un, and the location as s
+    for s, unit in CI.items():
+        #Now iterate through each recording, and unit
+        #Take any significant values and append them to lists.
+        if unit.loc[2.5] > 0 or unit.loc[97.5] < 0:
+            sig.append(unit) #Append the percentile information for this column to a list
+            keys.append(s) #append the information containing the point at which the iteration currently stands
+
+
+    #Now convert this list to a dataframe, using the information stored in the keys list to index it
+    sigs = pd.concat(
+        sig, axis = 1, copy = False,
+        keys=keys,
+        names=["session", "unit", "rec_num"]
+    )
+    
+    return sigs
+
+def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False):
+    """
+
+    This function takes the CI data and significant values and plots them relative to zero. 
+    May specify if percentiles should be plotted in ascending or descending order. 
+
+    CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points.
+
+    sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero
+    
+    exp: The experimental session to analyse, defined in base.py
+
+    sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False)
+
+    dir_ascending: Whether to plot the values in ascending order (True/False)
+    
+    """
+    #First sort the data into long form for the full dataset, by percentile
+    CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending)
+    CIs_long = CIs_long.reset_index()
+    CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting
+
+    #Now select if we want only significant values plotted, else raise an error. 
+    if sig_only is True:
+        CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending)
+        CIs_long_sig = CIs_long_sig.reset_index()
+        CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0]))
+        
+        data = CIs_long_sig
+    
+    elif sig_only is False:
+        data = CIs_long
+    
+    else:
+        raise TypeError("Sig_only argument must be a boolean operator (True/False)")
+
+    #Plot this data for the experimental sessions as a pointplot. 
+    for s, session in enumerate(exp):
+        name = session.name
+
+        p = sns.pointplot(
+        x="unit", y = "value", data = data.loc[(data.session == s)],
+        order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting
+        
+        p.set_xlabel("Unit")
+        p.set_ylabel("Confidence Interval")
+        p.set(xticklabels=[])
+        p.axhline(0)
+        plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page.
+
+        plt.show()
\ No newline at end of file
diff --git a/pixtools/responsiveness.py b/pixtools/responsiveness/responsiveness.py
similarity index 100%
rename from pixtools/responsiveness.py
rename to pixtools/responsiveness/responsiveness.py
diff --git a/projects/Aidan_Analysis/FullAnalysis/.vscode/launch.json b/projects/Aidan_Analysis/FullAnalysis/.vscode/launch.json
new file mode 100644
index 0000000..1fbdcdc
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/.vscode/launch.json
@@ -0,0 +1,18 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "stopOnEntry": true,
+            "justMyCode": false,
+            "editor.bracketPairColorization.independentColorPoolPerBracketType": true
+        }
+    ]
+}
\ No newline at end of file
diff --git a/projects/Aidan_Analysis/FullAnalysis/.vscode/settings.json b/projects/Aidan_Analysis/FullAnalysis/.vscode/settings.json
new file mode 100644
index 0000000..de288e1
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.formatting.provider": "black"
+}
\ No newline at end of file
diff --git a/projects/Aidan_Analysis/FullAnalysis/CI_Analysis.py b/projects/Aidan_Analysis/FullAnalysis/CI_Analysis.py
new file mode 100644
index 0000000..46c1398
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/CI_Analysis.py
@@ -0,0 +1,223 @@
+#This script draws a confidence interval comparison between a baseline and some event. This is calculated as baseline - average firing rate 
+#Will use a modified version of a preexisting function in base.py (align_spike_rate_CI)
+#Import required packages
+from argparse import Action
+import sys
+from tokenize import group
+import numpy as np
+import pandas as pd 
+import matplotlib.pyplot as plt
+from matplotlib import cm
+from pandas import melt
+import seaborn as sns
+from textwrap import wrap
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+from pixtools.utils import Subplots2D
+#sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels") #use  the local copy of base.py
+from base import * 
+
+#First select units to analyse, will take all units in the brain, i.e. across the full 3500um recording range
+units = myexp.select_units(
+    group="good",
+    max_depth=3500,
+    name="unit"
+)
+#Now let us run the align spike rate CI and save this as CIs
+
+# CIs = myexp.get_aligned_spike_rate_CI(
+#     label=ActionLabels.correct_left | ActionLabels.correct_right,
+#     event=Events.led_off,
+#     start=-0.400, #Start the analysis 400ms before LED turns off
+#     step=0.400, #Make this analysis in one step, not looking after or before
+#     end=0.000, #Finish the analysis for each unit when the grasp is made
+
+#     bl_label=ActionLabels.correct_left | ActionLabels.correct_right,
+#     bl_event=Events.led_on, #The baseline will examine the ITI (i.e., time before the trial began)
+#     bl_start=-4.000, #Will catch every trial's ITI while avoiding the tail end of the previous trial 
+#     bl_end=0.000,
+
+#     ss=20, #The size of each sample to take
+#     CI=95, #Confidence interval to analyse to 
+#     bs=10000, #The number of times to take a pseudorandom sample for bootstrapping
+#     units=units
+# )
+
+#Also create a dataset containing only values that do not straddle zero. 
+#Could check if first and last percentile are different from zero 
+#cis[0] > 0 or cis[-1] < 0 - if true then suggests significance!
+#Will be easier if this is in wide form, can simply take it by column
+
+#First will iterate through the sessions for CI
+#Remember the indexes are as follows:
+#Session, Unit, rec_num (always zero), percentile (2.5, 50, or 97.5)
+
+def significance_extraction(CI):
+    """
+    This function takes the output of the get_aligned_spike_rate_CI function and extracts any significant values, returning a dataframe in the same format. 
+
+    CI: The dataframe created by the CI calculation previously mentioned
+
+    """
+    
+    sig = []
+    keys=[]
+    rec_num = 0
+
+    #This loop iterates through each column, storing the data as unit, and the location as s
+    for s, unit in CI.items():
+        #Now iterate through each recording, and unit
+        #Take any significant values and append them to lists.
+        if unit.loc[2.5] >= 0 or unit.loc[97.5] <= 0:
+            sig.append(unit) #Append the percentile information for this column to a list
+            keys.append(s) #append the information containing the point at which the iteration currently stands
+
+
+    #Now convert this list to a dataframe, using the information stored in the keys list to index it
+    sigs = pd.concat(
+        sig, axis = 1, copy = False,
+        keys=keys,
+        names=["session", "unit", "rec_num"]
+    )
+    
+    return sigs
+
+#sigs = significance_extraction(CIs)
+
+#Now we have successfully derived confidence intervals, we may plot these as a scatter for each unit, with a line denoting the critical point
+#TODO: Add code to plot a vertical line at the point where percentiles cross zero. Allows for a visual representation of proportion. 
+
+def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False):
+    """
+
+    This function takes the CI data and significant values and plots them relative to zero. 
+    May specify if percentiles should be plotted in ascending or descending order. 
+
+    CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points.
+
+    sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero
+    
+    exp: The experimental session to analyse, defined in base.py
+
+    sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False)
+
+    dir_ascending: Whether to plot the values in ascending order (True/False)
+
+    NB: Remember to change the title of the graph if making a different comparison!
+
+    """
+    #First sort the data into long form for both datasets, by percentile
+    CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending)
+    CIs_long = CIs_long.reset_index()
+    CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting
+    
+    CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending)
+    CIs_long_sig = CIs_long_sig.reset_index()
+    CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0]))
+
+    #Now select if we want only significant values plotted, else raise an error. 
+    if sig_only is True:
+        data = CIs_long_sig
+    
+    elif sig_only is False:
+        data = CIs_long
+    
+    else:
+        raise TypeError("Sig_only argument must be a boolean operator (True/False)")
+
+
+    #Plot this data for the experimental sessions as a pointplot. 
+    for s, session in enumerate(exp):
+        name = session.name
+        
+        p = sns.pointplot(
+        x="unit", y = "value", data = data.loc[(data.session == s)],
+        order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting
+        
+        p.set_xlabel("Unit")
+        p.set_ylabel("Confidence Interval")
+        p.set(xticklabels=[])
+        p.axhline(0)
+        plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page.
+
+        plt.show()
+
+
+#Can also make a pie chart detailing the proportions of the whole collection of units that were signifcant
+#Can simply compare the lengths of the unique values in the unit columns of the dataframes
+#Convert this to two bar charts, one displaying proportion, like this. And another displaying actual values. 
+#Proportion on a scale of zero to one
+#Will append these two seperate dataframes
+
+# colors = plt.get_cmap("Pastel1")
+# prop = []
+# count = []
+# ses_key = []
+
+# for s, session in enumerate(myexp):
+#     #First will create the dataframe of proportion vs total. (zero to one)
+
+#     name = session.name
+
+#     ses_data = CIs[s]
+#     sig_data = sigs[s]
+
+    
+#     #Now calculate the proportion of significant to nonsignificant units
+#     sig_count = len(sig_data.columns)
+#     nonsig_count = len(ses_data.columns) - sig_count
+    
+#     #This divides the group of units by the total number in a session to give the proportion of units on a scale of zero to one
+#     sig_prop = sig_count/len(ses_data.columns)
+#     nonsig_prop = nonsig_count/len(ses_data.columns)  
+
+#     #Now append these values to prop, and save key for this iteration
+#     prop.append([sig_prop, nonsig_prop])
+#     ses_key.append(name) #This will hold the name of the sesision the proportions were calc. from
+
+#     #Then shall create another dataframe containing the actual number of sig vs nonsig units
+#     count.append([sig_count, nonsig_count])
+
+# #Now concatenate these lists as a dataframe
+# counts = pd.DataFrame(
+#     count, copy = False,
+#     columns = ["sig count", "nonsig count"]
+# )
+
+# props = pd.DataFrame(
+#     prop, copy=False, 
+#     columns = ["sig proportion", "nonsig proportion"]
+# )
+
+# counts["session"] = ses_key
+# props["session"] = ses_key
+
+
+# #Finally, plot this data as two seperate stacked bar charts
+# fig, ax = plt.subplots(nrows = 1, ncols = 2) #One row, two columns
+
+
+# props.plot.bar(
+#     stacked = True, x = "session", ax = ax[0], legend = False, 
+#     xlabel="Session No.", ylabel = "Proportion of Total Units",
+#     color={"sig proportion":"cornflowerblue", "nonsig proportion":"pink"}
+# )
+
+
+# counts.plot.bar(
+#     stacked = True, x = "session", ax = ax[1], legend = False,
+#     xlabel = "Session No.", ylabel = "Number of Units",
+#     color={"sig count":"cornflowerblue", "nonsig count":"pink"}
+# )
+
+# ax[0].tick_params('x', labelrotation=45)
+# ax[1].tick_params('x', labelrotation=45)
+
+# plt.setp(ax[0].xaxis.get_majorticklabels(), ha='right') #Prevent the ticks from moving after rotating
+# plt.setp(ax[1].xaxis.get_majorticklabels(), ha='right')
+# plt.legend(["Responsive", "Non-Responsive"], loc=[1.1,0.5])
+# plt.suptitle("\n".join(wrap("Proportion of Responsive to Non-Responsive Units - Compared Across 400ms Pre-Grasp vs. 4s ITI")))
+
+# utils.save("/home/s1735718/Figures/AllSession_BarChart_Significant")
+
+
diff --git a/projects/Aidan_Analysis/FullAnalysis/CI_Analysis_responsiveness.py b/projects/Aidan_Analysis/FullAnalysis/CI_Analysis_responsiveness.py
new file mode 100644
index 0000000..8d593df
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/CI_Analysis_responsiveness.py
@@ -0,0 +1,148 @@
+#This file will produce graphs detailing the relative responsiveness of units. Will have three scatters:
+#Did not significantly change (unresponsive), increased in activity, decreased in activity. 
+#Can import data from CI Analysis
+
+from matplotlib.pyplot import title, ylim
+import numpy as np
+import pandas as pd
+
+import sys
+from base import *
+from CI_Analysis import significance_extraction
+from CI_Analysis import percentile_plot
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels")
+from pixtools.utils import Subplots2D #use the local copy of base.py
+from pixtools import utils
+
+
+#First select the units that shall be analysed
+units = myexp.select_units(
+    group="good",
+    max_depth=3500,
+    name="unit"
+)
+
+#Then align spike rates to trial, and gennerate confidence intervals
+#Will once again compare peak reach time to a 4s ITI period acting as baseline
+CIs = myexp.get_aligned_spike_rate_CI(
+    label=ActionLabels.correct_left | ActionLabels.correct_right,
+    event=Events.led_off,
+    start=-0.400, #Start the analysis 400ms before LED turns off
+    step=0.400, #Make this analysis in one step, not looking after or before
+    end=0.000, #Finish the analysis for each unit when the grasp is made
+
+    bl_label=ActionLabels.correct_left | ActionLabels.correct_right,
+    bl_event=Events.led_on, #The baseline will examine the ITI (i.e., time before the trial began)
+    bl_start=-4.000, #Will catch every trial's ITI while avoiding the tail end of the previous trial 
+    bl_end=0.000,
+
+    ss=20, #The size of each sample to take
+    CI=95, #Confidence interval to analyse to 
+    bs=10000, #The number of times to take a pseudorandom sample for bootstrapping
+    units=units
+)
+
+#Now take this information and pass it to the significance extraction function
+sigs = significance_extraction(CIs)
+
+#Split this data into upregulated and downregulated units
+#Define a function to split units by responsiveness
+def responsiveness(CI, sigs_only = True):
+
+    """
+    This function takes the confidence intervals computed by the aligned 
+    spike rate CI method and returns the responsiveness type (Up, Down, No Sig Change)
+
+    If sigs_only is true (default) then this function will not return those values that did not significantly change from zero
+
+    CI: The data produced by the aligned spike rate CI method of myexp
+
+    sigs_only: Whether to return no change values
+
+    """
+
+    #first begin by extracting significant values
+    sigs = significance_extraction(CI)
+
+    #Then create empty lists to store the data
+    units = []
+    keys = []
+    rec_num = 0
+
+    #If we only want the significant units:
+    if sigs_only == True:
+        #Check if the 2.5 percentile is greater than or equal to zero (i.e., upregulated is true)
+        #Append this as a new row
+        upregulated = (sigs.loc[2.5] >= 0).rename(index={2.5: "change"})
+        sigs.loc["upregulated"] = upregulated #True or False that unit was upregulated
+
+        return sigs
+
+    #Or if we want every unit regardless of significance:    
+    elif sigs_only == False:
+        #Check the entire dataframe rather than iterating, using a boolean operator
+
+        CI.loc["change"] = "nochange" #Add a row of entirely no change values (default)
+        
+        upregulated = (CI.loc[2.5] >= 0).rename(index={2.5: "change"}) #Find all upregulated values
+        downregulated = (CI.loc[97.5] <= 0).rename(index={2.5: "change"}) #And the same for downregulated values
+        
+        CI.loc["change"][upregulated] = "up" #Replace these values with up
+        CI.loc["change"][downregulated] = "down"
+        print(CI)
+        return CI
+
+
+df = responsiveness(CIs, sigs_only=False)
+
+#Now transpose this to long form information and plot this by depth
+#To get depth for a unit, use get cluster info, here cluster_id is the unit, and depth is stated
+#Get cluster_id == unit and extract depth. 
+info = myexp.get_cluster_info()
+depths = []
+keys = []
+units = []
+skipped = []
+
+
+#Now iterate through the list, finding the depth of each unit
+
+for s, unit in df.items():
+
+    ses_info = info[s[0]]
+
+    if len(ses_info["depth"].loc[ses_info["cluster_id"] == s[1]]) == 0:
+        skipped.append(s[1])
+        continue #This line checks if the unit is absent from cluster_id and skips it if it is, will also append this to a list for checking later
+
+    units.append(unit)
+    key = list(s)
+    depths = ses_info["depth"].loc[ses_info["cluster_id"] == s[1]].item()
+    key.append(depths)
+    keys.append(tuple(key))
+
+
+df2 = pd.concat(
+    units, axis = 1, copy = False,
+    keys = keys, names = ["session", "unit", "rec_num", "depth"]
+)
+
+
+#Once the units are known alongside their depths, can plot the relative proportion of response types
+#Will use a bar plot for this
+#Iterate over session in df2, plotting the chart for each
+
+fig, axs = plt.subplots(2)
+plt.subplots_adjust(top = 1, hspace=0.5)
+for s, session in enumerate(df2):
+    name = myexp[session[0]].name
+    sns.countplot(
+        x = df2[session[0]].xs("change"), data = df2[session[0]], orient='vertical',
+        palette="pastel", ax = axs[session[0]]
+    ).set_title(f"{name} Count of Unit Responsiveness")
+
+#Now save this figure including all sessions used. 
+names = "_".join(s.name for s in myexp)
+utils.save(f"/home/s1735718/Figures/{names}_count_of_unit_responsiveness", nosize=True)
\ No newline at end of file
diff --git a/projects/Aidan_Analysis/FullAnalysis/FanoFactorCalc.py b/projects/Aidan_Analysis/FullAnalysis/FanoFactorCalc.py
new file mode 100644
index 0000000..f8c77dd
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/FanoFactorCalc.py
@@ -0,0 +1,1007 @@
+# First import required packages
+from argparse import Action
+from curses import raw
+import math
+
+from matplotlib.pyplot import axvline, xlim
+from base import *
+import numpy as np
+import pandas as pd
+import matplotlib as plt
+from pixtools.utils import Subplots2D
+from rasterbase import *
+from tqdm import tqdm
+from sklearn.linear_model import LinearRegression
+from pixtools.utils import Subplots2D
+from pixels import ioutils
+
+from functions import (
+    event_times,
+    per_trial_raster,
+    per_trial_binning,
+    per_unit_spike_rate,
+    bin_data_average,
+)
+
+#%%
+# First, select only units from correct trials, of good quality, and within m2
+units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200)
+
+l23_units = myexp.select_units(
+    group="good", name="layerII/III", min_depth=200, max_depth=375
+)
+l5_units = myexp.select_units(group="good", name="layerV", min_depth=375, max_depth=815)
+l6_units = myexp.select_units(
+    group="good", name="layerVI", min_depth=815, max_depth=1200
+)
+
+# Now align these firing rates to the point at which the trial began
+duration = 8
+
+# Specifically, to calculate FANO FACTOR we need to get the exact spike times.
+spike_times = myexp.align_trials(
+    ActionLabels.clean,
+    Events.reach_onset,
+    "spike_times",
+    duration=duration,
+    units=units,
+)
+spike_times = spike_times.reorder_levels(
+    ["session", "trial", "unit"], axis=1
+)  # reorder the levels
+
+l23_spike_times = myexp.align_trials(
+    ActionLabels.clean,
+    Events.reach_onset,
+    "spike_times",
+    duration=duration,
+    units=l23_units,
+)
+l23_spike_times = l23_spike_times.reorder_levels(
+    ["session", "trial", "unit"], axis=1
+)  # reorder the levels
+
+l5_spike_times = myexp.align_trials(
+    ActionLabels.clean,
+    Events.reach_onset,
+    "spike_times",
+    duration=duration,
+    units=l5_units,
+)
+l5_spike_times = l5_spike_times.reorder_levels(
+    ["session", "trial", "unit"], axis=1
+)  # reorder the levels
+
+l6_spike_times = myexp.align_trials(
+    ActionLabels.clean,
+    Events.reach_onset,
+    "spike_times",
+    duration=duration,
+    units=l6_units,
+)
+l6_spike_times = l6_spike_times.reorder_levels(
+    ["session", "trial", "unit"], axis=1
+)  # reorder the levels
+
+firing_rates = myexp.align_trials(
+    ActionLabels.clean, Events.reach_onset, "spike_rate", duration=duration, units=units
+)
+firing_rate_means = per_trial_binning(
+    firing_rates, myexp, timespan=duration, bin_size=0.1
+)
+firing_rate_means = bin_data_average(firing_rate_means)
+firing_rate_means = firing_rate_means.mean()
+# l23_firing_rates = myexp.align_trials(
+#     ActionLabels.clean,
+#     Events.reach_onset,
+#     "spike_rate",
+#     duration=duration,
+#     units=l23_units,
+# )
+# l23_firing_rates = l23_firing_rates.reorder_levels(
+#     ["session", "unit", "trial"], axis=1
+# )  # reorder the levels
+
+# l5_firing_rates = myexp.align_trials(
+#     ActionLabels.clean,
+#     Events.reach_onset,
+#     "spike_rate",
+#     duration=duration,
+#     units=l5_units,
+# )
+# l5_firing_rates = l5_firing_rates.reorder_levels(
+#     ["session", "unit", "trial"], axis=1
+# )  # reorder the levels
+
+# l6_firing_rates = myexp.align_trials(
+#     ActionLabels.clean,
+#     Events.reach_onset,
+#     "spike_rate",
+#     duration=duration,
+#     units=l6_units,
+# )
+# l6_firing_rates = l6_firing_rates.reorder_levels(
+#     ["session", "unit", "trial"], axis=1
+# )  # reorder the levels
+
+# Take average firing rates
+# l23_firing_rates = per_trial_binning(
+#     l23_firing_rates, myexp, timespan=duration, bin_size=0.1
+# )
+# l23_means = bin_data_average(l23_firing_rates)
+# l23_means = l23_means.mean()
+
+# l5_firing_rates = per_trial_binning(
+#     l5_firing_rates, myexp, timespan=duration, bin_size=0.1
+# )
+# l5_means = bin_data_average(l5_firing_rates)
+# l5_means = l5_means.mean()
+
+# l6_firing_rates = per_trial_binning(
+#     l6_firing_rates, myexp, timespan=duration, bin_size=0.1
+# )
+# l6_means = bin_data_average(l6_firing_rates)
+# l6_means = l6_means.mean()
+
+# From this, must now calculate the point at which the LED turned off in each trial.
+# Will allow plotting of off time.
+off_times = event_times("led_off", myexp)  # time after LED on when trial finished
+
+# %%
+# Now shall define a function to calculate per Trial FF
+
+
+def cross_trial_FF(
+    spike_times,
+    exp,
+    duration,
+    bin_size,
+    cache_name,
+    ff_iterations=50,
+    mean_matched=False,
+    raw_values=False,
+):
+    """
+    This function will take raw spike times calculated through the exp.align_trials method and calculate the fano factor for each trial, cross units
+    or for individual units, if mean matched is False.
+
+    ------------------------------------------------------------------------
+    |NB: Output of the function is saved to an h5 file as "FF_Calculation" |
+    ------------------------------------------------------------------------
+
+    spike_times: The raw spike time data produced by the align_trials method
+
+    exp: the experimental cohort defined in base.py (through the Experiment class)
+
+    duration: the window of the aligned data (in s)
+
+    bin_size: the size of the bins to split the trial duration by (in s)
+
+    name: the name to cache the results under
+
+    mean_matched: Whether to apply mean matching adjustment (as defined by Churchland 2010) to correct for large changes in unit mean firing rates across trials
+                  If this is False, the returned dataframe will be of unadjusted single unit FFs
+
+    raw_values: Whether to return the raw values rather than mean FFs
+
+    ff_iterations: The number of times to repeat the random mean-matched sampling when calculating population fano factor (default 50)
+
+    """
+    # First create bins that the data shall be categorised into
+    duration = duration * 1000  # convert s to ms
+    bin_size = bin_size * 1000
+    bins = np.arange((-duration / 2), (duration / 2), bin_size)
+    all_FF = pd.DataFrame()
+
+    # First split data by session
+    for s, session in enumerate(myexp):
+        ses_data = spike_times[s]
+        ses_data = ses_data.reorder_levels(["unit", "trial"], axis=1)
+        name = session.name
+        session_FF = {}
+        session_se = {}
+        session_ci = {}
+
+        repeat_FF = pd.DataFrame()
+        repeat_se = pd.DataFrame()
+        repeat_ci = pd.DataFrame()
+
+        # Bin the session data
+        ses_vals = ses_data.values
+        bin_ids = np.digitize(ses_vals[~np.isnan(ses_vals)], bins, right=True)
+
+        # Then by unit
+        unit_bin_means = []
+        unit_bin_var = []
+
+        print(f"beginning mean/variance calculation for session {name}")
+        keys = []
+        for t in tqdm(ses_data.columns.get_level_values("unit").unique()):
+            unit_data = ses_data[t]  # take each unit values for all trials
+            unit_data = (
+                unit_data.melt().dropna()
+            )  # Pivot this data and drop missing values
+
+            # Once these values have been isolated, can begin FF calculation.
+            unit_data["bin"] = np.digitize(unit_data["value"], bins, right=True)
+
+            bin_count_means = []
+            bin_count_var = []
+
+            for b in np.unique(bin_ids):
+                bin_data = unit_data.loc[unit_data["bin"] == b]
+
+                # Calculate the count mean of this bin, i.e., the average number of events per trial
+                bin_count_mean = (
+                    bin_data.groupby("trial").count().mean()["value"]
+                )  # the average number of spikes per unit in this bin
+
+                # If there are any bins where no spikes occurred (i.e, empty dataframes) then return zero
+                if np.isnan(bin_count_mean):
+                    bin_count_mean = 0
+
+                bin_count_means.append(bin_count_mean)
+
+                # Now calculate variance of the event count
+                bin_var = np.var(bin_data.groupby("trial").count()["value"])
+
+                # Again return zero if no events
+                if np.isnan(bin_var):
+                    bin_var = 0
+
+                bin_count_var.append(bin_var)
+
+            # These values will make up a single point on the scatterplots used to calculate cross-unit FF
+            # Append to list containing values for each unit, across trials
+
+            unit_bin_means.append(bin_count_means)
+            unit_bin_var.append(bin_count_var)
+            keys.append(t)
+
+        # Now convert this to a dataframe
+        unit_bin_means = pd.DataFrame.from_records(unit_bin_means, index=keys)
+        unit_bin_means.index.name = "unit"
+        unit_bin_means.columns.name = "bin"
+
+        unit_bin_var = pd.DataFrame.from_records(unit_bin_var, index=keys)
+        unit_bin_var.index.name = "unit"
+        unit_bin_var.columns.name = "bin"
+
+        # can now use this information to calculate FF, and include any potential adjustments needed.
+        # To do so, shall create a linear regression to calculate FF, for each bin
+
+        if mean_matched is True:
+            bin_heights = {}
+            """
+            First calculate the greatest common distribution for this session's data
+            1. Calculate the distribution of mean counts for each timepoint (binned trial time)
+            2. Take the smallest height of each bin across timepoints, this will form the height of the GCD's corresponding bin
+            3. Save this GCD to allow further mean matching
+            """
+
+            # Take each bin, and unit, then calculate the distribution of mean counts
+            for b in unit_bin_means.columns:
+
+                # Isolate bin values, across units
+                bin_means = unit_bin_means[b]
+
+                # Calculate distribution for this time, across units
+                p = np.histogram(bin_means, bins=9)
+
+                # Save the heights of these bins for each bin
+                bin_heights.update(
+                    {b: p[0]}
+                )  # append to a dictionary, where the key is the bin
+
+            # Convert this dictionary to a dataframe, containing the heights of each timepoints distribution
+            bin_heights = pd.DataFrame.from_dict(
+                bin_heights, orient="index"
+            )  # the index of this dataframe will be the binned timepoints
+            gcd_heights = (
+                bin_heights.min()
+            )  # get the minimum value for each "bin" of the histogram across timepoints
+            gcd_heights.index += 1
+
+            # Now may begin mean matching to the gcd
+            # Take each bin (i.e., timepoint) and calculate the distribution of points
+            # TODO: Update this analysis to utilise a rolling bin approach. I.e, take data at 100ms intervals, then shift the bin 10ms right
+
+            print(f"Initiating Mean Matched Fano Factor Calculation for Session {name}")
+            for iteration in tqdm(range(ff_iterations)):
+                np.random.seed(iteration)
+
+                for b in unit_bin_means.columns:
+                    timepoint_means = pd.DataFrame(unit_bin_means[b])
+                    timepoint_edges = pd.DataFrame(
+                        np.histogram(timepoint_means, bins=9)[1]
+                    )  # the left edges of the histogram used to create a distribution
+                    timepoint_var = unit_bin_var[b]
+
+                    # split data into bins
+                    binned_data = pd.cut(
+                        np.ndarray.flatten(timepoint_means.values),
+                        np.ndarray.flatten(timepoint_edges.values),
+                        labels=timepoint_edges.index[1:],
+                        include_lowest=True,
+                    )  # seperate raw data into the distribution bins
+
+                    timepoint_means["bin"] = binned_data
+
+                    # Isolate a single bin from the timepoint histogram
+                    timepoint_FF_mean = {}
+                    timepoint_FF_var = {}
+
+                    for i in range(len(timepoint_edges.index)):
+                        if i == 0:
+                            continue
+                        # Bins begin at one, skip first iteration
+                        repeat = True
+                        bin_data = timepoint_means.loc[timepoint_means["bin"] == i]
+
+                        while repeat == True:
+
+                            # Check if bin height matches gcd height - if it does, append timepoint data to list
+
+                            if bin_data.count()["bin"] == gcd_heights[i]:
+                                timepoint_FF_mean.update(
+                                    {i: bin_data[bin_data.columns[0]]}
+                                )  # append the bin, and unit/mean count
+                                repeat = False
+
+                            # If not, remove randomly points from the dataset one at a time, until they match
+                            else:
+                                dropped_data = np.random.choice(
+                                    bin_data.index, 1, replace=False
+                                )
+                                bin_data = bin_data.drop(
+                                    dropped_data
+                                )  # remove one datapoint, then continue with check
+
+                    ##Now extract the associated variance for the timepoint
+                    timepoint_FF_mean = pd.DataFrame.from_dict(timepoint_FF_mean)
+                    timepoint_FF_mean = timepoint_FF_mean.melt(
+                        ignore_index=False
+                    ).dropna()
+                    timepoint_FF_var = timepoint_var[
+                        timepoint_var.index.isin(timepoint_FF_mean.index)
+                    ]
+
+                    # Using the count mean and variance of the count, construct a linear regression for the population
+                    # Ensure the origin of the linear model is set as zero
+
+                    # Set up data in correct shape
+                    x = np.array(timepoint_FF_mean["value"]).reshape(
+                        (-1, 1)
+                    )  # mean count, the x axis
+                    y = np.array(timepoint_FF_var)
+
+                    # Now fit model
+                    timepoint_model = sm.OLS(y, x).fit()
+
+                    # The slope (i.e., coefficient of variation) of this model
+                    ff_score = timepoint_model.params[0]
+
+                    # Extract the confidence interval of this model fit
+                    ff_se = timepoint_model.bse[0]
+                    ff_ci = timepoint_model.conf_int()[0]
+
+                    session_FF.update({b: ff_score})
+                    session_se.update({b: ff_se})
+                    session_ci.update({b: tuple(ff_ci)})
+
+                # Convert the session's FF to dataframe, then add to a master
+                session_FF = pd.DataFrame(session_FF, index=[0])
+                session_se = pd.DataFrame(session_se, index=[0])
+                session_ci = pd.DataFrame(session_ci.items())
+
+                repeat_FF = pd.concat([repeat_FF, session_FF], axis=0)
+                repeat_se = pd.concat([repeat_se, session_se], axis=0)
+                repeat_ci = pd.concat([repeat_ci, session_ci], axis=0)
+
+                session_FF = {}
+                session_se = {}
+                session_ci = {}
+
+            # Take the average across repeats for each session, add this to a final dataframe for session, timepoint, mean_FF, SE
+
+            for i, cols in enumerate(repeat_FF.iteritems()):
+                timepoint_vals = repeat_FF.describe()[i]
+                timepoint_se = repeat_se.describe()[i]
+
+                ci_bins = repeat_ci[0].tolist()
+                timepoint_ci = pd.DataFrame(repeat_ci[1].tolist())
+                timepoint_ci = timepoint_ci.rename(
+                    columns={0: "lower_ci", 1: "upper_ci"}
+                ).reset_index(drop=True)
+                timepoint_ci.insert(0, "bin", ci_bins)
+
+                mean = timepoint_vals["mean"]
+                se = timepoint_se["mean"]
+                lower_ci = timepoint_ci.loc[timepoint_ci["bin"] == i].describe()[
+                    "lower_ci"
+                ]["mean"]
+                upper_ci = timepoint_ci.loc[timepoint_ci["bin"] == i].describe()[
+                    "upper_ci"
+                ]["mean"]
+
+                if raw_values == False:
+                    vals = pd.DataFrame(
+                        {
+                            "session": name,
+                            "bin": i,
+                            "mean FF": mean,
+                            "SE": se,
+                            "lower_ci": lower_ci,
+                            "upper_ci": upper_ci,
+                        },
+                        index=[0],
+                    )
+                    all_FF = pd.concat([all_FF, vals], ignore_index=True)
+                elif raw_values == True:
+                    repeat_FF = repeat_FF.melt()
+                    repeat_se = repeat_se.melt()
+
+                    repeat_FF["session"] = name
+                    repeat_FF = repeat_FF.set_index("session")
+                    repeat_FF = repeat_FF.rename(
+                        columns={"value": "FF", "variable": "bin"}
+                    )
+
+                    repeat_se["session"] = name
+                    repeat_se = repeat_se.set_index("session")
+                    repeat_FF["se"] = repeat_se["value"]
+                    repeat_FF["lower_ci"] = timepoint_ci["lower_ci"]
+                    repeat_FF["upper_ci"] = timepoint_ci["upper_ci"]
+
+                    all_FF = pd.concat([all_FF, repeat_FF])
+                    break
+
+        elif mean_matched is False:
+            # Remember, this will return the individual FF for each unit, not the population level FF
+            # calculate raw FF for each unit
+            session_FF = pd.DataFrame(columns=["bin", "FF", "session", "unit"])
+            for unit in unit_bin_means.index:
+                unit_FF = pd.DataFrame(
+                    unit_bin_var.loc[unit] / unit_bin_means.loc[unit]
+                ).rename(
+                    columns={unit: "FF"}
+                )  # return all bins for this unit
+                unit_FF = unit_FF.reset_index()
+                unit_FF["session"] = name
+                unit_FF["unit"] = unit
+
+                session_FF = pd.concat([session_FF, unit_FF])
+
+            # Reorder columns, then return data
+            session_FF = session_FF[["session", "unit", "bin", "FF"]]
+            session_FF.reset_index(drop=True)
+            all_FF = pd.concat([all_FF, session_FF], axis=0)
+
+    ioutils.write_hdf5(
+        f"/data/aidan/{cache_name}_FF_Calculation_mean_matched_{mean_matched}_raw{raw_values}.h5",
+        all_FF,
+    )
+    return all_FF
+
+
+# test = cross_trial_FF(
+#     spike_times=spike_times,
+#     exp=myexp,
+#     duration=4,
+#     bin_size=0.1,
+#     cache_name="per_unit_FF",
+#     mean_matched=True,
+#     ff_iterations=2,
+# )
+
+
+# %%
+# Plot individual unit FFs for each session overlaid on firing rates
+# Split this by unit type
+
+# l23_per_unit_FF = cross_trial_FF(
+#     spike_times=l23_spike_times,
+#     exp=myexp,
+#     duration=4,
+#     bin_size=0.1,
+#     cache_name="per_unit_FF",
+#     mean_matched=False,
+#     ff_iterations=50,
+# )
+
+# l5_per_unit_FF = cross_trial_FF(
+#     spike_times=l5_spike_times,
+#     exp=myexp,
+#     duration=4,
+#     bin_size=0.1,
+#     cache_name="per_unit_FF",
+#     mean_matched=False,
+#     ff_iterations=50,
+# )
+
+# l6_per_unit_FF = cross_trial_FF(
+#     spike_times=l6_spike_times,
+#     exp=myexp,
+#     duration=4,
+#     bin_size=0.1,
+#     cache_name="per_unit_FF",
+#     mean_matched=False,
+#     ff_iterations=50,
+# )
+# # Concatenate data
+# l23_per_unit_FF["layer"] = "layer 2/3"
+# l5_per_unit_FF["layer"] = "layer 5"
+# l6_per_unit_FF["layer"] = "layer 6"
+
+# per_unit_FF = pd.concat(
+#     [l23_per_unit_FF, l5_per_unit_FF, l6_per_unit_FF], ignore_index=True
+# # )
+
+# # First plot layer 2/3
+# for s, session in enumerate(myexp):
+#     name = session.name
+#     units = l23_per_unit_FF["unit"].unique()
+#     subplots = Subplots2D(units, sharex=False)
+#     layer = "23"
+#     palette = sns.color_palette()
+#     plt.rcParams["figure.figsize"] = (10, 10)
+#     ses_fr = l23_firing_rates[s]
+
+#     for i, un in enumerate(units):
+#         unit_data = l23_per_unit_FF.loc[l23_per_unit_FF["unit"] == un]
+#         unit_data["bin/10"] = unit_data["bin"] / 10 - (
+#             duration / 2
+#         )  # convert 100ms bins to seconds relative to action
+#         ax = subplots.axes_flat[i]
+#         ax1 = ax.twinx()  # secondary axis for fr
+
+#         bins = np.unique(
+#             l23_per_unit_FF.loc[l23_per_unit_FF["session"] == name]["bin"].values
+#         )
+
+#         val_data = ses_fr[un].stack().reset_index()
+#         val_data["y"] = val_data[0]
+#         # num_samples = len(ses_fr[values[0]].columns)
+
+#         p = sns.lineplot(
+#             data=unit_data, x="bin/10", y="FF", ax=ax, linewidth=0.5, color="red"
+#         )
+#         p.axvline(
+#             len(np.unique(bins)) / 2,
+#             color="green",
+#             ls="--",
+#         )
+#         # p.autoscale(enable=True, tight=False)
+#         p.set_xticks([])
+#         p.set_yticks([])
+#         p.get_yaxis().get_label().set_visible(False)
+#         p.get_xaxis().get_label().set_visible(False)
+#         ax.set_xlim(bins[0], bins[-1])
+
+#         q = sns.lineplot(
+#             data=val_data,
+#             x="time",
+#             y="y",
+#             ci="sd",
+#             ax=ax1,
+#             linewidth=0.5,
+#         )
+#         # q.autoscale(enable=True, tight=False)
+#         q.set_yticks([])
+#         q.set_xticks([])
+#         ax1.set_xlim(-duration / 2, duration / 2)
+#         # peak = ses_fr[value].values.mean(axis=1).max()
+#         q.get_yaxis().get_label().set_visible(False)
+#         q.get_xaxis().get_label().set_visible(False)
+#         q.axvline(c=palette[1], ls="--", linewidth=0.5)
+#         q.set_box_aspect(1)
+
+#     to_label = subplots.axes_flat[0]
+#     to_label.get_yaxis().get_label().set_visible(True)
+#     to_label.get_xaxis().get_label().set_visible(True)
+#     to_label.set_xticks([])
+
+#     legend = subplots.legend
+
+#     legend.text(
+#         0,
+#         0.3,
+#         "Unit ID",
+#         transform=legend.transAxes,
+#         color=palette[0],
+#     )
+#     legend.set_visible(True)
+#     ticks = list(unit_data["bin"].unique())
+#     legend.get_xaxis().set_ticks(
+#         ticks=[ticks[0], ticks[round(len(ticks) / 2)], ticks[-1]]
+#     )
+#     legend.set_xlabel("100ms Bin")
+#     legend.get_yaxis().set_visible(False)
+#     legend.set_box_aspect(1)
+
+#     # Overlay firing rates
+#     values = ses_fr.columns.get_level_values("unit").unique()
+#     values.values.sort()
+#     # num_samples = len(ses_fr[values[0]].columns)
+
+#     plt.suptitle(f"Session {name} Per Unit Fano Factor + Firing Rate")
+
+#     utils.save(
+#         f"/home/s1735718/Figures/{name}_per_unit_FanoFactor_l{layer}", nosize=True
+#     )
+
+# for s, session in enumerate(myexp):
+#     name = session.name
+#     units = l5_per_unit_FF["unit"].unique()
+#     subplots = Subplots2D(units, sharex=False)
+#     layer = "5"
+#     palette = sns.color_palette()
+#     plt.rcParams["figure.figsize"] = (10, 10)
+#     ses_fr = l5_firing_rates[s]
+
+#     for i, un in enumerate(units):
+#         unit_data = l5_per_unit_FF.loc[l5_per_unit_FF["unit"] == un]
+#         unit_data["bin/10"] = unit_data["bin"] / 10 - (
+#             duration / 2
+#         )  # convert 100ms bins to seconds relative to action
+#         ax = subplots.axes_flat[i]
+#         ax1 = ax.twinx()  # secondary axis for fr
+
+#         bins = np.unique(
+#             l5_per_unit_FF.loc[l5_per_unit_FF["session"] == name]["bin"].values
+#         )
+
+#         val_data = ses_fr[un].stack().reset_index()
+#         val_data["y"] = val_data[0]
+#         # num_samples = len(ses_fr[values[0]].columns)
+
+#         p = sns.lineplot(
+#             data=unit_data, x="bin/10", y="FF", ax=ax, linewidth=0.5, color="red"
+#         )
+#         p.axvline(
+#             len(np.unique(bins)) / 2,
+#             color="green",
+#             ls="--",
+#         )
+#         # p.autoscale(enable=True, tight=False)
+#         p.set_xticks([])
+#         p.set_yticks([])
+#         p.get_yaxis().get_label().set_visible(False)
+#         p.get_xaxis().get_label().set_visible(False)
+#         ax.set_xlim(bins[0], bins[-1])
+
+#         q = sns.lineplot(
+#             data=val_data,
+#             x="time",
+#             y="y",
+#             ci="sd",
+#             ax=ax1,
+#             linewidth=0.5,
+#         )
+#         # q.autoscale(enable=True, tight=False)
+#         q.set_yticks([])
+#         q.set_xticks([])
+#         ax1.set_xlim(-duration / 2, duration / 2)
+#         # peak = ses_fr[value].values.mean(axis=1).max()
+#         q.get_yaxis().get_label().set_visible(False)
+#         q.get_xaxis().get_label().set_visible(False)
+#         q.axvline(c=palette[1], ls="--", linewidth=0.5)
+#         q.set_box_aspect(1)
+
+#     to_label = subplots.axes_flat[0]
+#     to_label.get_yaxis().get_label().set_visible(True)
+#     to_label.get_xaxis().get_label().set_visible(True)
+#     to_label.set_xticks([])
+
+#     legend = subplots.legend
+
+#     legend.text(
+#         0,
+#         0.3,
+#         "Unit ID",
+#         transform=legend.transAxes,
+#         color=palette[0],
+#     )
+#     legend.set_visible(True)
+#     ticks = list(unit_data["bin"].unique())
+#     legend.get_xaxis().set_ticks(
+#         ticks=[ticks[0], ticks[round(len(ticks) / 2)], ticks[-1]]
+#     )
+#     legend.set_xlabel("100ms Bin")
+#     legend.get_yaxis().set_visible(False)
+#     legend.set_box_aspect(1)
+
+#     # Overlay firing rates
+#     values = ses_fr.columns.get_level_values("unit").unique()
+#     values.values.sort()
+#     # num_samples = len(ses_fr[values[0]].columns)
+
+#     plt.suptitle(f"Session {name} Per Unit Fano Factor + Firing Rate")
+
+#     utils.save(
+#         f"/home/s1735718/Figures/{name}_per_unit_FanoFactor_l{layer}", nosize=True
+#     )
+
+# for s, session in enumerate(myexp):
+#     name = session.name
+#     units = l6_per_unit_FF["unit"].unique()
+#     subplots = Subplots2D(units, sharex=False)
+#     layer = "6"
+#     palette = sns.color_palette()
+#     plt.rcParams["figure.figsize"] = (10, 10)
+#     ses_fr = l6_firing_rates[s]
+
+#     for i, un in enumerate(units):
+#         unit_data = l6_per_unit_FF.loc[l6_per_unit_FF["unit"] == un]
+#         unit_data["bin/10"] = unit_data["bin"] / 10 - (
+#             duration / 2
+#         )  # convert 100ms bins to seconds relative to action
+#         ax = subplots.axes_flat[i]
+#         ax1 = ax.twinx()  # secondary axis for fr
+
+#         bins = np.unique(
+#             l6_per_unit_FF.loc[l6_per_unit_FF["session"] == name]["bin"].values
+#         )
+
+#         val_data = ses_fr[un].stack().reset_index()
+#         val_data["y"] = val_data[0]
+#         # num_samples = len(ses_fr[values[0]].columns)
+
+#         p = sns.lineplot(
+#             data=unit_data, x="bin/10", y="FF", ax=ax, linewidth=0.5, color="red"
+#         )
+#         p.axvline(
+#             len(np.unique(bins)) / 2,
+#             color="green",
+#             ls="--",
+#         )
+#         # p.autoscale(enable=True, tight=False)
+#         p.set_xticks([])
+#         p.set_yticks([])
+#         p.get_yaxis().get_label().set_visible(False)
+#         p.get_xaxis().get_label().set_visible(False)
+#         ax.set_xlim(bins[0], bins[-1])
+
+#         q = sns.lineplot(
+#             data=val_data,
+#             x="time",
+#             y="y",
+#             ci="sd",
+#             ax=ax1,
+#             linewidth=0.5,
+#         )
+#         # q.autoscale(enable=True, tight=False)
+#         q.set_yticks([])
+#         q.set_xticks([])
+#         ax1.set_xlim(-duration / 2, duration / 2)
+#         # peak = ses_fr[value].values.mean(axis=1).max()
+#         q.get_yaxis().get_label().set_visible(False)
+#         q.get_xaxis().get_label().set_visible(False)
+#         q.axvline(c=palette[1], ls="--", linewidth=0.5)
+#         q.set_box_aspect(1)
+
+#     to_label = subplots.axes_flat[0]
+#     to_label.get_yaxis().get_label().set_visible(True)
+#     to_label.get_xaxis().get_label().set_visible(True)
+#     to_label.set_xticks([])
+
+#     legend = subplots.legend
+
+#     legend.text(
+#         0,
+#         0.3,
+#         "Unit ID",
+#         transform=legend.transAxes,
+#         color=palette[0],
+#     )
+#     legend.set_visible(True)
+#     ticks = list(unit_data["bin"].unique())
+#     legend.get_xaxis().set_ticks(
+#         ticks=[ticks[0], ticks[round(len(ticks) / 2)], ticks[-1]]
+#     )
+#     legend.set_xlabel("100ms Bin")
+#     legend.get_yaxis().set_visible(False)
+#     legend.set_box_aspect(1)
+
+#     # Overlay firing rates
+#     values = ses_fr.columns.get_level_values("unit").unique()
+#     values.values.sort()
+#     # num_samples = len(ses_fr[values[0]].columns)
+
+#     plt.suptitle(f"Session {name} Per Unit Fano Factor + Firing Rate")
+
+#     utils.save(
+#         f"/home/s1735718/Figures/{name}_per_unit_FanoFactor_l{layer}", nosize=True
+#     )
+
+# # %%
+# # Plot population FF changes
+
+# population_FF_l23 = cross_trial_FF(
+#     spike_times=l23_spike_times,
+#     exp=myexp,
+#     duration=4,
+#     bin_size=0.1,
+#     cache_name="l23",
+#     mean_matched=True,
+#     ff_iterations=50,
+# )
+# population_FF_l5 = cross_trial_FF(
+#     spike_times=l5_spike_times,
+#     exp=myexp,
+#     duration=4,
+#     bin_size=0.1,
+#     cache_name="l5",
+#     mean_matched=True,
+#     ff_iterations=50,
+# )
+# population_FF_l6 = cross_trial_FF(
+#     spike_times=l6_spike_times,
+#     exp=myexp,
+#     duration=4,
+#     bin_size=0.1,
+#     cache_name="l6",
+#     mean_matched=True,
+#     ff_iterations=50,
+# )
+
+#%%
+# # Plot lines for each layer
+# fig, ax = plt.subplots(2, figsize=(10, 10))
+# bin_size = 0.1
+# ax1 = ax[0]
+# # First layer 2/3
+# mean = population_FF_l23["mean FF"]
+# se = population_FF_l23["SE"]
+# x = (population_FF_l23["bin"] * bin_size) - 2
+# lower = mean - se
+# upper = mean + se
+
+# ax1.plot(x, mean, label="Layer 2/3", color="blue")
+# ax1.plot(x, lower, color="tab:blue", alpha=0.1)
+# ax1.plot(x, upper, color="tab:blue", alpha=0.1)
+# ax1.fill_between(x, lower, upper, alpha=0.2)
+
+# # Then layer 5
+# mean = population_FF_l5["mean FF"]
+# se = population_FF_l5["SE"]
+# x = (population_FF_l5["bin"] * bin_size) - 2
+# lower = mean - se
+# upper = mean + se
+
+# ax1.plot(x, mean, label="Layer 5", color="orange")
+# ax1.plot(x, lower, color="tab:orange", alpha=0.1)
+# ax1.plot(x, upper, color="tab:orange", alpha=0.1)
+# ax1.fill_between(x, lower, upper, alpha=0.2)
+
+# # Finally, layer 6
+# mean = population_FF_l6["mean FF"]
+# se = population_FF_l6["SE"]
+# x = (population_FF_l6["bin"] * bin_size) - 2
+# lower = mean - se
+# upper = mean + se
+
+# ax1.plot(x, mean, label="Layer 6", color="green")
+# ax1.plot(x, lower, color="tab:green", alpha=0.1)
+# ax1.plot(x, upper, color="tab:green", alpha=0.1)
+# ax1.fill_between(x, lower, upper, alpha=0.2)
+
+# # Now set axes
+# ax1.set_ylabel("Mean Matched Fano Factor")
+# ax1.set_xticklabels([])
+# ax1.axvline(x=0, color="black", ls="--")
+# ax1.set_ylim(0)
+# ax1.legend()
+
+
+# plt.suptitle(
+#     f"Population Level Fano Factor - Aligned to Reach Onset - Session {myexp[0].name}",
+#     y=0.9,
+# )
+
+# # Now plot firing rates
+# ax2 = ax[1]
+
+# ax2.plot(x, l23_means, color="blue")
+# ax2.plot(x, l5_means, color="orange")
+# ax2.plot(x, l6_means, color="green")
+# ax2.axvline(x=0, color="black", ls="--")
+# ax2.set_title("Population Level Firing Rate")
+# ax2.set_ylabel("Mean Firing Rate (Hz)")
+# ax2.set_xlabel("Time Relative to Reach Onset (s)")
+# ax2.set_ylim(0)
+# utils.save(f"/home/s1735718/Figures/{myexp[0].name}_population_FanoFactor", nosize=True)
+# plt.show()
+# %%
+# Plot undivided fano factor for the population
+population_FF = cross_trial_FF(
+    spike_times=spike_times,
+    exp=myexp,
+    duration=8,
+    bin_size=0.1,
+    cache_name="population",
+    mean_matched=True,
+    ff_iterations=50,
+)
+# Check significance for each bin
+
+
+def fano_fac_sig(population_FF):
+    """
+    Function checks for significance in fano factor output, assuming the data is NOT raw, and mean-matched.
+    Returns a list of significance values (sig or nonsig) for each bin, compared to the previous timepoint.
+
+    population_FF: the data obtained from the cross_trial_FF() function, where mean_matched is True, and raw_data is False.
+    """
+    significance = []
+    for i, items in enumerate(population_FF.iterrows()):
+        if i == 0:
+            continue
+
+        bin_upper = items[1]["upper_ci"]
+        bin_lower = items[1]["lower_ci"]
+
+        previous_upper = population_FF.loc[population_FF["bin"] == i - 1]["upper_ci"]
+        previous_lower = population_FF.loc[population_FF["bin"] == i - 1]["lower_ci"]
+
+        # Compare bins for overlap
+        if (bin_upper <= previous_lower.item()) or (bin_lower >= previous_upper.item()):
+            significance.append("significant")
+
+        else:
+            significance.append("nonsignificant")
+
+    return significance
+
+significance = fano_fac_sig(population_FF)
+#%%
+# Plot firing rates and fano factor
+name = myexp[0].name
+bin_size = 0.1
+bins = per_trial_binning(firing_rates, myexp, timespan=duration, bin_size=0.1)
+bins = bin_data_average(bins).describe()
+std = []
+for i in bins.iteritems():
+    std.append(i[1]["std"])
+
+fig, ax = plt.subplots(2, figsize=(10, 10))
+ax1 = ax[0]
+ax2 = ax[1]
+
+mean = population_FF["mean FF"]
+se = population_FF["SE"]
+upper = mean + se
+lower = mean - se
+x = (population_FF["bin"] * bin_size) - 4
+
+ax1.plot(x, mean, color="purple")
+ax1.plot(x, lower, color="tab:purple", alpha=0.1)
+ax1.plot(x, upper, color="tab:purple", alpha=0.1)
+ax1.fill_between(x, lower, upper, alpha=0.2)
+
+ax1.set_ylabel("Mean Matched Fano Factor")
+ax1.set_xlabel("Duration Relative to Reach Onset (s)")
+ax1.axvline(x=0, color="black", ls="--")
+ax1.set_ylim(0)
+ax1.set_title(f"Population Level Fano Factor - session {name}")
+
+# Plot firing rate
+lower = firing_rate_means - std
+upper = firing_rate_means + std
+
+ax2.plot(x, firing_rate_means)
+ax2.plot(x, lower, color="tab:purple", alpha=0.1)
+ax2.plot(x, upper, color="tab:purple", alpha=0.1)
+ax2.fill_between(x, lower, upper, alpha=0.2)
+
+ax2.set_title("Population Level Firing Rate")
+ax2.set_ylabel("Mean Firing Rate (Hz)")
+ax2.set_xlabel("Time Relative to Reach Onset (s)")
+
+ax2.axvline(x=0, color="black", ls="--")
+
+utils.save(
+    f"/home/s1735718/Figures/{myexp[0].name}_population_FanoFactor_unsplit", nosize=True
+)
+plt.show()
+# %%
diff --git a/projects/Aidan_Analysis/FullAnalysis/FanoFactor_Epochs.py b/projects/Aidan_Analysis/FullAnalysis/FanoFactor_Epochs.py
new file mode 100644
index 0000000..ad5887f
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/FanoFactor_Epochs.py
@@ -0,0 +1,37 @@
+# This file will calculate the fano factor relative to different alignment points.
+# Then compare the changes observed.
+# First import required packages
+from argparse import Action
+import math
+
+from matplotlib.pyplot import axvline, xlim
+from base import *
+import numpy as np
+import pandas as pd
+import matplotlib as plt
+from pixtools.utils import Subplots2D
+from rasterbase import *
+from tqdm import tqdm
+from sklearn.linear_model import LinearRegression
+from pixtools.utils import Subplots2D
+from pixels import ioutils
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+
+from functions import (
+    event_times,
+    per_trial_raster,
+    per_trial_binning,
+    per_unit_spike_rate,
+    bin_data_average,
+    cross_trial_FF,
+    fano_fac_sig,
+)
+
+# Import previously calculated FF values
+population_FF = ioutils.read_hdf5(
+    "/data/aidan/per_unit_FF_FF_Calculation_mean_matched_True_rawTrue.h5"
+)
+population_FF = population_FF.melt()
+population_FF.rename(columns={"variable": "bin", "value": "FF"}, inplace=True)
+# Calculate any significant changes in FF across time: using the SE produced from the model
diff --git a/projects/Aidan_Analysis/FullAnalysis/VR46_SpikeRate_ShallowDeep.py b/projects/Aidan_Analysis/FullAnalysis/VR46_SpikeRate_ShallowDeep.py
new file mode 100644
index 0000000..1ad7a8f
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/VR46_SpikeRate_ShallowDeep.py
@@ -0,0 +1,72 @@
+#This file shall split 211024_VR46 into two groups by depth, then plot spike firing rate by unit
+#First import required packages
+import sys
+import matplotlib.pyplot as plt
+
+
+from matplotlib.pyplot import figure
+from distutils.command.sdist import sdist
+from base import *
+from textwrap import wrap
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") 
+from pixtools import spike_rate
+
+
+
+#Now select the specific recording to be analysed
+ses = myexp.get_session_by_name("211024_VR46")
+
+#And now split session into two groups 
+shallow = ses.select_units(
+    group="good",
+    max_depth=500,
+    uncurated=False,
+    name="shallow"
+)
+
+deep = ses.select_units(
+    group="good",
+    min_depth=550,
+    max_depth=1200,
+    uncurated=False,
+    name="deep"
+)
+
+#And then define the hits
+hits_s = ses.align_trials(
+    ActionLabels.correct,
+    Events.led_on,
+    "spike_rate",
+    duration=4,
+    units=shallow
+)
+
+hits_d = ses.align_trials(
+    ActionLabels.correct,
+    Events.led_on,
+    "spike_rate",
+    duration=4,
+    units=deep
+)
+
+#Using these units we may now plot seperate spike rate graphs
+#First for shallow
+
+name=ses.name
+ci = 95
+
+spike_rate.per_unit_spike_rate(hits_s, ci=ci)
+plt.suptitle(
+    "\n".join(wrap(f"Session {name} - per-unit (<500 Depth) across-trials firing rate (aligned to LED on)")),
+)
+
+
+#And then for deep units
+
+spike_rate.per_unit_spike_rate(hits_d, ci=ci)
+plt.suptitle(
+    "\n".join(wrap(f"Session {name} - per-unit (500 < Depth < 1200) across-trials firing rate (aligned to LED on)")),
+)
+
+
+plt.show()
diff --git a/projects/Aidan_Analysis/FullAnalysis/base.py b/projects/Aidan_Analysis/FullAnalysis/base.py
new file mode 100644
index 0000000..24ef39c
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/base.py
@@ -0,0 +1,82 @@
+# This file sets up the required data/paths for subsequent data processing
+# TODO: add pixtools to path permenantly so I don't have to keep importing from a local copy
+# TODO: run files through black to format
+
+# Package required to change path
+from pathlib import Path
+
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+fig_dir = Path(
+    "/home/s1735718/PixelsAnalysis/pixels-analysis/projects/Aidan_Analysis/FullAnalysis/Figures"
+)
+
+sys.path.append(
+    "/opt/neuropixels/pixels"
+)  # Adds the location of the pixtools folder to the python path
+sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels-analysis/")
+from pixels import Experiment
+from pixels.behaviours.reach import Reach, ActionLabels, Events
+from pixtools import utils
+
+interim_dir = "/data/visuomotor_control/interim"
+
+# Step 1: Load an experiment
+#
+# An experiment handles a group of mice that were trained in the same behaviour. It
+# stores data and metadata for all included sessions belonging to the list of mice
+# provided. The Experiment class is passed the mouse or list of mice, the class
+# definition for the behaviour they were trained in (imported from pixels.behaviours),
+# and the paths where it can find recording data (the folder containing 'raw', 'interim'
+# and 'processed' folders) and training metadata.
+
+# Will import master data from visuomotor control/neuropixels
+
+myexp = Experiment(
+    [
+        # "VR46",
+        # "VR47",
+        # "VR49",
+        # "VR50",
+        # "VR52",
+        # "VR53",
+        # "VR55",
+        # "VR56",
+        # "VR58",
+        "VR59",
+    ],  # This can be a list
+    Reach,  # We want the reach behaviour
+    "~/duguidlab/visuomotor_control/neuropixels",  # Where is the main data saved
+    "~/duguidlab/CuedBehaviourAnalysis/Data/TrainingJSON",  # Where is the metadata for the recording saved
+    interim_dir="/data/visuomotor_control/interim",
+)
+
+
+# This depth range covers all mice
+m2_depth = {
+    "min_depth": 200,
+    "max_depth": 1200,
+}
+
+
+def get_pyramidals(exp):
+    rep = "-".join(str(s) for s in m2_depth.values())
+
+    return exp.select_units(
+        **m2_depth,
+        min_spike_width=0.4,
+        name=f"{rep}_pyramidals",
+    )
+
+
+def get_interneurons(exp):
+    rep = "-".join(str(s) for s in m2_depth.values())
+
+    return exp.select_units(
+        **m2_depth,
+        max_spike_width=0.35,
+        name=f"{rep}_interneurons",
+    )
diff --git a/projects/Aidan_Analysis/FullAnalysis/bin_delta_depth_plot.py b/projects/Aidan_Analysis/FullAnalysis/bin_delta_depth_plot.py
new file mode 100644
index 0000000..02820cf
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/bin_delta_depth_plot.py
@@ -0,0 +1,512 @@
+###Using the calculated bin changes from the FR anova, this script will plot the depths of individual neuronal units against the change in firing rate (delta)
+# Delta calculated from start of drop in FR to end (i.e., largest change in bin)
+
+#%%
+# First import required packages:
+import enum
+from xml.etree.ElementInclude import include
+from matplotlib.pyplot import legend, title, ylabel, ylim
+import matplotlib.lines as mlines
+from pyrsistent import b
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import sys
+from base import *
+from channeldepth import meta_spikeglx
+from tqdm import tqdm  # for progress bar
+
+from functions import (
+    per_trial_binning,
+    bin_data_average,
+    event_times,
+    per_trial_spike_rate,
+)
+from reach import Cohort
+
+
+from pixtools.utils import Subplots2D
+from pixtools import utils
+from pixtools import spike_rate
+from pixels import ioutils
+import statsmodels.stats.multicomp as mc
+from statsmodels.graphics.factorplots import interaction_plot
+from bioinfokit.analys import stat
+from channeldepth import meta_spikeglx
+from pixtools import clusters
+from textwrap import wrap
+
+#%%
+# Select units from given session
+units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200)
+
+duration = 2
+per_unit_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=duration, units=units
+)
+
+#%%
+# Using import functions, bin firing rate data
+bin_data = per_trial_binning(per_unit_aligned, myexp, timespan=2, bin_size=0.1)
+bin_data.reorder_levels(["session", "unit", "trial"], axis=1)
+
+multicomp_bin_results = []
+multicomp_unit_results = []
+multicomp_int_results = []
+
+
+#
+###Run linear model in a distributed manner###
+###NB: As the dataset is too large to run within memory constraints, will remove the final 4s of data.
+
+
+def within_unit_GLM(bin_data, myexp):
+
+    """
+    Function takes binned data (with levels in order session -> unit -> trial) and performs an ANOVA on each session
+    Returns the multiple comparisons and ANOVA results for the interaction, also prints anova results
+    Following parameters are used:
+
+    IV = Firing Rate, DV = Bin, Unit (containing many trials)
+
+    Model is as follows:
+
+    firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)
+
+    bin_data: the output of the per_trial_binning function reordered to swap unit and trial level hierarchy
+
+    myexp: experimental cohort defined in base.py
+
+
+    NB: This function runs the model as a distributed set of calculations!!
+    i.e., it slices the data and runs several smaller calculations before concatenating results
+
+    """
+    # store data as a resillient distributed dataset
+    # will do this using pyspark
+
+    # Now split this data into sessions
+    results = []
+    for s, session in enumerate(myexp):
+
+        name = session.name
+        ses_data = bin_data[s]
+
+        # Now shall average the data per bin
+        ses_avgs = bin_data_average(ses_data)
+
+        # Then melt data to prep for analysis
+        ses_avgs = ses_avgs.reset_index().melt(id_vars=["unit", "trial"])
+        ses_avgs.rename({"value": "firing_rate"}, axis=1, inplace=True)
+
+        ##Now shall construct a linear model to allow for analysis
+        # IV: Firing Rate
+        # DVs: Bin (within subjects), Unit (Within subjects - trials averaged out)
+
+        # Now run the GLM analysis
+        model = ols(
+            "firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)", data=ses_avgs
+        ).fit()
+
+        output = sm.stats.anova_lm(model, typ=2)  # Type II SS as this is faster
+        print(f"ANOVA Results - Session {name}")
+        print(output)
+        results.append(output)
+
+        ####Run Multiple Comparisons####
+        # Main effect of bin
+        res = stat()
+
+        print(f"starting multiple comparisons for session {name}")
+        # Interaction
+        res.tukey_hsd(
+            df=ses_avgs,
+            res_var="firing_rate",
+            xfac_var=["bin", "unit"],
+            anova_model="firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)",
+            phalpha=0.05,
+            ss_typ=2,
+        )
+        int_effect = res.tukey_summary
+        multicomp_int_results.append(int_effect)
+    results = pd.DataFrame(results)
+    return multicomp_int_results, results
+
+
+glm, output = within_unit_GLM(bin_data, myexp)
+
+#Cache the multicomps and results
+for s, session in enumerate(myexp):
+    name = session.name
+    ses_output = output[s]
+    ses_output = pd.DataFrame(ses_output)
+    ioutils.write_hdf5(f"/data/aidan/glm_output_session_{name}.h5", ses_output)
+
+    ses_multicomps = glm[s]
+    ses_multicomps = pd.DataFrame(ses_multicomps)
+
+    ioutils.write_hdf5(f"/data/aidan/glm_multicomps_session_{name}.h5", ses_multicomps)
+
+#For later retrieval
+# %%
+# Using this data, now shall determine the largest delta between bins for each unit
+# Split data into seperate dataframes for each session
+def unit_delta(glm, myexp, bin_data, bin_duration, sig_only, percentage_change):
+    """
+    Function takes the output of the multiple comparisons calculated by within_unit_GLM() and calculates the larges change in firing rate (or other IV measured by the GLM) between bins, per unit
+    This requires both experimental cohort data, and bin_data calculated by per_trial_binning() and passed through reorder levels (as "session", "unit", "trial")
+    Function returns a list of lists, in a hierarchy of [session[unit deltas]]
+
+    Using this data, a depth plot may be created displaying the relative delta by unit location in pM2
+
+    glm: the result of the ANOVA and subsequent Tukey adjusted multiple comparisons performed by within_unit_GLM
+         NB: THIS IS A COMPUTATIONALLY EXPENSIVE FUNCTION, DO NOT RUN IT ON MORE THAN ONE SESSION AT A TIME
+
+    myexp: the experimental cohort defined in base.py
+
+    bin_data: the binned raw data computed by the per_trial_binning function
+
+    bin_duration: the length of the bins in the data
+
+    sig_only: whether to return only the greatest delta of significant bins or not
+
+    percentage_change: whether to return deltas as a percentage change
+
+    """
+    ses_deltas = []
+    for s, session in enumerate(myexp):
+
+        final_deltas = []
+        unit_deltas = []
+        sigunit_comps = []
+        ses_comps = glm[s]
+
+        if sig_only is True:
+            # Return only significant comparisons
+            ses_comps = ses_comps.loc[ses_comps["p-value"] < 0.05]
+
+        # Determine list of units remaining in comparison
+        units = []
+        for i in ses_comps["group1"]:
+            units.append(i[1])
+        units = np.array(units)
+        units = np.unique(units)
+
+        # Now iterate through comparisons, by unit, Saving the significant comparisons to allow later calculation of delta
+
+        for i in tqdm(units):
+            unit_comps = ses_comps[
+                ses_comps["group1"].apply(lambda x: True if i in x else False)
+            ]  # Lambda function checks if the value is in the tuple for group one
+            unit_comps = unit_comps[
+                unit_comps["group2"].apply(lambda x: True if i in x else False)
+            ]
+            unit_comps.reset_index(inplace=True)
+
+            # find row with largest difference
+            if unit_comps.empty:  # skip if empty
+                continue
+
+            unit_comps = unit_comps.sort_values("Diff", ascending=False)
+            ##Right around here I need to work out a way to sort by adjacent bins only
+            # Extract only adjacent bins, then take the largest value with this set.
+            for item in unit_comps.iterrows():
+                # row = unit_comps["Diff"].idxmax()
+                row = item[1]
+                g1_bin, unit_num = item[1]["group1"]
+                g1_bin1 = round(float(g1_bin.split()[0][0:-1]), 1)
+                g1_bin2 = round(float(g1_bin.split()[1][0:]), 1)
+
+                g2_bin, unit_num = item[1]["group2"]
+                g2_bin1 = round(float(g2_bin.split()[0][0:-1]), 1)
+                g2_bin2 = round(float(g2_bin.split()[1][0:]), 1)
+
+                # Check if bins are sequential, will return the largest value where bins are next to eachother
+                if g2_bin1 == g1_bin2 + bin_duration:
+
+                    sigunit_comps.append([row])
+                else:
+                    continue
+
+        # Now that we know the units with significant comparisons, take these bins from raw firing rate averages
+        ses_avgs = bin_data_average(bin_data[s])
+        # Iterate through our significant comparisons, calculating the actual delta firing rate
+        for i in range(len(sigunit_comps)):
+            sig_comp = sigunit_comps[i]
+            sig_comp = pd.DataFrame(sig_comp)  # convert list to dataframe
+
+            unit = [x[1] for x in sig_comp["group1"]]
+            ses_unit = ses_avgs.loc[
+                ses_avgs.index.get_level_values("unit") == int(unit[0])
+            ].mean()  # get all rows where our units firing rate is present
+
+            # Get the bins that the signficant comparison looked at
+            bin_val1 = [x[0] for x in sig_comp["group1"]][0]
+            bin_val2 = [x[0] for x in sig_comp["group2"]][0]
+
+            # Return the percentage change from initial state rather than raw value
+            if percentage_change == True:
+                change = ses_unit[bin_val2] - ses_unit[bin_val1]
+                delta = (change / ses_unit[bin_val1]) * 100
+                if ses_unit[bin_val1] == 0:
+                    continue  # Skip this value, if it changes from a value of zero, this is infinite
+                unit_deltas.append([int(unit[0]), delta])
+            # Finally, get the delta value across these bins for the given unit
+            elif percentage_change == False:
+                delta = ses_unit[bin_val2] - ses_unit[bin_val1]
+                unit_deltas.append([int(unit[0]), delta])
+
+        # Iterate through unit_deltas and remove any duplicate units
+        # Keeping only the units of largest values
+        unit_deltas = pd.DataFrame(unit_deltas, columns=["unit", "delta"])
+        for j in unit_deltas["unit"].unique():
+            vals = unit_deltas.loc[unit_deltas["unit"] == j]
+            vals = vals.sort_values("delta", key=abs, ascending=False)
+
+            final_deltas.append(vals.iloc[0].values.tolist())
+
+        ses_deltas.append(final_deltas)
+
+    return ses_deltas
+
+
+sig_deltas = unit_delta(
+    glm, myexp, bin_data, bin_duration=0.1, sig_only=True, percentage_change=False
+)
+all_deltas = unit_delta(
+    glm, myexp, bin_data, bin_duration=0.1, sig_only=False, percentage_change=False
+)
+
+# %%
+# Using this delta information, the approximate depths of these units may be determined
+# First, update the unit depths function
+def unit_depths(exp):
+    """
+    Parameters
+    ==========
+    exp : pixels.Experiment
+        Your experiment.
+    """
+    info = exp.get_cluster_info()
+    depths = []
+    keys = []
+
+    for s, session in enumerate(exp):
+        name = session.name
+        session_depths = {}
+        rec_num = 0
+        for rec_num, probe_depth in enumerate(session.get_probe_depth()):
+            rec_depths = {}
+            rec_info = info[s]
+            id_key = "id" if "id" in rec_info else "cluster_id"  # Depends on KS version
+
+            for unit in rec_info[id_key]:
+                unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict()
+                rec_depths[unit] = probe_depth - unit_info["depth"]
+
+            session_depths = pd.DataFrame(rec_depths, index=["depth"])
+        keys.append(name)
+        session_depths = pd.concat([session_depths], axis=1, names="unit")
+        depths.append(session_depths)
+
+    return pd.concat(depths, axis=1, names=["session", "unit"], keys=keys)
+
+
+all_depths = unit_depths(myexp)
+# %%
+# Sort pyramidal and interneuronal units for the plot below
+# select all pyramidal neurons
+pyramidal_units = get_pyramidals(myexp)
+
+# and all interneurons
+interneuron_units = get_interneurons(myexp)
+
+pyramidal_aligned = myexp.align_trials(
+    ActionLabels.correct,
+    Events.led_off,
+    "spike_rate",
+    duration=2,
+    units=pyramidal_units,
+)
+
+interneuron_aligned = myexp.align_trials(
+    ActionLabels.correct,
+    Events.led_off,
+    "spike_rate",
+    duration=2,
+    units=interneuron_units,
+)
+
+
+#%%
+# Now extract the depths of each of our largest delta units from this
+# Plot the points as interneurons or pyramidals
+
+plt.rcParams.update({"font.size": 20})
+for s, session in enumerate(myexp):
+    name = session.name
+    ses_deltas = sig_deltas[s]
+
+    ses_deltas = pd.DataFrame(ses_deltas, columns=["unit", "delta"])
+
+    ses_depths = all_depths[name]
+    # Find depths of associated units
+    unit_depth = ses_depths[ses_deltas["unit"]].melt()
+    ses_deltas = pd.concat([ses_deltas, unit_depth["value"]], axis=1)
+    ses_deltas.rename(columns={"value": "depth"}, inplace=True)
+
+    # Do the same for nonsignificant units
+    ses_nosig_deltas = all_deltas[s]
+    ses_nosig_deltas = pd.DataFrame(ses_nosig_deltas, columns=["unit", "delta"])
+
+    # Find depths of associated units
+    unit_nosig_depth = ses_depths[ses_nosig_deltas["unit"]].melt()
+    ses_nosig_deltas = pd.concat([ses_nosig_deltas, unit_nosig_depth["value"]], axis=1)
+    ses_nosig_deltas.rename(columns={"value": "depth"}, inplace=True)
+    # Now remove any significant values
+    ses_nosig_deltas = ses_nosig_deltas[
+        ~ses_nosig_deltas["unit"].isin(ses_deltas["unit"])
+    ].dropna()
+    ses_nosig_deltas = ses_nosig_deltas.reset_index(drop=True)
+
+    ##Then determine the type of neuron these units are (i.e., pyramidal or interneuron)
+    pyr_units = pyramidal_aligned[s].melt()["unit"].unique()
+    int_units = interneuron_aligned[s].melt()["unit"].unique()
+    dicts = {}
+
+    unit_types = []
+    for i, item in enumerate(ses_deltas["unit"]):
+        if item in pyr_units:
+            unit_types.append("pyramidal")
+        if item in int_units:
+            unit_types.append("interneuron")
+        else:
+            continue
+
+    nosig_unit_types = []
+    for i, item in enumerate(ses_nosig_deltas["unit"]):
+        if item in pyr_units:
+            nosig_unit_types.append("pyramidal")
+        if item in int_units:
+            nosig_unit_types.append("interneuron")
+        else:
+            continue
+
+    # Append unit_types as a new column
+    # Will try and append this as a new column, to see where the extra value lies
+    unit_types = pd.DataFrame(unit_types)
+    ses_deltas = pd.concat([ses_deltas, unit_types], ignore_index=True, axis=1)
+    ses_deltas.rename(
+        columns={0: "unit", 1: "delta", 2: "depth", 3: "type"}, inplace=True
+    )
+
+    nosig_unit_types = pd.DataFrame(nosig_unit_types)
+    ses_nosig_deltas = pd.concat(
+        [ses_nosig_deltas, nosig_unit_types], ignore_index=True, axis=1
+    )
+    ses_nosig_deltas.rename(
+        columns={0: "unit", 1: "delta", 2: "depth", 3: "type"}, inplace=True
+    )
+
+    # Finally, plot a graph of deltas relative to zero, by depth
+    p = sns.scatterplot(
+        x="delta",
+        y="depth",
+        data=ses_nosig_deltas,
+        s=100,
+        linewidth=1,
+        style="type",
+        color="#972c7f",
+        alpha=0.5,
+    )
+
+    p2 = sns.scatterplot(
+        x="delta",
+        y="depth",
+        data=ses_deltas,
+        s=100,
+        linewidth=1,
+        style="type",  # uncomment when neuron type is fixed
+        color="#221150",
+    )
+
+    plt.axvline(x=0, color="green", ls="--")
+
+    plt.xlim(-50, 50)
+    plt.ylim(0, 1300)
+
+    plt.xticks(fontsize=25)
+    plt.yticks(fontsize=25)
+    plt.xlabel("Δ Firing Rate (Hz)", fontsize=20)
+    plt.ylabel("Depth of Recorded Neuron (μm)", fontsize=20)
+    plt.suptitle(
+        "\n".join(
+            wrap(
+                f"Largest Trial Firing Rate Change by pM2 Depth - Session {name}",
+                width=30,
+            )
+        ),
+        y=1.1,
+        fontsize=20,
+    )
+
+    # Create lengend manually
+    # Entries for legend listed below as handles:
+    sig_dot = mlines.Line2D(
+        [],
+        [],
+        color="#221150",
+        marker="o",
+        linestyle="None",
+        markersize=10,
+        label="Significant",
+    )
+    nonsig_dot = mlines.Line2D(
+        [],
+        [],
+        color="#972c7f",
+        marker="o",
+        linestyle="None",
+        alpha=0.5,
+        markersize=10,
+        label="Non-Significant",
+    )
+    grey_dot = mlines.Line2D(
+        [],
+        [],
+        color="grey",
+        marker="o",
+        linestyle="None",
+        markersize=10,
+        label="Pyramidal",
+    )
+    grey_cross = mlines.Line2D(
+        [],
+        [],
+        color="grey",
+        marker="X",
+        lw=5,
+        linestyle="None",
+        markersize=10,
+        label="Interneuronal",
+    )
+
+    plt.legend(
+        handles=[sig_dot, nonsig_dot, grey_dot, grey_cross],
+        bbox_to_anchor=(1.7, 1),
+        title="Significance (p < 0.05)",
+        fontsize=15,
+    )
+
+    # Now invert y-axis and save
+    plt.gca().invert_yaxis()
+
+    utils.save(
+        f"/home/s1735718/Figures/{name}_DeltaFR_byDepth",
+        nosize=True,
+    )
+    plt.show()
+
+# %%
diff --git a/projects/Aidan_Analysis/FullAnalysis/bin_firingrate_ANOVA.py b/projects/Aidan_Analysis/FullAnalysis/bin_firingrate_ANOVA.py
new file mode 100644
index 0000000..efca6b6
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/bin_firingrate_ANOVA.py
@@ -0,0 +1,361 @@
+#################################################
+# This script will analyse the standard deviation of the data, as a measure of variance
+# Rather than confidence interval size.
+# Will be done by a comparison across bins and session
+# IV: Standard Deviation of Firing Rate
+# DV: Bin (Within Subjects), Session (Between Subjects)
+#################################################
+
+# Import req. packages
+
+import enum
+from xml.etree.ElementInclude import include
+from matplotlib.pyplot import title, ylabel, ylim
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import sys
+from base import *
+
+
+from functions import (
+    per_trial_binning,
+    bin_data_average,
+    event_times,
+    per_trial_spike_rate,
+)
+from reach import Cohort
+
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels")
+from pixtools.utils import Subplots2D  # use the local copy of base.py
+from pixtools import utils
+from pixtools import spike_rate
+from pixels import ioutils
+import statsmodels.stats.multicomp as mc
+from statsmodels.graphics.factorplots import interaction_plot
+from bioinfokit.analys import stat
+
+#%%
+# Select Units From Cache (m2 only)
+units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200)
+
+duration = 2
+trial_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=duration, units=units
+)
+
+#%%
+# Bin firing rate data
+#
+bin_data = per_trial_binning(trial_aligned, myexp, timespan=2, bin_size=0.1)
+
+
+def bin_data_SD(bin_data):
+    """
+    Function is similar to bin_data_mean.
+
+    This function takes the dataframe produced by per_trial_binning() including bins as a multiindex
+    and calculates the standard deviation of each bin's activity within a given time period
+    This returns the SD of the firing rate of each unit (i.e., per session and time)
+
+    bin_data: the dataframe produced by per_trial_binning, with predefined bins
+
+    NB: To return a transposed version of the data (i.e., with bin averages as rows rather than columns)
+        add ".T" to function call.
+    """
+
+    # First extract the values of each bin in bin_data
+    bins = bin_data.index.get_level_values(
+        "bin"
+    )  # a list of the bins created by the previous function
+    bin_names = (
+        bins.unique()
+    )  # Take the unique values present, giving us the list of bin categories
+    ses_bins = {}
+    for s, session in enumerate(myexp):
+        name = session.name
+
+        ses_data = bin_data[s]
+        trial_bins = {}
+
+        # Trial number will give the following loop the number of times to iterate
+        trial_number = ses_data.columns.get_level_values("trial").unique()
+        # Now, take each trial and average across bins
+        for t in trial_number:
+            trial_data = ses_data[t]
+            individual_bins = {}
+            for b in bin_names:
+
+                bin_vals = trial_data.loc[trial_data.index.get_level_values("bin") == b]
+                bin_st_dev = np.std(
+                    bin_vals.values
+                )  # take standard deviation for all values in this trials bin
+                individual_bins.update({b: bin_st_dev})
+            trial_bins.update({t: individual_bins})
+
+        ses_bins.update({name: trial_bins})
+
+    # Merge these SDs as a dataframe
+    binned_sd = pd.DataFrame.from_dict(
+        {(i, j): ses_bins[i][j] for i in ses_bins.keys() for j in ses_bins[i].keys()},
+        orient="index",
+    )
+
+    # Take the mean of the SDs for each trial Bin
+
+    binned_sd.index.rename(["session", "trial"], inplace=True)
+    binned_sd.columns.names = ["bin"]
+
+    # Return the data
+    return binned_sd
+
+
+bin_sd = bin_data_SD(bin_data)
+
+
+# %%
+# Now take SD data and and convert to long form to prepare for ANOVA analysis
+long_sd = pd.melt(
+    bin_sd.reset_index(), id_vars=["session", "trial"]
+)  # Preserve the indexes as comparator variables
+long_sd.rename(
+    {"value": "firing_rate_sd"}, axis=1, inplace=True
+)  # rename value column to sd for readability
+
+#%%
+#######################################
+# Then shall run a mixed factorial ANOVA
+# IV: Standard Deviation of Firing Rate
+# DV: Bin (Within Subjects - 100 levels), Session (Between Subjects - 2 levels for VR46 [n otherwise])
+#######################################
+
+# Assemble model
+model = ols(
+    "firing_rate_sd ~ C(bin) + C(session) + C(bin):C(session)", data=long_sd
+).fit()
+
+output = sm.stats.anova_lm(
+    model, typ=2
+)  # Run test on model, sample sizes are equal between groups here (neurons stay the same across trials) so type 2 SS used
+print(output)  # Print output
+
+####Run multiple comparisons####
+# For main effect of bin
+res = stat()
+res.tukey_hsd(
+    df=long_sd,
+    res_var="firing_rate_sd",
+    xfac_var="bin",
+    anova_model="firing_rate_sd ~ C(bin) + C(session) + C(bin):C(session)",
+    phalpha=0.05,
+    ss_typ=2,
+)
+bin_main_effect = res.tukey_summary
+
+# For main effect of session
+res = stat()
+res.tukey_hsd(
+    df=long_sd,
+    res_var="firing_rate_sd",
+    xfac_var="session",
+    anova_model="firing_rate_sd ~ C(bin) + C(session) + C(bin):C(session)",
+    phalpha=0.05,
+    ss_typ=2,
+)
+ses_main_effect = res.tukey_summary
+
+# For interaction
+res = stat()
+res.tukey_hsd(
+    df=long_sd,
+    res_var="firing_rate_sd",
+    xfac_var=["bin", "session"],
+    anova_model="firing_rate_sd ~ C(bin) + C(session) + C(bin):C(session)",
+    phalpha=0.05,
+    ss_typ=2,
+)
+interaction = res.tukey_summary
+
+# Determine sig. comparisons
+sig_bin_comps = bin_main_effect.loc[bin_main_effect["p-value"] < 0.05]
+sig_ses_comps = ses_main_effect.loc[ses_main_effect["p-value"] < 0.05]
+sig_int = interaction.loc[interaction["p-value"] < 0.05]
+
+####Now plot the findings of this analysis####
+fig = interaction_plot(
+    x=long_sd["bin"], trace=long_sd["session"], response=long_sd["firing_rate_sd"]
+)
+plt.show()
+# From this it is clear there is a drop around the centre of bins
+
+##Check assumptions
+# First residual plot (QQ)
+sm.qqplot(res.anova_std_residuals, line="45")
+plt.xlabel("Theoretical Quantiles")
+plt.ylabel("Standardized Residuals")
+# plt.show()
+
+# Then Distribution plot
+plt.hist(res.anova_model_out.resid, bins="auto", histtype="bar", ec="k")
+plt.xlabel("Residuals")
+plt.ylabel("Frequency")
+# plt.show()
+
+import scipy.stats as stats
+
+w, pvalue = stats.shapiro(res.anova_model_out.resid)
+# print(w, pvalue)
+
+# Model assumptions are met.
+# %%
+#############################################
+# Run the same analysis on firing rate to paint the other half of the picture.
+# Calculate mean firing rate for the data
+bin_avgs = bin_data_average(bin_data)
+
+bins = bin_avgs.columns
+ses_bins = {}
+
+for s, session in enumerate(myexp):
+    name = session.name
+
+    ses_data = bin_avgs.loc[bin_avgs.index.get_level_values("session") == s]
+    ses_trials = ses_data.index.get_level_values("trial")
+    trial_bins = {}
+    # Now, take each trial and average across bins
+    for t in ses_trials:
+        trial_data = ses_data.loc[ses_data.index.get_level_values("trial") == t].mean(
+            axis=0
+        )
+        trial_bins.update({t: trial_data})
+
+    ses_bins.update({name: trial_bins})
+
+# Merge these averages as a dataframe
+means = pd.DataFrame.from_dict(
+    {(i, j): ses_bins[i][j] for i in ses_bins.keys() for j in ses_bins[i].keys()},
+    orient="index",
+)
+
+means.index.rename(["session", "trial"], inplace=True)
+means.columns.names = ["bin"]
+
+long_mean = pd.melt(
+    means.reset_index(), id_vars=["session", "trial"]
+)  # Preserve the indexes as comparator variables
+long_mean.rename(
+    {"value": "mean_firing_rate"}, axis=1, inplace=True
+)  # rename value column to sd for readability
+
+#%%
+# Now run analysis for firing rate, per session
+# Do this manually, its faster
+
+model = ols(
+    "mean_firing_rate ~ C(bin) + C(session) + C(bin):C(session)", data=long_mean
+).fit()
+
+output = sm.stats.anova_lm(
+    model, typ=2
+)  # Run test on model, sample sizes are equal between groups here (neurons stay the same across trials) so type 2 SS used
+print(output)  # Print output
+
+####Run multiple comparisons####
+# For main effect of bin
+res = stat()
+res.tukey_hsd(
+    df=long_mean,
+    res_var="mean_firing_rate",
+    xfac_var="bin",
+    anova_model="mean_firing_rate ~ C(bin) + C(session) + C(bin):C(session)",
+    phalpha=0.05,
+    ss_typ=2,
+)
+bin_main_effect_fr = res.tukey_summary
+
+# For main effect of session
+res = stat()
+res.tukey_hsd(
+    df=long_mean,
+    res_var="mean_firing_rate",
+    xfac_var="session",
+    anova_model="mean_firing_rate ~ C(bin) + C(session) + C(bin):C(session)",
+    phalpha=0.05,
+    ss_typ=2,
+)
+ses_main_effect_fr = res.tukey_summary
+
+# For interaction
+res = stat()
+res.tukey_hsd(
+    df=long_mean,
+    res_var="mean_firing_rate",
+    xfac_var=["bin", "session"],
+    anova_model="mean_firing_rate ~ C(bin) + C(session) + C(bin):C(session)",
+    phalpha=0.05,
+    ss_typ=2,
+)
+interaction_fr = res.tukey_summary
+
+# Determine sig. comparisons
+sig_bin_comps_fr = bin_main_effect_fr.loc[bin_main_effect_fr["p-value"] < 0.05]
+sig_ses_comps_fr = ses_main_effect_fr.loc[ses_main_effect_fr["p-value"] < 0.05]
+sig_int_fr = interaction_fr.loc[interaction_fr["p-value"] < 0.05]
+
+# %%
+######Now determine where sig. changes occur in adjacent bins######
+##To do so, will only select columns 1s before grasp.
+def pairwise_significance_extraction(pairwise_data):
+
+    """
+    This function will return pairwise comparisons for the 1s preceeding grasp.
+    To return only significant values, feed it data with sig. isolated already (i.e., where p-value column < 0.05)
+
+    pairwise_data: the result of the multiple comparisons performed earlier in the script, by tukey_hsd()
+
+    """
+
+    df = pairwise_data.loc[
+        (
+            (pairwise_data["group1"] == "-1.0, -0.9")
+            | (pairwise_data["group1"] == "-0.9, -0.8")
+            | (pairwise_data["group1"] == "-0.8, -0.7")
+            | (pairwise_data["group1"] == "-0.7, -0.6")
+            | (pairwise_data["group1"] == "-0.6, -0.5")
+            | (pairwise_data["group1"] == "-0.5, -0.4")
+            | (pairwise_data["group1"] == "-0.4, -0.3")
+            | (pairwise_data["group1"] == "-0.3, -0.2")
+            | (pairwise_data["group1"] == "-0.2, -0.1")
+            | (pairwise_data["group1"] == "-0.1, 0.0005")
+        )
+        & (
+            (pairwise_data["group2"] == "-1.0, -0.9")
+            | (pairwise_data["group2"] == "-0.9, -0.8")
+            | (pairwise_data["group2"] == "-0.8, -0.7")
+            | (pairwise_data["group2"] == "-0.7, -0.6")
+            | (pairwise_data["group2"] == "-0.6, -0.5")
+            | (pairwise_data["group2"] == "-0.5, -0.4")
+            | (pairwise_data["group2"] == "-0.4, -0.3")
+            | (pairwise_data["group2"] == "-0.3, -0.2")
+            | (pairwise_data["group2"] == "-0.2, -0.1")
+            | (pairwise_data["group2"] == "-0.1, 0.0005")
+        )
+    ]
+
+    return df
+
+
+# Return only comparisons in the 1s leading up to grasp
+# In SD data, significant change occurs in the 0.6s leading up to grasp
+bin_sd_comps = pairwise_significance_extraction(sig_bin_comps)
+bin_fr_comps = pairwise_significance_extraction(sig_bin_comps_fr)
+######################################
+# TODO:
+# Run this on remainder of sessions
+# run on all session in one Cohort
+# plot significant changes to graph
+######################################
diff --git a/projects/Aidan_Analysis/FullAnalysis/bin_variance_analysis.py b/projects/Aidan_Analysis/FullAnalysis/bin_variance_analysis.py
new file mode 100644
index 0000000..0a687be
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/bin_variance_analysis.py
@@ -0,0 +1,751 @@
+#%%
+# First shall import required packages
+
+import enum
+from xml.etree.ElementInclude import include
+from matplotlib.pyplot import title, ylabel, ylim
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import sys
+from base import *
+
+
+print("starting function import")
+# Will import functions from the master file to speed this process and avoid un-needed bootstrapping
+from functions import (
+    per_trial_binning,
+    bin_data_average,
+    bin_data_SD,
+    bin_data_bootstrap,
+    bs_pertrial_bincomp,
+)
+
+print("import function complete")
+from CI_Analysis import significance_extraction
+from CI_Analysis import percentile_plot
+from functions import event_times, per_trial_spike_rate
+from textwrap import wrap
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels")
+from pixtools.utils import Subplots2D  # use the local copy of base.py
+from pixtools import utils
+from pixtools import spike_rate
+from pixels import ioutils
+
+
+# Now select the units that shall be analysed
+# Will only analyse M2 for the purposes of this script
+units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200)
+
+# Then align trials to LED off (i.e, end of trial) with the time window determined by the max trial length
+duration = 10
+trial_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=duration, units=units
+)
+
+#%%
+##################################################################################################
+##Run functions defined in unit_binning_analysis to perform the binning and boostrapping of data##
+##################################################################################################
+
+print("bootstrapping initiated")
+
+bin_data = per_trial_binning(trial_aligned, myexp, timespan=10, bin_size=0.1)
+
+bin_avgs = bin_data_average(bin_data)
+
+bootstrap = bin_data_bootstrap(
+    bin_avgs, CI=95, ss=20, bootstrap=10000, name="fullbootstrap", cache_overwrite=True
+)
+
+# bin_comp = bs_pertrial_bincomp(bootstrap, myexp)
+
+#%%
+##################################################################################################################
+# As an alternative to the bootstrapping approach taken before I will now take average firing rates across trials
+# All trials will be combined into a single dataset and binned
+# Each bin will contain a series of average firing rates across trials
+# May then bootstrap this dataset to see how firing rate changes with time
+##################################################################################################################
+
+# First, average firing rate for each session across units/trials (to give a vals for each trial per bin)
+# i.e., each bin will have a single value representing the trials average firing rate at that point
+# To do this, will average across units, in bin_avgs
+
+bins = bin_avgs.columns
+ses_bins = {}
+
+for s, session in enumerate(myexp):
+    name = session.name
+
+    ses_data = bin_avgs.loc[bin_avgs.index.get_level_values("session") == s]
+    ses_trials = ses_data.index.get_level_values("trial")
+    trial_bins = {}
+    # Now, take each trial and average across bins
+    for t in ses_trials:
+        trial_data = ses_data.loc[ses_data.index.get_level_values("trial") == t].mean(
+            axis=0
+        )
+        trial_bins.update({t: trial_data})
+
+    ses_bins.update({name: trial_bins})
+
+# Merge these averages as a dataframe
+means = pd.DataFrame.from_dict(
+    {(i, j): ses_bins[i][j] for i in ses_bins.keys() for j in ses_bins[i].keys()},
+    orient="index",
+)
+
+means.index.rename(["session", "trial"], inplace=True)
+means.columns.names = ["bin"]
+
+# %%
+# Define all functions to be used in this script
+def cross_trial_bootstrap(
+    bin_data, CI=95, ss=20, bootstrap=10000, cache_name=None, cache_overwrite=False
+):
+    """
+    This function takes the averages across trials for a given bin as previously calculated
+    and samples across values, to give a series of percentiles for average firing rate in a given bin
+
+    bin_data: the dataframe produced by bin_data_average, with bin times represented as columns
+
+    CI: the confidence interval to calculate (default is 95%)
+
+    ss: the sample size to randomly select from the averages (default is 20)
+
+    bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000)
+
+    cache_name: the name to save the output under in cache, speeds the process if this has been run before
+
+    cache_overwrite: whether to overwrite the saved cache with new data, (default is False)
+    """
+
+    ##Stages of this analysis are as follows:
+    # 1. Take individual bin
+    # 2. Calculate confidence interval via bootstapping
+    # 3. Store all bin CIs
+
+    # Check if the function has been run before, and saved to a chache
+    # NB: This function caches under the name of the first session in the list!
+
+    if cache_name is not None:
+        # Read in data from previous run
+        output = myexp.sessions[0].interim / "cache" / (name + ".h5")
+        if output.exists() and cache_overwrite is not True:
+            df = ioutils.read_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5"))
+            return df
+
+    # First define confidence intervals
+    lower = (100 - CI) / 2  # Lower confidence interval bound (default 2.5%)
+    upper = 100 - lower  # Upper bound (default 97.5%)
+    percentiles = [lower, 50, upper]  # The defined percentiles to analyse
+    cis = []  # empty list to store calculated intervals
+
+    bin_values = bin_data.columns
+    keys = []
+
+    # Loop through each of the bin averages and calculate their confidence interval
+    for s, session in enumerate(myexp):
+
+        names = session.name
+        ses_bins = bin_data[bin_data.index.get_level_values("session") == names]
+        trials = ses_bins.index.get_level_values(
+            "trial"
+        ).unique()  # get the trials for a given session
+
+        # Sample from every trial within a bin
+        for b in bin_values:
+
+            # Take every trial within a bin
+            bin_avgs = ses_bins[b]
+            samples = np.array(
+                [[np.random.choice(bin_avgs.values, size=ss)] for j in range(bootstrap)]
+            )
+
+            # Use these samples to calculate the percentiles for each bin
+            medians = np.median(samples, axis=2)  # Median of the samples taken
+            results = np.percentile(
+                medians, percentiles, axis=0
+            )  # The percentiles, relative to the median
+            cis.append(pd.DataFrame(results))
+            keys.append(
+                (names, b)
+            )  # Set the keys for the dataframe to be bin, session, trial
+
+    # Now save these calculated confidence intervals to a dataframe
+    df = pd.concat(cis, axis=1, names=["session", "bin"], keys=keys)
+    df.set_index(
+        pd.Index(percentiles, name="percentile"), inplace=True
+    )  # Set the name of the index to percentiles
+    df.columns = df.columns.droplevel(
+        level=2
+    )  # remove the redundant units row, as they have been averaged out
+
+    # Finally, save this df to cache to speed up future runs
+    if cache_name is not None:
+        ioutils.write_hdf5(
+            myexp.sessions[0].interim / "cache" / (cache_name + ".h5"), df
+        )
+        return df
+
+    return df
+
+
+def cross_trial_bincomp(bs_data, myexp):
+    """
+    This function imports percentile data produced from bootstrapping, and compares across bins to determine if a significant change from previous states occurs
+    This process is repeated across trials to allow plotting of significance.
+    Returns a dataframe with bins classified as either increasing, decreasing or displaying little change from previous timepoint.
+
+    bs_data: the percentile information produced by bin_data_bootstrap()
+
+    myexp: the experimental cohort defined in base.py
+    """
+
+    total_bin_response = {}
+    # First import the data and split it by session, and trial.
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = bs_data[name]
+        ses_bin_response = {}
+        for t, cols in enumerate(ses_data):
+
+            # Now that we have all the bins for a given trial,
+            # t count value in enumerate loop will allow comparison to previous iteration
+            bins = ses_data.columns
+
+            # compare each column's 2.5% (lower) to previous column's 97.5%
+            # to check for sig. increase in activity
+            previous_bin = []
+            for b, bins in enumerate(ses_data):
+
+                # First store the initial bin to previous_bin
+                if len(previous_bin) == 0:
+                    previous_bin.append(ses_data[bins])
+                    continue
+
+                # Once this has been done, will compare across bins
+                current_bin = ses_data[bins]
+
+                # First check if there is a sig. increase - i.e., if current 2.5 and previous 97.5 do not overlap
+                if current_bin[2.5] >= previous_bin[0][97.5]:
+                    ses_bin_response.update({bins: "increased"})
+
+                # Now check if there is a sig. decrease
+                elif current_bin[97.5] <= previous_bin[0][2.5]:
+                    ses_bin_response.update({bins: "decreased"})
+
+                # And finally if there is no change at all
+                else:
+                    ses_bin_response.update({bins: "nochange"})
+
+                # The replace previous bin with current bin
+                previous_bin = []
+                previous_bin.append(current_bin)
+
+            # Now append data for each trial to a larget list
+        total_bin_response.update({name: ses_bin_response})
+
+    # Convert nested dictionary to dataframe
+    df = pd.DataFrame.from_dict(
+        total_bin_response,
+        orient="index",
+    )
+    # df.index.rename(["session"], inplace=True)
+    df.columns.names = ["bin"]
+    df.index.rename("session", inplace=True)
+    return df
+
+
+def percentile_range(bs_data):
+    """
+    Function takes percentiles for given bins and calculates the range, adding this as a new index row
+
+    bs_data: a bootstrapped dataset containing 2.5, 50, and 97.5% percentiles as index
+             with data organised as columns for some given bin
+    """
+
+    # Run through the set col by col
+    ranges = []
+
+    for col in bs_data:
+        column = bs_data[col]
+        perc_delta = column[97.5] - column[2.5]  # Range of the percentiles
+        ranges.append(perc_delta)
+
+    # Now append this as a new row
+    bs_data.loc["range"] = ranges
+    return bs_data
+
+
+def variance_boostrapping(
+    bs_data,
+    myexp,
+    CI=95,
+    ss=20,
+    bootstrap=10000,
+    cache_name=None,
+    cache_overwrite=False,
+):
+    """
+    Similar to previous functions, this performs a boostrapping analysis per bin throughout a sessions trials.
+    However, it instead analyses if the size of the variance (ie. the range between percentiles calculated previously) differs significantly between bins
+    This will act as a proxy for the synchronicity of neuronal activity for a given bin.
+
+    bs_data: the data calculated within each trial by bin_data_bootstrap (NOT Cross data bootstrap!)
+
+    myexp: the experimental cohort defined in base.py
+
+    CI: the confidence interval to calculate (default is 95%)
+
+    ss: the sample size to randomly select from the averages (default is 20)
+
+    bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000)
+
+    cache_name: the name to save the output under in cache, speeds the process if this has been run before
+
+    cache_overwrite: whether to overwrite the saved cache with new data, (default is False)
+
+    """
+    # Check if the function has been run before, and saved to a chache
+    # NB: This function caches under the name of the first session in the list!
+    if cache_name is not None:
+        # Read in data from previous run
+        output = myexp.sessions[0].interim / "cache" / (cache_name + ".h5")
+        if output.exists() and cache_overwrite is not True:
+            df = ioutils.read_hdf5(
+                myexp.sessions[0].interim / "cache" / (cache_name + ".h5")
+            )
+            return df
+
+    # First define confidence intervals
+    lower = (100 - CI) / 2  # Lower confidence interval bound (default 2.5%)
+    upper = 100 - lower  # Upper bound (default 97.5%)
+    percentiles = [lower, 50, upper]  # The defined percentiles to analyse
+    cis = []  # empty list to store calculated intervals
+
+    keys = []
+
+    for s, session in enumerate(myexp):
+        ses_data = bs_data[s]
+        ses_data = ses_data.loc[ses_data.index == "range"]
+        ses_data = ses_data.melt()
+
+        name = session.name
+        legend_names.append(name)
+        bin_vals = ses_data.bin.unique()
+
+        # Sample across all trial ranges for a given bin
+        for b in bin_vals:
+            bin_data = ses_data.loc[ses_data.bin == b]
+            samples = np.array(
+                [[np.random.choice(bin_data.value, size=ss)] for j in range(bootstrap)]
+            )
+
+            # Save results of single bin boostrapping
+            medians = np.median(samples, axis=2)  # Median of the samples taken
+            results = np.percentile(
+                medians, percentiles, axis=0
+            )  # The percentiles, relative to the median
+            cis.append(pd.DataFrame(results))
+
+            keys.append((name, b))
+
+    # Now save these calculated confidence intervals to a dataframe
+    df = pd.concat(cis, axis=1, names=["session", "bin"], keys=keys)
+    df.set_index(
+        pd.Index(percentiles, name="percentile"), inplace=True
+    )  # Set the name of the index to percentiles
+    df.columns = df.columns.droplevel(
+        level=2
+    )  # remove the redundant units row, as they have been averaged out
+
+    if cache_name is not None:
+        ioutils.write_hdf5(
+            myexp.sessions[0].interim / "cache" / (cache_name + ".h5"), df
+        )
+        return df
+
+    return df
+
+
+#%%
+# Now that the firing rates have been averaged, may perform a bootstrapping analysis per bin
+
+crosstrialbs = cross_trial_bootstrap(
+    means,
+    CI=95,
+    ss=20,
+    bootstrap=10000,
+    cache_name="crosstrialbs",
+    cache_overwrite=False,
+)
+
+#%%
+# Using this boostrapped firing rate, will plot average firing rate across trials, then compare bootstrap CIs
+# Specifically, compare size of the variance.
+
+# cross_trial_variance_change = percentile_range(
+#     crosstrialbs
+# )  # How the variance changes with time averaged across all trials for a session
+# within_trial_variance_change = percentile_range(
+#     bootstrap
+# )  # How the variance changes with time within each trial
+
+sd_change = bin_data_SD(bin_data)
+
+
+# Plot this change in variance to see how it changes with time, relative to LED on
+# First, for every trial in a session averaged, how does the variance change with time
+legend_names = []
+plt.rcParams.update({"font.size": 12})
+for s, session in enumerate(myexp):
+
+    name = session.name
+    legend_names.append(name)
+    ses_data = sd_change.loc[sd_change.index.get_level_values("session") == name]
+    ses_means = ses_data.mean(axis=0)
+
+    bin_duration = duration / len(sd_change.columns)
+    p = sns.lineplot(data=ses_means, palette="hls")
+
+    plt.tick_params(
+        axis="x",  # changes apply to the x-axis
+        which="both",  # both major and minor ticks are affected
+        bottom=True,  # ticks along the bottom edge are on
+        top=False,  # ticks along the top edge are off
+        labelbottom=False,
+    )  # labels along the bottom edge are off
+
+# Plot signficance annotations
+x1 = "-0.6, -0.5"  # first col
+x2 = "-0.1, 0.0005"  # last col
+p.axvspan(
+    x1,
+    x2,
+    color="red",
+    alpha=0.1,
+)
+
+# Adjust labels/legend
+plt.ylabel("Average Variance in Firing Rate (SD)")
+plt.xlabel(
+    f"Trial Duration Elapsed Relative to LED Off (as {bin_duration}s Bins)", labelpad=20
+)
+plt.legend(
+    labels=legend_names,
+    bbox_to_anchor=(1.1, 1.1),
+    title="Session (RecordingDate_MouseID)",
+)
+plt.suptitle("Average Trial Firing Rate Variance")
+plt.axvline(
+    x=round((len(ses_data.columns) / 2)), ls="--", color="green"
+)  # Plot vertical line at midpoint, representing the event the data is aligned to
+
+utils.save(
+    f"/home/s1735718/Figures/AllSession_Average_Variance_Change",
+    nosize=True,
+)
+plt.show()
+
+#%%
+# Now plot average firing rate relative to led off to go with this
+
+legend_names = []
+for s, session in enumerate(myexp):
+
+    ax = subplots.axes_flat[s]
+    name = session.name
+
+    legend_names.append(name)
+    ses_data = means.loc[means.index.get_level_values("session") == name].mean(
+        axis=0
+    )  # average firing rate for each bin in a given session
+    bin_duration = duration / len(ses_data)
+
+    p = sns.lineplot(
+        x=ses_data.index, y=ses_data.values, palette="hls"  # bins  # firing rates
+    )
+
+    plt.tick_params(
+        axis="x",  # changes apply to the x-axis
+        which="both",  # both major and minor ticks are affected
+        bottom=True,  # ticks along the bottom edge are on
+        top=False,  # ticks along the top edge are off
+        labelbottom=False,
+    )  # labels along the bottom edge are off
+
+    # Plot significance indicator
+    x1 = "-0.6, -0.5"  # first col
+    x2 = "-0.1, 0.0005"  # last col
+    p.axvspan(
+        x1,
+        x2,
+        color="red",
+        alpha=0.1,
+    )
+
+# Adjust labels
+plt.ylabel("Average Firing Rate (Hz)")
+plt.xlabel(
+    f"Trial Duration Elapsed Relative to LED Off (as {bin_duration}s Bins)", labelpad=20
+)
+plt.legend(
+    labels=legend_names,
+    bbox_to_anchor=(1.1, 1.1),
+    title="Session (RecordingDate_MouseID)",
+)
+plt.suptitle("Average Change In Firing Rate Relative to Grasp")
+plt.axvline(
+    x=round((len(ses_data.index) / 2)), ls="--", color="green"
+)  # Plot vertical line at midpoint, representing the event the data is aligned to
+
+utils.save(
+    f"/home/s1735718/Figures/AllSession_Average_FiringRate_Change",
+    nosize=True,
+)
+#%%
+# Then, plot every trial, with average variance per bin (i.e., after boostrapping across unit activity)
+legend_names = []
+for s, session in enumerate(myexp):
+
+    name = session.name
+    legend_names.append(name)
+    ses_data = within_trial_variance_change[s]
+
+    # Determien the number of trials per session to plot as individual graphs
+    ses_values = ses_data.columns.get_level_values("trial").unique()
+
+    subplots = Subplots2D(
+        ses_values, sharex=True, sharey=True
+    )  # determines the number of subplots to create (as many as there are trials in the session)
+
+    for i, value in enumerate(ses_values):
+        trial_data = ses_data[value]
+        ax = subplots.axes_flat[i]
+
+        p = sns.lineplot(
+            x=trial_data.columns,
+            y=trial_data.loc["range"].values,
+            ax=ax,
+            ci=None,
+            markers=None,
+        )
+
+        p.tick_params(
+            axis="x",  # changes apply to the x-axis
+            which="both",  # both major and minor ticks are affected
+            bottom=False,  # ticks along the bottom edge are off
+            top=False,  # ticks along the top edge are off
+            labelbottom=False,
+        )  # labels along the bottom edge are off
+
+        p.get_yaxis().get_label().set_visible(False)
+        p.get_xaxis().get_label().set_visible(False)
+        p.axvline(
+            x=round((len(trial_data.columns) / 2)), ls="--", color="green"
+        )  # Plot vertical line at midpoint, representing the event the data is aligned to
+
+    # Finally, return the legend.
+    legend = subplots.legend
+    legend.text(0.3, 0.5, "Trial", transform=legend.transAxes)
+    legend.text(
+        -0.2,
+        -0.5,
+        "Variance in Firing Rate (95% CI)",
+        transform=legend.transAxes,
+        rotation=90,
+    )  # y axis label
+    legend.text(
+        0,
+        -0.3,
+        f"Trial Time Elapsed Relative to LED Off (as {bin_duration}s Bins)",
+        transform=legend.transAxes,
+    )
+
+    legend.set_visible(True)
+    legend.get_xaxis().set_visible(False)
+    legend.get_yaxis().set_visible(False)
+    legend.set_facecolor("white")
+    legend.set_box_aspect(1)
+
+    plt.gcf().set_size_inches(20, 20)
+    plt.show()
+
+
+#%%
+# Will now compare the change in variance between bins as the trial progresses.
+# i.e., via another boostrapping anaysis
+# Once again do this by session
+
+cross_trial_variance = variance_boostrapping(
+    within_trial_variance_change,
+    myexp,
+    CI=95,
+    ss=20,
+    bootstrap=10000,
+    cache_name="variance_range_bootstrap",
+    cache_overwrite=False,
+)
+# %%
+# Run this boostrapping of variance size through the comparison
+comp_cross_trial = cross_trial_bincomp(cross_trial_variance, myexp)
+
+# Then plot changing variance with inclusion of lines denoting significance
+for s, session in enumerate(myexp):
+    name = session.name
+    legend_names.append(name)
+    ses_data = within_trial_variance_change[s]
+    ses_comps = comp_cross_trial.loc[name]
+
+    # Get only range values, convert to longform
+    ses_data = ses_data.loc[ses_data.index == "range"]
+    ses_data = ses_data.melt()
+
+    # Now plot all bin values for each trial as a set of lines
+    p = sns.lineplot(
+        data=ses_data,
+        x="bin",
+        y="value",
+        hue="trial",
+        estimator="mean",
+        lw=0.7,
+    )
+
+    p.tick_params(
+        axis="x",  # changes apply to the x-axis
+        which="both",  # both major and minor ticks are affected
+        bottom=False,  # ticks along the bottom edge are off
+        top=False,  # ticks along the top edge are off
+        labelbottom=False,
+    )  # labels along the bottom edge are off
+
+    p.axvline(
+        x=round((len(ses_data.bin.unique()) / 2)), ls="--", color="green"
+    )  # Plot vertical line at midpoint, representing the event the data is aligned to
+
+    # iterate through the comparison data and where there is a significnat change, plot a shaded area
+    for i, cols in enumerate(ses_comps):
+        current_bin = ses_comps.index[i]
+        previous_bin = ses_comps.index[i - 1]
+
+        if cols == "increased" or cols == "decreased":
+            p.axvspan(
+                current_bin,
+                previous_bin,
+                color="red",
+                alpha=0.1,
+                hatch=r"/o",
+            )
+
+    plt.suptitle(f"Size of Variance per Trial for Session {name}", y=0.9)
+    plt.ylabel("Variance in Firing Rate (95% CI)")
+    plt.xlabel(f"Trial Time Elapsed Relative to LED Off ({bin_duration}s Bins)")
+    plt.gcf().set_size_inches(10, 10)
+    utils.save(
+        f"/home/s1735718/Figures/{myexp[s].name}variance_change",
+        nosize=True,
+    )
+    plt.show()
+
+# %%
+####Plot individual session FRs
+legend_names = []
+subplots = Subplots2D(
+    myexp, sharex=True, sharey=True
+)  # determines the number of subplots to create (as many as there are seaaions in the session)
+plt.rcParams.update({"font.size": 30})
+
+for s, session in enumerate(myexp):
+    name = session.name
+    ax = subplots.axes_flat[s]
+
+    legend_names.append(name)
+    ses_data = means.loc[
+        means.index.get_level_values("session") == name
+    ]  # average firing rate for each bin in a given session
+    ses_data = pd.melt(ses_data.reset_index(), id_vars=["session", "trial"])
+    bin_duration = duration / len(ses_data["bin"].unique())
+
+    p = sns.lineplot(
+        x=ses_data["bin"],
+        y=ses_data["value"],
+        palette="hls",
+        ax=ax,  # bins  # firing rates
+        ci="sd",
+    )
+
+    p.tick_params(
+        axis="x",  # changes apply to the x-axis
+        which="both",  # both major and minor ticks are affected
+        bottom=True,  # ticks along the bottom edge are on
+        top=False,  # ticks along the top edge are off
+        labelbottom=False,
+    )  # labels along the bottom edge are off
+
+    p.get_yaxis().get_label().set_visible(False)
+    # Plot significance indicator
+    if name == "211027_VR49":  # plot smaller bin for here
+        x1 = "-0.2, -0.1"  # first col
+        x2 = "-0.1, 0.0005"  # last col
+        p.axvspan(x1, x2, color="red", alpha=0.1)
+        p.axvline(x=round((len(ses_data["bin"].unique()) / 2)), ls="--", color="green")
+        p.text(1, 14, name)
+        p.get_xaxis().set_visible(False)
+        continue
+
+    # plot other significance values
+    x1 = "-0.6, -0.5"  # first col
+    x2 = "-0.1, 0.0005"  # last col
+    p.axvspan(x1, x2, color="red", alpha=0.1)
+    # Adjust labels
+
+    p.get_xaxis().set_visible(False)
+    p.axvline(
+        x=round((len(ses_data["bin"].unique()) / 2)), ls="--", color="green"
+    )  # Plot vertical line at midpoint, representing the event the data is aligned to
+
+    # plot session number
+    p.text(1, 14, name)
+
+
+# Finally, return the legend.
+legend = subplots.legend
+
+legend.text(
+    -0.3,
+    0,
+    "\n".join(wrap("Average Firing Rate (Hz)", width=20)),
+    transform=legend.transAxes,
+    rotation=90,
+)  # y axis label
+legend.text(
+    0,
+    -0.4,
+    "\n".join(
+        wrap(
+            f"Trial Time Elapsed Relative to LED Off (as {bin_duration}s Bins)",
+            width=25,
+        )
+    ),
+    transform=legend.transAxes,
+)
+
+legend.set_visible(True)
+legend.get_xaxis().set_visible(False)
+legend.get_yaxis().set_visible(False)
+legend.set_facecolor("white")
+legend.set_box_aspect(1)
+
+plt.suptitle("Average Firing Rate Per Session", y=0.95)
+plt.gcf().set_size_inches(25, 25)
+plt.rcParams.update({"font.size": 30})
+utils.save(
+    f"/home/s1735718/Figures/AllSession_Average_FiringRate_Change_individualPlots",
+    nosize=True,
+)
+
+# %%
diff --git a/projects/Aidan_Analysis/FullAnalysis/cell_atlas_neuron_distribution_M2.csv b/projects/Aidan_Analysis/FullAnalysis/cell_atlas_neuron_distribution_M2.csv
new file mode 100644
index 0000000..469be32
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/cell_atlas_neuron_distribution_M2.csv
@@ -0,0 +1,2 @@
+Cells,Neurons,Excitatory,Inhibitory,REST,PV+,SST+,VIP+,Glia,Oligodendrocytes,Astrocytes,Microglia
+1380708,465259,301973,163286,73770,36328,30289,22899,915449,527419,135943,252087,
\ No newline at end of file
diff --git a/projects/Aidan_Analysis/FullAnalysis/channeldepth.py b/projects/Aidan_Analysis/FullAnalysis/channeldepth.py
new file mode 100644
index 0000000..731eec6
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/channeldepth.py
@@ -0,0 +1,22 @@
+#This file will read in channel depth information from the available metadata
+#First import required packages
+ 
+from base import *
+import probeinterface as pi
+
+from reach import Session
+
+#Find the metadata containing the depth of the channels used in the recording
+#Remember must index into experiment and then files to find the spike metadata
+#Shall create a function to do this
+def meta_spikeglx(exp, session):
+    meta=exp[session].files[0]["spike_meta"]
+    data_path= myexp[session].find_file(meta)
+    data=pi.read_spikeglx(data_path)
+
+    return data
+
+
+
+
+
diff --git a/projects/Aidan_Analysis/FullAnalysis/chisquare_by_unit_type.py b/projects/Aidan_Analysis/FullAnalysis/chisquare_by_unit_type.py
new file mode 100644
index 0000000..a65da2a
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/chisquare_by_unit_type.py
@@ -0,0 +1,99 @@
+#%%
+# First import required packages
+from argparse import Action
+from base import *
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import scipy.stats as sc
+import sys
+from channeldepth import meta_spikeglx
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+from reach import Cohort
+from xml.etree.ElementInclude import include
+from matplotlib.pyplot import legend, title, ylabel, ylim
+import matplotlib.lines as mlines
+from tqdm import tqdm
+from pixels import ioutils
+
+from functions import (
+    event_times,
+    unit_depths,
+    per_trial_binning,
+    event_times,
+    within_unit_GLM,
+    bin_data_average,
+    # unit_delta, #for now use a local copy
+)
+
+#%%
+# First select units based on spike width analysis
+all_units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200)
+
+pyramidals = get_pyramidals(myexp)
+interneurons = get_interneurons(myexp)
+
+
+# Then import the known neuronal type distributions from csv
+known_units = pd.read_csv("cell_atlas_neuron_distribution_M2.csv", index_col=False)
+
+#%%
+# Now align Trials
+per_unit_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=all_units
+)
+
+left_aligned = myexp.align_trials(
+    ActionLabels.correct_left, Events.led_off, "spike_rate", duration=2, units=all_units
+)
+
+right_aligned = myexp.align_trials(
+    ActionLabels.correct_right,
+    Events.led_off,
+    "spike_rate",
+    duration=2,
+    units=all_units,
+)
+
+pyramidal_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=pyramidals
+)
+interneuron_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=interneurons
+)
+
+# Now compare counts of neuropixel data to known data
+counts = pd.DataFrame(columns=["Session", "Neurons", "Excitatory", "Inhibitory"])
+for s, session in enumerate(myexp):
+
+    name = session.name
+    ses_pyr_count = len(pyramidals[s])
+    ses_int_count = len(interneurons[s])
+    total = ses_pyr_count + ses_int_count
+    neurons = pd.Series(
+        [name, total, ses_pyr_count, ses_int_count], index=counts.columns
+    )
+
+    counts.loc[s] = neurons  # append as new row
+
+# Now calculate percentage proportions
+counts["excitatory_percentage"] = (counts.Excitatory / counts.Neurons) * 100
+counts["inhibitory_percentage"] = (counts.Inhibitory / counts.Neurons) * 100
+median_counts = counts.median()
+
+known_counts = known_units[["Neurons", "Excitatory", "Inhibitory"]]
+known_counts["excitatory_percentage"] = (
+    known_counts.Excitatory / known_counts.Neurons
+) * 100
+known_counts["inhibitory_percentage"] = (
+    known_counts.Inhibitory / known_counts.Neurons
+) * 100
+known_median = known_counts.median()
+
+# Now perform a chi-squared analysis to check across distributions
+sc.chisquare(
+    median_counts[["excitatory_percentage", "inhibitory_percentage"]],
+    f_exp=known_median[["excitatory_percentage", "inhibitory_percentage"]],
+)
diff --git a/projects/Aidan_Analysis/FullAnalysis/dprime_training.py b/projects/Aidan_Analysis/FullAnalysis/dprime_training.py
new file mode 100644
index 0000000..9406333
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/dprime_training.py
@@ -0,0 +1,50 @@
+# Produces graphs of d' training progress in all mice across three month periods
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import sys
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels")
+from pixels.behaviours import Behaviour
+from reach import Cohort
+from pixels import ioutils
+from pixtools import utils
+
+#%%
+# create the cohort class
+cohort = Cohort.init_from_files(
+    data_dir="/home/s1735718/duguidlab/CuedBehaviourAnalysis/Data/TrainingJSON",
+    mouse_ids=[
+        "VR46",
+        "VR49",
+        "VR52",
+        "VR55",
+        "VR59",
+    ],
+)
+
+# now get the training results from file
+results = pd.DataFrame(cohort.get_results())
+trials = pd.DataFrame(cohort.get_trials())
+# NB: D' may be incorrect where days have multiple sessions! May have to calculate manually.
+# Or exclude from the plotting (CANT DO THIS FOR THE MAIN DISS ONLY TO SHOW TRAINING PROGRESSION)
+
+#%%
+# Plot linegraph of mouse and d' by day
+plt.rcParams.update({"font.size": 35})
+p = sns.lineplot(x="day", y="d_prime", hue="mouse_id", data=results, lw=3)
+
+p.axhline(y=0, ls="--", color="grey", lw=3)
+p.axhline(y=1.5, ls="--", color="green", lw=3)
+plt.xlabel("Training Day")
+plt.ylabel("d-Prime Score (d')")
+plt.suptitle("Mouse d-Prime Score Throughout Training Period")
+
+plt.gcf().set_size_inches(15,10)
+utils.save(
+    "/home/s1735718/Figures/cohort_dprime",
+    nosize=True,
+)
+# %%
diff --git a/projects/Aidan_Analysis/FullAnalysis/firingrate_histogram_toLEDOFF.py b/projects/Aidan_Analysis/FullAnalysis/firingrate_histogram_toLEDOFF.py
new file mode 100644
index 0000000..8396d3c
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/firingrate_histogram_toLEDOFF.py
@@ -0,0 +1,107 @@
+# This will plot a figure aligned to LED off with the following features:
+# A series of indicators displaying when LED came on across trials for each unit, plotted as a histogram
+# The confidence interval for the spike rate.
+
+# First shall import the required packages
+from argparse import Action
+import sys
+from cv2 import bitwise_and
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import seaborn as sns
+
+from base import *
+from rugplotbase import *
+from matplotlib.pyplot import figure
+from distutils.command.sdist import sdist
+from textwrap import wrap
+
+
+##Now import the main experimental data. Here all good units from cortex##
+unit = myexp.select_units(group="good", uncurated=False, max_depth=1500)
+
+# Now must align these trials to LED off for the main spike rate data
+hits_off = myexp.align_trials(
+    ActionLabels.correct,
+    Events.led_off,
+    "spike_rate",
+    duration=6,  # May need to increase this depending on how far back the LED came on
+    units=unit,
+)
+
+##Now will obtain the data containing time at which the LED turned on and duration##
+
+# I shall iterate through the trial taking the difference between the event on (LED_on) and event off (LED_off)
+# This will give a duration of events and allow plotting
+# First iterate through the experimental data, both sessions and get the action labels and events
+# Importantly, this will be structured as a function to allow me to run this easily in future
+
+
+##The following function will return the timepoints for the requested event type for a given experiment
+#
+def event_times(event, myexp):
+    """
+
+    This function will give the timepoints for the specified event across experimental sessions
+
+    event: the specific event to search for, must be input within quotes ("")
+
+    myexp: the experiment defined in base.py
+
+    NB: Setting the event to LED_off when the action label is correct (whose start point therefore defines LED_on as zero) will return the DURATION of the event!
+    """
+    times = []  # Give an empty list to store times
+
+    for ses in myexp:
+        sessiontimepoints = (
+            []
+        )  # Give an empty list to store this sessions times in before appending to the master list
+        als = ses.get_action_labels()  # Get the action labels for this session
+
+        for rec_num in range(
+            len(als)
+        ):  # This will run through all recording numbers in the above action label data
+            actions = als[rec_num][:, 0]
+            events = als[rec_num][:, 1]
+
+            # Call all trials where the mouse was correct to search
+            start = np.where(np.bitwise_and(actions, ActionLabels.correct))[0]
+
+            for trial in start:
+                # Now iterate through these correct trials and return all times for selected event
+                event_time = np.where(
+                    np.bitwise_and(
+                        events[trial : trial + 10000], getattr(Events, event)
+                    )
+                )[0]
+                sessiontimepoints.append(event_time[0])
+
+        times.append(sessiontimepoints)
+
+    return times
+
+
+##Now let us define the event times, and durations for the experiment
+led_duration = event_times("led_off", myexp)
+
+
+# Combine these as a dictionary, now have all on times across sessions. Each representing an individual trial
+rugdata = {ses.name: pd.DataFrame(led_duration[s]) for s, ses in enumerate(myexp)}
+
+rugdata = pd.concat(rugdata, axis=1)
+rugdata = (
+    -rugdata / 1000
+)  # Converts the positive values to a negative second, to signify the duration before the LED off
+
+##Now will plot the base information, combined with a rugplot addition
+# To do so, must edit the _plot function, imported from rugplotbase
+for s, session in enumerate(myexp):
+    name = session.name
+
+    per_unit_spike_rate(hits_off[s], rugdata[name], ci="sd")
+    plt.suptitle(
+        f"session {name} - per-unit across trial firing rate, aligned to LED off"
+    )
+
+plt.show()
diff --git a/projects/Aidan_Analysis/FullAnalysis/firingrate_to_LED_on.py b/projects/Aidan_Analysis/FullAnalysis/firingrate_to_LED_on.py
new file mode 100644
index 0000000..c11b759
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/firingrate_to_LED_on.py
@@ -0,0 +1,52 @@
+from argparse import Action
+import sys
+from cv2 import bitwise_and
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import seaborn as sns
+
+from base import *
+from rugplotbase import *
+from matplotlib.pyplot import figure
+from distutils.command.sdist import sdist
+from textwrap import wrap
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") 
+from pixtools import spike_rate
+from pixtools import utils
+
+
+##Now import the main experimental data. Here all good units from cortex##
+unit = myexp.select_units(
+    group="good",
+    uncurated=False,
+    max_depth=1500,
+    name="cortex"
+)
+
+#Now must align these trials to LED on for the main spike rate data
+hits_off = myexp.align_trials(
+    ActionLabels.correct,
+    Events.led_on,
+    "spike_rate",
+    duration=8,
+    units=unit
+)
+
+#Now plot average firing rate per unit, across trials. Will only examine the 4 seconds preceeding 
+ci = "sd"
+
+for s, session in enumerate(myexp):
+
+    fig = plt.figure()
+    name = session.name
+    spike_rate.per_unit_spike_rate(hits_off[s], ci=ci)
+    fig.suptitle(
+        f"session {name} - per-unit firing rate 4s before LED on (Trial Beginning)"
+    )
+    plt.gcf().set_size_inches(20, 20)
+    utils.save(f"/home/s1735718/Figures/{myexp[s].name}_spikerate_LED_on", nosize=True)
+    #plt.show()
+
+
diff --git a/projects/Aidan_Analysis/FullAnalysis/functions.py b/projects/Aidan_Analysis/FullAnalysis/functions.py
new file mode 100644
index 0000000..6f2ac0d
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/functions.py
@@ -0,0 +1,1789 @@
+"""
+This is the master file containing any functions used in my analyses. 
+Most require that experiments are initially defined in base.py.
+sys.path.append allows the local importing of a copy of the pixels analysis repo, though this may be added to path otherwise. 
+
+Notes
+----------------------------------------------------------
+|
+|Some functions utilise the depreciated rec_num argument!
+|
+|
+|
+-----------------------------------------------------------
+"""
+
+from base import *  # Import our experiment and metadata needed
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+
+from pyrsistent import b
+from xml.etree.ElementInclude import include
+import sys
+import random
+import probeinterface as pi
+from reach import Session
+from argparse import Action
+from cv2 import bitwise_and
+import matplotlib.pyplot as plt
+from textwrap import wrap
+from tqdm import tqdm
+from channeldepth import meta_spikeglx
+
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+import statsmodels.stats.multicomp as mc
+from statsmodels.graphics.factorplots import interaction_plot
+from bioinfokit.analys import stat
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+from pixtools.utils import Subplots2D
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from pixtools import utils
+from pixtools import spike_rate
+from pixels import ioutils
+
+
+def _raster(per, data, sample, start, subplots, label):
+    per_values = data.columns.get_level_values(per).unique()
+    per_values.values.sort()
+
+    if per == "trial":
+        data = data.reorder_levels(["trial", "unit"], axis=1)
+        other = "unit"
+    else:
+        other = "trial"
+
+    # units if plotting per trial, trials if plotting per unit
+    samp_values = list(data.columns.get_level_values(other).unique().values)
+    if sample and sample < len(samp_values):
+        sample = random.sample(samp_values, sample)
+    else:
+        sample = samp_values
+
+    if subplots is None:
+        subplots = Subplots2D(per_values)
+
+    palette = sns.color_palette("pastel")  # Set the colours for the graphs produced
+
+    for i, value in enumerate(per_values):
+        val_data = data[value][
+            sample
+        ]  # Requires I index into the data when assembling plots (see plots.py)
+        val_data.columns = np.arange(len(sample)) + start
+        val_data = val_data.stack().reset_index(level=1)
+
+        p = sns.scatterplot(
+            data=val_data,
+            x=0,
+            y="level_1",
+            ax=subplots.axes_flat[i],
+            s=0.5,
+            legend=None,
+        )
+
+        p.set_yticks([])
+        p.set_xticks([])
+        p.autoscale(enable=True, tight=True)
+        p.get_yaxis().get_label().set_visible(False)
+        p.get_xaxis().get_label().set_visible(False)
+        p.text(
+            0.95,
+            0.95,
+            value,
+            horizontalalignment="right",
+            verticalalignment="top",
+            transform=subplots.axes_flat[i].transAxes,
+            color="0.3",
+        )
+        p.axvline(c=palette[2], ls="--", linewidth=0.5)
+
+        if not subplots.axes_flat[i].yaxis_inverted():
+            subplots.axes_flat[i].invert_yaxis()
+
+    if (
+        label
+    ):  # Chane our axis labels, here to trial number and time from the reach (which the data is aligned to!)
+        to_label = subplots.to_label
+        to_label.get_yaxis().get_label().set_visible(True)
+        to_label.set_ylabel("Trial Number")
+        to_label.get_xaxis().get_label().set_visible(True)
+        to_label.set_xlabel("Time from reach (s)")
+
+        # plot legend subplot
+        legend = subplots.legend
+        legend.text(
+            0,
+            0.9,
+            "Trial number" if per == "trial" else "Unit ID",
+            transform=legend.transAxes,
+            color="0.3",
+        )
+        legend.set_visible(True)
+        legend.get_xaxis().set_visible(False)
+        legend.get_yaxis().set_visible(False)
+        legend.set_facecolor("white")
+
+    return subplots
+
+
+def within_unit_GLM(bin_data, myexp):
+
+    """
+    Function takes binned data (with levels in order session -> unit -> trial) and performs an ANOVA on each session
+    Returns the multiple comparisons for the interaction and prints anova results
+    Following parameters are used:
+
+    IV = Firing Rate, DV = Bin, Unit (containing many trials)
+
+    Model is as follows:
+
+    firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)
+
+    bin_data: the output of the per_trial_binning function reordered to swap unit and trial level hierarchy
+
+    myexp: experimental cohort defined in base.py
+
+
+    NB: This function runs the model as a distributed set of calculations!!
+    i.e., it slices the data and runs several smaller calculations before concatenating results
+
+    """
+    # store data as a resillient distributed dataset
+    # will do this using pyspark
+
+    multicomp_bin_results = []
+    multicomp_unit_results = []
+    multicomp_int_results = []
+
+    # Now split this data into sessions
+    for s, session in tqdm(enumerate(myexp)):
+
+        sessions = bin_data.columns.get_level_values(0).unique()
+        name = session.name
+        ses_data = bin_data[sessions[s]]
+
+        # Now shall average the data per bin
+        ses_avgs = bin_data_average(ses_data)
+
+        # Then melt data to prep for analysis
+        ses_avgs = ses_avgs.reset_index().melt(id_vars=["unit", "trial"])
+        ses_avgs.rename({"value": "firing_rate"}, axis=1, inplace=True)
+
+        ##Now shall construct a linear model to allow for analysis
+        # IV: Firing Rate
+        # DVs: Bin (within subjects), Unit (Within subjects - trials averaged out)
+
+        # Now run the GLM analysis
+        model = ols(
+            "firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)", data=ses_avgs
+        ).fit()
+
+        output = sm.stats.anova_lm(model, typ=2)  # Type II SS as this is faster
+        print(f"ANOVA Results - Session {name}")
+        print(output)
+
+        ####Run Multiple Comparisons####
+        # Main effect of bin
+        res = stat()
+
+        print(f"starting multiple comparisons for session {name}")
+        # Interaction
+        res.tukey_hsd(
+            df=ses_avgs,
+            res_var="firing_rate",
+            xfac_var=["bin", "unit"],
+            anova_model="firing_rate ~ C(bin) + C(unit) + C(bin):C(unit)",
+            phalpha=0.05,
+            ss_typ=2,
+        )
+        int_effect = res.tukey_summary
+        multicomp_int_results.append(int_effect)
+    return multicomp_int_results
+
+
+# Using this function we may now plot the graphs we desire!
+# First let us define a function to plot spikes per unit
+
+
+def per_unit_raster(data, sample=None, start=0, subplots=None, label=True):
+    """
+    Plots a spike raster for every unit in the given data with trials as rows.
+
+    data is a multi-dimensional dataframe as returned from
+    Experiment.align_trials(<action>, <event>, 'spike_times') indexed into to get the
+    session and recording.
+    """
+    return _raster("unit", data, sample, start, subplots, label)
+
+
+# And per trial
+def per_trial_raster(data, sample=None, start=0, subplots=None, label=True):
+    """
+    Plots a spike raster for every trial in the given data with units as rows.
+
+    data is a multi-dimensional dataframe as returned from
+    Experiment.align_trials(<action>, <event>, 'spike_times') indexed into to get the
+    session and recording.
+    """
+    return _raster("trial", data, sample, start, subplots, label)
+
+
+# Now finally define a function that allows plotting a single unit as a raster
+def single_unit_raster(data, ax=None, sample=None, start=0, unit_id=None):
+    # units if plotting per trial, trials if plotting per unit
+    samp_values = list(data.columns.get_level_values("trial").unique().values)
+    if sample and sample < len(samp_values):
+        sample = random.sample(samp_values, sample)
+    else:
+        sample = samp_values
+
+    val_data = data[sample]
+    val_data.columns = np.arange(len(sample)) + start
+    val_data = val_data.stack().reset_index(level=1)
+
+    if not ax:
+        ax = plt.gca()
+
+    p = sns.scatterplot(
+        data=val_data,
+        x=0,
+        y="level_1",
+        ax=ax,
+        s=1.5,
+        legend=None,
+        edgecolor=None,
+    )
+    p.set_yticks([])
+    p.set_xticks([])
+    p.autoscale(enable=True, tight=True)
+    p.get_yaxis().get_label().set_visible(False)
+    p.get_xaxis().get_label().set_visible(False)
+
+    palette = sns.color_palette()
+    p.axvline(c="black", ls="--", linewidth=0.5)
+
+    if not ax.yaxis_inverted():
+        ax.invert_yaxis()
+
+    if unit_id is not None:
+        p.text(
+            0.95,
+            0.95,
+            unit_id,
+            horizontalalignment="right",
+            verticalalignment="top",
+            transform=ax.transAxes,
+            color="0.3",
+        )
+
+
+def _plot(level, data, rugdata, ci, subplots, label):
+    values = data.columns.get_level_values(level).unique()
+    values.values.sort()
+    if level == "trial":
+        data = data.reorder_levels(["trial", "unit"], axis=1)
+
+    # no. units if plotting per trial, no. trials if plotting per unit
+    num_samples = len(data[values[0]].columns)
+
+    if subplots is None:
+        subplots = Subplots2D(values)
+
+    palette = sns.color_palette()
+
+    for i, value in enumerate(values):
+        val_data = data[value].stack().reset_index()
+        val_data["y"] = val_data[0]
+        ax = subplots.axes_flat[i]
+        divider = make_axes_locatable(ax)
+
+        r = sns.rugplot(
+            ax=ax,
+            palette=palette,
+            data=rugdata,
+            x=rugdata[0],
+            height=0.1,
+            legend=False,
+            expand_margins=True,
+        )
+        r.autoscale(enable=False, tight=False)
+
+        p = sns.lineplot(
+            data=val_data,
+            x="time",
+            y="y",
+            ci=ci,
+            ax=ax,
+            linewidth=0.5,
+        )
+        p.autoscale(enable=True, tight=False)
+        p.set_yticks([])
+        p.set_xticks([])
+        peak = data[value].values.mean(axis=1).max()
+        p.text(
+            0.05,
+            0.95,
+            "%.1f" % peak,
+            horizontalalignment="left",
+            verticalalignment="top",
+            transform=ax.transAxes,
+            color="grey",
+        )
+        p.text(
+            0.95,
+            0.95,
+            value,
+            horizontalalignment="right",
+            verticalalignment="top",
+            transform=ax.transAxes,
+            color=palette[0],
+        )
+        p.get_yaxis().get_label().set_visible(False)
+        p.get_xaxis().get_label().set_visible(False)
+        p.axvline(c=palette[1], ls="--", linewidth=0.5)
+        p.set_box_aspect(1)
+
+    if label:
+        to_label = subplots.to_label
+        to_label.get_yaxis().get_label().set_visible(True)
+        to_label.set_ylabel("Firing rate (Hz)")
+        to_label.get_xaxis().get_label().set_visible(True)
+        to_label.set_xlabel("Time from push (s)")
+        to_label.set_xticks(data.index)
+        # to_label.set_xticklabels([- duration / 2, 0,  duration / 2])
+
+        # plot legend subplot
+        legend = subplots.legend
+        legend.text(
+            0,
+            0.7,
+            "Peak of mean",
+            transform=legend.transAxes,
+            color="grey",
+        )
+        legend.text(
+            0,
+            0.3,
+            "Trial number" if level == "trial" else "Unit ID",
+            transform=legend.transAxes,
+            color=palette[0],
+        )
+        legend.set_visible(True)
+        legend.get_xaxis().set_visible(False)
+        legend.get_yaxis().set_visible(False)
+        legend.set_facecolor("white")
+        legend.set_box_aspect(1)
+
+    return subplots
+
+
+def per_unit_spike_rate(data, rugdata, ci=95, subplots=None, label=True):
+    """
+    Plots a histogram for every unit in the given session showing spike rate.
+
+    Parameters
+    ----------
+
+    data : pandas.DataFrame
+        A dataframe for a single session as returned from Behaviour.align_trials (or
+        Experiment.align_trials but indexed into for session and rec_num).
+
+    rugdata : pandas.DataFrame
+        A dataframe for a single session containing the onset of the event (led_on) to plot as a rug.
+
+    ci : int or 'sd', optional
+        Confidence intervals to plot as envelope around line. Default is 95 i.e. 95%
+        confidence intervals. Also accepts "'sd'" which will draw standard deviation
+        envelope, or None to plot no envelope.
+    """
+    return _plot("unit", data, rugdata, ci, subplots, label)
+
+
+def per_trial_spike_rate(data, rugdata, ci=95, subplots=None, label=True):
+    """
+    Plots a histogram for every trial in the given session showing spike rate.
+
+    Parameters
+    ----------
+
+    data : pandas.DataFrame
+        A dataframe for a single session as returned from Behaviour.align_trials (or
+        Experiment.align_trials but indexed into for session and rec_num).
+
+    ci : int or 'sd', optional
+        Confidence intervals to plot as envelope around line. Default is 95 i.e. 95%
+        confidence intervals. Also accepts "'sd'" which will draw standard deviation
+        envelope, or None to plot no envelope.
+    """
+    return _plot("trial", data, rugdata, ci, subplots, label)
+
+
+def event_times(event, myexp):
+    """
+
+    This function will give the timepoints for the specified event across experimental sessions
+
+    event: the specific event to search for, must be input within quotes ("")
+
+    myexp: the experiment defined in base.py
+
+    NB: Setting the event to LED_off when the action label is correct (whose start point therefore defines LED_on as zero) will return the DURATION of the event!
+    """
+    times = []  # Give an empty list to store times
+
+    for ses in myexp:
+        sessiontimepoints = (
+            []
+        )  # Give an empty list to store this sessions times in before appending to the master list
+        als = ses.get_action_labels()  # Get the action labels for this session
+
+        for rec_num in range(
+            len(als)
+        ):  # This will run through all recording numbers in the above action label data
+            actions = als[rec_num][:, 0]
+            events = als[rec_num][:, 1]
+
+            # Call all trials where the mouse was correct to search
+            start = np.where(np.bitwise_and(actions, ActionLabels.correct))[0]
+
+            for trial in start:
+                # Now iterate through these correct trials and return all times for selected event
+                event_time = np.where(
+                    np.bitwise_and(
+                        events[trial : trial + 10000], getattr(Events, event)
+                    )
+                )[0]
+                sessiontimepoints.append(event_time[0])
+
+        times.append(sessiontimepoints)
+
+    return times
+
+
+def meta_spikeglx(exp, session):
+    """
+    This function finds the metadata containing the depth of the channels used in the recording
+    Remember must index into experiment and then files to find the spike metadata
+
+    exp: myexp, the class defined in base.py
+
+    session: the session to analyse, in the form of an integer (i.e., 0 for first session, 1 for second etc.)
+    """
+
+    meta = exp[session].files[0]["spike_meta"]
+    data_path = myexp[session].find_file(meta)
+    data = pi.read_spikeglx(data_path)
+
+    return data
+
+
+def significance_extraction(CI):
+    """
+    This function takes the output of the get_aligned_spike_rate_CI function and extracts any significant values, returning a dataframe in the same format.
+
+    CI: The dataframe created by the CI calculation previously mentioned
+
+    """
+
+    sig = []
+    keys = []
+    rec_num = 0
+
+    # This loop iterates through each column, storing the data as un, and the location as s
+    for s, unit in CI.items():
+        # Now iterate through each recording, and unit
+        # Take any significant values and append them to lists.
+        if unit.loc[2.5] > 0 or unit.loc[97.5] < 0:
+            sig.append(
+                unit
+            )  # Append the percentile information for this column to a list
+            keys.append(
+                s
+            )  # append the information containing the point at which the iteration currently stands
+
+    # Now convert this list to a dataframe, using the information stored in the keys list to index it
+    sigs = pd.concat(
+        sig, axis=1, copy=False, keys=keys, names=["session", "unit", "rec_num"]
+    )
+
+    return sigs
+
+
+def percentile_plot(CIs, sig_CIs, exp, sig_only=False, dir_ascending=False):
+    """
+
+    This function takes the CI data and significant values and plots them relative to zero.
+    May specify if percentiles should be plotted in ascending or descending order.
+
+    CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points.
+
+    sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero
+
+    exp: The experimental session to analyse, defined in base.py
+
+    sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False)
+
+    dir_ascending: Whether to plot the values in ascending order (True/False)
+
+    """
+    # First sort the data into long form for both datasets, by percentile
+    CIs_long = (
+        CIs.reset_index()
+        .melt("percentile")
+        .sort_values("value", ascending=dir_ascending)
+    )
+    CIs_long = CIs_long.reset_index()
+    CIs_long["index"] = pd.Series(
+        range(0, CIs_long.shape[0])
+    )  # reset the index column to allow ordered plotting
+
+    CIs_long_sig = (
+        sig_CIs.reset_index()
+        .melt("percentile")
+        .sort_values("value", ascending=dir_ascending)
+    )
+    CIs_long_sig = CIs_long_sig.reset_index()
+    CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0]))
+
+    # Now select if we want only significant values plotted, else raise an error.
+    if sig_only is True:
+        data = CIs_long_sig
+
+    elif sig_only is False:
+        data = CIs_long
+
+    else:
+        raise TypeError("Sig_only argument must be a boolean operator (True/False)")
+
+    # Plot this data for the experimental sessions as a pointplot.
+    for s, session in enumerate(exp):
+        name = session.name
+
+        p = sns.pointplot(
+            x="unit",
+            y="value",
+            data=data.loc[(data.session == s)],
+            order=data.loc[(data.session == s)]["unit"].unique(),
+            join=False,
+            legend=None,
+        )  # Plots in the order of the units as previously set, uses unique values to prevent double plotting
+
+        p.set_xlabel("Unit")
+        p.set_ylabel("Confidence Interval")
+        p.set(xticklabels=[])
+        p.axhline(0)
+        plt.suptitle(
+            "\n".join(
+                wrap(
+                    f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"
+                )
+            )
+        )  # Wraps the title of the plot to fit on the page.
+
+        plt.show()
+
+
+def per_trial_binning(trial, myexp, timespan, bin_size):
+    """
+    Function takes data aligned by trial (i.e., using myexp align_trials method),
+    bins them, then returns the dataframe with an added bin index
+
+    The end result of this is an index structured as (time, 'lower bin, upper bin')
+
+    trial: The data aligned to an event according to myexp.align_trials
+
+    myexp: The experimental cohort defined in base.py
+
+    timespan: the total time data was aligned to in myexp.align_trials (i.e., duration) in seconds
+
+    bin size: the size of the bins in seconds
+
+    """
+
+    # First reorder data hierarchy
+    reorder = trial.reorder_levels(["session", "trial", "unit"], axis=1)
+
+    # Perform binning on  whole dataset
+    # First calculate the number of bins to create based on total duration and bin length
+    bin_number = int(timespan / bin_size)
+
+    values = reorder.columns.get_level_values(
+        "trial"
+    ).unique()  # Get trials for session
+
+    # Add a new level containing the bin data for the dataframe
+    bin_vals = pd.cut(reorder.index, bin_number, include_lowest=True, precision=1)
+    bin_vals = bin_vals.astype(str)  # Convert the interval series to a list of strings
+
+    # Remove brackets from bin_vals
+    bin_vals_str = []
+    for t in bin_vals:
+        t = t.strip("()[]")
+        bin_vals_str.append(t)
+
+    # This will be added as an index, before time.
+    full_data = reorder.assign(
+        bin=bin_vals_str
+    )  # Add bin values temporarily as a column
+    full_data.set_index(
+        "bin", append=True, inplace=True
+    )  # Convert this column to a new index, below time
+    return full_data
+
+
+def bin_data_average(bin_data):
+    """
+    This function takes the dataframe produced by per_trial_binning() including bins as a multiindex
+    and takes the average of each bin's activity within a given time period
+    This returns the average firing rate of each unit (i.e., per session and time)
+
+    bin_data: the dataframe produced by per_trial_binning, with predefined bins
+
+    NB: To return a transposed version of the data (i.e., with bin averages as rows rather than columns)
+        add ".T" to function call.
+    """
+
+    # First extract the values of each bin in bin_data
+    bins = bin_data.index.get_level_values(
+        "bin"
+    )  # a list of the bins created by the previous function
+    bin_names = (
+        bins.unique()
+    )  # Take the unique values present, giving us the list of bin categories
+
+    # Take averages for each bin, across all units and trials then concatenate into a dataframe, where each column is a bin
+    binned_avgs = pd.concat(
+        (bin_data.loc[bins == b].mean(axis=0) for b in bin_names),
+        axis=1,
+        keys=bin_names,
+    )
+    binned_avgs = pd.DataFrame(binned_avgs)
+
+    # Return the data
+    return binned_avgs
+
+
+def bin_data_bootstrap(
+    bin_data, CI=95, ss=20, bootstrap=10000, name=None, cache_overwrite=False
+):
+    """
+    This function takes the data, with bin averages calculated by bin_data_average and calculates boostrapped confidence intervals
+    To do so, it takes a random sample many times over and uses this data to calculate percentiles
+
+    bin_data: the dataframe produced by bin_data_average, with bin times represented as columns
+
+    CI: the confidence interval to calculate (default is 95%)
+
+    ss: the sample size to randomly select from the averages (default is 20)
+
+    bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000)
+
+    name: the name to save the output under in cache, speeds the process if this has been run before
+
+    cache_overwrite: whether to overwrite the saved cache with new data, (default is False)
+    """
+
+    ##Stages of this analysis are as follows:
+    # 1. Take individual bin
+    # 2. Calculate confidence interval via bootstapping
+    # 3. Store all bin CIs
+
+    # Check if the function has been run before, and saved to a chache
+    # NB: This function caches under the name of the first session in the list!
+    if name is not None:
+        # Read in data from previous run
+        output = myexp.sessions[0].interim / "cache" / (name + ".h5")
+        if output.exists() and cache_overwrite is not True:
+            df = ioutils.read_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5"))
+            return df
+
+    # First define confidence intervals
+    lower = (100 - CI) / 2  # Lower confidence interval bound (default 2.5%)
+    upper = 100 - lower  # Upper bound (default 97.5%)
+    percentiles = [lower, 50, upper]  # The defined percentiles to analyse
+    cis = []  # empty list to store calculated intervals
+
+    bin_values = bin_data.columns
+    keys = []
+
+    # Loop through each of the bin averages and calculate their confidence interval
+    for s, session in enumerate(myexp):
+        names = session.name
+        ses_bins = bin_data[bin_data.index.get_level_values("session") == s]
+        trials = ses_bins.index.get_level_values(
+            "trial"
+        ).unique()  # get the trials for a given session
+
+        for t in trials:
+            trial_bins = ses_bins[ses_bins.index.get_level_values("trial") == t]
+
+            # Take every bin within a trial
+            for b in bin_values:
+                bin_avg = trial_bins[b]
+                samples = np.array(
+                    [
+                        [np.random.choice(bin_avg.values, size=ss)]
+                        for j in range(bootstrap)
+                    ]
+                )
+
+                # Use these samples to calculate the percentiles for each bin
+                medians = np.median(samples, axis=2)  # Median of the samples taken
+                results = np.percentile(
+                    medians, percentiles, axis=0
+                )  # The percentiles, relative to the median
+                cis.append(pd.DataFrame(results))
+                keys.append(
+                    (s, t, b)
+                )  # Set the keys for the dataframe to be bin, session, trial
+
+    # Now save these calculated confidence intervals to a dataframe
+    df = pd.concat(cis, axis=1, names=["session", "trial", "bin"], keys=keys)
+    df.set_index(
+        pd.Index(percentiles, name="percentile"), inplace=True
+    )  # Set the name of the index to percentiles
+    df.columns = df.columns.droplevel(
+        level=3
+    )  # remove the redundant units row, as they have been averaged out
+
+    # Finally, save this df to cache to speed up future runs
+    if name is not None:
+        ioutils.write_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5"), df)
+        return df
+
+    return df
+
+
+def bs_pertrial_bincomp(bs_data, myexp):
+    """
+    This function imports percentile data produced from bootstrapping, and compares across bins to determine if a significant change from previous states occurs
+    This process is repeated across trials to allow plotting of significance.
+    Returns a dataframe with bins classified as either increasing, decreasing or displaying little change from previous timepoint.
+
+    bs_data: the percentile information produced by bin_data_bootstrap()
+
+    myexp: the experimental cohort defined in base.py
+    """
+
+    total_bin_response = {}
+    # First import the data and split it by session, and trial.
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = bs_data[0]
+        ses_bin_response = {}
+        for t, cols in enumerate(ses_data):
+            trial_data = ses_data[
+                cols[0]
+            ]  # All bins, and associated percentiles for a given trial
+
+            # Now that we have all the bins for a given trial,
+            # t count value in enumerate loop will allow comparison to previous iteration
+            bins = trial_data.columns
+
+            # compare each column's 2.5% (lower) to previous column's 97.5%
+            # to check for sig. increase in activity
+            previous_bin = []
+            trial_bin_response = {}  # store as a dictionary
+            for b, bins in enumerate(trial_data):
+
+                # First store the initial bin to previous_bin
+                if len(previous_bin) == 0:
+                    previous_bin.append(trial_data[bins])
+                    continue
+
+                # Once this has been done, will compare across bins
+                current_bin = trial_data[bins]
+
+                # First check if there is a sig. increase - i.e., if current 2.5 and previous 97.5 do not overlap
+                if current_bin[2.5] >= previous_bin[0][97.5]:
+                    trial_bin_response.update({bins: "increased"})
+
+                # Now check if there is a sig. decrease
+                elif current_bin[97.5] <= previous_bin[0][2.5]:
+                    trial_bin_response.update({bins: "decreased"})
+
+                # And finally if there is no change at all
+                else:
+                    trial_bin_response.update({bins: "nochange"})
+
+                # The replace previous bin with current bin
+                previous_bin = []
+                previous_bin.append(current_bin)
+
+            # Now append data for each trial to a larget list
+            ses_bin_response.update(
+                {cols[0]: trial_bin_response}
+            )  # contains every trial for a given session
+        total_bin_response.update({name: ses_bin_response})
+
+    # Convert nested dictionary to dataframe
+    df = pd.DataFrame.from_dict(
+        {
+            (i, j): total_bin_response[i][j]
+            for i in total_bin_response.keys()
+            for j in total_bin_response[i].keys()
+        },
+        orient="index",
+    )
+    df.index.rename(["session", "trial"], inplace=True)
+    df.columns.names = ["bin"]
+    return df
+
+
+def bs_graph(aligned_data, binned_data, bin_comparisons, myexp):
+    """
+    Function plots the binned data graph including period of LED onset for each trial within a given session.
+
+    aligned_data: the dataset produced by align_trials
+
+    binned_data: the dataset produced by per_trial_binning
+
+    bin_comparisons: the dataset containing comparisons between bins, produced by bs_pertrial_bincomp
+
+    myexp: the experimental cohort defined in base.py
+
+    """
+    for s, session in enumerate(myexp):
+        name = session.name
+
+        ses_data = binned_data[s]
+        ses_values = ses_data.columns.get_level_values(
+            "trial"
+        ).unique()  # Get all trial values for the session
+        ses_start_times = start[
+            name
+        ]  # LED on time for the session, will leave trial numbers in ses_values unsorted to ensure they align
+
+        ses_bin_comps = bin_comparisons[
+            bin_comparisons.index.get_level_values("session") == name
+        ]
+
+        num_trials = len(
+            ses_data[ses_values[0]].columns
+        )  # How many trials shall be plotted
+        subplots = Subplots2D(
+            ses_values, sharex=True, sharey=True
+        )  # determines the number of subplots to create (as many as there are trials in the session)
+
+        for i, value in enumerate(ses_values):
+            data = ses_data[value].stack().reset_index()
+            data["y"] = data[0]  # Add a new column containing y axis values
+            ax = subplots.axes_flat[i]  # Plot this on one graph
+
+            # bin responses for the trial to be plotted
+            trial_bins = ses_bin_comps[
+                ses_bin_comps.index.get_level_values("trial") == i
+            ]
+
+            # Now create the actual lineplot
+            p = sns.lineplot(x="time", y="y", data=data, ax=ax, ci="sd")
+
+            # Now for each of these plots, add the LED on-off time as a shaded area
+            p.axvspan(ses_start_times[i], 0, color="green", alpha=0.3)
+
+            ##Now, within each trial, add shaded areas representing the binned timescales
+            for i, cols in enumerate(ses_bin_comps.columns):
+                shaded_bin = data["time"].loc[data["bin"] == ses_bin_comps.columns[i]]
+                bin_significance = trial_bins[cols]
+
+                # Then plot an indicator of significance to these bins
+                if bin_significance.values == "nochange":
+                    p.axvspan(
+                        shaded_bin.values[0],
+                        shaded_bin.values[-1],
+                        color="gray",
+                        alpha=0,
+                        hatch=r"//",
+                    )  # Change this alpha value to above zero to plot nonsignificant bins.
+
+                if (
+                    bin_significance.values == "increased"
+                    or bin_significance.values == "decreased"
+                ):
+                    p.axvspan(
+                        shaded_bin.values[0],
+                        shaded_bin.values[-1],
+                        color="red",
+                        alpha=0.1,
+                        hatch=r"/o",
+                    )
+
+            # Now remove axis labels
+            p.get_yaxis().get_label().set_visible(False)
+            p.get_xaxis().get_label().set_visible(False)
+
+            # Then set the x limit to be one second post trial completion
+            p.set(xlim=(-5, 1))
+
+        # Now plot the legend for each of these figures
+        legend = subplots.legend
+
+        legend.text(0.3, 0.5, "Trial", transform=legend.transAxes)
+        legend.text(
+            -0.2, 0, "Firing Rate (Hz)", transform=legend.transAxes, rotation=90
+        )  # y axis label
+        legend.text(
+            0, -0.3, "Time Relative to LED Off (s)", transform=legend.transAxes
+        )  # x axis label
+
+        legend.set_visible(True)
+        legend.get_xaxis().set_visible(False)
+        legend.get_yaxis().set_visible(False)
+        legend.set_facecolor("white")
+        legend.set_box_aspect(1)
+
+        # Display the plot
+        plt.gcf().set_size_inches(20, 20)
+
+        # Uncomment to save figures as pdf.
+        utils.save(
+            f"/home/s1735718/Figures/{myexp[s].name}_spikerate_LED_Range_PerTrial",
+            nosize=True,
+        )
+
+
+def cross_trial_bootstrap(
+    bin_data, CI=95, ss=20, bootstrap=10000, cache_name=None, cache_overwrite=False
+):
+    """
+    This function takes the averages across trials for a given bin as previously calculated
+    and samples across values, to give a series of percentiles for average firing rate in a given bin
+
+    bin_data: the dataframe produced by bin_data_average, with bin times represented as columns
+
+    CI: the confidence interval to calculate (default is 95%)
+
+    ss: the sample size to randomly select from the averages (default is 20)
+
+    bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000)
+
+    cache_name: the name to save the output under in cache, speeds the process if this has been run before
+
+    cache_overwrite: whether to overwrite the saved cache with new data, (default is False)
+    """
+
+    ##Stages of this analysis are as follows:
+    # 1. Take individual bin
+    # 2. Calculate confidence interval via bootstapping
+    # 3. Store all bin CIs
+
+    # Check if the function has been run before, and saved to a chache
+    # NB: This function caches under the name of the first session in the list!
+
+    if cache_name is not None:
+        # Read in data from previous run
+        output = myexp.sessions[0].interim / "cache" / (name + ".h5")
+        if output.exists() and cache_overwrite is not True:
+            df = ioutils.read_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5"))
+            return df
+
+    # First define confidence intervals
+    lower = (100 - CI) / 2  # Lower confidence interval bound (default 2.5%)
+    upper = 100 - lower  # Upper bound (default 97.5%)
+    percentiles = [lower, 50, upper]  # The defined percentiles to analyse
+    cis = []  # empty list to store calculated intervals
+
+    bin_values = bin_data.columns
+    keys = []
+
+    # Loop through each of the bin averages and calculate their confidence interval
+    for s, session in enumerate(myexp):
+
+        names = session.name
+        ses_bins = bin_data[bin_data.index.get_level_values("session") == names]
+        trials = ses_bins.index.get_level_values(
+            "trial"
+        ).unique()  # get the trials for a given session
+
+        # Sample from every trial within a bin
+        for b in bin_values:
+
+            # Take every trial within a bin
+            bin_avgs = ses_bins[b]
+            samples = np.array(
+                [[np.random.choice(bin_avgs.values, size=ss)] for j in range(bootstrap)]
+            )
+
+            # Use these samples to calculate the percentiles for each bin
+            medians = np.median(samples, axis=2)  # Median of the samples taken
+            results = np.percentile(
+                medians, percentiles, axis=0
+            )  # The percentiles, relative to the median
+            cis.append(pd.DataFrame(results))
+            keys.append(
+                (names, b)
+            )  # Set the keys for the dataframe to be bin, session, trial
+
+    # Now save these calculated confidence intervals to a dataframe
+    df = pd.concat(cis, axis=1, names=["session", "bin"], keys=keys)
+    df.set_index(
+        pd.Index(percentiles, name="percentile"), inplace=True
+    )  # Set the name of the index to percentiles
+    df.columns = df.columns.droplevel(
+        level=2
+    )  # remove the redundant units row, as they have been averaged out
+
+    # Finally, save this df to cache to speed up future runs
+    if cache_name is not None:
+        ioutils.write_hdf5(
+            myexp.sessions[0].interim / "cache" / (cache_name + ".h5"), df
+        )
+        return df
+
+    return df
+
+
+def cross_trial_bincomp(bs_data, myexp):
+    """
+    This function imports percentile data produced from bootstrapping, and compares across bins to determine if a significant change from previous states occurs
+    This process is repeated across trials to allow plotting of significance.
+    Returns a dataframe with bins classified as either increasing, decreasing or displaying little change from previous timepoint.
+
+    bs_data: the percentile information produced by bin_data_bootstrap()
+
+    myexp: the experimental cohort defined in base.py
+    """
+
+    total_bin_response = {}
+    # First import the data and split it by session, and trial.
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = bs_data[name]
+        ses_bin_response = {}
+        for t, cols in enumerate(ses_data):
+
+            # Now that we have all the bins for a given trial,
+            # t count value in enumerate loop will allow comparison to previous iteration
+            bins = ses_data.columns
+
+            # compare each column's 2.5% (lower) to previous column's 97.5%
+            # to check for sig. increase in activity
+            previous_bin = []
+            for b, bins in enumerate(ses_data):
+
+                # First store the initial bin to previous_bin
+                if len(previous_bin) == 0:
+                    previous_bin.append(ses_data[bins])
+                    continue
+
+                # Once this has been done, will compare across bins
+                current_bin = ses_data[bins]
+
+                # First check if there is a sig. increase - i.e., if current 2.5 and previous 97.5 do not overlap
+                if current_bin[2.5] >= previous_bin[0][97.5]:
+                    ses_bin_response.update({bins: "increased"})
+
+                # Now check if there is a sig. decrease
+                elif current_bin[97.5] <= previous_bin[0][2.5]:
+                    ses_bin_response.update({bins: "decreased"})
+
+                # And finally if there is no change at all
+                else:
+                    ses_bin_response.update({bins: "nochange"})
+
+                # The replace previous bin with current bin
+                previous_bin = []
+                previous_bin.append(current_bin)
+
+            # Now append data for each trial to a larget list
+        total_bin_response.update({name: ses_bin_response})
+
+    # Convert nested dictionary to dataframe
+    df = pd.DataFrame.from_dict(
+        total_bin_response,
+        orient="index",
+    )
+    # df.index.rename(["session"], inplace=True)
+    df.columns.names = ["bin"]
+    df.index.rename("session", inplace=True)
+    return df
+
+
+def percentile_range(bs_data):
+    """
+    Function takes percentiles for given bins and calculates the range, adding this as a new index row
+
+    bs_data: a bootstrapped dataset containing 2.5, 50, and 97.5% percentiles as index
+             with data organised as columns for some given bin
+    """
+
+    # Run through the set col by col
+    ranges = []
+
+    for col in bs_data:
+        column = bs_data[col]
+        perc_delta = column[97.5] - column[2.5]  # Range of the percentiles
+        ranges.append(perc_delta)
+
+    # Now append this as a new row
+    bs_data.loc["range"] = ranges
+    return bs_data
+
+
+def variance_boostrapping(
+    bs_data,
+    myexp,
+    CI=95,
+    ss=20,
+    bootstrap=10000,
+    cache_name=None,
+    cache_overwrite=False,
+):
+    """
+    Similar to previous functions, this performs a boostrapping analysis per bin throughout a sessions trials.
+    However, it instead analyses if the size of the variance (ie. the range between percentiles calculated previously) differs significantly between bins
+    This will act as a proxy for the synchronicity of neuronal activity for a given bin.
+
+    bs_data: the data calculated within each trial by bin_data_bootstrap (NOT Cross data bootstrap!)
+
+    myexp: the experimental cohort defined in base.py
+
+    CI: the confidence interval to calculate (default is 95%)
+
+    ss: the sample size to randomly select from the averages (default is 20)
+
+    bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000)
+
+    cache_name: the name to save the output under in cache, speeds the process if this has been run before
+
+    cache_overwrite: whether to overwrite the saved cache with new data, (default is False)
+
+    """
+    # Check if the function has been run before, and saved to a chache
+    # NB: This function caches under the name of the first session in the list!
+    if cache_name is not None:
+        # Read in data from previous run
+        output = myexp.sessions[0].interim / "cache" / (cache_name + ".h5")
+        if output.exists() and cache_overwrite is not True:
+            df = ioutils.read_hdf5(
+                myexp.sessions[0].interim / "cache" / (cache_name + ".h5")
+            )
+            return df
+
+    # First define confidence intervals
+    lower = (100 - CI) / 2  # Lower confidence interval bound (default 2.5%)
+    upper = 100 - lower  # Upper bound (default 97.5%)
+    percentiles = [lower, 50, upper]  # The defined percentiles to analyse
+    cis = []  # empty list to store calculated intervals
+
+    keys = []
+
+    for s, session in enumerate(myexp):
+        ses_data = bs_data[s]
+        ses_data = ses_data.loc[ses_data.index == "range"]
+        ses_data = ses_data.melt()
+
+        name = session.name
+        legend_names.append(name)
+        bin_vals = ses_data.bin.unique()
+
+        # Sample across all trial ranges for a given bin
+        for b in bin_vals:
+            bin_data = ses_data.loc[ses_data.bin == b]
+            samples = np.array(
+                [[np.random.choice(bin_data.value, size=ss)] for j in range(bootstrap)]
+            )
+
+            # Save results of single bin boostrapping
+            medians = np.median(samples, axis=2)  # Median of the samples taken
+            results = np.percentile(
+                medians, percentiles, axis=0
+            )  # The percentiles, relative to the median
+            cis.append(pd.DataFrame(results))
+
+            keys.append((name, b))
+
+    # Now save these calculated confidence intervals to a dataframe
+    df = pd.concat(cis, axis=1, names=["session", "bin"], keys=keys)
+    df.set_index(
+        pd.Index(percentiles, name="percentile"), inplace=True
+    )  # Set the name of the index to percentiles
+    df.columns = df.columns.droplevel(
+        level=2
+    )  # remove the redundant units row, as they have been averaged out
+
+    if cache_name is not None:
+        ioutils.write_hdf5(
+            myexp.sessions[0].interim / "cache" / (cache_name + ".h5"), df
+        )
+        return df
+
+    return df
+
+
+def bin_data_SD(bin_data):
+    """
+    Function is similar to bin_data_mean.
+
+    This function takes the dataframe produced by per_trial_binning() including bins as a multiindex
+    and calculates the standard deviation of each bin's activity within a given time period
+    This returns the SD of the firing rate of each unit (i.e., per session and time)
+
+    bin_data: the dataframe produced by per_trial_binning, with predefined bins
+
+    NB: To return a transposed version of the data (i.e., with bin averages as rows rather than columns)
+        add ".T" to function call.
+    """
+
+    # First extract the values of each bin in bin_data
+    bins = bin_data.index.get_level_values(
+        "bin"
+    )  # a list of the bins created by the previous function
+    bin_names = (
+        bins.unique()
+    )  # Take the unique values present, giving us the list of bin categories
+    ses_bins = {}
+    for s, session in enumerate(myexp):
+        name = session.name
+
+        ses_data = bin_data[s]
+        trial_bins = {}
+
+        # Trial number will give the following loop the number of times to iterate
+        trial_number = ses_data.columns.get_level_values("trial").unique()
+        # Now, take each trial and average across bins
+        for t in trial_number:
+            trial_data = ses_data[t]
+            individual_bins = {}
+            for b in bin_names:
+
+                bin_vals = trial_data.loc[trial_data.index.get_level_values("bin") == b]
+                bin_st_dev = np.std(
+                    bin_vals.values
+                )  # take standard deviation for all values in this trials bin
+                individual_bins.update({b: bin_st_dev})
+            trial_bins.update({t: individual_bins})
+
+        ses_bins.update({name: trial_bins})
+
+    # Merge these SDs as a dataframe
+    binned_sd = pd.DataFrame.from_dict(
+        {(i, j): ses_bins[i][j] for i in ses_bins.keys() for j in ses_bins[i].keys()},
+        orient="index",
+    )
+
+    # Take the mean of the SDs for each trial Bin
+
+    binned_sd.index.rename(["session", "trial"], inplace=True)
+    binned_sd.columns.names = ["bin"]
+
+    # Return the data
+    return binned_sd
+
+
+def unit_depths(exp):
+    """
+    Parameters
+    ==========
+    exp : pixels.Experiment
+        Your experiment.
+
+    This is an updated version of the pixtools function
+    """
+    info = exp.get_cluster_info()
+    depths = []
+    keys = []
+
+    for s, session in enumerate(exp):
+        name = session.name
+        session_depths = {}
+        rec_num = 0
+        for rec_num, probe_depth in enumerate(session.get_probe_depth()):
+            rec_depths = {}
+            rec_info = info[s]
+            id_key = "id" if "id" in rec_info else "cluster_id"  # Depends on KS version
+
+            for unit in rec_info[id_key]:
+                unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict()
+                rec_depths[unit] = probe_depth - unit_info["depth"]
+
+            session_depths = pd.DataFrame(rec_depths, index=["depth"])
+        keys.append(name)
+        session_depths = pd.concat([session_depths], axis=1, names="unit")
+        depths.append(session_depths)
+
+    return pd.concat(depths, axis=1, names=["session", "unit"], keys=keys)
+
+
+def unit_delta(glm, myexp, bin_data, bin_duration, sig_only, percentage_change):
+    """
+    Function takes the output of the multiple comparisons calculated by within_unit_GLM() and calculates the larges change in firing rate (or other IV measured by the GLM) between bins, per unit
+    This requires both experimental cohort data, and bin_data calculated by per_trial_binning() and passed through reorder levels (as "session", "unit", "trial")
+    Function returns a list of lists, in a hierarchy of [session[unit deltas]]
+
+    Using this data, a depth plot may be created displaying the relative delta by unit location in pM2
+
+    glm: the result of the ANOVA and subsequent Tukey adjusted multiple comparisons performed by within_unit_GLM
+         NB: THIS IS A COMPUTATIONALLY EXPENSIVE FUNCTION, DO NOT RUN IT ON MORE THAN ONE SESSION AT A TIME
+
+    myexp: the experimental cohort defined in base.py
+
+    bin_data: the binned raw data computed by the per_trial_binning function
+
+    bin_duration: the length of the bins in the data
+
+    sig_only: whether to return only the greatest delta of significant bins or not
+
+    percentage_change: whether to return deltas as a percentage change
+
+    """
+    ses_deltas = []
+    for s, session in enumerate(myexp):
+
+        final_deltas = []
+        unit_deltas = []
+        sigunit_comps = []
+        ses_comps = glm[s]
+
+        if sig_only is True:
+            # Return only significant comparisons
+            ses_comps = ses_comps.loc[ses_comps["p-value"] < 0.05]
+
+        # Determine list of units remaining in comparison
+        units = []
+        for i in ses_comps["group1"]:
+            units.append(i[1])
+        units = np.array(units)
+        units = np.unique(units)
+
+        # Now iterate through comparisons, by unit, Saving the significant comparisons to allow later calculation of delta
+
+        for i in tqdm(units):
+            unit_comps = ses_comps[
+                ses_comps["group1"].apply(lambda x: True if i in x else False)
+            ]  # Lambda function checks if the value is in the tuple for group one
+            unit_comps = unit_comps[
+                unit_comps["group2"].apply(lambda x: True if i in x else False)
+            ]
+            unit_comps.reset_index(inplace=True)
+
+            # find row with largest difference
+            if unit_comps.empty:  # skip if empty
+                continue
+
+            unit_comps = unit_comps.sort_values("Diff", ascending=False)
+            ##Right around here I need to work out a way to sort by adjacent bins only
+            # Extract only adjacent bins, then take the largest value with this set.
+            for item in unit_comps.iterrows():
+                # row = unit_comps["Diff"].idxmax()
+                row = item[1]
+                g1_bin, unit_num = item[1]["group1"]
+                g1_bin1 = round(float(g1_bin.split()[0][0:-1]), 1)
+                g1_bin2 = round(float(g1_bin.split()[1][0:]), 1)
+
+                g2_bin, unit_num = item[1]["group2"]
+                g2_bin1 = round(float(g2_bin.split()[0][0:-1]), 1)
+                g2_bin2 = round(float(g2_bin.split()[1][0:]), 1)
+
+                # Check if bins are sequential, will return the largest value where bins are next to eachother
+                if g2_bin1 == g1_bin2 + bin_duration:
+
+                    sigunit_comps.append([row])
+                else:
+                    continue
+
+        # Now that we know the units with significant comparisons, take these bins from raw firing rate averages
+        ses_avgs = bin_data_average(bin_data[s])
+        # Iterate through our significant comparisons, calculating the actual delta firing rate
+        for i in range(len(sigunit_comps)):
+            sig_comp = sigunit_comps[i]
+            sig_comp = pd.DataFrame(sig_comp)  # convert list to dataframe
+
+            unit = [x[1] for x in sig_comp["group1"]]
+            ses_unit = ses_avgs.loc[
+                ses_avgs.index.get_level_values("unit") == int(unit[0])
+            ].mean()  # get all rows where our units firing rate is present
+
+            # Get the bins that the signficant comparison looked at
+            bin_val1 = [x[0] for x in sig_comp["group1"]][0]
+            bin_val2 = [x[0] for x in sig_comp["group2"]][0]
+
+            # Return the percentage change from initial state rather than raw value
+            if percentage_change == True:
+                change = ses_unit[bin_val2] - ses_unit[bin_val1]
+                delta = (change / ses_unit[bin_val1]) * 100
+                if ses_unit[bin_val1] == 0:
+                    continue  # Skip this value, if it changes from a value of zero, this is infinite
+                unit_deltas.append([int(unit[0]), delta])
+            # Finally, get the delta value across these bins for the given unit
+            elif percentage_change == False:
+                delta = ses_unit[bin_val2] - ses_unit[bin_val1]
+                unit_deltas.append([int(unit[0]), delta])
+
+        # Iterate through unit_deltas and remove any duplicate units
+        # Keeping only the units of largest values
+        unit_deltas = pd.DataFrame(unit_deltas, columns=["unit", "delta"])
+        for j in unit_deltas["unit"].unique():
+            vals = unit_deltas.loc[unit_deltas["unit"] == j]
+            vals = vals.sort_values("delta", key=abs, ascending=False)
+
+            final_deltas.append(vals.iloc[0].values.tolist())
+
+        ses_deltas.append(final_deltas)
+
+    return ses_deltas
+
+
+def cross_trial_FF(
+    spike_times,
+    exp,
+    duration,
+    bin_size,
+    cache_name,
+    ff_iterations=50,
+    mean_matched=False,
+    raw_values=False,
+):
+    """
+    This function will take raw spike times calculated through the exp.align_trials method and calculate the fano factor for each trial, cross units
+    or for individual units, if mean matched is False.
+
+    ------------------------------------------------------------------------
+    |NB: Output of the function is saved to an h5 file as "FF_Calculation" |
+    ------------------------------------------------------------------------
+
+    spike_times: The raw spike time data produced by the align_trials method
+
+    exp: the experimental cohort defined in base.py (through the Experiment class)
+
+    duration: the window of the aligned data (in s)
+
+    bin_size: the size of the bins to split the trial duration by (in s)
+
+    name: the name to cache the results under
+
+    mean_matched: Whether to apply mean matching adjustment (as defined by Churchland 2010) to correct for large changes in unit mean firing rates across trials
+                  If this is False, the returned dataframe will be of unadjusted single unit FFs
+
+    raw_values: Whether to return the raw values rather than mean FFs
+
+    ff_iterations: The number of times to repeat the random mean-matched sampling when calculating population fano factor (default 50)
+
+    """
+    # First create bins that the data shall be categorised into
+    duration = duration * 1000  # convert s to ms
+    bin_size = bin_size * 1000
+    bins = np.arange((-duration / 2), (duration / 2), bin_size)
+    all_FF = pd.DataFrame()
+
+    # First split data by session
+    for s, session in enumerate(myexp):
+        ses_data = spike_times[s]
+        ses_data = ses_data.reorder_levels(["unit", "trial"], axis=1)
+        name = session.name
+        session_FF = {}
+        session_se = {}
+        session_ci = {}
+
+        repeat_FF = pd.DataFrame()
+        repeat_se = pd.DataFrame()
+        repeat_ci = pd.DataFrame()
+
+        # Bin the session data
+        ses_vals = ses_data.values
+        bin_ids = np.digitize(ses_vals[~np.isnan(ses_vals)], bins, right=True)
+
+        # Then by unit
+        unit_bin_means = []
+        unit_bin_var = []
+
+        print(f"beginning mean/variance calculation for session {name}")
+        keys = []
+        for t in tqdm(ses_data.columns.get_level_values("unit").unique()):
+            unit_data = ses_data[t]  # take each unit values for all trials
+            unit_data = (
+                unit_data.melt().dropna()
+            )  # Pivot this data and drop missing values
+
+            # Once these values have been isolated, can begin FF calculation.
+            unit_data["bin"] = np.digitize(unit_data["value"], bins, right=True)
+
+            bin_count_means = []
+            bin_count_var = []
+
+            for b in np.unique(bin_ids):
+                bin_data = unit_data.loc[unit_data["bin"] == b]
+
+                # Calculate the count mean of this bin, i.e., the average number of events per trial
+                bin_count_mean = (
+                    bin_data.groupby("trial").count().mean()["value"]
+                )  # the average number of spikes per unit in this bin
+
+                # If there are any bins where no spikes occurred (i.e, empty dataframes) then return zero
+                if np.isnan(bin_count_mean):
+                    bin_count_mean = 0
+
+                bin_count_means.append(bin_count_mean)
+
+                # Now calculate variance of the event count
+                bin_var = np.var(bin_data.groupby("trial").count()["value"])
+
+                # Again return zero if no events
+                if np.isnan(bin_var):
+                    bin_var = 0
+
+                bin_count_var.append(bin_var)
+
+            # These values will make up a single point on the scatterplots used to calculate cross-unit FF
+            # Append to list containing values for each unit, across trials
+
+            unit_bin_means.append(bin_count_means)
+            unit_bin_var.append(bin_count_var)
+            keys.append(t)
+
+        # Now convert this to a dataframe
+        unit_bin_means = pd.DataFrame.from_records(unit_bin_means, index=keys)
+        unit_bin_means.index.name = "unit"
+        unit_bin_means.columns.name = "bin"
+
+        unit_bin_var = pd.DataFrame.from_records(unit_bin_var, index=keys)
+        unit_bin_var.index.name = "unit"
+        unit_bin_var.columns.name = "bin"
+
+        # can now use this information to calculate FF, and include any potential adjustments needed.
+        # To do so, shall create a linear regression to calculate FF, for each bin
+
+        if mean_matched is True:
+            bin_heights = {}
+            """
+            First calculate the greatest common distribution for this session's data
+            1. Calculate the distribution of mean counts for each timepoint (binned trial time)
+            2. Take the smallest height of each bin across timepoints, this will form the height of the GCD's corresponding bin
+            3. Save this GCD to allow further mean matching
+            """
+
+            # Take each bin, and unit, then calculate the distribution of mean counts
+            for b in unit_bin_means.columns:
+
+                # Isolate bin values, across units
+                bin_means = unit_bin_means[b]
+
+                # Calculate distribution for this time, across units
+                p = np.histogram(bin_means, bins=9)
+
+                # Save the heights of these bins for each bin
+                bin_heights.update(
+                    {b: p[0]}
+                )  # append to a dictionary, where the key is the bin
+
+            # Convert this dictionary to a dataframe, containing the heights of each timepoints distribution
+            bin_heights = pd.DataFrame.from_dict(
+                bin_heights, orient="index"
+            )  # the index of this dataframe will be the binned timepoints
+            gcd_heights = (
+                bin_heights.min()
+            )  # get the minimum value for each "bin" of the histogram across timepoints
+            gcd_heights.index += 1
+
+            # Now may begin mean matching to the gcd
+            # Take each bin (i.e., timepoint) and calculate the distribution of points
+            # TODO: Update this analysis to utilise a rolling bin approach. I.e, take data at 100ms intervals, then shift the bin 10ms right
+
+            print(f"Initiating Mean Matched Fano Factor Calculation for Session {name}")
+            for iteration in tqdm(range(ff_iterations)):
+                np.random.seed(iteration)
+
+                for b in unit_bin_means.columns:
+                    timepoint_means = pd.DataFrame(unit_bin_means[b])
+                    timepoint_edges = pd.DataFrame(
+                        np.histogram(timepoint_means, bins=9)[1]
+                    )  # the left edges of the histogram used to create a distribution
+                    timepoint_var = unit_bin_var[b]
+
+                    # split data into bins
+                    binned_data = pd.cut(
+                        np.ndarray.flatten(timepoint_means.values),
+                        np.ndarray.flatten(timepoint_edges.values),
+                        labels=timepoint_edges.index[1:],
+                        include_lowest=True,
+                    )  # seperate raw data into the distribution bins
+
+                    timepoint_means["bin"] = binned_data
+
+                    # Isolate a single bin from the timepoint histogram
+                    timepoint_FF_mean = {}
+                    timepoint_FF_var = {}
+
+                    for i in range(len(timepoint_edges.index)):
+                        if i == 0:
+                            continue
+                        # Bins begin at one, skip first iteration
+                        repeat = True
+                        bin_data = timepoint_means.loc[timepoint_means["bin"] == i]
+
+                        while repeat == True:
+
+                            # Check if bin height matches gcd height - if it does, append timepoint data to list
+
+                            if bin_data.count()["bin"] == gcd_heights[i]:
+                                timepoint_FF_mean.update(
+                                    {i: bin_data[bin_data.columns[0]]}
+                                )  # append the bin, and unit/mean count
+                                repeat = False
+
+                            # If not, remove randomly points from the dataset one at a time, until they match
+                            else:
+                                dropped_data = np.random.choice(
+                                    bin_data.index, 1, replace=False
+                                )
+                                bin_data = bin_data.drop(
+                                    dropped_data
+                                )  # remove one datapoint, then continue with check
+
+                    ##Now extract the associated variance for the timepoint
+                    timepoint_FF_mean = pd.DataFrame.from_dict(timepoint_FF_mean)
+                    timepoint_FF_mean = timepoint_FF_mean.melt(
+                        ignore_index=False
+                    ).dropna()
+                    timepoint_FF_var = timepoint_var[
+                        timepoint_var.index.isin(timepoint_FF_mean.index)
+                    ]
+
+                    # Using the count mean and variance of the count, construct a linear regression for the population
+                    # Ensure the origin of the linear model is set as zero
+
+                    # Set up data in correct shape
+                    x = np.array(timepoint_FF_mean["value"]).reshape(
+                        (-1, 1)
+                    )  # mean count, the x axis
+                    y = np.array(timepoint_FF_var)
+
+                    # Now fit model
+                    timepoint_model = sm.OLS(y, x).fit()
+
+                    # The slope (i.e., coefficient of variation) of this model
+                    ff_score = timepoint_model.params[0]
+
+                    # Extract the confidence interval of this model fit
+                    ff_se = timepoint_model.bse[0]
+                    ff_ci = timepoint_model.conf_int()[0]
+
+                    session_FF.update({b: ff_score})
+                    session_se.update({b: ff_se})
+                    session_ci.update({b: tuple(ff_ci)})
+
+                # Convert the session's FF to dataframe, then add to a master
+                session_FF = pd.DataFrame(session_FF, index=[0])
+                session_se = pd.DataFrame(session_se, index=[0])
+                session_ci = pd.DataFrame(session_ci.items())
+
+                repeat_FF = pd.concat([repeat_FF, session_FF], axis=0)
+                repeat_se = pd.concat([repeat_se, session_se], axis=0)
+                repeat_ci = pd.concat([repeat_ci, session_ci], axis=0)
+
+                session_FF = {}
+                session_se = {}
+                session_ci = {}
+
+            # Take the average across repeats for each session, add this to a final dataframe for session, timepoint, mean_FF, SE
+
+            for i, cols in enumerate(repeat_FF.iteritems()):
+                timepoint_vals = repeat_FF.describe()[i]
+                timepoint_se = repeat_se.describe()[i]
+
+                ci_bins = repeat_ci[0].tolist()
+                timepoint_ci = pd.DataFrame(repeat_ci[1].tolist())
+                timepoint_ci = timepoint_ci.rename(
+                    columns={0: "lower_ci", 1: "upper_ci"}
+                ).reset_index(drop=True)
+                timepoint_ci.insert(0, "bin", ci_bins)
+
+                mean = timepoint_vals["mean"]
+                se = timepoint_se["mean"]
+                lower_ci = timepoint_ci.loc[timepoint_ci["bin"] == i].describe()[
+                    "lower_ci"
+                ]["mean"]
+                upper_ci = timepoint_ci.loc[timepoint_ci["bin"] == i].describe()[
+                    "upper_ci"
+                ]["mean"]
+
+                if raw_values == False:
+                    vals = pd.DataFrame(
+                        {
+                            "session": name,
+                            "bin": i,
+                            "mean FF": mean,
+                            "SE": se,
+                            "lower_ci": lower_ci,
+                            "upper_ci": upper_ci,
+                        },
+                        index=[0],
+                    )
+                    all_FF = pd.concat([all_FF, vals], ignore_index=True)
+                elif raw_values == True:
+                    repeat_FF = repeat_FF.melt()
+                    repeat_se = repeat_se.melt()
+
+                    repeat_FF["session"] = name
+                    repeat_FF = repeat_FF.set_index("session")
+                    repeat_FF = repeat_FF.rename(
+                        columns={"value": "FF", "variable": "bin"}
+                    )
+
+                    repeat_se["session"] = name
+                    repeat_se = repeat_se.set_index("session")
+                    repeat_FF["se"] = repeat_se["value"]
+                    repeat_FF["lower_ci"] = timepoint_ci["lower_ci"]
+                    repeat_FF["upper_ci"] = timepoint_ci["upper_ci"]
+
+                    all_FF = pd.concat([all_FF, repeat_FF])
+                    break
+
+        elif mean_matched is False:
+            # Remember, this will return the individual FF for each unit, not the population level FF
+            # calculate raw FF for each unit
+            session_FF = pd.DataFrame(columns=["bin", "FF", "session", "unit"])
+            for unit in unit_bin_means.index:
+                unit_FF = pd.DataFrame(
+                    unit_bin_var.loc[unit] / unit_bin_means.loc[unit]
+                ).rename(
+                    columns={unit: "FF"}
+                )  # return all bins for this unit
+                unit_FF = unit_FF.reset_index()
+                unit_FF["session"] = name
+                unit_FF["unit"] = unit
+
+                session_FF = pd.concat([session_FF, unit_FF])
+
+            # Reorder columns, then return data
+            session_FF = session_FF[["session", "unit", "bin", "FF"]]
+            session_FF.reset_index(drop=True)
+            all_FF = pd.concat([all_FF, session_FF], axis=0)
+
+    ioutils.write_hdf5(
+        f"/data/aidan/{cache_name}_FF_Calculation_mean_matched_{mean_matched}_raw{raw_values}.h5",
+        all_FF,
+    )
+    return all_FF
+
+
+def fano_fac_sig(population_FF):
+    """
+    Function checks for significance in fano factor output, assuming the data is NOT raw, and mean-matched.
+    Returns a list of significance values (sig or nonsig) for each bin, compared to the previous timepoint.
+
+    population_FF: the data obtained from the cross_trial_FF() function, where mean_matched is True, and raw_data is False.
+    """
+    significance = []
+    for i, items in enumerate(population_FF.iterrows()):
+        if i == 0:
+            continue
+
+        bin_upper = items[1]["upper_ci"]
+        bin_lower = items[1]["lower_ci"]
+
+        previous_upper = population_FF.loc[population_FF["bin"] == i - 1]["upper_ci"]
+        previous_lower = population_FF.loc[population_FF["bin"] == i - 1]["lower_ci"]
+
+        # Compare bins for overlap
+        if (bin_upper <= previous_lower.item()) or (bin_lower >= previous_upper.item()):
+            significance.append("significant")
+
+        else:
+            significance.append("nonsignificant")
+
+    return significance
diff --git a/projects/Aidan_Analysis/FullAnalysis/granulardepths_VR46_VR59.csv b/projects/Aidan_Analysis/FullAnalysis/granulardepths_VR46_VR59.csv
new file mode 100755
index 0000000..7df34be
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/granulardepths_VR46_VR59.csv
@@ -0,0 +1,12 @@
+MouseID,LayerV_Depth
+VR46,0.26
+VR47,0.285
+VR49,0.341
+VR50,0.34
+VR59,0.439
+VR52,0.222
+VR55,0.584
+VR53,0.505
+VR56,0.297
+VR58,0.262
+VR54,0.417
diff --git a/projects/Aidan_Analysis/FullAnalysis/neuron_types.py b/projects/Aidan_Analysis/FullAnalysis/neuron_types.py
new file mode 100644
index 0000000..0652360
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/neuron_types.py
@@ -0,0 +1,211 @@
+#%%
+##This script simply produces a pie chart of the distribution of neuron types
+# Import required packages
+import enum
+from xml.etree.ElementInclude import include
+from cv2 import multiply, rotate
+from matplotlib.pyplot import colormaps, title, ylabel, ylim
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+import sys
+from base import *
+from channeldepth import *
+from functions import event_times, per_trial_spike_rate
+from functions import unit_depths
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels")
+from pixtools.utils import Subplots2D  # use the local copy of base.py
+from pixtools import utils
+from pixtools import spike_rate
+from pixels import ioutils
+from pixels import PixelsError
+from matplotlib.axes import Axes
+
+##Now select units for pyramidal and interneurons
+all_units = myexp.select_units(group="good", name="m2", max_depth=1200, min_depth=200)
+
+# select all pyramidal neurons
+pyramidal_units = get_pyramidals(myexp)
+
+# and all interneurons
+interneuron_units = get_interneurons(myexp)
+
+
+# Then align trials
+all_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=10, units=all_units
+)
+
+pyramidal_aligned = myexp.align_trials(
+    ActionLabels.correct,
+    Events.led_off,
+    "spike_rate",
+    duration=10,
+    units=pyramidal_units,
+)
+
+interneuron_aligned = myexp.align_trials(
+    ActionLabels.correct,
+    Events.led_off,
+    "spike_rate",
+    duration=10,
+    units=interneuron_units,
+)
+
+#%%
+# Now plot a chart containing a count of all neuron types per session
+# To do so, will require a count of neuronal units, only need to do this once
+ses_counts = []
+raw_data = []
+for s, session in enumerate(myexp):
+    name = session.name
+
+    ses_pyramidals = pyramidal_aligned[s]
+    ses_interns = interneuron_aligned[s]
+
+    pyr_count = pd.DataFrame(
+        [
+            name,
+            "pyramidal",
+            len(ses_pyramidals.columns.get_level_values("unit").unique()),
+        ]
+    ).T
+
+    pyr_num = pd.DataFrame(
+        [
+            name,
+            "pyramidal",
+            ses_pyramidals.columns.get_level_values("unit").unique(),
+        ]
+    ).T
+    ses_counts.append(pyr_count)
+    raw_data.append(pyr_num)
+
+    int_count = pd.DataFrame(
+        [
+            name,
+            "interneuron",
+            len(ses_interns.columns.get_level_values("unit").unique()),
+        ]
+    ).T
+
+    int_num = pd.DataFrame(
+        [
+            name,
+            "interneuron",
+            ses_interns.columns.get_level_values("unit").unique(),
+        ]
+    ).T
+
+    ses_counts.append(int_count)
+    raw_data.append(int_num)
+
+ses_counts = pd.concat(ses_counts)
+ses_counts.rename(columns={0: "session", 1: "neuron", 2: "count"}, inplace=True)
+
+ses_raw_data = pd.concat(raw_data)
+ses_raw_data.rename(columns={0: "session", 1: "neuron", 2: "unit"}, inplace=True)
+ses_raw_data = ses_raw_data.explode("unit").reset_index(drop=True)
+#%%
+# plot these as stacked bar
+# plt.rcParams.update({"font.size": 30})
+
+# plot_dat = (
+#     ses_raw_data.groupby(["session", "neuron"])
+#     .size()
+#     .reset_index()
+#     .pivot(columns="neuron", index="session")
+# )
+# plot_dat.plot(kind="bar", stacked=True, color=["pink", "cornflowerblue"])
+# plt.legend(
+#     labels=["Interneuron", "Pyramidal"],
+#     bbox_to_anchor=(1.22, 1.1),
+#     title="Neuronal Type",
+# )
+
+# plt.suptitle("Proportion of Neuronal Types Within pM2")
+# plt.xlabel("Recording Session & Mouse ID (SessionDate_MouseID)")
+# plt.ylabel("Number of Recorded Neurons")
+# plt.xticks(rotation=45)
+# plt.gcf().set_size_inches(10, 10)
+
+
+# Uncomment below to save as pdf
+# utils.save(
+#     f"/home/s1735718/Figures/NeuronalTypeCountPlot",
+#     nosize=True,
+# )
+
+
+#%%
+
+####Will also plot a depth graph of neuron type####
+####Like the previous graph but Y is depth rather than count, x is session again####
+##Get all unit depths for the experiment
+depths = unit_depths(myexp)
+all_unit_depths = []
+
+for s, session in enumerate(myexp):
+    name = session.name
+    ses_depths = depths[name]
+    ses_pyramidals = pyramidal_aligned[s]
+    ses_int = interneuron_aligned[s]
+
+    pyr_depths = ses_depths[ses_pyramidals.columns.get_level_values("unit").unique()]
+    int_depths = ses_depths[ses_int.columns.get_level_values("unit").unique()]
+
+    # melt then merge these datasets to allow plotting
+    pyr_depths = pyr_depths.melt()
+    pyr_depths["type"] = "pyramidal"
+
+    int_depths = int_depths.melt()
+    int_depths["type"] = "interneuron"
+
+    units = pd.concat([pyr_depths, int_depths]).reset_index(drop=True)
+    units["session"] = name
+
+    all_unit_depths.append(units)
+
+all_unit_depths = pd.concat(all_unit_depths, axis=0).reset_index(drop=True)
+# %%
+# Plot these depths by session and type
+plt.rcParams.update({"font.size": 50})
+
+p = sns.stripplot(
+    x="session",
+    y="value",
+    hue="type",
+    data=all_unit_depths,
+    s=15,
+    linewidth=1,
+    palette="bright",
+)
+
+plt.xticks(rotation=90)
+plt.xlabel("Recording Session")
+plt.ylabel("pM2 Depth (μm)")
+plt.ylim(0)
+plt.gca().invert_yaxis()  # invert the y axis, make understanding easier
+
+
+# Plot lines showing lower bound of pM2
+
+plt.axhline(y=1200, color="black", ls="--")
+plt.text(1.55, 1200, "pM2 Lower Bound")
+
+plt.legend(bbox_to_anchor=(1, 1), title="Neuron Type")
+plt.gcf().set_size_inches(15, 15)
+plt.suptitle("Neuronal Types by Location in pM2")
+
+# Save Figure then Show
+utils.save(
+    "/home/s1735718/Figures/neurontype_by_depth",
+    nosize=True,
+)
+
+plt.show()
+
+# %%
diff --git a/projects/Aidan_Analysis/FullAnalysis/noisebase.py b/projects/Aidan_Analysis/FullAnalysis/noisebase.py
new file mode 100644
index 0000000..39c845e
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/noisebase.py
@@ -0,0 +1,50 @@
+import json
+
+import datetime
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+
+from pixels import Experiment
+from pixels.behaviours.leverpush import LeverPush
+from pixels.behaviours.pushpull import PushPull
+from pixels.behaviours.reach import Reach
+from pixels.behaviours.no_behaviour import NoBehaviour
+
+import sys
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+from pixtools import utils
+from pixtools import clusters
+
+from base import *
+
+
+noise_tests = Experiment(
+    ["noisetest1"],
+    NoBehaviour,
+    "~/duguidlab/visuomotor_control/neuropixels",
+)
+
+noise_test_unchanged = Experiment(
+    ["noisetest_unchanged"],
+    NoBehaviour,
+    "~/duguidlab/visuomotor_control/neuropixels",
+)
+
+noise_test_nopi = Experiment(
+    ["noisetest_nopi"],
+    NoBehaviour,
+    "~/duguidlab/visuomotor_control/neuropixels",
+)
+
+noise_test_no_caps = Experiment(
+    "VR50",
+    Reach,
+    "~/duguidlab/visuomotor_control/neuropixels",
+)
+
+#This contains the list of experiments we want to plot noise for, here only interested in the reaching task
+#Remember to change this in base.py
+exps = {
+    "reaching": myexp
+}
\ No newline at end of file
diff --git a/projects/Aidan_Analysis/FullAnalysis/noiseplot_By_Channel_Stripplot.py b/projects/Aidan_Analysis/FullAnalysis/noiseplot_By_Channel_Stripplot.py
new file mode 100644
index 0000000..327c120
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/noiseplot_By_Channel_Stripplot.py
@@ -0,0 +1,92 @@
+# This script will assess the noise in the recordings from the cortex, to allow us to decide which recordings to use
+# Import required packages and data from base files
+# NB: I changed how unit_depths() works in my local copy to remove the reference to rec_num
+
+
+
+from turtle import fd
+
+from matplotlib.pyplot import suptitle, xlabel
+from base import *
+from noisebase import *
+from channeldepth import *
+from matplotlib.cm import ScalarMappable
+
+
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+
+# Now select the experiment we want to use
+# Here we are taking the data from the 17th.
+
+
+# Now from this data, give us all units that are good and within the cortex
+units = myexp.select_units(group="good", max_depth=1200, name="m2")
+
+# #Now that we have the units selected, let us assess the noise
+myexp.assess_noise()
+
+# now read these dataframes in from the json files just created
+# If we want to change the data read in, simply change the experimental call in noisebase!
+# May also add more experiments to the list here if desired!
+
+noise = []
+for s, session in enumerate(myexp):
+    for i in range(len(session.files)):
+        path = session.processed / f"noise_{i}.json"
+        with path.open() as fd:
+            ses_noise = json.load(fd)
+        date = datetime.datetime.strptime(session.name[:6], "%y%m%d")
+        for SD in ses_noise["SDs"]:
+            noise.append((session.name, date, name, SD))
+
+
+df = pd.DataFrame(noise, columns=["session", "date", "project", "SDs", "median SD"])
+
+# Now read in the metadata containing channel coordinates and convert it to dataframe format
+
+depth1 = meta_spikeglx(
+    myexp, 0
+)  # give us data for session1, session two will be the same so perhaps we can simply clone this??
+depth1 = depth1.to_dataframe()
+
+depth = pd.concat([depth1, depth1.copy()], axis=0)
+
+# ##Can use the code below to print the individual datapoints rather than just the median, with each point representing the noise from each channel##
+# #First requires we convert the dataframe to longform (Check this once the fixed noise assessment has completed running)
+
+df2 = df.set_index("date")["SDs"].apply(pd.Series).stack()
+df2 = df2.reset_index()
+df2.columns = ["date", "channel", "SDs"]
+print(df2)
+# #Now add the required coordinates from the metadata
+# df2=pd.concat([df2, depth], axis=1).reset_index()
+# print(df2)
+
+
+# Channel depth is simply organised in order of the noise json, so let us simply use the channel column in df2 as a binned category
+fig, ax = plt.subplots()
+p = sns.stripplot(
+    x="date", y="SDs", data=df2, palette="Spectral", ax=ax, hue="channel"
+).set(title="Noise for Subject VR59")
+
+ax.set_xticklabels(
+    [t.get_text().split("T")[0] for t in ax.get_xticklabels()]
+)  # Remove zeroes from date line
+ax.legend_.remove()  # Remove the huge autogenerated legend
+
+# Now will add a colour bar to change hue based on channel coordinates
+
+
+# This code adds a legend for the colourmap
+cmap = plt.get_cmap("Spectral")
+norm = plt.Normalize(df2["channel"].min(), df2["channel"].max())
+sm = ScalarMappable(norm=norm, cmap=cmap)
+sm.set_array([])
+cbar = fig.colorbar(sm, ax=ax)
+cbar.ax.set_title('"Channel Num"')
+
+
+plt.show()
diff --git a/projects/Aidan_Analysis/FullAnalysis/noiseplot_SD_Clustering.py b/projects/Aidan_Analysis/FullAnalysis/noiseplot_SD_Clustering.py
new file mode 100644
index 0000000..634bc3b
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/noiseplot_SD_Clustering.py
@@ -0,0 +1,251 @@
+#%%
+# Unlike other noiseplot file (by channel) this file will cluster the SDs, allowing for a mean square analysis
+# If there are distinct clusters able to be seperated by depth, it will indicate that there is a clear relationship to noise
+# TODO: Test Functionality after noise analysis is complete, push to pixtools.
+# First import required packages
+
+import json
+import datetime
+from turtle import fd
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from pixels import Experiment
+from pixels.behaviours.leverpush import LeverPush
+from pixels.behaviours.no_behaviour import NoBehaviour
+from pixels.behaviours.pushpull import PushPull
+from pixels.behaviours.reach import Reach
+from sklearn.cluster import KMeans
+
+from base import *
+from channeldepth import *
+from tqdm import tqdm
+import os
+from pathlib import Path
+
+import sys
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels")
+from pixtools.utils import Subplots2D
+from pixtools import clusters, utils
+
+myexp.assess_noise()
+
+
+def noise_per_channeldepth(myexp):
+    """
+    Function  extracts the noise for each channel, combining this into a dataframe
+
+    myexp: the experiment defined in base.py, will extract the depth information from here.
+    """
+    noise = pd.DataFrame(
+        columns=["session", "project", "SDs", "x", "y"]
+    )  # Create the empty array to hold the noise information
+    depths = meta_spikeglx(myexp, 0)
+    depths = depths.to_dataframe()
+    coords = depths[
+        ["x", "y"]
+    ]  # Create a dataframe containing the generic x and y coords.
+    tot_noise = []
+
+    # Iterate through each session, taking the noise for each file and loading them into one continuous data frame.
+    for s, session in enumerate(myexp):
+        for i in tqdm(range(len(session.files))):
+            path = session.processed / f"noise_{i}.json"
+            with path.open() as fd:
+                ses_noise = json.load(fd)
+
+            chan_noises = []
+            for j, SD in enumerate(
+                ses_noise["SDs"][0:-1]
+            ):  # This will iterate over first 384 channels, and exclude the sync channel
+                x = coords["x"].iloc[j]
+                y = coords["y"].iloc[j]
+                noise_row = pd.DataFrame.from_records(
+                    {"session": [session.name], "SDs": [SD], "x": x, "y": y}
+                )
+                chan_noises.append(noise_row)
+
+        # Take all datafrom channel noises for a session, then concatenate
+        noise = pd.concat(chan_noises)
+        tot_noise.append(noise)  # Take all channel noises and add to a master file
+        df2 = pd.concat(
+            tot_noise
+        )  # Convert this master file, containing every sessions noise data into a dataframe
+
+    return df2
+
+
+df2 = noise_per_channeldepth(myexp)
+
+# Now that we have both noise and depth information for every channel, we may perform a means clustering
+# Initiate the kmeans class, describing the parameters of our analysis
+# First double check how many clusters are required to best describe the data
+# Reshape the array to allow clustering
+
+
+# Now determine the optimal number of clusters
+def elbowplot(data, myexp):
+
+    """
+
+    This function takes data formatted according to the function above, containing the noise values for all channels
+    Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends.
+    This point is defined as the boundary where additional clusters no longer explain much more variance in the data.
+
+    data: The dataframe, as formatted by noise_per_channel()
+
+    myexp: The experiment, defined in base.py containing the session information.
+
+    """
+
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = data.loc[data["session"] == name]
+        df3 = ses_data["SDs"].values.reshape(
+            -1, 1
+        )  # Just gives all noise values, for each session
+        Sum_of_squares = []  # create an empty list to store these in.
+
+        k = range(1, 10)
+        for num_clusters in k:
+            kmeans = KMeans(n_clusters=num_clusters)
+            kmeans.fit(df3)
+            Sum_of_squares.append(kmeans.inertia_)
+
+        fig, ax = plt.subplots()
+
+        # This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters
+        # This gives the distance from the centroids, as a measure of the variability explained
+        # We want this to drop off indicating that there is no remaining data explained by further centroid inclusion
+
+        # Figure has two rows, one columns, this is the first plot
+        plt.plot(k, Sum_of_squares, "bx-")  # bx gives blue x as each point.
+        plt.xlabel("Putative Number of Clusters")
+        plt.ylabel("Sum of Squares Distances/Inertia")
+        plt.title(
+            f"Determining Optimal Number of Clusters for Analysis - Session {name}"
+        )
+        utils.save(f"/home/s1735718/Figures/{myexp[s].name}_elbow_plot")
+        plt.show()
+
+
+elbowplot(df2, myexp)
+
+# Must now define kmeans parameters based on elbow analysis!
+# Seems that 2 clusters is still optimal across datasets
+
+kmeans = KMeans(
+    init="random",  # Initiate the iterative analysis with random centres
+    n_clusters=2,  # How many clusters to bin the data into, based on the elbow analysis!
+    n_init=10,  # Number of centroids to generate initially
+    max_iter=300,  # Max number of iterations before ceasing analysis
+    random_state=42,  # The random number seed for centroid generation, can really be anything for our purposes
+)
+
+# Plot the kmeans clustering by depth (y coord) with hue representing generated clusters.
+for s, session in enumerate(myexp):
+    name = session.name
+
+    ses = df2.loc[df2["session"] == name]
+    df3 = ses["SDs"].values.reshape(-1, 1)
+    x_kmeans = kmeans.fit(df3)
+    y_means = kmeans.fit_predict(df3)
+
+    # Now plot the kmeans analysis
+    # Remember we use our original data (df2) but use the df3 analysis to generate the labels
+    plt.scatter(ses["y"], ses["SDs"], c=y_means, cmap="viridis")
+
+    plt.xlabel("Probe Channel Y-Coordinate")
+    plt.ylabel("Channel Noise (SD)")
+    plt.title(f"{myexp[s].name} Channel Noise k-Mean Clustering Analysis")
+
+    # Save figures to folder
+    utils.save(f"/home/s1735718/Figures/{myexp[s].name}_noise_clustering")
+    plt.show()
+
+# Extract the k means values and plot these as a histogram
+# Where we want to plot only values within the brain across sessions
+all_means = pd.DataFrame()
+all_data = pd.DataFrame()
+for s, session in enumerate(myexp):
+    name = session.name
+
+    ses = df2.loc[df2["session"] == name]
+    df3 = ses["SDs"].values.reshape(-1, 1)
+    x_kmeans = kmeans.fit(df3)
+    y_means = kmeans.fit_predict(df3)
+
+    # Add this classification to the main session info
+    ses["cluster"] = y_means
+
+    # Determine which cluster is on average deeper (lower y value)
+    # This will be the within brain cluster
+    mean = ses.groupby("cluster").mean()
+    mins = mean.idxmin()["y"]  # give the cluster where depth is average higher
+
+    # Take mean of this cluster
+    inbrain = ses.loc[ses["cluster"] == mins]
+    inbrain_mean = inbrain.mean()["SDs"]
+
+    ses_means = pd.DataFrame({"session": [name], "mean_SD": [inbrain_mean]})
+
+    # Now concatenate this into a single dataframe
+    all_means = pd.concat([all_means, ses_means], ignore_index=True, axis=0)
+    all_data = pd.concat([all_data, inbrain], ignore_index=True, axis=0)
+# use this data to plot a histogram of only values considered "within brain" by k-means clustering
+
+# plot histogram
+p = sns.histplot(data=all_means, x="mean_SD")
+
+#%%
+# Now will determine the depth of the boundary between inbrain and outbrain clusters for each session
+# Will extract the y coordinate for these points, then convert to real depth
+brain_boundaries = pd.DataFrame(columns=["session", "y coordinate", "probe depth"])
+for s, session in enumerate(myexp):
+    name = session.name
+
+    ses_data = all_data.loc[all_data["session"] == name]
+
+    # Sort data to find boundary
+    ses_data.sort_values("y", ascending=False, inplace=True)
+
+    # Append values to dataframe
+    ses_vals = pd.DataFrame(ses_data.iloc[0][["session", "y"]].values).T
+    ses_vals = ses_vals.rename(columns={0: "session", 1: "y coordinate"})
+
+    # Add probe depth as a new column
+    #ses_vals["probe depth"] = session.get_probe_depth()[0]
+    # Calculate actual depth
+    brain_boundaries = pd.concat(
+        [brain_boundaries, ses_vals], ignore_index=True, axis=0
+    )
+
+
+# %%
+# Take file below that contains probe depths, rename it to depths.txt.histology
+# Recreate it using y coordinate value for each mouse, nothing else
+# DO NOT UNCOMMENT, WILL OVERWRITE SAVED OLD VERSION
+
+# session.processed /"depths.txt"
+
+# for s, session in enumerate(myexp):
+#     name = session.name
+
+#     ses_data = brain_boundaries.loc[brain_boundaries["session"] == name]
+
+#     # open path, rename file
+#     #os.rename(Path(session.processed / "depth.txt"), Path(session.processed / "depth.txt.histology"))
+
+#     # Create depths.txt
+#     with open(Path(session.processed / "depth.txt"), "w") as f:
+    
+#         #Write the probe y value to the file
+#         f.write(str(ses_data["y coordinate"].values[0]))
+#         f.write("\n")
+
+
+# %%
diff --git a/projects/Aidan_Analysis/FullAnalysis/params.py b/projects/Aidan_Analysis/FullAnalysis/params.py
new file mode 100644
index 0000000..6e983f2
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/params.py
@@ -0,0 +1,6 @@
+dat_path = '/home/mcolliga/duguidlab/visuomotor_control/neuropixels/processed/220217_VR59/sorted_stream_0/temp_wh.dat'
+n_channels_dat = 384
+dtype = 'int16'
+offset = 0
+sample_rate = 30000
+hp_filtered = True
\ No newline at end of file
diff --git a/projects/Aidan_Analysis/FullAnalysis/phaseI_Visualisation.py b/projects/Aidan_Analysis/FullAnalysis/phaseI_Visualisation.py
new file mode 100644
index 0000000..71eb618
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/phaseI_Visualisation.py
@@ -0,0 +1,451 @@
+# Import required packages
+from argparse import Action
+from base import *
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.ticker import MultipleLocator
+import matplotlib as mpl
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import scipy.stats as sc
+import sys
+from channeldepth import meta_spikeglx
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+from reach import Cohort
+from xml.etree.ElementInclude import include
+from matplotlib.pyplot import legend, title, ylabel, ylim
+import matplotlib.lines as mlines
+from tqdm import tqdm
+from pixels import ioutils
+from pixtools import utils
+from textwrap import wrap
+
+from functions import (
+    event_times,
+    unit_depths,
+    per_trial_binning,
+    event_times,
+    within_unit_GLM,
+    bin_data_average,
+    # unit_delta, #for now use a local copy
+)
+
+#%%
+# import datafile calculated previously
+data = ioutils.read_hdf5("/data/aidan/concatenated_session_information.h5")
+# test = ioutils.read_hdf5("/data/aidan/concatenated_session_information_test.h5")
+# Add layer V data
+layer_vals = pd.read_csv("granulardepths_VR46_VR59.csv")
+#%%
+# Visualise the data as a depth plot, showing units above and below layer V boundary for each session
+fig, ax = plt.subplots(2, 1, sharex=True)
+fig.set_size_inches(10, 10)
+
+p = sns.swarmplot(
+    data=data, x="depth", y="session", hue="Depth Classification", ax=ax[1]
+)
+p.set_xlim(0)
+ax[1].legend(loc="upper right", bbox_to_anchor=(0, 1.7), title="Layer Class")
+
+# Plot the overall distribution of points across depths.
+q = sns.kdeplot(
+    data=data, x="depth", ax=ax[0], hue="Depth Classification", legend=False
+)
+q.set_ylabel("")
+q.set_yticklabels("")
+q.set(yticks=[])
+
+plt.subplots_adjust(wspace=0, hspace=0)
+plt.suptitle("Distribution of Recorded Units Across pM2")
+
+#%%
+# Plot the proportion of each neuron type by up/downregulation
+# As a stacked bar chart
+# first isolate up/downreg units
+delta_types = []
+for i in data.iterrows():
+    if i[1]["delta"] > 0:
+        delta_types.append("upregulated")
+
+    if i[1]["delta"] < 0:
+        delta_types.append("downregulated")
+
+data.insert(3, "delta type", delta_types)
+# now plot these proportions, as a percentage
+vals = data[["session", "delta type", "delta significance"]]
+change = []
+for i in vals.iterrows():
+    if i[1]["delta significance"] == "non-significant":
+        change.append("no-change")
+    if (i[1]["delta significance"] == "significant") & (
+        i[1]["delta type"] == "downregulated"
+    ):
+        change.append("downregulated")
+    if (i[1]["delta significance"] == "significant") & (
+        i[1]["delta type"] == "upregulated"
+    ):
+        change.append("upregulated")
+vals["change"] = change
+vals.drop(columns=["delta type", "delta significance"])
+
+vals = (
+    vals.groupby(["session", "change"])
+    .size()
+    .reset_index()
+    .pivot(columns="change", index="session", values=0)
+)
+vals["upregulated"] = vals["upregulated"].fillna(0)
+vals["downregulated"] = vals["downregulated"].fillna(0)
+vals["no-change"] = vals["no-change"].fillna(0)
+val_props = pd.DataFrame(index=vals.index)
+val_props["upreg_%"] = (
+    vals["upregulated"]
+    / (vals["upregulated"] + vals["downregulated"] + vals["no-change"])
+) * 100
+val_props["downreg_%"] = (
+    vals["downregulated"]
+    / (vals["upregulated"] + vals["downregulated"] + vals["no-change"])
+) * 100
+val_props["nochange_%"] = (
+    vals["no-change"]
+    / (vals["upregulated"] + vals["downregulated"] + vals["no-change"])
+) * 100
+val_props.plot(kind="bar", stacked=True)
+
+plt.suptitle("Proportion of Responsive Units in pM2")
+utils.save(f"/home/s1735718/Figures/AllSessionUnitResponsiveness", nosize=True)
+
+#%%
+# Now shall plot delta firing rates by depth, as a violin plot
+# NB: this may require binning depth to prevent pseudoreplication when collapsing across sessions
+# Plot two seperate subplots for each session them merge, for up and downregulated data
+pyr = data.loc[data["type"] == "pyramidal"]
+inter = data.loc[data["type"] == "interneuron"]
+pyr["all"] = 1
+inter["all"] = 1
+# first plot pyramidals
+p = sns.violinplot(
+    data=pyr,
+    y="depth",
+    x="session",
+    hue="delta type",
+    split=True,
+    palette={"upregulated": "orange", "downregulated": "blue"},
+)
+for i, violin in enumerate(p.findobj(mpl.collections.PolyCollection)):
+    violin.set_hatch("//")
+
+p2 = sns.violinplot(
+    data=inter,
+    y="depth",
+    x="session",
+    hue="delta type",
+    split=True,
+    palette={"upregulated": "orange", "downregulated": "blue"},
+)
+
+plt.setp(p.collections, alpha=0.3)
+
+# Now set legend
+down = mpatches.Patch(facecolor="blue", alpha=0.5, label="Downregulated")
+
+up = mpatches.Patch(facecolor="orange", alpha=0.5, label="Upregulated")
+
+pyr_leg = mpatches.Patch(facecolor="grey", hatch="//", alpha=0.5, label="Pyramidals")
+
+inter_leg = mpatches.Patch(facecolor="grey", alpha=0.5, label="Interneurons")
+plt.legend(
+    handles=[down, up, pyr_leg, inter_leg],
+    bbox_to_anchor=(1, 1),
+    title="Activity Change/Cell Type",
+    fontsize=15,
+)
+
+plt.suptitle("Proportion of Significantly Changing Units During Grasp")
+plt.gca().set_ylim(0, 1200)
+plt.gca().invert_yaxis()
+plt.gca().set_ylabel("pM2 Depth (um)")
+plt.gca().set_xlabel("Session")
+plt.xticks(rotation=90)
+
+
+plt.show()
+
+
+# Now plot the point at which layer V occurs
+
+# %%
+# A similar plot that may be of use is to instead plot a scatter with histogram overlaid
+# Reuse scatter plot code, adjust it
+
+for s, session in enumerate(myexp):
+    name = session.name
+    ses_data = data.loc[data["session"] == name]
+    sigs = ses_data.loc[ses_data["delta significance"] == "significant"]
+    nosigs = ses_data.loc[ses_data["delta significance"] == "non-significant"]
+
+    fig = plt.figure()
+    ax = fig.add_subplot(111, label="1")
+
+    # Finally, plot a graph of deltas relative to zero, by depth
+    sns.scatterplot(
+        x="delta",
+        y="depth",
+        data=sigs,
+        # size="average firing rate",
+        # sizes=(40, 400),
+        s=100,
+        linewidth=1,
+        style="type",
+        color="#972c7f",
+        alpha=0.5,
+        ax=ax,
+        legend=False,
+    )
+
+    ######################################################################
+    # ##Now overlay a kde plot over these points
+    # ##Uncomment to plot this version
+    # up_kde = sns.kdeplot(
+    #     data=sigs.loc[sigs["delta type"] == "upregulated"],
+    #     y="depth"
+    # ).get_lines()[0].get_data()
+
+    # #Extract density data, and scale it to the order of magnitude plotted (i.e., x10^5)
+    # #Plot these values
+
+    # up_kde=pd.DataFrame(up_kde, index=["density", "depth"])
+    # up_kde.loc["density"] = up_kde.loc["density"].apply(lambda x: x*30000)
+    # up_kde = up_kde.T
+    # sns.scatterplot(data = up_kde, x="density", y = "depth")
+
+    # down_kde = sns.kdeplot(
+    #     data=sigs.loc[sigs["delta type"] == "downregulated"],
+    #     y="depth"
+    # ).get_lines()[1].get_data()
+
+    # down_kde=pd.DataFrame(down_kde, index=["density", "depth"])
+    # down_kde.loc["density"] = down_kde.loc["density"].apply(lambda x: x*-30000) #times by -1 to allow inverted plotting of kde plot
+    # down_kde = down_kde.T
+    # sns.scatterplot(data = down_kde, x="density", y = "depth")
+
+    ######################################################################
+
+    ##Uncomment below to plot histogram over points.
+    # ax2=fig.add_subplot(111, label="2", frame_on=False)
+    # ax3 = fig.add_subplot(111, label="2", frame_on=False)
+    # up_hist = sns.histplot(
+    #     data=sigs.loc[sigs["delta type"] == "upregulated"], y = "depth", binwidth=100, binrange=(0,1300), ax=ax2, legend=False
+    # )
+
+    # ax2.xaxis.tick_top()
+    # ax2.set_xlim(-3,3)
+    # ax2.axes.xaxis.set_visible(False)
+    # ax2.axes.yaxis.set_visible(False)
+    # ax2.set_ylabel("")
+
+    # down_hist = sns.histplot(
+    #     data=sigs.loc[sigs["delta type"] == "downregulated"], y = "depth", binwidth=100, binrange=(0,1300), ax=ax3, legend=False
+    # )
+
+    # ax3.invert_yaxis()
+    # ax3.xaxis.tick_top()
+    # ax3.set_xlim(3,-3)
+    # ax3.axes.xaxis.set_visible(False)
+    # ax3.axes.yaxis.set_visible(False)
+
+    ######################################################################
+
+    # p2 = sns.scatterplot(
+    #     x="delta",
+    #     y="depth",
+    #     data=nosigs,
+    #     size="average firing rate",
+    #     sizes=(40, 400),
+    #     s=100,
+    #     linewidth=1,
+    #     style="type",
+    #     color="#221150",
+    # )
+
+    ax.axvline(x=0, color="green", ls="--")
+
+    ax.set_xlim(-60, 60)
+    ax.set_ylim(0, 1300)
+
+    ax.set_xlabel("Δ Firing Rate (Hz)", fontsize=20)
+    ax.set_ylabel("Depth of Recorded Neuron (μm)", fontsize=20)
+    plt.xticks(fontsize=25)
+    plt.yticks(fontsize=25)
+    plt.suptitle(
+        "\n".join(
+            wrap(
+                f"Largest Trial Firing Rate Change by pM2 Depth - Session {name}",
+                width=30,
+            )
+        ),
+        y=1.1,
+        fontsize=20,
+    )
+
+    # Create lengend manually
+    # Entries for legend listed below as handles:
+    sig_dot = mlines.Line2D(
+        [],
+        [],
+        color="#221150",
+        marker="o",
+        linestyle="None",
+        markersize=10,
+        label="Significant",
+    )
+    nonsig_dot = mlines.Line2D(
+        [],
+        [],
+        color="#972c7f",
+        marker="o",
+        linestyle="None",
+        alpha=0.5,
+        markersize=10,
+        label="Non-Significant",
+    )
+    grey_dot = mlines.Line2D(
+        [],
+        [],
+        color="grey",
+        marker="o",
+        linestyle="None",
+        markersize=10,
+        label="Pyramidal",
+    )
+    grey_cross = mlines.Line2D(
+        [],
+        [],
+        color="grey",
+        marker="X",
+        lw=5,
+        linestyle="None",
+        markersize=10,
+        label="Interneuronal",
+    )
+
+    plt.legend(
+        handles=[  # sig_dot,
+            # nonsig_dot,
+            grey_dot,
+            grey_cross,
+        ],
+        bbox_to_anchor=(1.7, 1),
+        title="Significance (p < 0.05)",
+        fontsize=15,
+    )
+
+    # Now invert y-axis and save
+    ax.invert_yaxis()
+
+    # utils.save(
+    #     f"/home/s1735718/Figures/{name}_DeltaFR_byDepth",
+    #     nosize=True,
+    # )
+    plt.show()
+
+# %%
+# Plot only the KDE values
+
+for s, session in enumerate(myexp):
+    name = session.name
+    ses_data = data.loc[data["session"] == name]
+    sigs = ses_data.loc[ses_data["delta significance"] == "significant"]
+    nosigs = ses_data.loc[ses_data["delta significance"] == "non-significant"]
+
+    fig = plt.figure()
+    ax = fig.add_subplot(111, label="1")
+    ax2 = fig.add_subplot(111, label = "2", frame_on=False)
+    ######################################################################
+    # ##Now overlay a kde plot over these points
+    # ##Uncomment to plot this version
+    up_kde = sns.kdeplot(
+        data=sigs.loc[sigs["delta type"] == "upregulated"],
+        y="depth", ax = ax, color = "blue"
+    )
+
+    down_kde = sns.kdeplot(
+        data=sigs.loc[sigs["delta type"] == "downregulated"],
+        y="depth", ax = ax2, color = "orange"
+    )
+
+    ax2.set_xlim(-0.002, 0.002)
+    ax2.invert_xaxis()
+    ax2.axes.xaxis.set_visible(False)
+    ax2.axes.yaxis.set_visible(False)
+
+
+    plt.axvline(x=0, color="green", ls="--")
+    ax.set_xlim(-0.002, 0.002)
+    ax.set_ylim(0, 1300)
+    ax.invert_yaxis()
+    ax2.invert_yaxis()
+
+    ax.set_xlabel("Likelihood of Unit Presence", fontsize=20)
+    ax.set_ylabel("Depth Across pM2 (μm)", fontsize=20)
+    ax.tick_params(axis='both', which='major', labelsize=20)
+    ax.yaxis.set_ticks(np.arange(0, 1300, 200))
+
+    plt.suptitle(
+        "\n".join(
+            wrap(
+                f"Kernel Density Estimate of Responsive Unit Type - Session {name}",
+                width=30,
+            )
+        ),
+        y=1.1,
+        fontsize=20,
+    )
+
+
+
+    # Create lengend manually
+    # Entries for legend listed below as handles:
+    upreg = mlines.Line2D(
+        [],
+        [],
+        color="blue",
+        marker="o",
+        linestyle="None",
+        markersize=10,
+        label="Upregulated Units",
+    )
+
+    downreg = mlines.Line2D(
+        [],
+        [],
+        color="orange",
+        marker="o",
+        linestyle="None",
+        markersize=10,
+        label="Downregulated Units",
+    )
+
+    plt.legend(
+        handles=[ 
+            upreg,
+            downreg,
+        ],
+        bbox_to_anchor=(1.7, 1),
+        title="Significance (p < 0.05)",
+        fontsize=15,
+    )
+
+    # Now invert y-axis and save
+    plt.gca().invert_yaxis()
+
+    # utils.save(
+    #     f"/home/s1735718/Figures/{name}_DeltaFR_byDepth",
+    #     nosize=True,
+    # )
+    plt.show()
+
+# %%
+# Furthermore, will now plot a ball and stick graph showing the actual changes in firing rates and the point at which these deltas occur
diff --git a/projects/Aidan_Analysis/FullAnalysis/phaseI_pm2_map.py b/projects/Aidan_Analysis/FullAnalysis/phaseI_pm2_map.py
new file mode 100644
index 0000000..3bec38f
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/phaseI_pm2_map.py
@@ -0,0 +1,575 @@
+"""
+This file contains the first section of my full analysis, it will aim to do the following:
+
+1. Determine if the distribution of pyramidal to interneuron type alignes with known data. This will give an idea of the rate of undersampling by the probe
+2. Concatenate as much information surrounding unit activity as possible. i.e., type, activity, statistically significant changes, depth etc. 
+   This will be concatenated and saved as a dataframe with the following structure:
+
+   ---------------------------------------------------------------------------------
+   |Session | Unit No | Delta Change | Depth (um) | Supra/Infragranular | Neuron Type| Delta Significance|
+   ---------------------------------------------------------------------------------
+"""
+
+# First import required packages
+from argparse import Action
+from base import *
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import scipy.stats as sc
+import sys
+from channeldepth import meta_spikeglx
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+from reach import Cohort
+from xml.etree.ElementInclude import include
+from matplotlib.pyplot import legend, title, ylabel, ylim
+import matplotlib.lines as mlines
+from tqdm import tqdm
+from pixels import ioutils
+
+from functions import (
+    event_times,
+    unit_depths,
+    per_trial_binning,
+    event_times,
+    within_unit_GLM,
+    bin_data_average,
+    # unit_delta, #for now use a local copy
+)
+
+#%%
+# First select units based on spike width analysis
+all_units = myexp.select_units(group="good", name="m2", min_depth=200, max_depth=1200)
+
+pyramidals = get_pyramidals(myexp)
+interneurons = get_interneurons(myexp)
+
+
+# Then import the known neuronal type distributions from csv
+known_units = pd.read_csv("cell_atlas_neuron_distribution_M2.csv", index_col=False)
+
+#%%
+# Now align Trials
+per_unit_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=all_units
+)
+
+left_aligned = myexp.align_trials(
+    ActionLabels.correct_left, Events.led_off, "spike_rate", duration=2, units=all_units
+)
+
+right_aligned = myexp.align_trials(
+    ActionLabels.correct_right,
+    Events.led_off,
+    "spike_rate",
+    duration=2,
+    units=all_units,
+)
+
+pyramidal_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=pyramidals
+)
+interneuron_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=2, units=interneurons
+)
+
+#%%
+##Now that we have determined the optimal sessions to take from each mouse
+# Create a dataframe of neuron type and depth
+
+# First bin data, then shift hierachy
+bin_data = per_trial_binning(per_unit_aligned, myexp, timespan=2, bin_size=0.1)
+bin_data.reorder_levels(["session", "unit", "trial"], axis=1)
+
+# Then perform an ANOVA
+glm = within_unit_GLM(bin_data, myexp)
+
+# Finally take average firing rate
+fr_avgs = per_unit_aligned.mean()
+fr_avgs = fr_avgs.groupby(["session", "unit"]).mean()
+
+#%%
+# This cell is to serve as an interactive method of altering the deltas function
+# The aim is to allow selection of the type of delta
+# I.e., first, or largest
+
+
+def unit_delta(glm, myexp, bin_data, sig_only, delta_type, percentage_change):
+    """
+    Function takes the output of the multiple comparisons calculated by within_unit_GLM() and calculates the change in firing rate (or other IV measured by the GLM) between bins, per unit
+    This requires both experimental cohort data, and bin_data calculated by per_trial_binning() and passed through reorder levels (as "session", "unit", "trial")
+    Function returns a list of lists, in a hierarchy of [session[unit deltas]]
+
+    Using this data, a depth plot may be created displaying the relative delta by unit location in pM2.
+
+    glm: the result of the ANOVA and subsequent Tukey adjusted multiple comparisons performed by within_unit_GLM
+         NB: THIS IS A COMPUTATIONALLY EXPENSIVE FUNCTION, DO NOT RUN IT ON MORE THAN ONE SESSION AT A TIME
+
+    myexp: the experimental cohort defined in base.py
+
+    bin_data: the binned raw data computed by the per_trial_binning function
+
+    delta_type: the type of delta change to return, i.e., "largest", "first", or "last"
+
+    sig_only: whether to return only the greatest delta of significant bins or not
+
+    percentage_change: whether to return deltas as a percentage change
+
+    """
+    ses_deltas = []
+    print("beginning unit delta calculation")
+    for s, session in enumerate(myexp):
+
+        unit_deltas = []
+        sigunit_comps = []
+        ses_comps = glm[s]
+
+        if sig_only is True:
+            # Return only significant comparisons
+            ses_comps = ses_comps.loc[ses_comps["p-value"] < 0.05]
+
+        # Determine list of units remaining in comparison
+        units = []
+        for i in ses_comps["group1"]:
+            units.append(i[1])
+        units = np.array(units)
+        units = np.unique(units)
+
+        # Now iterate through comparisons, by unit, Saving the significant comparisons to allow later calculation of delta
+
+        for i in tqdm(units):
+            unit_comps = ses_comps[
+                ses_comps["group1"].apply(lambda x: True if i in x else False)
+            ]  # Lambda function checks if the value is in the tuple for group one
+            unit_comps = unit_comps[
+                unit_comps["group2"].apply(lambda x: True if i in x else False)
+            ]
+            unit_comps.reset_index(inplace=True)
+
+            # Now determine which delta to obtain
+            try:
+                delta_type == "largest" or delta_type == "first" or delta_type == "last"
+
+                if delta_type == "largest":
+
+                    # find row with largest difference
+                    if unit_comps.empty:  # skip if empty
+                        continue
+
+                    row = unit_comps["Diff"].idxmax()
+                    sigunit_comps.append(unit_comps[unit_comps.index == row])
+
+                if delta_type == "first":
+
+                    if unit_comps.empty:
+                        continue
+
+                    row = unit_comps[
+                        unit_comps.index == unit_comps.index[0]
+                    ]  # get the first delta that occurs
+                    sigunit_comps.append(row)  # append this to the list
+
+                if delta_type == "last":
+
+                    if unit_comps.empty:
+                        continue
+
+                    row = unit_comps[unit_comps.index == unit_comps.index[-1]]
+
+            except:
+                print("Invalid Delta_Type Specified")
+                break
+
+        # Now that we know the units with significant comparisons, take these bins from raw firing rate averages
+        ses_avgs = bin_data_average(bin_data[s])
+
+        # Iterate through our significant comparisons, calculating the actual delta firing rate
+        for i in range(len(sigunit_comps)):
+            sig_comp = sigunit_comps[i]
+
+            unit = [x[1] for x in sig_comp["group1"]]
+            ses_unit = ses_avgs.loc[
+                ses_avgs.index.get_level_values("unit") == int(unit[0])
+            ].mean()  # get all rows where our units firing rate is present
+
+            # Get the bins that the signficant comparison looked at
+            bin_val1 = [x[0] for x in sig_comp["group1"]][0]
+            bin_val2 = [x[0] for x in sig_comp["group2"]][0]
+
+            # Return the percentage change from initial state rather than raw value
+            if percentage_change == True:
+                change = ses_unit[bin_val2] - ses_unit[bin_val1]
+                delta = (change / ses_unit[bin_val1]) * 100
+                if ses_unit[bin_val1] == 0:
+                    continue  # Skip this value, if it changes from a value of zero, this is infinite
+                print(delta)
+                unit_deltas.append(
+                    [
+                        int(unit[0]),
+                        delta,
+                        ses_unit[bin_val2],
+                        ses_unit[bin_val1],
+                        bin_val2,
+                        bin_val1,
+                    ]
+                )
+            # Finally, get the delta value across these bins for the given unit
+            elif percentage_change == False:
+                delta = ses_unit[bin_val2] - ses_unit[bin_val1]
+                unit_deltas.append(
+                    [
+                        int(unit[0]),
+                        delta,
+                        ses_unit[bin_val2],
+                        ses_unit[bin_val1],
+                        bin_val2,
+                        bin_val1,
+                    ]
+                )
+
+        ses_deltas.append(unit_deltas)
+
+    return ses_deltas
+
+
+largest_sig_deltas = unit_delta(
+    glm, myexp, bin_data, sig_only=True, delta_type="largest", percentage_change=False
+)
+
+deltas = unit_delta(
+    glm, myexp, bin_data, sig_only=False, delta_type="largest", percentage_change=False
+)
+#%%
+# Now concatenate this information into a single dataframe
+# TODO: Add the ability to extract the bin beginnings and ends including firing rates to this function.
+
+depths = unit_depths(myexp)
+layer_info = pd.read_csv("granulardepths_VR46_VR59.csv")
+
+
+def unit_info_concatenation(
+    average_fr,
+    sig_deltas,
+    all_deltas,
+    depths,
+    layer_info,
+    pyramidal_aligned,
+    interneuron_aligned,
+    myexp,
+):
+
+    """
+    This function takes all previously calculated information surrounding unit activity, type, depth, etc. and concatenates it as a single dataframe
+    Requires all of these values are previously determined using predefined functions shown
+
+    average_fr: Average firing rate for each unit, across trials, organised in long form as a multiindexed dataframe (session->unit)
+
+    sig_deltas: Significant delta changes to firing rate (or SD) determined by within_unit_glm
+
+    all_deltas: All delta changes, regardless of significance, determined by the same within_unit_glm function
+
+    depths: Depths of all units across sessions, determined by unit_depths
+
+    layer_info: Dataframe containing the granular layer boundaries defined through histological analyses
+
+    pyramidal_aligned: Only pyramidal units, aligned to the same event, determined by myexp.align_trials
+
+    interneuron_aligned: Only interneuronal units, aligned to the same event
+
+    myexp: The experimental cohort defined in base.py
+
+    """
+    unit_info = pd.DataFrame()
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_deltas = sig_deltas[s]
+
+        ses_deltas = pd.DataFrame(
+            ses_deltas,
+            columns=[
+                "unit",
+                "delta",
+                "start value",
+                "end value",
+                "start bin",
+                "end bin",
+            ],
+        )
+        ses_depths = depths[name]
+
+        # Determine depths of units
+        unit_depth = ses_depths[ses_deltas["unit"]].melt()
+        ses_deltas = pd.concat([ses_deltas, unit_depth["value"]], axis=1)
+        ses_deltas.rename(columns={"value": "depth"}, inplace=True)
+
+        # Do the same for nonsignificant units
+        ses_nosig_deltas = all_deltas[s]
+        ses_nosig_deltas = pd.DataFrame(
+            ses_nosig_deltas,
+            columns=[
+                "unit",
+                "delta",
+                "start value",
+                "end value",
+                "start bin",
+                "end bin",
+            ],
+        )
+
+        # Find depths of associated units
+        unit_nosig_depth = ses_depths[ses_nosig_deltas["unit"]].melt()
+        ses_nosig_deltas = pd.concat(
+            [ses_nosig_deltas, unit_nosig_depth["value"]], axis=1
+        )
+        ses_nosig_deltas.rename(columns={"value": "depth"}, inplace=True)
+        # Now remove any significant values
+        ses_nosig_deltas = ses_nosig_deltas[
+            ~ses_nosig_deltas["unit"].isin(ses_deltas["unit"])
+        ].dropna()
+        ses_nosig_deltas = ses_nosig_deltas.reset_index(drop=True)
+
+        ##Then determine the type of neuron these units are (i.e., pyramidal or interneuron)
+        pyr_units = pyramidal_aligned[s].melt()["unit"].unique()
+        int_units = interneuron_aligned[s].melt()["unit"].unique()
+        dicts = {}
+
+        unit_types = []
+        for item_sig in ses_deltas["unit"]:
+            if item_sig in pyr_units:
+                unit_types.append("pyramidal")
+            if item_sig in int_units:
+                unit_types.append("interneuron")
+            if item_sig not in pyr_units and item_sig not in int_units:
+                unit_types.append("neuron unclassified")
+
+        nosig_unit_types = []
+        for item in ses_nosig_deltas["unit"]:
+            if item in pyr_units:
+                nosig_unit_types.append("pyramidal")
+            if item in int_units:
+                nosig_unit_types.append("interneuron")
+            if item not in pyr_units and item not in int_units:
+                nosig_unit_types.append("neuron unclassified")
+
+        # Append unit_types as a new column
+        # Will try and append this as a new column, to see where the extra value lies
+        unit_types = pd.DataFrame(unit_types)
+        ses_deltas = pd.concat([ses_deltas, unit_types], ignore_index=True, axis=1)
+        ses_deltas.rename(
+            columns={
+                0: "unit",
+                1: "bin1 val",
+                2: "bin2 val",
+                3: "delta",
+                4: "start bin",
+                5: "end bin",
+                6: "depth",
+                7: "type",
+            },
+            inplace=True,
+        )
+        ses_deltas["delta significance"] = "significant"
+        ses_deltas.insert(0, "session", "")
+        ses_deltas["session"] = name
+
+        nosig_unit_types = pd.DataFrame(nosig_unit_types)
+        ses_nosig_deltas = pd.concat(
+            [ses_nosig_deltas, nosig_unit_types], ignore_index=True, axis=1
+        )
+        ses_nosig_deltas.rename(
+            columns={
+                0: "unit",
+                1: "bin1 val",
+                2: "bin2 val",
+                3: "delta",
+                4: "start bin",
+                5: "end bin",
+                6: "depth",
+                7: "type",
+            },
+            inplace=True,
+        )
+        ses_nosig_deltas["delta significance"] = "non-significant"
+        ses_nosig_deltas.insert(0, "session", "")
+        ses_nosig_deltas["session"] = name
+        # Concatenate sig and nonsig deltas as unit types
+        session_info = pd.concat(
+            [ses_deltas, ses_nosig_deltas], ignore_index=True, axis=0
+        )
+
+        # Now iterate through session info by row, classifying depth into supra or infragranular
+        # Will use the layer_info to do so
+        mouse_id = session.name[-4:]
+        mouse_boundary = (
+            layer_info.loc[layer_info["MouseID"] == mouse_id]["LayerV_Depth"].item()
+            * 1000
+        )  # get value and convert from mm to um
+
+        unit_depth_class = []
+        for index, row in session_info.iterrows():
+            if row["depth"] < mouse_boundary:
+                unit_depth_class.append("Supragranular")
+
+            if row["depth"] > mouse_boundary:
+                unit_depth_class.append("Infragranular")
+
+        # Append this to the session_info as a new column (after depth)
+        session_info.insert(4, "Depth Classification", unit_depth_class)
+
+        # Now add firing rates
+        ses_rates = average_fr[s].reindex(
+            index=session_info["unit"]
+        )  # reorder this to match out data
+        session_info.insert(
+            len(session_info.columns), "average firing rate", ses_rates.values
+        )
+
+        # Finally, concatenate this info for the session into the master dataframe to be returned
+        unit_info = pd.concat([unit_info, session_info], axis=0, ignore_index=True)
+
+    return unit_info
+
+
+data = unit_info_concatenation(
+    fr_avgs,
+    largest_sig_deltas,
+    deltas,
+    depths,
+    layer_info,
+    pyramidal_aligned=pyramidal_aligned,
+    interneuron_aligned=interneuron_aligned,
+    myexp=myexp,
+)
+
+# %%
+# Now finally, will export this data as a saved file using utils.save
+ioutils.write_hdf5("/data/aidan/concatenated_session_information.h5", data)
+
+
+# %%
+
+
+# def tester(
+#     average_fr,
+#     sig_deltas,
+#     all_deltas,
+#     depths,
+#     layer_info,
+#     myexp,
+# ):
+
+#     """
+#     This function takes all previously calculated information surrounding unit activity, type, depth, etc. and concatenates it as a single dataframe
+#     Requires all of these values are previously determined using predefined functions shown
+
+#     average_fr: Average firing rate for each unit, across trials, organised in long form as a multiindexed dataframe (session->unit)
+
+#     sig_deltas: Significant delta changes to firing rate (or SD) determined by within_unit_glm
+
+#     all_deltas: All delta changes, regardless of significance, determined by the same within_unit_glm function
+
+#     depths: Depths of all units across sessions, determined by unit_depths
+
+#     layer_info: Dataframe containing the granular layer boundaries defined through histological analyses
+
+#     pyramidal_aligned: Only pyramidal units, aligned to the same event, determined by myexp.align_trials
+
+#     interneuron_aligned: Only interneuronal units, aligned to the same event
+
+#     myexp: The experimental cohort defined in base.py
+
+#     """
+#     unit_info = pd.DataFrame()
+#     for s, session in enumerate(myexp):
+#         name = session.name
+#         ses_deltas = sig_deltas[s]
+
+#         ses_deltas = pd.DataFrame(ses_deltas, columns=["unit", "delta"])
+#         ses_depths = depths[name]
+
+#         # Determine depths of units
+#         unit_depth = ses_depths[ses_deltas["unit"]].melt()
+#         ses_deltas = pd.concat([ses_deltas, unit_depth["value"]], axis=1)
+#         ses_deltas.rename(columns={"value": "depth"}, inplace=True)
+
+#         # Do the same for nonsignificant units
+#         ses_nosig_deltas = all_deltas[s]
+#         ses_nosig_deltas = pd.DataFrame(ses_nosig_deltas, columns=["unit", "delta"])
+
+#         # Find depths of associated units
+#         unit_nosig_depth = ses_depths[ses_nosig_deltas["unit"]].melt()
+#         ses_nosig_deltas = pd.concat(
+#             [ses_nosig_deltas, unit_nosig_depth["value"]], axis=1
+#         )
+#         ses_nosig_deltas.rename(columns={"value": "depth"}, inplace=True)
+#         # Now remove any significant values
+#         ses_nosig_deltas = ses_nosig_deltas[
+#             ~ses_nosig_deltas["unit"].isin(ses_deltas["unit"])
+#         ].dropna()
+#         ses_nosig_deltas = ses_nosig_deltas.reset_index(drop=True)
+
+#         # Append unit_types as a new column
+#         # Will try and append this as a new column, to see where the extra value lies
+#         unit_types = pd.DataFrame(unit_types)
+#         ses_deltas.rename(columns={0: "unit", 1: "delta", 2: "depth"}, inplace=True)
+#         ses_deltas["delta significance"] = "significant"
+#         ses_deltas.insert(0, "session", "")
+#         ses_deltas["session"] = name
+
+#         nosig_unit_types = pd.DataFrame(nosig_unit_types)
+#         ses_nosig_deltas.rename(
+#             columns={0: "unit", 1: "delta", 2: "depth"}, inplace=True
+#         )
+#         ses_nosig_deltas["delta significance"] = "non-significant"
+#         ses_nosig_deltas.insert(0, "session", "")
+#         ses_nosig_deltas["session"] = name
+#         # Concatenate sig and nonsig deltas as unit types
+#         session_info = pd.concat(
+#             [ses_deltas, ses_nosig_deltas], ignore_index=True, axis=0
+#         )
+
+#         # Now iterate through session info by row, classifying depth into supra or infragranular
+#         # Will use the layer_info to do so
+#         mouse_id = session.name[-4:]
+#         mouse_boundary = (
+#             layer_info.loc[layer_info["MouseID"] == mouse_id]["LayerV_Depth"].item()
+#             * 1000
+#         )  # get value and convert from mm to um
+
+#         unit_depth_class = []
+#         for index, row in session_info.iterrows():
+#             if row["depth"] < mouse_boundary:
+#                 unit_depth_class.append("Supragranular")
+
+#             if row["depth"] > mouse_boundary:
+#                 unit_depth_class.append("Infragranular")
+
+#         # Append this to the session_info as a new column (after depth)
+#         session_info.insert(4, "Depth Classification", unit_depth_class)
+
+#         # Now add firing rates
+#         ses_rates = average_fr[s].reindex(
+#             index=session_info["unit"]
+#         )  # reorder this to match out data
+#         session_info.insert(
+#             len(session_info.columns), "average firing rate", ses_rates.values
+#         )
+
+#         # Finally, concatenate this info for the session into the master dataframe to be returned
+#         unit_info = pd.concat([unit_info, session_info], axis=0, ignore_index=True)
+
+#     return unit_info
+
+
+# test = tester(
+#     fr_avgs,
+#     largest_sig_deltas,
+#     deltas,
+#     depths,
+#     layer_info,
+#     myexp=myexp,
+# )
+ioutils.write_hdf5("/data/aidan/concatenated_session_information_test.h5", test)
+# %%
diff --git a/projects/Aidan_Analysis/FullAnalysis/processing.py b/projects/Aidan_Analysis/FullAnalysis/processing.py
new file mode 100644
index 0000000..3b1662e
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/processing.py
@@ -0,0 +1,25 @@
+#Import our experimental data from base
+
+from base import *
+
+#The first time this is run it will take a while!! There is a lot of data to go through and many steps##
+#It will pull from raw data and output to processed data.
+
+#First we will select only the recording from the 17th and assign this to a variable
+
+
+#First run Kilosort on data to prepare for phy curation
+#Remember to open phy after to generate the cluster info file!!
+myexp.sort_spikes()
+
+# #Then process behavioural data 
+myexp.process_behaviour()
+
+# # #Then alicn, crop, and downsample spike data
+myexp.process_spikes()
+
+# # #Now do the same with local field potential data
+myexp.process_lfp()
+
+#Opetionally analyse the noise of the recordings
+myexp.assess_noise()
\ No newline at end of file
diff --git a/projects/Aidan_Analysis/FullAnalysis/raster_plots_VR59.py b/projects/Aidan_Analysis/FullAnalysis/raster_plots_VR59.py
new file mode 100644
index 0000000..66b8c10
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/raster_plots_VR59.py
@@ -0,0 +1,57 @@
+#First import experimental data from base.py
+from cProfile import label
+
+from matplotlib.pyplot import subplots
+from base import *
+
+
+#Now add the pixtools directory to the path to allow us to use the modules within
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+from pixtools.utils import Subplots2D
+
+
+#Then import the data to allow raster plotting
+from rasterbase import *
+
+#Now let us plot the raster info for the 17th, per UNIT
+
+
+unit1 = myexp.select_units(
+    group="good",
+    max_depth=1500,
+    name="cortex"
+)
+
+
+
+#Now take all spike timees for all left hand events
+session1_L = myexp.align_trials(
+    ActionLabels.correct_left,
+    Events.led_on,
+    "spike_times",
+    duration=4,
+    units = unit1
+)
+
+#And now as right hand 
+session1_R = myexp.align_trials(
+    ActionLabels.correct_right,
+    Events.led_on,
+    "spike_times",
+    duration=4,
+    units = unit1
+)
+
+
+#Now plot left and right as subplots, remember we are taking session 2 here (by indexing into session)
+subplot1 = per_unit_raster(session1_L[1], sample=None, start=0, subplots=None, label=True)
+
+per_unit_raster(
+    session1_R[1],
+    sample=None,
+    start=0,
+    subplots=subplot1,
+    label=True
+)
+
+plt.show()
diff --git a/projects/Aidan_Analysis/FullAnalysis/rasterbase.py b/projects/Aidan_Analysis/FullAnalysis/rasterbase.py
new file mode 100644
index 0000000..eebf078
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/rasterbase.py
@@ -0,0 +1,165 @@
+#First import required packages from base. 
+
+
+from base import * #Import our experiment and metadata needed
+import sys
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+import random
+from math import ceil
+
+#Add the location of pixtools to the path
+# sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") #Adds the location of the pixtools folder to the path
+# from pixtools.utils import Subplots2D #Allows us to use the custom class designed to give the figs/axes from a set of subplots that will fit the data. This generates as many subplots as required in a square a shape as possible
+
+#Now import our sorted data functions
+
+
+#Defines a function that will return plots of EITHER units per trial or trials per unit (i.e., inverted datasets)
+#To do so, specify "trial" or "unit" as the per value
+def _raster(per, data, sample, start, subplots, label):
+    per_values = data.columns.get_level_values(per).unique()
+    per_values.values.sort()
+
+    if per == "trial":
+        data = data.reorder_levels(["trial", "unit"], axis=1)
+        other = "unit"
+    else:
+        other = "trial"
+
+    # units if plotting per trial, trials if plotting per unit
+    samp_values = list(data.columns.get_level_values(other).unique().values)
+    if sample and sample < len(samp_values):
+        sample = random.sample(samp_values, sample)
+    else:
+        sample = samp_values
+
+    if subplots is None:
+        subplots = Subplots2D(per_values)
+
+    palette = sns.color_palette("pastel") #Set the colours for the graphs produced
+
+    for i, value in enumerate(per_values):
+        val_data = data[value][sample] #Requires I index into the data when assembling plots (see plots.py)
+        val_data.columns = np.arange(len(sample)) + start
+        val_data = val_data.stack().reset_index(level=1)
+
+        p = sns.scatterplot(
+            data=val_data,
+            x=0,
+            y='level_1',
+            ax=subplots.axes_flat[i],
+            s=0.5,
+            legend=None,
+        )
+        p.set_yticks([])
+        p.set_xticks([])
+        p.autoscale(enable=True, tight=True)
+        p.get_yaxis().get_label().set_visible(False)
+        p.get_xaxis().get_label().set_visible(False)
+        p.text(
+            0.95, 0.95,
+            value,
+            horizontalalignment='right',
+            verticalalignment='top',
+            transform=subplots.axes_flat[i].transAxes,
+            color='0.3',
+        )
+        p.axvline(c=palette[2], ls='--', linewidth=0.5)
+
+        if not subplots.axes_flat[i].yaxis_inverted():
+            subplots.axes_flat[i].invert_yaxis()
+
+    if label: #Chane our axis labels, here to trial number and time from the reach (which the data is aligned to!)
+        to_label = subplots.to_label
+        to_label.get_yaxis().get_label().set_visible(True)
+        to_label.set_ylabel('Trial Number')
+        to_label.get_xaxis().get_label().set_visible(True)
+        to_label.set_xlabel('Time from reach (s)')
+
+        # plot legend subplot
+        legend = subplots.legend
+        legend.text(
+            0, 0.9,
+            'Trial number' if per == 'trial' else 'Unit ID',
+            transform=legend.transAxes,
+            color='0.3',
+        )
+        legend.set_visible(True)
+        legend.get_xaxis().set_visible(False)
+        legend.get_yaxis().set_visible(False)
+        legend.set_facecolor('white')
+
+    return subplots
+
+#Using this function we may now plot the graphs we desire!
+#First let us define a function to plot spikes per unit
+
+def per_unit_raster(data, sample=None, start=0, subplots=None, label=True):
+    """
+    Plots a spike raster for every unit in the given data with trials as rows.
+
+    data is a multi-dimensional dataframe as returned from
+    Experiment.align_trials(<action>, <event>, 'spike_times') indexed into to get the
+    session and recording.
+    """
+    return _raster("unit", data, sample, start, subplots, label)
+
+#And per trial
+def per_trial_raster(data, sample=None, start=0, subplots=None, label=True):
+    """
+    Plots a spike raster for every trial in the given data with units as rows.
+
+    data is a multi-dimensional dataframe as returned from
+    Experiment.align_trials(<action>, <event>, 'spike_times') indexed into to get the
+    session and recording.
+    """
+    return _raster("trial", data, sample, start, subplots, label)
+
+#Now finally define a function that allows plotting a single unit as a raster
+def single_unit_raster(data, ax=None, sample=None, start=0, unit_id=None):
+    # units if plotting per trial, trials if plotting per unit
+    samp_values = list(data.columns.get_level_values('trial').unique().values)
+    if sample and sample < len(samp_values):
+        sample = random.sample(samp_values, sample)
+    else:
+        sample = samp_values
+
+    val_data = data[sample]
+    val_data.columns = np.arange(len(sample)) + start
+    val_data = val_data.stack().reset_index(level=1)
+
+    if not ax:
+        ax = plt.gca()
+
+    p = sns.scatterplot(
+        data=val_data,
+        x=0,
+        y='level_1',
+        ax=ax,
+        s=1.5,
+        legend=None,
+        edgecolor=None,
+    )
+    p.set_yticks([])
+    p.set_xticks([])
+    p.autoscale(enable=True, tight=True)
+    p.get_yaxis().get_label().set_visible(False)
+    p.get_xaxis().get_label().set_visible(False)
+
+    palette = sns.color_palette()
+    p.axvline(c='black', ls='--', linewidth=0.5)
+
+    if not ax.yaxis_inverted():
+        ax.invert_yaxis()
+
+    if unit_id is not None:
+        p.text(
+            0.95, 0.95,
+            unit_id,
+            horizontalalignment='right',
+            verticalalignment='top',
+            transform=ax.transAxes,
+            color='0.3',
+        )
diff --git a/projects/Aidan_Analysis/FullAnalysis/rugplotbase.py b/projects/Aidan_Analysis/FullAnalysis/rugplotbase.py
new file mode 100644
index 0000000..c16c6ac
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/rugplotbase.py
@@ -0,0 +1,150 @@
+#This file contains the modified plot function to return a rugplot and firing rate
+from base import *
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+from math import ceil
+import sys
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+from pixtools.utils import Subplots2D
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+
+
+def _plot(level, data, rugdata, ci, subplots, label):
+    values = data.columns.get_level_values(level).unique()
+    values.values.sort()
+    if level == "trial":
+        data = data.reorder_levels(["trial", "unit"], axis=1)
+    
+    # no. units if plotting per trial, no. trials if plotting per unit
+    num_samples = len(data[values[0]].columns)
+
+    if subplots is None:
+        subplots = Subplots2D(values)
+
+    palette = sns.color_palette()
+
+    for i, value in enumerate(values):
+        val_data = data[value].stack().reset_index()
+        val_data['y'] = val_data[0]
+        ax = subplots.axes_flat[i]
+        divider = make_axes_locatable(ax)
+
+        r = sns.rugplot(
+                ax = ax,
+                palette=palette,
+                data=rugdata,
+                x=rugdata[0],
+                height=.1,
+                legend=False,
+                expand_margins=True
+        )
+        r.autoscale(enable = False, tight=False)
+        
+        
+        p = sns.lineplot(
+            data=val_data,
+            x='time',
+            y='y',
+            ci=ci,
+            ax=ax,
+            linewidth=0.5,
+        )
+        p.autoscale(enable=True, tight=False)
+        p.set_yticks([])
+        p.set_xticks([])
+        peak = data[value].values.mean(axis=1).max()
+        p.text(
+            0.05, 0.95,
+            "%.1f" % peak,
+            horizontalalignment='left',
+            verticalalignment='top',
+            transform=ax.transAxes,
+            color='grey',
+        )
+        p.text(
+            0.95, 0.95,
+            value,
+            horizontalalignment='right',
+            verticalalignment='top',
+            transform=ax.transAxes,
+            color=palette[0],
+        )
+        p.get_yaxis().get_label().set_visible(False)
+        p.get_xaxis().get_label().set_visible(False)
+        p.axvline(c=palette[1], ls='--', linewidth=0.5)
+        p.set_box_aspect(1)
+        
+       
+    if label:
+        to_label = subplots.to_label
+        to_label.get_yaxis().get_label().set_visible(True)
+        to_label.set_ylabel('Firing rate (Hz)')
+        to_label.get_xaxis().get_label().set_visible(True)
+        to_label.set_xlabel('Time from push (s)')
+        to_label.set_xticks(data.index)
+        #to_label.set_xticklabels([- duration / 2, 0,  duration / 2])
+
+        # plot legend subplot
+        legend = subplots.legend
+        legend.text(
+            0, 0.7,
+            'Peak of mean',
+            transform=legend.transAxes,
+            color='grey',
+        )
+        legend.text(
+            0, 0.3,
+            'Trial number' if level == 'trial' else 'Unit ID',
+            transform=legend.transAxes,
+            color=palette[0],
+        )
+        legend.set_visible(True)
+        legend.get_xaxis().set_visible(False)
+        legend.get_yaxis().set_visible(False)
+        legend.set_facecolor('white')
+        legend.set_box_aspect(1)
+
+    return subplots
+
+
+def per_unit_spike_rate(data, rugdata, ci=95, subplots=None, label=True):
+    """
+    Plots a histogram for every unit in the given session showing spike rate.
+
+    Parameters
+    ----------
+
+    data : pandas.DataFrame
+        A dataframe for a single session as returned from Behaviour.align_trials (or
+        Experiment.align_trials but indexed into for session and rec_num).
+
+    rugdata : pandas.DataFrame
+        A dataframe for a single session containing the onset of the event (led_on) to plot as a rug.
+
+    ci : int or 'sd', optional
+        Confidence intervals to plot as envelope around line. Default is 95 i.e. 95%
+        confidence intervals. Also accepts "'sd'" which will draw standard deviation
+        envelope, or None to plot no envelope.
+    """
+    return _plot("unit", data, rugdata, ci, subplots, label)
+
+
+def per_trial_spike_rate(data, rugdata, ci=95, subplots=None, label=True):
+    """
+    Plots a histogram for every trial in the given session showing spike rate.
+
+    Parameters
+    ----------
+
+    data : pandas.DataFrame
+        A dataframe for a single session as returned from Behaviour.align_trials (or
+        Experiment.align_trials but indexed into for session and rec_num).
+
+    ci : int or 'sd', optional
+        Confidence intervals to plot as envelope around line. Default is 95 i.e. 95%
+        confidence intervals. Also accepts "'sd'" which will draw standard deviation
+        envelope, or None to plot no envelope.
+    """
+    return _plot("trial", data, rugdata, ci, subplots, label)
\ No newline at end of file
diff --git a/projects/Aidan_Analysis/FullAnalysis/spikerate_plots_VR59.py b/projects/Aidan_Analysis/FullAnalysis/spikerate_plots_VR59.py
new file mode 100644
index 0000000..faec577
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/spikerate_plots_VR59.py
@@ -0,0 +1,48 @@
+#Let us now plot the spike rates for the desired experiment, here aligned to the LED on
+#Take all correct trials from both left and right sides
+#Remember this all requires data be processed beforehand (use processing.py!)
+
+from distutils.command.sdist import sdist
+from base import *
+
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") #Adds the location of the pixtools folder to the path
+from pixtools import spike_rate #Allows us to use the custom class designed to give the figs/axes from a set of subplots that will fit the data. This generates as many subplots as required in a square a shape as possible
+
+#Seperate VR59 into two recording sessions, for the 17th and 18th to allow comparison
+
+
+#now get units from these
+units = myexp.select_units(
+    group="good",
+    max_depth=1500,
+    name="cortex",
+    uncurated=False
+)
+
+
+#Now that we have selected the required units, let us plot thespike rates according to LED on
+hits =myexp.align_trials(
+    ActionLabels.correct,
+    Events.led_on,
+    "spike_rate",
+    duration=4,
+    units=units
+)
+
+#First set the confidence interval for the graph (This significantly increases time to run!)
+#ci = 95
+
+#Now run the code to enumerate over the exp.
+#The code will plot the spike rates of every unit selected above aligned to the defined event
+#For every session
+
+for s, session in enumerate(myexp):
+    name = session.name
+
+    spike_rate.per_unit_spike_rate(hits[s], ci="sd")
+    plt.suptitle(
+        f"Session {name} - per-unit across-trials firing rate (aligned to LED on)",
+    )
+    plt.set_size_inches(10, 10)
+    plt.show() #Plot this information
diff --git a/projects/Aidan_Analysis/FullAnalysis/training_summary.py b/projects/Aidan_Analysis/FullAnalysis/training_summary.py
new file mode 100644
index 0000000..afb436a
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/training_summary.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+"""
+Example script plotting metrics to illustrate how well a cohort of mice are training
+across sessions.
+
+Usage:
+    training_summary.py /path/to/data_dir mouse1 mouse2 ...
+
+"""
+
+import sys
+from pathlib import Path
+from statistics import NormalDist
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+import typer
+from matplotlib.ticker import MultipleLocator
+
+from reach import Cohort
+from reach.session import Outcomes, Targets
+
+
+def main(
+    data_dir: Path = typer.Argument(..., help="Folder containing task data."),
+    mouse_ids: list[str] = typer.Argument(..., help="The IDs of mice to summarise."),
+    no_collapse_days: bool = typer.Option(
+        False, "-n", help="Don't merge sessions with the same date"
+    ),
+) -> None:
+    """
+    Plot training summaries for a cohort of mice using matplotlib.
+    """
+    cohort = Cohort.init_from_files(
+        data_dir=data_dir,
+        mouse_ids=mouse_ids,
+    )
+
+    sns.set_style("darkgrid")
+    results = pd.DataFrame(cohort.get_results(not no_collapse_days))
+    trials = pd.DataFrame(cohort.get_trials(not no_collapse_days))
+
+    _, axes = plt.subplots(6, 1, sharex=True)
+
+    # NaN-ify results for any sessions that didn't have any trials
+    results.loc[
+        results["trials"] == 0,
+        ["correct_r", "correct_l", "incorrect_l", "incorrect_r", "d_prime", "trials"],
+    ] = np.nan
+
+    # Number of trials
+    sns.lineplot(
+        data=results,
+        x="day",
+        y="trials",
+        hue="mouse_id",
+        legend="brief",
+        ax=axes[0],
+        markers=True,
+    )
+    axes[0].set_ylabel("No.\ntrials", rotation="horizontal", ha="right")
+    axes[0].set_ylim(bottom=0)
+
+    # Number of correct trials
+    sns.lineplot(
+        data=results,
+        x="day",
+        y="correct_l",
+        hue="mouse_id",
+        legend=False,
+        ax=axes[1],
+        markers=True,
+    )
+    sns.lineplot(
+        data=results,
+        x="day",
+        y="correct_r",
+        hue="mouse_id",
+        legend=False,
+        ax=axes[1],
+        markers=True,
+    )
+    axes[1].set_ylabel("No.\ncorrect", rotation="horizontal", ha="right")
+
+    # Hit rate
+    results["Hit rate"] = (results["correct_l"] + results["correct_r"]) / results[
+        "trials"
+    ]
+    sns.lineplot(
+        data=results,
+        x="day",
+        y="Hit rate",
+        hue="mouse_id",
+        legend=False,
+        ax=axes[2],
+        markers=True,
+    )
+    axes[2].set_ylabel("Hit rate", rotation="horizontal", ha="right")
+
+    # Number of incorrect trials
+    sns.lineplot(
+        data=results,
+        x="day",
+        y="incorrect_l",
+        hue="mouse_id",
+        legend=False,
+        ax=axes[3],
+        markers=True,
+    )
+    sns.lineplot(
+        data=results,
+        x="day",
+        y="incorrect_r",
+        hue="mouse_id",
+        legend=False,
+        ax=axes[3],
+        markers=True,
+    )
+    axes[3].set_ylabel("No.\nincorrect", rotation="horizontal", ha="right")
+
+    # d'
+    if not no_collapse_days and results["d_prime"].isna().any():
+        # We have to re-calcuate d' for days that had multiple (merged) sessions using
+        # the trial data.
+        def z(p):
+            return -NormalDist().inv_cdf(p)
+
+        for _, data in results.loc[results["d_prime"].isna()].iterrows():
+            day_trials = trials.loc[
+                (trials.day == data.day) & (trials.mouse_id == data.mouse_id)
+            ]
+            # Trials preceded by an incorrect trial are considered to be correction trials,
+            # and are therefore ignored. Remove missed trials to help identify these.
+            day_trials = day_trials.loc[
+                (day_trials.outcome == Outcomes.CORRECT)
+                | (day_trials.outcome == Outcomes.INCORRECT)
+            ]
+
+            lefts = []
+            rights = []
+            prev = None
+            for _, trial in day_trials.iterrows():
+                outcome = trial.outcome
+                if prev != Outcomes.INCORRECT:
+                    if trial.spout == Targets.LEFT:
+                        lefts.append(outcome)
+                    else:
+                        rights.append(outcome)
+                prev = outcome
+
+            H = (lefts.count(Outcomes.CORRECT) + 0.5) / (len(lefts) + 1)
+            FA = (rights.count(Outcomes.INCORRECT) + 0.5) / (len(rights) + 1)
+            d_prime = z(FA) - z(H)
+            results.loc[
+                (results.mouse_id == data.mouse_id) & (results.day == data.day),
+                "d_prime",
+            ] = d_prime
+
+        assert not results["d_prime"].isna().any()
+
+    axes[4].axhline(0, color="#aaaaaa", alpha=0.5, ls="--")
+    # 1.5 here is an arbitrary threshold of ability to discriminate
+    axes[4].axhline(1.5, color="#aaaaaa", alpha=0.5, ls=":")
+    sns.lineplot(
+        data=results,
+        x="day",
+        y="d_prime",
+        hue="mouse_id",
+        legend=False,
+        ax=axes[4],
+        markers=True,
+    )
+    axes[4].set_ylabel("d'", rotation="horizontal", ha="right")
+
+    # Spout position
+    axes[5].axhline(7, color="#aaaaaa", alpha=0.5, ls="--")
+    sns.lineplot(
+        data=trials,
+        x="day",
+        y="spout_position_0",
+        hue="mouse_id",
+        legend=False,
+        ax=axes[5],
+        markers=True,
+    )
+    sns.lineplot(
+        data=trials,
+        x="day",
+        y="spout_position_1",
+        hue="mouse_id",
+        legend=False,
+        ax=axes[5],
+        markers=True,
+        linestyle="dashed",
+    )
+    axes[5].set_ylim(bottom=0, top=8)
+    axes[5].set_ylabel("Spout\nposition\n(mm)", rotation="horizontal", ha="right")
+
+    axes[5].xaxis.set_major_locator(MultipleLocator(2))
+
+    plt.suptitle("Training summary for: " + ", ".join(mouse_ids))
+    plt.show()
+
+
+if __name__ == "__main__":
+    typer.run(main)
diff --git a/projects/Aidan_Analysis/FullAnalysis/unit_binning_analysis.py b/projects/Aidan_Analysis/FullAnalysis/unit_binning_analysis.py
new file mode 100644
index 0000000..7c0362b
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/unit_binning_analysis.py
@@ -0,0 +1,486 @@
+#################################################################################################################
+# As described in my notes, this analysis shall perform the following:
+# 1. Take data from entire trials across units (i.e., aligned to LED off)
+# 2. Normalise each trials data as a percentage of the total time elapsed
+# 3. Bin this data into 10% bins
+# 4. Compare each bin to the previous, with the exception of the first bin, where no comparison shall take place
+# 5. Create a histogram of the number of significant units across general trial space (i.e., as a normalised %)
+#################################################################################################################
+
+##TODO: Split the two analyses in this file into seperate scripts, will make things easier to manage
+##TODO: Rerun this code on only M2 and see if this changes findings.
+
+#%%
+# First shall import required packages
+import enum
+from xml.etree.ElementInclude import include
+from matplotlib.pyplot import title, ylabel, ylim
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+import sys
+from base import *
+from CI_Analysis import significance_extraction
+from CI_Analysis import percentile_plot
+from functions import event_times, per_trial_spike_rate
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+sys.path.insert(0, "/home/s1735718/PixelsAnalysis/pixels/pixels")
+from pixtools.utils import Subplots2D  # use the local copy of base.py
+from pixtools import utils
+from pixtools import spike_rate
+from pixels import ioutils
+
+
+# Now select the units that shall be analysed
+
+
+units = myexp.select_units(group="good", name="all_layers", max_depth=3500)
+
+# select all pyramidal neurons
+# pyramidal_units = myexp.select_units(
+#     group="good", min_depth=200, max_depth=1200, name="pyramidals", min_spike_width=0.4
+# )
+
+# interneuron_units = myexp.select_units(
+#     group="good",
+#     min_depth=200,
+#     max_depth=1200,
+#     name="interneurons",
+#     max_spike_width=0.35,
+# )
+
+# Then align trials to LED off (i.e, end of trial) with the time window determined by the max trial length
+duration = 10
+trial_aligned = myexp.align_trials(
+    ActionLabels.correct, Events.led_off, "spike_rate", duration=duration, units=units
+)
+
+# pyramidal_aligned = myexp.align_trials(
+#     ActionLabels.correct,
+#     Events.led_off,
+#     "spike_rate",
+#     duration=10,
+#     units=pyramidal_units,
+# )
+
+# interneuron_aligned = myexp.align_trials(
+#     ActionLabels.correct,
+#     Events.led_off,
+#     "spike_rate",
+#     duration=10,
+#     units=interneuron_units,
+# )
+
+#%%
+##Now shall bin the data within each trial into 100 ms bins
+# Will create a function to do so:
+
+
+def per_trial_binning(trial, myexp, timespan, bin_size):
+    """
+    Function takes data aligned by trial (i.e., using myexp align_trials method),
+    bins them, then returns the dataframe with an added bin index
+
+    The end result of this is an index structured as (time, 'lower bin, upper bin')
+
+    trial: The data aligned to an event according to myexp.align_trials
+
+    myexp: The experimental cohort defined in base.py
+
+    timespan: the total time data was aligned to in myexp.align_trials (i.e., duration) in seconds
+
+    bin size: the size of the bins in seconds
+
+    """
+
+    # First reorder data hierarchy
+    reorder = trial.reorder_levels(["session", "trial", "unit"], axis=1)
+
+    # Perform binning on  whole dataset
+    # First calculate the number of bins to create based on total duration and bin length
+    bin_number = int(timespan / bin_size)
+
+    values = reorder.columns.get_level_values(
+        "trial"
+    ).unique()  # Get trials for session
+
+    # Add a new level containing the bin data for the dataframe
+    bin_vals = pd.cut(reorder.index, bin_number, include_lowest=True, precision=1)
+    bin_vals = bin_vals.astype(str)  # Convert the interval series to a list of strings
+
+    # Remove brackets from bin_vals
+    bin_vals_str = []
+    for t in bin_vals:
+        t = t.strip("()[]")
+        bin_vals_str.append(t)
+
+    # This will be added as an index, before time.
+    full_data = reorder.assign(
+        bin=bin_vals_str
+    )  # Add bin values temporarily as a column
+    full_data.set_index(
+        "bin", append=True, inplace=True
+    )  # Convert this column to a new index, below time
+    return full_data
+
+
+#%%
+##Now shall create a function to take this bin information and conduct a boostrapping analysis
+def bin_data_average(bin_data):
+    """
+    This function takes the dataframe produced by per_trial_binning() including bins as a multiindex
+    and takes the average of each bin's activity within a given time period
+    This returns the average firing rate of each unit (i.e., per session and time)
+
+    bin_data: the dataframe produced by per_trial_binning, with predefined bins
+
+    NB: To return a transposed version of the data (i.e., with bin averages as rows rather than columns)
+        add ".T" to function call.
+    """
+
+    # First extract the values of each bin in bin_data
+    bins = bin_data.index.get_level_values(
+        "bin"
+    )  # a list of the bins created by the previous function
+    bin_names = (
+        bins.unique()
+    )  # Take the unique values present, giving us the list of bin categories
+
+    # Take averages for each bin, across all units and trials then concatenate into a dataframe, where each column is a bin
+    binned_avgs = pd.concat(
+        (bin_data.loc[bins == b].mean(axis=0) for b in bin_names),
+        axis=1,
+        keys=bin_names,
+    )
+    binned_avgs = pd.DataFrame(binned_avgs)
+
+    # Return the data
+    return binned_avgs
+
+
+#%%
+##Now shall create a function to take these averaged bin values and perform a bootstrapping analysis
+def bin_data_bootstrap(
+    bin_data, CI=95, ss=20, bootstrap=10000, name=None, cache_overwrite=False
+):
+    """
+    This function takes the data, with bin averages calculated by bin_data_average and calculates boostrapped confidence intervals
+    To do so, it takes a random sample many times over and uses this data to calculate percentiles
+
+    bin_data: the dataframe produced by bin_data_average, with bin times represented as columns
+
+    CI: the confidence interval to calculate (default is 95%)
+
+    ss: the sample size to randomly select from the averages (default is 20)
+
+    bootstrap: the number of times a random sample will be taken to create a population dataset (default is 10,000)
+
+    name: the name to save the output under in cache, speeds the process if this has been run before
+
+    cache_overwrite: whether to overwrite the saved cache with new data, (default is False)
+    """
+
+    ##Stages of this analysis are as follows:
+    # 1. Take individual bin
+    # 2. Calculate confidence interval via bootstapping
+    # 3. Store all bin CIs
+
+    # Check if the function has been run before, and saved to a chache
+    # NB: This function caches under the name of the first session in the list!
+    if name is not None:
+        # Read in data from previous run
+        output = myexp.sessions[0].interim / "cache" / (name + ".h5")
+        if output.exists() and cache_overwrite is not True:
+            df = ioutils.read_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5"))
+            return df
+
+    # First define confidence intervals
+    lower = (100 - CI) / 2  # Lower confidence interval bound (default 2.5%)
+    upper = 100 - lower  # Upper bound (default 97.5%)
+    percentiles = [lower, 50, upper]  # The defined percentiles to analyse
+    cis = []  # empty list to store calculated intervals
+
+    bin_values = bin_data.columns
+    keys = []
+
+    # Loop through each of the bin averages and calculate their confidence interval
+    for s, session in enumerate(myexp):
+        names = session.name
+        ses_bins = bin_data[bin_data.index.get_level_values("session") == s]
+        trials = ses_bins.index.get_level_values(
+            "trial"
+        ).unique()  # get the trials for a given session
+
+        for t in trials:
+            trial_bins = ses_bins[ses_bins.index.get_level_values("trial") == t]
+
+            # Take every bin within a trial
+            for b in bin_values:
+                bin_avg = trial_bins[b]
+                samples = np.array(
+                    [
+                        [np.random.choice(bin_avg.values, size=ss)]
+                        for j in range(bootstrap)
+                    ]
+                )
+
+                # Use these samples to calculate the percentiles for each bin
+                medians = np.median(samples, axis=2)  # Median of the samples taken
+                results = np.percentile(
+                    medians, percentiles, axis=0
+                )  # The percentiles, relative to the median
+                cis.append(pd.DataFrame(results))
+                keys.append(
+                    (s, t, b)
+                )  # Set the keys for the dataframe to be bin, session, trial
+
+    # Now save these calculated confidence intervals to a dataframe
+    df = pd.concat(cis, axis=1, names=["session", "trial", "bin"], keys=keys)
+    df.set_index(
+        pd.Index(percentiles, name="percentile"), inplace=True
+    )  # Set the name of the index to percentiles
+    df.columns = df.columns.droplevel(
+        level=3
+    )  # remove the redundant units row, as they have been averaged out
+
+    # Finally, save this df to cache to speed up future runs
+    if name is not None:
+        ioutils.write_hdf5(myexp.sessions[0].interim / "cache" / (name + ".h5"), df)
+        return df
+
+    return df
+
+
+#%%
+##Finally, will create a function to take these percentiles and compare them.
+# Specifically, per trial, each bin will be compared to the one previous
+# If 2.5% and previous 97.5% overlap then there is a significant change
+def bs_pertrial_bincomp(bs_data, myexp):
+    """
+    This function imports percentile data produced from bootstrapping, and compares across bins to determine if a significant change from previous states occurs
+    This process is repeated across trials to allow plotting of significance.
+    Returns a dataframe with bins classified as either increasing, decreasing or displaying little change from previous timepoint.
+
+    bs_data: the percentile information produced by bin_data_bootstrap()
+
+    myexp: the experimental cohort defined in base.py
+    """
+
+    total_bin_response = {}
+    # First import the data and split it by session, and trial.
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = bs_data[0]
+        ses_bin_response = {}
+        for t, cols in enumerate(ses_data):
+            trial_data = ses_data[
+                cols[0]
+            ]  # All bins, and associated percentiles for a given trial
+
+            # Now that we have all the bins for a given trial,
+            # t count value in enumerate loop will allow comparison to previous iteration
+            bins = trial_data.columns
+
+            # compare each column's 2.5% (lower) to previous column's 97.5%
+            # to check for sig. increase in activity
+            previous_bin = []
+            trial_bin_response = {}  # store as a dictionary
+            for b, bins in enumerate(trial_data):
+
+                # First store the initial bin to previous_bin
+                if len(previous_bin) == 0:
+                    previous_bin.append(trial_data[bins])
+                    continue
+
+                # Once this has been done, will compare across bins
+                current_bin = trial_data[bins]
+
+                # First check if there is a sig. increase - i.e., if current 2.5 and previous 97.5 do not overlap
+                if current_bin[2.5] >= previous_bin[0][97.5]:
+                    trial_bin_response.update({bins: "increased"})
+
+                # Now check if there is a sig. decrease
+                elif current_bin[97.5] <= previous_bin[0][2.5]:
+                    trial_bin_response.update({bins: "decreased"})
+
+                # And finally if there is no change at all
+                else:
+                    trial_bin_response.update({bins: "nochange"})
+
+                # The replace previous bin with current bin
+                previous_bin = []
+                previous_bin.append(current_bin)
+
+            # Now append data for each trial to a larget list
+            ses_bin_response.update(
+                {cols[0]: trial_bin_response}
+            )  # contains every trial for a given session
+        total_bin_response.update({name: ses_bin_response})
+
+    # Convert nested dictionary to dataframe
+    df = pd.DataFrame.from_dict(
+        {
+            (i, j): total_bin_response[i][j]
+            for i in total_bin_response.keys()
+            for j in total_bin_response[i].keys()
+        },
+        orient="index",
+    )
+    df.index.rename(["session", "trial"], inplace=True)
+    df.columns.names = ["bin"]
+    return df
+
+
+#%%
+##Run these functions to perform the binning and boostrapping of data##
+# Uncomment to run
+# bin_data = per_trial_binning(trial_aligned, myexp, timespan=10, bin_size=0.1)
+# bin_avgs = bin_data_average(bin_data)
+# bootstrap = bin_data_bootstrap(
+#     bin_avgs, CI=95, ss=20, bootstrap=10000, name="fullbootstrap", cache_overwrite=False
+# )
+# bin_comp = bs_pertrial_bincomp(bootstrap, myexp)
+
+# %%
+
+#%%
+# #### Now may plot changing spikerates for each trial including the point at which LED turned on (i.e., the point where trial began)
+# # First determine the times LED turned on for each trial
+ledon = event_times("led_on", myexp)
+ledon = pd.DataFrame(ledon)
+
+# Then when LED turned off
+ledoff = event_times("led_off", myexp)
+ledoff = pd.DataFrame(ledoff)
+
+# Then determine the time relative to LED off (alignment point) where led turned on for each trial
+start = ledon - ledoff
+start = {ses.name: start.loc[s] for s, ses in enumerate(myexp)}
+start = pd.concat(start, axis=1)
+start = start / 1000  # Convert from ms to s
+start.index.name = "trial"  # Rename the index to trial number (ascending order)
+
+# Now check what the max length of the trial is
+vals = start.values.copy()
+vals = vals.astype(int)  # Convert nan values to integer (a very large negative value)
+vals[vals < -90000000] = 0  # Convert all large negative values to zero
+print(np.abs(vals).max())  # Check the max value with nan removed
+
+
+## Plot a single graph for every session
+def bs_graph(aligned_data, binned_data, bin_comparisons, myexp):
+    """
+    Function plots the binned data graph including period of LED onset for each trial within a given session.
+
+    aligned_data: the dataset produced by align_trials
+
+    binned_data: the dataset produced by per_trial_binning
+
+    bin_comparisons: the dataset containing comparisons between bins, produced by bs_pertrial_bincomp
+
+    myexp: the experimental cohort defined in base.py
+
+    """
+    for s, session in enumerate(myexp):
+        name = session.name
+
+        ses_data = binned_data[s]
+        ses_values = ses_data.columns.get_level_values(
+            "trial"
+        ).unique()  # Get all trial values for the session
+        ses_start_times = start[
+            name
+        ]  # LED on time for the session, will leave trial numbers in ses_values unsorted to ensure they align
+
+        ses_bin_comps = bin_comparisons[
+            bin_comparisons.index.get_level_values("session") == name
+        ]
+
+        num_trials = len(
+            ses_data[ses_values[0]].columns
+        )  # How many trials shall be plotted
+        subplots = Subplots2D(
+            ses_values, sharex=True, sharey=True
+        )  # determines the number of subplots to create (as many as there are trials in the session)
+
+        for i, value in enumerate(ses_values):
+            data = ses_data[value].stack().reset_index()
+            data["y"] = data[0]  # Add a new column containing y axis values
+            ax = subplots.axes_flat[i]  # Plot this on one graph
+
+            # bin responses for the trial to be plotted
+            trial_bins = ses_bin_comps[
+                ses_bin_comps.index.get_level_values("trial") == i
+            ]
+
+            # Now create the actual lineplot
+            p = sns.lineplot(x="time", y="y", data=data, ax=ax, ci="sd")
+
+            # Now for each of these plots, add the LED on-off time as a shaded area
+            p.axvspan(ses_start_times[i], 0, color="green", alpha=0.3)
+
+            ##Now, within each trial, add shaded areas representing the binned timescales
+            for i, cols in enumerate(ses_bin_comps.columns):
+                shaded_bin = data["time"].loc[data["bin"] == ses_bin_comps.columns[i]]
+                bin_significance = trial_bins[cols]
+
+                # Then plot an indicator of significance to these bins
+                if bin_significance.values == "nochange":
+                    p.axvspan(
+                        shaded_bin.values[0],
+                        shaded_bin.values[-1],
+                        color="gray",
+                        alpha=0,
+                        hatch=r"//",
+                    )  # Change this alpha value to above zero to plot nonsignificant bins.
+
+                if (
+                    bin_significance.values == "increased"
+                    or bin_significance.values == "decreased"
+                ):
+                    p.axvspan(
+                        shaded_bin.values[0],
+                        shaded_bin.values[-1],
+                        color="red",
+                        alpha=0.1,
+                        hatch=r"/o",
+                    )
+
+            # Now remove axis labels
+            p.get_yaxis().get_label().set_visible(False)
+            p.get_xaxis().get_label().set_visible(False)
+
+            # Then set the x limit to be one second post trial completion
+            p.set(xlim=(-5, 1))
+
+        # Now plot the legend for each of these figures
+        legend = subplots.legend
+
+        legend.text(0.3, 0.5, "Trial", transform=legend.transAxes)
+        legend.text(
+            -0.2, 0, "Firing Rate (Hz)", transform=legend.transAxes, rotation=90
+        )  # y axis label
+        legend.text(
+            0, -0.3, "Time Relative to LED Off (s)", transform=legend.transAxes
+        )  # x axis label
+
+        legend.set_visible(True)
+        legend.get_xaxis().set_visible(False)
+        legend.get_yaxis().set_visible(False)
+        legend.set_facecolor("white")
+        legend.set_box_aspect(1)
+
+        # Display the plot
+        plt.gcf().set_size_inches(20, 20)
+
+        # Uncomment to save figures as pdf.
+        utils.save(
+            f"/home/s1735718/Figures/{myexp[s].name}_spikerate_LED_Range_PerTrial",
+            nosize=True,
+        )
+
+
+# Uncomment below to plot this
+# bs_graph(trial_aligned, bin_data, bin_comp, myexp)
diff --git a/projects/Aidan_Analysis/FullAnalysis/unit_depth_plot_VR59.py b/projects/Aidan_Analysis/FullAnalysis/unit_depth_plot_VR59.py
new file mode 100644
index 0000000..85698cc
--- /dev/null
+++ b/projects/Aidan_Analysis/FullAnalysis/unit_depth_plot_VR59.py
@@ -0,0 +1,36 @@
+#Import data from base 
+
+from base import *
+
+#Import required packages
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis")
+from pixtools import clusters #A series of functions that will allow plotting of depth profiles
+
+#Select which units from the data we would like to plot
+#To get an overview of the data, let us select all units of good quality and plot their depth
+#Select only from recording from the 17th
+session1 = myexp.get_session_by_name("220217_VR59") #It appears this is the line that is screwing with things, why does it make an unindexable item??
+
+units = session1.select_units(
+    group="good",
+    uncurated = False,
+)
+
+#Now plot a graph of increasing depth vs unit 
+#First select the depth profile to analyse, function will then automatically plot the required graph as a scatter
+#May need to alter this to remove use of rec_num!# Or perhaps simply set rec_num to 0?
+
+rec_num = 0
+
+depth = clusters.depth_profile(
+    myexp,
+    group="good",
+    curated = True
+)
+
+
+plt.show()
\ No newline at end of file