DuguidLab · Aidan-MT · Mar 7, 2022 · Mar 8, 2022 · Mar 11, 2022 · Mar 12, 2022
diff --git a/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py b/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py
@@ -0,0 +1,156 @@
+# First import required packages
+import sys
+import json
+from sklearn.cluster import KMeans
+
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import probeinterface as pi
+
+
+def meta_spikeglx(exp, session):
+    """
+    Simply extracts channel depth from probe metadata.
+
+    exp: exp class containing mouse IDs
+
+    session: specific recording session to extract information from
+    """
+    meta = exp[session].files[0]["spike_meta"]
+    data_path = exp[session].find_file(meta)
+    data = pi.read_spikeglx(data_path)
+
+    return data
+
+
+def noise_per_channeldepth(myexp):
+    """
+    Function  extracts the noise for each channel, combining this into a dataframe
+
+    myexp: the experiment defined in base.py, will extract the depth information from here.
+    """
+    noise = pd.DataFrame(
+        columns=["session", "project", "SDs", "x", "y"]
+    )  # Create the empty array to hold the noise information
+    depths = meta_spikeglx(myexp, 0)
+    depths = depths.to_dataframe()
+    coords = depths[
+        ["x", "y"]
+    ]  # Create a dataframe containing the generic x and y coords.
+    tot_noise = []
+
+    # Iterate through each session, taking the noise for each file and loading them into one continuous data frame.
+    for s, session in enumerate(myexp):
+        for i in range(len(session.files)):
+            path = session.processed / f"noise_{i}.json"
+            with path.open() as fd:
+                ses_noise = json.load(fd)
+
+            chan_noises = []
+            for j, SD in enumerate(
+                ses_noise["SDs"][0:-1]
+            ):  # This will iterate over first 384 channels, and exclude the sync channel
+                x = coords["x"].iloc[j]
+                y = coords["y"].iloc[j]
+                noise_row = pd.DataFrame.from_records(
+                    {"session": [session.name], "SDs": [SD], "x": x, "y": y}
+                )
+                chan_noises.append(noise_row)
+
+        # Take all datafrom channel noises for a session, then concatenate
+        noise = pd.concat(chan_noises)
+        tot_noise.append(noise)  # Take all channel noises and add to a master file
+        df2 = pd.concat(
+            tot_noise
+        )  # Convert this master file, containing every sessions noise data into a dataframe
+
+    return df2
+
+
+def elbowplot(data, myexp):
+
+    """
+
+    This function takes data formatted according to noise_per_channeldepth(), containing the noise values for all channels
+    Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends.
+    This point is defined as the boundary where additional clusters no longer explain much more variance in the data.
+
+    data: The dataframe, as formatted by noise_per_channeldepth()
+
+    myexp: The experiment, defined in base.py containing the session information.
+
+    """
+
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = data.loc[data["session"] == name]
+        df3 = ses_data["SDs"].values.reshape(
+            -1, 1
+        )  # Just gives all noise values, for each session
+        Sum_of_squares = []  # create an empty list to store these in.
+
+        k = range(1, 10)
+        for num_clusters in k:
+            kmeans = KMeans(n_clusters=num_clusters)
+            kmeans.fit(df3)
+            Sum_of_squares.append(kmeans.inertia_)
+
+        fig, ax = plt.subplots()
+
+        # This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters
+        # This gives the distance from the centroids, as a measure of the variability explained
+        # We want this to drop off indicating that there is no remaining data explained by further centroid inclusion
+
+        # Figure has two rows, one columns, this is the first plot
+        plt.plot(k, Sum_of_squares, "bx-")  # bx gives blue x as each point.
+        plt.xlabel("Putative Number of Clusters")
+        plt.ylabel("Sum of Squares Distances/Inertia")
+        plt.title(
+            f"Determining Optimal Number of Clusters for Analysis - Session {name}"
+        )
+
+        f = plt.gca()
+        return f
+
+
+def clusterplot(data, myexp, cluster_num):
+    """
+    Function takes the noise per channel and depth information, produced by noise_per_channeldepth() and produces a clusterplot.
+    Clustering is performed by K-means analysis, elbow plot should be produced by elbowplot() to determine optimal cluster number.
+
+    data: data produced by noise_per_channel_depth() containing channel ID, coordinate, and recording noise for each session in the exp class
+
+    myexp: the exp class containing mouse IDs
+
+    cluster_num: the number of clusters to produce through the k-means analysis, determined by qualitative analysis of elbow plots. (where the "bow" of the line occurs)
+
+    """
+
+    # First define k-means parameters for clustering
+    kmeans = KMeans(
+        init="random",  # Initiate the iterative analysis with random centres
+        n_clusters=cluster_num,  # How many clusters to bin the data into, based on the elbow analysis!
+        n_init=10,  # Number of centroids to generate initially
+        max_iter=300,  # Max number of iterations before ceasing analysis
+        random_state=42,  # The random number seed for centroid generation, can really be anything for our purposes
+    )
+
+    for s, session in enumerate(myexp):
+        name = session.name
+
+        ses = data.loc[data["session"] == name]
+        sd = ses["SDs"].values.reshape(-1, 1)
+        y_means = kmeans.fit_predict(sd)
+
+        # Now plot the kmeans analysis
+        # Remember we use our original data (ses) but use the clustering analysis to generate the labels
+        plt.scatter(ses["y"], ses["SDs"], c=y_means, cmap="viridis")
+
+        plt.xlabel("Probe Channel Y-Coordinate")
+        plt.ylabel("Channel Noise (SD)")
+        plt.title(f"{name} Channel Noise k-Mean Clustering Analysis")
+
+        f = plt.gca()
+        return f
diff --git a/pixtools/clusters/unit_depths.py b/pixtools/clusters/unit_depths.py
@@ -19,17 +19,17 @@ def unit_depths(exp):
 
     for s, session in enumerate(exp):
         session_depths = {}
-
+        rec_num=0
         for rec_num, probe_depth in enumerate(session.get_probe_depth()):
             rec_depths = {}
-            rec_info = info[s][rec_num]
+            rec_info = info[s]
             id_key = 'id' if 'id' in rec_info else 'cluster_id'  # Depends on KS version
 
             for unit in rec_info[id_key]:
                 unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict()
                 rec_depths[unit] = probe_depth - unit_info["depth"]
 
-            session_depths[rec_num] = pd.DataFrame(rec_depths, index=["depth"])
+            session_depths[0] = pd.DataFrame(rec_depths, index=["depth"])
 
         depths.append(pd.concat(session_depths, axis=1, names=["rec_num", "unit"]))
 

diff --git a/pixtools/responsiveness/CI_Analysis_pointplot.py b/pixtools/responsiveness/CI_Analysis_pointplot.py
@@ -0,0 +1,81 @@
+def significance_extraction(CI):
+    """
+    This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format. 
+
+    CI: The dataframe created by the CI calculation previously mentioned
+
+    """
+
+    sig = []
+    keys=[]
+    rec_num = 0
+
+    #This loop iterates through each column, storing the data as un, and the location as s
+    for s, unit in CI.items():
+        #Now iterate through each recording, and unit
+        #Take any significant values and append them to lists.
+        if unit.loc[2.5] > 0 or unit.loc[97.5] < 0:
+            sig.append(unit) #Append the percentile information for this column to a list
+            keys.append(s) #append the information containing the point at which the iteration currently stands
+
+
+    #Now convert this list to a dataframe, using the information stored in the keys list to index it
+    sigs = pd.concat(
+        sig, axis = 1, copy = False,
+        keys=keys,
+        names=["session", "unit", "rec_num"]
+    )
+
+    return sigs
+
+def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False):
+    """
+
+    This function takes the CI data and significant values and plots them relative to zero. 
+    May specify if percentiles should be plotted in ascending or descending order. 
+
+    CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points.
+
+    sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero
+
+    exp: The experimental session to analyse, defined in base.py
+
+    sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False)
+
+    dir_ascending: Whether to plot the values in ascending order (True/False)
+
+    """
+    #First sort the data into long form for the full dataset, by percentile
+    CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending)
+    CIs_long = CIs_long.reset_index()
+    CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting
+
+    #Now select if we want only significant values plotted, else raise an error. 
+    if sig_only is True:
+        CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending)
+        CIs_long_sig = CIs_long_sig.reset_index()
+        CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0]))
+
+        data = CIs_long_sig
+
+    elif sig_only is False:
+        data = CIs_long
+
+    else:
+        raise TypeError("Sig_only argument must be a boolean operator (True/False)")
+
+    #Plot this data for the experimental sessions as a pointplot. 
+    for s, session in enumerate(exp):
+        name = session.name
+
+        p = sns.pointplot(
+        x="unit", y = "value", data = data.loc[(data.session == s)],
+        order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting
+
+        p.set_xlabel("Unit")
+        p.set_ylabel("Confidence Interval")
+        p.set(xticklabels=[])
+        p.axhline(0)
+        plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page.
+
+        plt.show()
diff --git a/pixtools/responsiveness/ci_analysis_pointplot.py b/pixtools/responsiveness/ci_analysis_pointplot.py
@@ -0,0 +1,81 @@
+def significance_extraction(CI):
+    """
+    This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format. 
+
+    CI: The dataframe created by the CI calculation previously mentioned
+
+    """
+
+    sig = []
+    keys=[]
+    rec_num = 0
+
+    #This loop iterates through each column, storing the data as un, and the location as s
+    for s, unit in CI.items():
+        #Now iterate through each recording, and unit
+        #Take any significant values and append them to lists.
+        if unit.loc[2.5] > 0 or unit.loc[97.5] < 0:
+            sig.append(unit) #Append the percentile information for this column to a list
+            keys.append(s) #append the information containing the point at which the iteration currently stands
+
+
+    #Now convert this list to a dataframe, using the information stored in the keys list to index it
+    sigs = pd.concat(
+        sig, axis = 1, copy = False,
+        keys=keys,
+        names=["session", "unit", "rec_num"]
+    )
+
+    return sigs
+
+def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False):
+    """
+
+    This function takes the CI data and significant values and plots them relative to zero. 
+    May specify if percentiles should be plotted in ascending or descending order. 
+
+    CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points.
+
+    sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero
+
+    exp: The experimental session to analyse, defined in base.py
+
+    sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False)
+
+    dir_ascending: Whether to plot the values in ascending order (True/False)
+
+    """
+    #First sort the data into long form for the full dataset, by percentile
+    CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending)
+    CIs_long = CIs_long.reset_index()
+    CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting
+
+    #Now select if we want only significant values plotted, else raise an error. 
+    if sig_only is True:
+        CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending)
+        CIs_long_sig = CIs_long_sig.reset_index()
+        CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0]))
+
+        data = CIs_long_sig
+
+    elif sig_only is False:
+        data = CIs_long
+
+    else:
+        raise TypeError("Sig_only argument must be a boolean operator (True/False)")
+
+    #Plot this data for the experimental sessions as a pointplot. 
+    for s, session in enumerate(exp):
+        name = session.name
+
+        p = sns.pointplot(
+        x="unit", y = "value", data = data.loc[(data.session == s)],
+        order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting
+
+        p.set_xlabel("Unit")
+        p.set_ylabel("Confidence Interval")
+        p.set(xticklabels=[])
+        p.axhline(0)
+        plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page.
+
+        plt.show()
diff --git a/pixtools/responsiveness.py → pixtools/responsiveness/responsiveness.py b/pixtools/responsiveness.py → pixtools/responsiveness/responsiveness.py
diff --git a/projects/Aidan_Analysis/FullAnalysis/.vscode/launch.json b/projects/Aidan_Analysis/FullAnalysis/.vscode/launch.json
@@ -0,0 +1,18 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "stopOnEntry": true,
+            "justMyCode": false,
+            "editor.bracketPairColorization.independentColorPoolPerBracketType": true
+        }
+    ]
+}
diff --git a/projects/Aidan_Analysis/FullAnalysis/.vscode/settings.json b/projects/Aidan_Analysis/FullAnalysis/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.formatting.provider": "black"
+}