In this notebook we read in the Master XML file made using NapaTrackMater and create N, Delta times Attribute dimensional vectors. N being the number of tracks present in the chosen region, R, Delta being the chosen time interval (t_minus + t_plus) {t - t_minus, t + t_plus} and Attributes being the morphological and the dynamic properties associated with cells in the tracks. We concatenate the Attribute componenets over the chosen time interval to create a Delta times Attribute dimensional vector and create a pandas dataframe with these vectors for further analysis.

%gui qt5
from napatrackmater.Trackvector import TrackVector
from pathlib import Path
from ipywidgets import interactive, widgets
from IPython.display import display
import napari 
from tifffile import imread
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, fcluster
import matplotlib.pyplot as plt
2023-11-18 14:08:24.729227: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-18 14:08:25.358781: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 2
      1 get_ipython().run_line_magic('gui', 'qt5')
----> 2 from napatrackmater.Trackvector import TrackVector
      3 from pathlib import Path
      4 from ipywidgets import interactive, widgets

File ~/anaconda3/envs/naparienv/lib/python3.10/site-packages/napatrackmater/__init__.py:2
      1 from .pretrained import register_model, register_aliases, clear_models_and_aliases
----> 2 from .clustering import Clustering
      3 from .DeepEmbeddedClustering import DeepEmbeddedClustering
      4 from .CloudAutoEncoder import CloudAutoEncoder

File ~/anaconda3/envs/naparienv/lib/python3.10/site-packages/napatrackmater/clustering.py:1
----> 1 from kapoorlabs_lightning.lightning_trainer import AutoLightningModel
      2 from cellshape_helper.vendor.pytorch_geometric_files import read_off, sample_points
      3 import numpy as np

ModuleNotFoundError: No module named 'kapoorlabs_lightning'
master_xml_path = Path('C:/Users/rando/Downloads/Mari_project/master_test_tracks.xml')
spot_csv_path = Path('C:/Users/rando/Downloads/Mari_project/test_spots.csv')
track_csv_path = Path('C:/Users/rando/Downloads/Mari_project/test_tracks.csv')
edges_csv_path = Path('C:/Users/rando/Downloads/Mari_project/test_edges.csv')
show_tracks = False
base_dir = 'C:/Users/rando/Downloads/Mari_project/'
plot_data_save_name = 'test_plot_data.csv'
if show_tracks:
  viewer = napari.Viewer()
  image = imread('C:/Users/rando/Downloads/Mari_project/gt/rawk.tif')

else:
    viewer = None
    image = None
track_vectors = TrackVector(viewer,image,master_xml_path,spot_csv_path, track_csv_path, edges_csv_path, show_tracks = show_tracks)
def track_setter(deltat, deltax, deltay):
    track_vectors.t_minus = deltat[0]
    track_vectors.t_plus = deltat[-1]
    
    track_vectors.x_start = deltax[0]
    track_vectors.x_end = deltax[-1]
    
    track_vectors.y_start = deltay[0]
    track_vectors.y_end = deltay[-1]
    
def plot_vectors(vector_dicts, cluster_labels, base_dir):
    print(f'Number of clusters: {max(cluster_labels)}')
    t_min = min(vector['t'] for vector in vector_dicts)
    t_max = max(vector['t'] for vector in vector_dicts)
    y_min = min(vector['y'] for vector in vector_dicts)
    y_max = max(vector['y'] for vector in vector_dicts)
    x_min = min(vector['x'] for vector in vector_dicts)
    x_max = max(vector['x'] for vector in vector_dicts)

    t_step = 1
    y_step = 10
    x_step = 10

    t_grid = np.arange(t_min, t_max + t_step, t_step)
    y_grid = np.arange(y_min, y_max + y_step, y_step)
    x_grid = np.arange(x_min, x_max + x_step, x_step)

    cluster_grid = np.zeros((len(t_grid), len(y_grid), len(x_grid)))

    for i, vector in enumerate(vector_dicts):
        t_index = int((vector['t'] - t_min) / t_step)
        y_index = int((vector['y'] - y_min) / y_step)
        x_index = int((vector['x'] - x_min) / x_step)
        cluster_label = cluster_labels[i]  
        cluster_grid[t_index, y_index, x_index] = cluster_label

    # Create a colormap for clusters (use 'jet' colormap)
    cmap = plt.get_cmap('jet', max(cluster_labels) + 1)  # Adding 1 for potential 0-based labels

    # Create a 3D figure and axes
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    # Define a list of unique cluster labels
    unique_clusters = np.unique(cluster_labels)

    markers = ['o', 's', '^', 'D', 'v', 'P']  # Customize marker styles as needed

    for cluster_label in unique_clusters:
        # Find indices of vectors in the current cluster
        cluster_indices = np.where(cluster_labels == cluster_label)[0]
        t_values = [vector_dicts[i]['t'] for i in cluster_indices]
        y_values = [vector_dicts[i]['y'] for i in cluster_indices]
        x_values = [vector_dicts[i]['x'] for i in cluster_indices]
        color = cmap(cluster_label)  # Use cluster_label as color index
        marker = markers[cluster_label % len(markers)]  # Cycle through markers
        ax.scatter(t_values, y_values, x_values, c=color, label=f'Cluster {cluster_label}', marker=marker)

    ax.set_xlabel('Time (t)')
    ax.set_ylabel('Y Coordinate')
    ax.set_zlabel('X Coordinate')
    ax.legend()  # Add legend

    # Customize plot title, labels, aspect ratio, etc. as needed

    plt.show()
    t_values = []
    y_values = []
    x_values = []
    cluster_label_values = []

    for i, vector in enumerate(vector_dicts):
        t_values.append(vector['t'])
        y_values.append(vector['y'])
        x_values.append(vector['x'])
        cluster_label_values.append(cluster_labels[i])

    
    plot_data = pd.DataFrame({
        't': t_values,
        'y': y_values,
        'x': x_values,
        'cluster_label': cluster_label_values
    })
    csv_file_path = os.path.join(base_dir, plot_data_save_name + '_clusters.csv')
    plot_data.to_csv(csv_file_path, index=False)
    

def recreate_plot_from_csv(csv_path):
    # Load the CSV file containing the plot data
    plot_data = pd.read_csv(csv_path)

    # Extract data from the CSV
    t_values = plot_data['t']
    y_values = plot_data['y']
    x_values = plot_data['x']
    cluster_labels = plot_data['cluster_label']

    # Create the 3D plot using the loaded data
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    unique_clusters = np.unique(cluster_labels)
    cmap = plt.get_cmap('tab20', max(cluster_labels))
    for cluster_label in unique_clusters:
        cluster_indices = np.where(cluster_labels == cluster_label)[0]
        t_cluster = [t_values[i] for i in cluster_indices]
        y_cluster = [y_values[i] for i in cluster_indices]
        x_cluster = [x_values[i] for i in cluster_indices]
        color = cmap(cluster_label - 1)
        ax.scatter(t_cluster, y_cluster, x_cluster, c=color, label=f'Cluster {cluster_label}', marker='o')

    ax.set_xlabel('Time (t)')
    ax.set_ylabel('Y Coordinate')
    ax.set_zlabel('X Coordinate')
    ax.legend()

    plt.show()
def cosine_similarity_without_tzyx(vector1, vector2):
    vector1 = vector1[6:]
    vector2 = vector2[6:]

    dot_product = np.dot(vector1, vector2)
    norm_a = np.linalg.norm(vector1)
    norm_b = np.linalg.norm(vector2)
    
    if norm_a == 0 or norm_b == 0:
        return 0.0  
    
    return 1.0 - dot_product / (norm_a * norm_b)      
track_vector_widgets = interactive(track_setter, deltat = widgets.IntRangeSlider(
    value=[track_vectors.tstart, track_vectors.tend],
    min= track_vectors.tstart,
    max=track_vectors.tend,
    step=1,
    description='Delta Time',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
), 
    deltax = widgets.IntRangeSlider(
    value=[track_vectors.xmin, track_vectors.xmax],
    min= track_vectors.xmin,
    max=track_vectors.xmax,
    step=1,
    description='Delta X',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
), 
    deltay = widgets.IntRangeSlider(
    value=[track_vectors.ymin, track_vectors.ymax],
    min= track_vectors.ymin,
    max=track_vectors.ymax,
    step=1,
    description='Delta Y',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
),                               
                                  
                                  )

track_vector_widgets
track_vectors._interactive_function()
current_shape_dynamic_vectors = track_vectors.current_shape_dynamic_vectors
global_shape_dynamic_dataframe = []
for i in range(len(current_shape_dynamic_vectors)):
   
   vector_list = list(zip(current_shape_dynamic_vectors[i]))
   data_frame_list = np.transpose(np.asarray([vector_list[i] for i in range(len(vector_list))])[:,0,:]) 
   
   shape_dynamic_dataframe = pd.DataFrame(data_frame_list, columns =['Track ID', 't', 'z', 'y', 'x', 'Dividing', 'Number_Dividing',  'Radius', 'Volume', 'Eccentricity Comp First', 'Eccentricity Comp Second', 'Surface Area', 'Speed', 'Motion_Angle', 'Acceleration', 'Distance_Cell_mask', 'Radial_Angle', 'Cell_Axis_Mask'])
   if len(global_shape_dynamic_dataframe) == 0:
        global_shape_dynamic_dataframe = shape_dynamic_dataframe
   else:
        global_shape_dynamic_dataframe = pd.concat([global_shape_dynamic_dataframe, shape_dynamic_dataframe],ignore_index=True)

global_shape_dynamic_dataframe = global_shape_dynamic_dataframe.set_index('Track ID')
global_shape_dynamic_dataframe = global_shape_dynamic_dataframe.sort_values(by=['Track ID'])
global_shape_dynamic_dataframe = global_shape_dynamic_dataframe.sort_values(by=['t'])
analysis_vectors = {}

unique_track_ids = global_shape_dynamic_dataframe['Track ID'].unique()
for track_id in unique_track_ids:
    track_data = global_shape_dynamic_dataframe[global_shape_dynamic_dataframe['Track ID'] == track_id].sort_values(by='t')
    
    track_vector = track_data[['t', 'z', 'y', 'x', 'Dividing', 'Number_Dividing', 'Radius', 'Volume', 'Eccentricity Comp First', 'Eccentricity Comp Second', 'Surface Area', 'Speed', 'Motion_Angle', 'Acceleration', 'Distance_Cell_mask', 'Radial_Angle', 'Cell_Axis_Mask']]
    
    track_vector_list = track_vector.to_dict(orient='records')
    
    analysis_vectors[track_id] = track_vector_list

vector_dicts = [analysis_vectors[key][0] for key in analysis_vectors]
vector_array = np.array([list(vector.values()) for vector in vector_dicts])

cosine_distance = pdist(vector_array, metric=cosine_similarity_without_tzyx)

linkage_matrix = linkage(cosine_distance, method='average')

threshold = 0.02  

cluster_labels = fcluster(linkage_matrix, threshold, criterion='distance')
    
plot_vectors(vector_dicts, cluster_labels, base_dir)