Source code for neurochat.nc_clust

# -*- coding: utf-8 -*-
"""
This module implements NClust Class for NeuroChaT software.

@author: Md Nurul Islam; islammn at tcd dot ie

"""
import logging

import numpy as np

from neurochat.nc_base import NBase
from neurochat.nc_spike import NSpike

from neurochat.nc_utils import bhatt, find, hellinger

import scipy as sc
from sklearn.decomposition import PCA


[docs]class NClust(NBase): """ This class facilitates clustering-related operations. Although no clustering algorithm is implemented in this class, it can be subclassed to create such algorithms. Many of the functions in this class are delegated to the spike attr. Attributes ---------- spike : NSpike An object of NSpike() class. """ def __init__(self, **kwargs): """ Create an NClust object. Parameters ---------- **kwargs : Keyword arguments spike: NSpike object, If directly passed an NSpike object, this is stored. Otherwise if spike is not NSpike or spike is not a kwarg, self.spike = NSpike(**kwargs) Returns ------- None """ spike = kwargs.get('spike', None) self.wavetime = [] self.UPSAMPLED = False self.ALLIGNED = False self.NULL_CHAN_REMOVED = False if isinstance(spike, NSpike): self.spike = spike else: self.spike = NSpike(**kwargs) super().__init__(**kwargs)
[docs] def get_unit_tags(self): """ Return tags of the spiking waveforms from clustering. Parameters ---------- None Returns ------- None """ return self.spike.get_unit_tags()
[docs] def set_unit_tags(self, new_tags=None): """ Return tags of the spiking waveforms from clustering. Parameters ---------- new_tags : ndarray Array that contains the tags for spike-waveforms which is based on the cluster number. Returns ------- None """ self.spike.set_unit_tags(new_tags)
[docs] def get_unit_list(self): """ Return the list of units in a spike dataset. Parameters ---------- None Returns ------- list List of units """ return self.spike.get_unit_list()
def _set_unit_list(self): """ Set the unit list. Delegates to NSpike._set_unit_list() Parameters ---------- None Returns ------- None See also -------- nc_spike.NSPike()._set_unit_list """ self.spike._set_unit_list()
[docs] def get_timestamp(self, unit_no=None): """ Return the timestamps of the spike-waveforms of specified unit. Parameters ---------- unit_no : int Unit whose timestamps are to be returned Returns ------- ndarray Timestamps of the spiking waveforms """ self.spike.get_timestamp(unit_no=unit_no)
[docs] def get_unit_spikes_count(self, unit_no=None): """ Return the total number of spikes in a specified unit. Parameters ---------- unit_no : int Unit whose count is returned Returns ------- int Total number of spikes in the unit """ return self.spike.get_unit_spikes_count(unit_no=unit_no)
[docs] def get_waveform(self): """ Return the waveforms in the spike dataset. Parameters ---------- None Returns ------- dict Each key represents one channel of the electrode group. Each value represents the waveforms of the spikes in a matrix form (no_samples x no_spikes) """ return self.spike.get_waveform()
def _set_waveform(self, spike_waves=[]): """ Set the waveforms of the spike dataset. Parameters ---------- spike_waves : dict Each key represents one channel of the electrode group. Each value represents the waveforms of the spikes in a matrix form (no_samples x no_spikes) Returns ------- None """ self.spike._set_waveform(spike_waves=spike_waves)
[docs] def get_unit_waves(self, unit_no=None): """ Return spike waveforms of a specific unit. Parameters ---------- unit_no : int Unit whose waveforms are returned Returns ------- dict Spike waveforms in each channel of the electrode group """ return self.spike.get_unit_waves(unit_no=unit_no)
# For multi-unit analysis, # {'SpikeName': cell_no} pairs should be used as function input
[docs] def load(self, filename=None, system=None): """ Load spike dataset from the file. Parameters ---------- filename: str Name of the spike file system : str Name of the recording format or system. Returns ------- None See Also -------- nc_spike.NSpike().load() """ self.spike.load(filename=filename, system=system)
[docs] def add_spike(self, spike=None, **kwargs): """ Add new spike node to current NSpike() object. Parameters ---------- spike : NSpike NSPike object. If None, new object is created Returns ------- `:obj:NSpike` A new NSpike() object """ return self.spike.add_spike(spike=spike, **kwargs)
[docs] def load_spike(self, names=None): """ Load datasets of the spike nodes. The name of each node is used for obtaining the filenames. Parameters ---------- names : list of str Names of the nodes to load. If None, current NSpike() object is loaded Returns ------- None """ self.spike.load_spike(names=names)
[docs] def wave_property(self): """ Calculate different waveform properties for currently set unit. Delegates to NSpike().wave_property() Parameters ---------- None Returns ------- dict Graphical data of the analysis See also -------- NSpike().wave_property() """ return self.spike.wave_property()
[docs] def isi(self, bins='auto', bound=None, density=False): """ Calculate the ISI histogram of the spike train. Delegates to NSpike().isi() Parameters ---------- bins : str or int Number of ISI histogram bins. If 'auto', NumPy default is used bound : int Length of the ISI histogram in msec density : bool If true, normalized histogram is calculated Returns ------- dict Graphical data of the analysis See also -------- NSpike().isi() """ return self.spike.isi(bins=bins, bound=bound, density=density)
[docs] def isi_corr(self, **kwargs): """ Analysis of ISI autocorrelation histogram. Delegates to NSpike().isi_auto_corr() Parameters ---------- **kwargs Keyword arguments Returns ------- dict Graphical data of the analysis See also -------- nc_spike.NSpike().isi_corr """ return self.spike.isi_corr(**kwargs)
[docs] def psth(self, event_stamp, **kwargs): """ Calculate peri-stimulus time histogram (PSTH). Delegates to NSpike().psth() Parameters ---------- event_stamp : ndarray Event timestamps **kwargs Keyword arguments Returns ------- dict Graphical data of the analysis See also -------- nc_spike.NSpike().psth() """ return self.spike.psth(event_stamp, **kwargs)
[docs] def burst(self, burst_thresh=5, ibi_thresh=50): """ Burst analysis of spike-train. Delegates to NSpike().burst() Parameters ---------- burst_thresh : int Minimum ISI between consecutive spikes in a burst ibi_thresh : int Minimum inter-burst interval between two bursting groups of spikes Returns ------- None See also -------- nc_spike.NSpike().burst """ self.spike.burst(burst_thresh=burst_thresh, ibi_thresh=ibi_thresh)
[docs] def get_total_spikes(self): """ Return total number of spikes in the recording. Parameters ---------- None Returns ------- int Total number of spikes """ return self.spike.get_total_spikes()
[docs] def get_total_channels(self): """ Return total number of electrode channels in the spike data file. Parameters ---------- None Returns ------- int Total number of electrode channels """ return self.spike.get_total_channels()
[docs] def get_channel_ids(self): """ Return the identities of individual channels. Parameters ---------- None Returns ------- list Identities of individual channels """ return self.spike.get_channel_ids()
[docs] def get_timebase(self): """ Return the timebase for spike event timestamps. Parameters ---------- None Returns ------- int Timebase for spike event timestamps """ return self.spike.get_timebase()
[docs] def get_sampling_rate(self): """ Return the sampling rate of spike waveforms. Parameters ---------- None Returns ------- int Sampling rate for spike waveforms """ return self.spike.get_sampling_rate()
[docs] def get_samples_per_spike(self): """ Return the number of bytes to represent each timestamp. Parameters ---------- None Returns ------- int Number of bytes to represent timestamps """ return self.spike.get_samples_per_spike()
[docs] def get_wave_timestamp(self): """ Return the temporal resolution to represent samples of spike-waves. Parameters ---------- None Returns ------- int Number of bytes to represent timestamps """ # return as microsecond # fs downsampled so that the time is given in microsecond fs = self.spike.get_sampling_rate() / 10**6 return 1 / fs
[docs] def save_to_hdf5(self): """ Store NSpike() object to HDF5 file. Delegates to NSPike().save_to_hdf5() Parameters ---------- None Returns ------- None Also see -------- nc_hdf.Nhdf().save_spike() """ self.spike.save_to_hdf5()
[docs] def get_feat(self, npc=2): """ Return the spike-waveform features. Parameters ---------- npc : int Number of principle components in each channel. Returns ------- feat : ndarray Matrix of size (number_spike X number_features) """ if not self.NULL_CHAN_REMOVED: self.remove_null_chan() if not self.ALLIGNED: self.align_wave_peak() trough, trough_loc = self.get_min_wave_chan() peak, peak_chan, peak_loc = self.get_max_wave_chan() pc = self.get_wave_pc(npc=npc) shape = (self.get_total_spikes(), 1) feat = np.concatenate( (peak.reshape(shape), trough.reshape(shape), pc), axis=1) return feat
[docs] def get_feat_by_unit(self, unit_no=None): """ Return the spike-waveform features for a particular unit. Parameters ---------- unit_no : int Unit of interest Returns ------- feat : ndarray Matrix of size (number_spike X number_features) """ if unit_no in self.get_unit_list(): feat = self.get_feat() return feat[self.get_unit_tags() == unit_no, :] else: logging.error('Specified unit does not exist in the spike dataset')
[docs] def get_wave_peaks(self): """ Return the peaks of the spike-waveforms. Parameters ---------- None Returns ------- (peak, peak_loc) : (ndarray, ndarray) peak: Spike waveform peaks in all the electrode channels Shape is (num_waves X num_channels) peak_loc : Index of peak locations """ wave = self.get_waveform() peak = np.zeros((self.get_total_spikes(), len(wave.keys()))) peak_loc = np.zeros( (self.get_total_spikes(), len(wave.keys())), dtype=int) for i, key in enumerate(wave.keys()): peak[:, i] = np.amax(wave[key], axis=1) peak_loc[:, i] = np.argmax(wave[key], axis=1) return peak, peak_loc
[docs] def get_max_wave_chan(self): """ Return the maximum of waveform peaks among the electrode groups. Parameters ---------- None Returns ------- (max_wave_val, max_wave_chan, peak_loc) : (ndarray, ndarray, ndarray) max_wave_val : ndarray Maximum value of the peaks of the waveforms max_wave_chan : ndarray Channel of the electrode group where a spike waveform is strongest peak_loc : ndarray Peak location in the channel with strongest waveform """ peak, peak_loc = self.get_wave_peaks() max_wave_chan = np.argmax(peak, axis=1) max_wave_val = np.amax(peak, axis=1) return ( max_wave_val, max_wave_chan, peak_loc[np.arange(len(peak_loc)), max_wave_chan])
[docs] def align_wave_peak(self, reach=300, factor=2): """ Align the waves by their peaks. Parameters ---------- reach : int Maximum allowed time-shift in microsecond factors : int Resampling factor Returns ------- None """ if not self.UPSAMPLED: self.resample_wave(factor=factor) if not self.ALLIGNED: # maximum 300microsecond allowed for shift shift = round(reach / self.get_wave_timestamp()) # NC waves are stored in waves['ch1'], waves['ch2'] etc. ways wave = self.get_waveform() maxInd = shift + self.get_max_wave_chan()[2] shift_ind = int(np.median(maxInd)) - maxInd shift_ind[np.abs(shift_ind) > shift] = 0 stacked_chan = np.empty(( self.get_total_spikes(), self.get_samples_per_spike(), self.get_total_channels())) keys = [] i = 0 for key, val in wave.items(): stacked_chan[:, :, i] = val keys.append(key) i += 1 stacked_chan = np.lib.pad( stacked_chan, [(0, 0), (shift, shift), (0, 0)], 'edge') stacked_chan = np.array([ np.roll(stacked_chan[i, :, :], shift_ind[i], axis=0)[ shift: shift + self.get_samples_per_spike()] for i in np.arange(shift_ind.size)]) for i, key in enumerate(keys): wave[key] = stacked_chan[:, :, i] self._set_waveform(wave) self.ALLIGNED = True
[docs] def get_wave_min(self): """ Return the minimum values of the spike-waveforms. Parameters ---------- None Returns ------- (min_w, min_loc) : (ndarray, ndarray) min_w : ndarray Minimum value of the waveforms min_loc : ndarray Index of minimum value """ wave = self.get_waveform() min_w = np.zeros((self.get_total_spikes(), len(wave.keys()))) min_loc = np.zeros((self.get_total_spikes(), len(wave.keys()))) for i, key in enumerate(wave.keys()): min_w[:, i] = np.amin(wave[key], axis=1) min_loc[:, i] = np.argmin(wave[key], axis=1) return min_w, min_loc
[docs] def get_min_wave_chan(self): """ Return the maximum of waveform peaks among the electrode groups. Parameters ---------- None Returns ------- (min_val, min_index) : (ndarray, ndarray) min_val : ndarray Minimum value of the waveform at channels with peak value min_index : ndarray Index of minimum values """ max_wave_chan = self.get_max_wave_chan()[1] trough, trough_loc = self.get_wave_min() return ( trough[np.arange(len(max_wave_chan)), max_wave_chan], trough_loc[np.arange(len(max_wave_chan)), max_wave_chan])
[docs] def get_wave_pc(self, npc=2): """ Return the Principle Components of the waveforms. Parameters ---------- npc : int Number of principle components from waveforms of each channel Returns ------- pc : ndarray Principle components (num_waves X npc*num_channels) """ wave = self.get_waveform() pc = np.array([]) for key, w in wave.items(): pca = PCA(n_components=5) w_new = pca.fit_transform(w) pc_var = pca.explained_variance_ratio_ if npc and npc < w_new.shape[1]: w_new = w_new[:, :npc] else: w_new = w_new[:, 0:( find(np.cumsum(pc_var) >= 0.95, 1, 'first')[0] + 1)] if not len(pc): pc = w_new else: pc = np.append(pc, w_new, axis=1) return pc
[docs] def get_wavetime(self): """ Return the timestamps of the waveforms, not the spiking-event. Parameters ---------- None Returns ------- Timestamps of the spike-waveforms """ # calculate the wavetime from the sampling rate and number of sample # returns in microsecond nsamp = self.spike.get_samples_per_spike() timestamp = self.get_wave_timestamp() return np.arange(0, (nsamp) * timestamp, timestamp)
[docs] def resample_wavetime(self, factor=2): """ Resample the timestamps of spike-waveforms. Parameters ---------- factor : int Resampling factor Returns ------- Resampled timestamps """ wavetime = self.get_wavetime() timestamp = self.get_wave_timestamp() return np.arange(0, wavetime[-1], timestamp / factor)
[docs] def resample_wave(self, factor=2): """ Resample spike waveforms using spline interpolation. Parameters ---------- factor : int Resampling factor Returns ------- wave : dict Upsampled waveforms uptime ndarray Upsampled wave timestamps """ # resample wave using spline interpolation using the resampled_time if not self.UPSAMPLED: wavetime = self.get_wavetime() uptime = self.resample_wavetime(factor=factor) wave = self.get_waveform() for key, w in wave.items(): f = sc.interpolate.interp1d( wavetime, w, axis=1, kind='quadratic') wave[key] = f(uptime) self.spike._set_sampling_rate(self.get_sampling_rate() * factor) self.spike._set_samples_per_spike(uptime.size) self.UPSAMPLED = True return wave, uptime else: logging.warning( 'You can upsample only once. ' + 'Please reload data from source file ' + 'for changing sampling factor!')
[docs] def get_wave_energy(self): """ Energy of the spike waveforms. This is measured as the summation of the square of samples. Parameters ---------- None Returns ------- energy : ndarray Energy of spikes (num_spike X num_channels) """ wave = self.get_waveform() energy = np.zeros((self.get_total_spikes(), len(wave.keys()))) for i, key in enumerate(wave.keys()): # taken the energy in mV2 energy[:, i] = (np.sum(np.square(wave[key]), 1) / 10**6) return energy
[docs] def get_max_energy_chan(self): """ Return the maximum energy of the spike waveforms. Parameters ---------- None Returns ------- ndarray Maximum energy of the spikes """ energy = self.get_wave_energy() return np.argmax(energy, axis=1)
[docs] def remove_null_chan(self): """ Remove the channel from the electrode group that has no spike in it. Parameters ---------- None Returns ------- off_chan : int Channel number that has been removed """ # simply detect in which channel everything is zero, # which means it's a reference channel or nothing is recorded here wave = self.get_waveform() off_chan = [] for key, w in wave.items(): if np.abs(w).sum() == 0: off_chan.append(key) if off_chan: for key in off_chan: del wave[key] self._set_waveform(wave) self.NULL_CHAN_REMOVED = True return off_chan
[docs] def cluster_separation(self, unit_no=0): """ Measure the separation of a specific unit from other clusters. This is performed quantitatively using the following: 1. Bhattacharyya coefficient 2. Hellinger distance Parameters ---------- unit_no : int Unit of interest. If '0', pairwise comparison of all units are returned. Returns ------- (bc, dh) : (ndarray, ndarray) bc : ndarray Bhattacharyya coefficient dh : ndarray Hellinger distance """ # if unit_no==0 all units, matrix output for pairwise comparison, # else maximum BC for the specified unit feat = self.get_feat() unit_list = self.get_unit_list() n_units = len(unit_list) if unit_no == 0: bc = np.zeros([n_units, n_units]) dh = np.zeros([n_units, n_units]) for c1 in np.arange(n_units): for c2 in np.arange(n_units): X1 = feat[self.get_unit_tags() == unit_list[c1], :] X2 = feat[self.get_unit_tags() == unit_list[c2], :] bc[c1, c2] = bhatt(X1, X2)[0] dh[c1, c2] = hellinger(X1, X2) unit_list = self.get_unit_list() return bc, dh else: bc = np.zeros(n_units) dh = np.zeros(n_units) X1 = feat[self.get_unit_tags() == unit_no, :] for c2 in np.arange(n_units): if c2 == unit_no: bc[c2] = 0 dh[c2] = 1 else: X2 = feat[self.get_unit_tags() == unit_list[c2], :] bc[c2] = bhatt(X1, X2)[0] dh[c2] = hellinger(X1, X2) idx = find(np.array(unit_list) != unit_no) return bc[idx], dh[idx]
[docs] def cluster_similarity(self, nclust=None, unit_1=None, unit_2=None): """ Measure the similarity or distance of units in a cluster. This is performed on one unit in a spike dataset to cluster of another unit in another dataset. This is performed quantitatively using the following: 1. Bhattacharyya coefficient 2. Hellinger distance Parameters ---------- nclust : Nclust NClust object whose unit is under comparison unit_1 : int Unit of current Nclust object unit_2 : int Unit of another NClust object under comparison Returns ------- (bc, dh) : (ndarray, ndarray) bc : ndarray Bhattacharyya coefficient dh : ndarray Hellinger distance """ if isinstance(nclust, NClust): if ((unit_1 in self.get_unit_list()) and (unit_2 in nclust.get_unit_list())): X1 = self.get_feat_by_unit(unit_no=unit_1) X2 = nclust.get_feat_by_unit(unit_no=unit_2) bc = bhatt(X1, X2)[0] dh = hellinger(X1, X2) return bc, dh