diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..b476ec030cb13c55f2dac43f21bd387a013ef43e --- /dev/null +++ b/LICENSE @@ -0,0 +1,32 @@ +The Clear BSD License + +Copyright (c) [year] [fullname] +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted (subject to the limitations in the disclaimer +below) provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY +THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/ReadMe.txt b/ReadMe.txt new file mode 100644 index 0000000000000000000000000000000000000000..9cea39bcab4d0cc30305d3732547cde9a3f5394e --- /dev/null +++ b/ReadMe.txt @@ -0,0 +1,26 @@ +System requirements: +This software was made and tested on a desktop computer (Mac OS) running Matlab 2019+ + +Installation guide: +Just go the main folder in Matlab and run the tda_msc_rsfmri_runmapper.m file to run Mapper on the data and tda_msc_rsfmri_anamapper.m to generate results in the manuscript + +Demo: +Within tda_msc_rsfmri_anamapper.m file, each cell can be independently run to generate individual figures + +Instruction for use: +You can individually run each cell in the Matlab file tda_msc_rsfmri_anamapper.m + +The data can be downloaded from individual data websites as mentioned in the paper. + + +License: +The work is covered under the BSD 3-Clause Clear License +More information here - https://choosealicense.com/licenses/bsd-3-clause-clear/ + +Cite: +Please cite the following papers if you use this code: +1. Saggar, M., Shine, J. M., Liegeois, Dosenbach, N.U.F., Fair, D. (under-review) Precision dynamical mapping using topological data analysis reveals a unique (hub-like) transition state at rest in highly sampled individuals. Nature Communications +Preprint: https://www.biorxiv.org/content/10.1101/2021.08.05.455149v2 +2. Geniesse, C., Sporns, O., Petri, G., Saggar, M. (2019). Generating dynamical neuroimaging spatiotemporal representations (DyNeuSR) using topological data analysis. Network Neuroscience. http://dx.doi.org/10.1162/netn_a_00093 +3. Saggar, M., Sporns, O., Gonzalez-Castillo, J., Bandettini, P.A., Carlsson, G., Glover, G., Reiss, A.L. (2018) Towards a new approach to visualize and quantify brain's dynamical organization using topological data analysis. Nature Communications. http://dx.doi.org/10.1038/s41467-018-03664-4 +4. Saggar, Manish. "Systems and Methods for Mapping Neuronal Circuitry and Clinical Applications Thereof." U.S. Patent Application 16/171,255, filed April 25, 2019 diff --git a/createPKNNG_bdl.m b/createPKNNG_bdl.m new file mode 100644 index 0000000000000000000000000000000000000000..1d41dc7fb9cb51dbaf20d4cfe4233dd2eac28f89 --- /dev/null +++ b/createPKNNG_bdl.m @@ -0,0 +1,95 @@ +% WGGGG +% +% aim - to code neighborhood based filter function in Matlab +% author - saggar@stanford.edu (7.11.2019) +% +% +% output: knnGraph (as a table, binary and weighted versions) +% +% inspired by the paper on penalized KNN graph in Baya & Granitto 2011 BMC +% Bioinformatics +% +% Version History +% - [7.11.19] Wrote it to perform knn graph construction with outlier +% removal and penalized edges to construct a single connected component +% +% +function [knnGraphTbl, knnGraph_dense_bin, knnGraph_dense_wtd, knnGraph_dense_bin_conn, knnGraph_dense_wtd_conn]= createPKNNG_bdl(distMat, num_k) + % create neighborhood graph + knnGraphTbl = zeros(size(distMat,1), num_k); + knnGraph_bin = zeros(size(distMat)); + knnGraph_wtd = zeros(size(distMat)); + for n = 1:1:size(distMat,1) + [tmp idx] = sort(distMat(n,:),'ascend'); + knnGraphTbl(n,:) = idx(2:2+num_k-1); + for l = 1:1:num_k + knnGraph_bin(n, knnGraphTbl(n,l)) = 1; + knnGraph_wtd(n, knnGraphTbl(n,l)) = distMat(n,knnGraphTbl(n,l)); + + end + end + + % remove outliers in knn graph to find densely connected subgraphs + % first remove non-reciprocal connections + knnGraph_dense_bin = knnGraph_bin; + knnGraph_dense_wtd = knnGraph_wtd; + for n = 1:1:size(distMat,1) + tmp = knnGraphTbl(n,:); + for l = 1:1:num_k + if ~ismember(n,knnGraphTbl(tmp(l),:)) + knnGraph_dense_bin(n,tmp(l)) = 0; + knnGraph_dense_bin(tmp(l),n) = 0; + + knnGraph_dense_wtd(n,tmp(l)) = 0; + knnGraph_dense_wtd(tmp(l),n) = 0; + + end + + end + + end + + % connect disconnected components of the graph + g = graph(knnGraph_dense_wtd); + knnGraph_dense_wtd_conn = knnGraph_dense_wtd; + knnGraph_dense_bin_conn = knnGraph_dense_bin; + bins = conncomp(g); + nComp = max(bins); + if nComp > 1 + for c = 1:1:nComp + for d = c+1:1:nComp + nodes_c = find(bins==c); + nodes_d = find(bins==d); + max_edge_c = max(max(knnGraph_dense_wtd(nodes_c, nodes_c))); + max_edge_d = max(max(knnGraph_dense_wtd(nodes_d, nodes_d))); + if length(nodes_d) == 1 + [val, best_nodes_c] = min(distMat(nodes_d, nodes_c)); + knnGraph_dense_wtd_conn(nodes_d, best_nodes_c) = val * exp(val/max_edge_c); + knnGraph_dense_wtd_conn(best_nodes_c, nodes_d) = knnGraph_dense_wtd_conn(nodes_d, best_nodes_c); + knnGraph_dense_bin_conn(nodes_d, best_nodes_c) = 1; + knnGraph_dense_bin_conn(best_nodes_c, nodes_d) = 1; + elseif length(nodes_c) == 1 + [val, best_nodes_d] = min(distMat(nodes_c, nodes_d)); + knnGraph_dense_wtd_conn(nodes_c, best_nodes_d) = val * exp(val/max_edge_d); + knnGraph_dense_wtd_conn(best_nodes_d, nodes_c) = knnGraph_dense_wtd_conn(nodes_c, best_nodes_d); + knnGraph_dense_bin_conn(nodes_c, best_nodes_d) = 1; + knnGraph_dense_bin_conn(best_nodes_d, nodes_c) = 1; + else % find the best pair of nodes between nodes_c and nodes_d that can be connected + tmp = distMat(nodes_c, nodes_d); + val = min(tmp(:)); + [nodes_c_min, nodes_d_min] = find(tmp==val); + nodes_c_min = nodes_c(nodes_c_min); + nodes_d_min = nodes_d(nodes_d_min); + knnGraph_dense_wtd_conn(nodes_c_min, nodes_d_min) = val * exp(val/max(max_edge_c,max_edge_d)); + knnGraph_dense_wtd_conn(nodes_d_min, nodes_c_min) = knnGraph_dense_wtd_conn(nodes_c_min, nodes_d_min); + knnGraph_dense_bin_conn(nodes_c_min, nodes_d_min) = 1; + knnGraph_dense_bin_conn(nodes_d_min, nodes_c_min) = 1; + end + + end + + end + end + + +end \ No newline at end of file diff --git a/mapper2d_bdl_hex_binning.m b/mapper2d_bdl_hex_binning.m new file mode 100644 index 0000000000000000000000000000000000000000..48ae1bc5f183ada3cc3a6dbd4d32ca1fbb0ca12f --- /dev/null +++ b/mapper2d_bdl_hex_binning.m @@ -0,0 +1,194 @@ +%WGGGG +% +% aim - to code mapper 2d in matlab +% author - saggar@stanford.edu (7.11.2019) +% +% +% output: adja, num_vertices, level_of_vertex, pts_in_vertex, +% points_in_level, and vertices_in_level +% +% inspired by TDAMapper R +% and Gurjeet Singh's original paper - Singh et al. 2007 +% +% Version History +% - [7.11.19] Originally wrote it to check across 4 directions in the binning space +% for finding overlap across bins. Thus, constrained the nodes to have a max +% degree 4. [TDAMapper R style] +% - [7.15.19] Modified to include the other 4 diagonal directions for +% finding overlappping bins. Thus, constrained the nodes to a max degree of 8. +% - [7.16.19] Todo: After talking with Samir C. we will now try to expand +% the binning strategy from 2D bins to several other styles. +% - [7.17.19] Update [non-metric version] - instead of looking over the neighbors for overlap, we will +% instead just connect levels irrespective of neighorhood status - as long +% as they share a data point +% - [7.22.19] Updated [non-metric-version] - added prunning of +% nodes/vertices - thus vertices with same set of data points are merged +% into one vertex. +% - [11.25.19] Adding hex-binning to the Mapper code +% +function [adja, adja_pruned, pts_in_vertex, pts_in_vertex_pruned] = mapper2d_bdl_hex_binning(distMat, filter_values, num_intervals, percent_overlap, num_bins_clustering, nsides) + + %initialize variables + vertex_index = 0; + if ~exist('nsides','var') + nsides = 6; %for hex, and 4 for regular. + end + + % indexed from 1 to the number of vertices + level_of_vertex = []; + pts_in_vertex = []; + + % indexed from 1 to the number of levels + pts_in_level = {}; + vts_in_level = {}; + + filter_min_1 = floor(min(filter_values(:,1))); + filter_max_1 = ceil(max(filter_values(:,1))); + filter_min_2 = floor(min(filter_values(:,2))); + filter_max_2 = ceil(max(filter_values(:,2))); + + interval_length_1 = (filter_max_1 - filter_min_1) / (num_intervals(1) - (num_intervals(1) - 1) * percent_overlap/100 ); + interval_length_2 = (filter_max_2 - filter_min_2) / (num_intervals(2) - (num_intervals(2) - 1) * percent_overlap/100 ); + + step_size_1 = interval_length_1 * (1 - percent_overlap/100); + step_size_2 = interval_length_2 * (1 - percent_overlap/100); + + num_levels = num_intervals(1) * num_intervals(2); + + level_indices_1 = repmat(1:num_intervals(1), [1,num_intervals(2)]); + level_indices_2 = reshape(repmat(1:num_intervals(2), num_intervals(1),1),[num_levels,1])'; + %figure; + for level = 1:1:num_levels + level_1 = level_indices_1(level); + level_2 = level_indices_2(level); + + min_val_level_1 = filter_min_1 + (level_1-1)*step_size_1; + min_val_level_2 = filter_min_2 + (level_2-1)*step_size_2; + + max_val_level_1 = min_val_level_1 + interval_length_1; + max_val_level_2 = min_val_level_2 + interval_length_2; + + % step 1: find center of min & max and create hexagon ther + % step 2: find pts that are within that hexagon + % step 3: we will start intersecting + center_level_1 = (min_val_level_1+max_val_level_1)./2; + center_level_2 = (min_val_level_2+max_val_level_2)./2; + hex_poly = nsidedpoly(nsides, 'Center', [center_level_1, center_level_2], 'SideLength', (interval_length_1+interval_length_2)./2); + pts_level = isinterior(hex_poly, filter_values); + %plot(hex_poly); hold on; + %pts_level = (filter_values(:,1) >= min_val_level_1) & (filter_values(:,2) >= min_val_level_2) & (filter_values(:,1) <= max_val_level_1) & (filter_values(:,2) <= max_val_level_2); + + num_pts_level = sum(pts_level); + pts_in_level{level} = find(pts_level); + + if num_pts_level == 0 + %fprintf(1,'Level set is empty\n'); + vts_in_level{level} = -1; + continue; + elseif num_pts_level == 1 + %fprintf(1,'Level set has only 1 pt\n'); + num_vts_level = 1; + cluster_indices_within_level = [1]; + vts_in_level{level} = vertex_index + (1:num_vts_level); + elseif num_pts_level > 1 + %fprintf(1,'Level set has only %d pts\n', num_pts_level); + level_distMat = distMat(pts_level, pts_level); + level_max_dist = max(level_distMat(:)); + [Z, cutoff] = find_cluster_cutoff(level_distMat, num_bins_clustering); + if cutoff <0 + fprintf(2,'cutoff <0\n'); + cutoff = 1; + + end + cluster_indices_within_level = cluster(Z, 'cutoff',cutoff, 'criterion','distance'); + num_vts_level = max(cluster_indices_within_level); + vts_in_level{level} = vertex_index + (1:num_vts_level); + end + + for j = 1:1:num_vts_level + vertex_index = vertex_index + 1; + level_of_vertex(vertex_index) = level; + pts_in_vertex{vertex_index} = pts_in_level{level}(cluster_indices_within_level==j); + + end + + end + %scatter(filter_values(:,1), filter_values(:,2), 15,'k'); + % pruning nodes/vertices that have same number of points in them + pts_in_vertex_pruned = pts_in_vertex; + for i = 1:1:vertex_index + for j = i:1:vertex_index + if i == j + continue; + end + k1 = pts_in_vertex{i}; + k2 = pts_in_vertex{j}; + if isequal(sort(k1),sort(k2)) + %fprintf(1,'found one vertex for pruning i=%d, j=%d\n', i,j); + pts_in_vertex_pruned{j} = []; + end + end + end + tmp = {}; + k = 1; + for i = 1:1:vertex_index + if ~isempty(pts_in_vertex_pruned{i}) + tmp{k} = pts_in_vertex_pruned{i}; + k = k+1; + end + end + pts_in_vertex_pruned = tmp; + fprintf(1,'Pruning done\n'); + + vertex_index_pruned = length(pts_in_vertex_pruned); + adja_pruned = zeros(vertex_index_pruned, vertex_index_pruned); + for i = 1:1:vertex_index_pruned + for j = i:1:vertex_index_pruned + if i == j + continue; + end + k1 = pts_in_vertex_pruned{i}; + k2 = pts_in_vertex_pruned{j}; + adja_pruned(i,j) = (length(intersect(k1,k2))>0); + adja_pruned(j,i) = adja_pruned(i,j); + end + end + fprintf(1,'Prunned Mapper Done\n'); + + + adja = zeros(vertex_index, vertex_index); +% for i = 1:1:vertex_index +% for j = i:1:vertex_index +% if i == j +% continue; +% end +% k1 = pts_in_vertex{i}; +% k2 = pts_in_vertex{j}; +% adja(i,j) = (length(intersect(k1,k2))>0); +% adja(j,i) = adja(i,j); +% end +% end +% fprintf(1,'Mapper Done\n'); + +end + +function [Z, cutoff] = find_cluster_cutoff(distMat, num_bins_clustering) + Z = linkage(distMat(tril(true(length(distMat)),-1))'); + if length(Z(:,3)) == 1 % only two points hence one cluster + fprintf(1,'Only two points found for clustering\n'); + cutoff = Inf; + return; + end + + lens = [Z(:,3)' max(max(distMat))]; + [numBins, bc] = hist(lens, num_bins_clustering); + z = find(numBins==0); + if (sum(z) == 0) + cutoff = Inf; + return; + else + cutoff = bc(z(1)); % pickup the smallest index of z + end + + +end diff --git a/tda_msc_rsfmri_anamapper.m b/tda_msc_rsfmri_anamapper.m new file mode 100644 index 0000000000000000000000000000000000000000..db6a03629075f5c2c420f247b206eaeb98ac6fda --- /dev/null +++ b/tda_msc_rsfmri_anamapper.m @@ -0,0 +1,545 @@ +%WGGGG +% +% +% Aims - +% +% [1] analyzes Mapper output and create figures from the paper below +% +% Please cite: +% Saggar, M., Shine, J.M., Liegeois, R., Dosenbach, N.U.F., Fair, D. 2021. +% Precision dynamical mapping using topological data analysis reveals a +% unique hub-like transition state at rest. BioRxiv +% +% date - 5.31.2021 +% author - saggar@stanford.edu +% + +%% generating figure 2, evidence of hub nodes in the real as compared to null data +clear; close all; clc; + +data_folder = 'output'; +session = 'odd'; +metric = 'euclidean'; +numNull = 25; +doi = [21, 36]; % degree of interest (hump observed in the degree dist plot) +p=[]; +prop_doi = []; +prop_doi_ar = []; +prop_doi_pr = []; +N_re = []; +N_ar = []; +N_pr = []; +output_name = 'mapperout'; +for null_n = 1:1:numNull + null_n + for s = 1:1:10 + mapperout = load(sprintf('%s/sub-MSC%02d/sub-MSC%02d_%s_runs_mat_metric_%s_%s.mat', data_folder, s, s, session, metric, output_name)); + try + mapperout_ar = load(sprintf('%s/sub-MSC%02d/sub-MSC%02d_AR_null%03d_%s_runs_mat_metric_%s_%s.mat', data_folder, s, s, null_n, session, metric, output_name)); + catch + continue; + end + try + mapperout_pr = load(sprintf('%s/sub-MSC%02d/sub-MSC%02d_PR_null%03d_%s_runs_mat_metric_%s_%s.mat', data_folder, s, s, null_n, session, metric, output_name)); + catch + continue; + end + + % calculating degree-distribution for the mapper generated graph + deg_ar = degrees_und(mapperout_ar.nodeBynode); + deg_pr = degrees_und(mapperout_pr.nodeBynode); + deg_re = degrees_und(mapperout.nodeBynode); + + [N_re(null_n, s,:), ~] = histcounts(deg_re, 'Normalization', 'probability', 'BinLimits', [0 50], 'BinWidth',1); + [N_ar(null_n, s,:), ~] = histcounts(deg_ar, 'Normalization', 'probability', 'BinLimits', [0 50], 'BinWidth',1); + [N_pr(null_n, s,:), ~] = histcounts(deg_pr, 'Normalization', 'probability', 'BinLimits', [0 50], 'BinWidth',1); + + prop_doi(null_n, s) = sum((deg_re>doi(1)))./length(deg_re); + prop_doi_ar(null_n, s) = sum((deg_ar>doi(1)))./length(deg_ar); + prop_doi_pr(null_n, s) = sum((deg_pr>doi(1)))./length(deg_pr); + end +end + +figure; +plot(squeeze(mean(N_re,1)));hold on +plot(squeeze(mean(N_ar,1))); +plot(squeeze(mean(N_pr,1))); +legend('Real','AR','PR'); + +%% generating figure 3 & 5, annotating graphs using resting state networks and topographic gradient +% In the current version we create metaInfo data that can be visualized outside +% Matlab using our python implementation at https://braindynamicslab.github.io/dyneusr/ +% Matlab version for similar viz. is coming soon... + +clear; close all; clc; +cmap = [ +254 4 2 +254 206 153 +255 251 0 +30 253 4 +253 176 245 +68 185 217 +0 0 0 +91 22 137 +18 255 235 +184 127 191 +255 255 255]./255; +nclust = 10; +num_k = 30; +res_val = 30; +gain_val = 70; + +data_folder = 'output'; +session = 'odd'; +metric = 'euclidean'; +net_names_allelse = {'DMN','VIS','FRP','DAN','MOT','VAN','SAL','COP','SM','AUD'}; +net_names_s4 = {'DMN','VIS','FRP','DAN','MOT', 'COP','SM','AUD'}; % subj4 has van and sal unavailable +net_names_s10 = {'DMN','VIS','FRP','DAN','MOT','VAN', 'COP','SM','AUD'}; % subj10 has sal unavailable + +json_thrval_pos = 0.5; +json_saveOrNot = 1; +winnerTakeAll = 0; + +doi = [21, 36]; % degree of interest (hump observed in the degree dist plot) + +corrProp_sub = []; + +for s = 1%:1:10 + sbj_name = sprintf('sub-MSC%02d-Session-%s', s, session) + mapperout = load(sprintf('%s/sub-MSC%02d/sub-MSC%02d_%s_runs_mat_metric_%s_mapperout_may31_2021.mat', data_folder, s, s, session, metric)); + + % degree annotation + deg = degrees_und(mapperout.nodeBynode); + + % doi_nodes + doi_nodes = (deg>doi(1)); + + % plotting graphs using d3.js + if s==4 + net_names = net_names_s4; + % pushing the metaInfo as networks 7 and 8 are missing in Sub04 + metaInfoNetData = zeros(size(mapperout.networkDataVertex,1),10); + metaInfoNetData(:,1:5) = mapperout.networkDataVertex(:,1:5); + metaInfoNetData(:,8:10) = mapperout.networkDataVertex(:,6:8); + elseif s==10 + net_names = net_names_s10; + % pushing the metaInfo as network 8 is missing in Sub10 + metaInfoNetData = zeros(size(mapperout.networkDataVertex,1),10); + metaInfoNetData(:,1:6) = mapperout.networkDataVertex(:,1:6); + metaInfoNetData(:,8:10) = mapperout.networkDataVertex(:,7:9); + + else + net_names = net_names_allelse; + metaInfoNetData = mapperout.networkDataVertex; + end + + [entropy1] = calcEntropy(mapperout.nodeTpMat, mapperout.nodeBynode, metaInfoNetData); + [hubness95, hubness99] = computeHubness(mapperout.nodeBynode, doi, deg); + [dom, corrProp] = calcDominance(mapperout.nodeTpMat, mapperout.nodeBynode, metaInfoNetData, json_thrval_pos); + + % write json for proportional rsn based piecharts + metaInfo.value = metaInfoNetData; + metaInfo.deg = 100*zscoreScaling(deg); + metaInfo.ses = mapperout.sess_id; + metaInfo.ent = 100*zscoreScaling(entropy1); + metaInfo.doi = doi_nodes+1; %adding 1 for plotting purposes + metaInfo.hubs99 = hubness99 + 1; %adding 1 for plotting purposes + metaInfo.dom = dom; + + + corrProp_sub(s,:) = corrProp(:); + metaInfo_s(s) = metaInfo; +end + +%% attempting to D3.js like force layouts in Matlab. +% please note - we didn't generate pie-charts in Matlab, hence the hub nodes +% don't look uniformly distributed. +% RSN based notation +figure; plot(graph(mapperout.nodeBynode),'Layout', 'force','UseGravity','on','MarkerSize',2+5*log10(sum(mapperout.nodeTpMat,2)),'nodecdata',metaInfo.dom); +colormap(cmap); +% SD based notation - Topographic gradient +figure; plot(graph(mapperout.nodeBynode),'Layout', 'force','UseGravity','on','MarkerSize',2+5*log10(sum(mapperout.nodeTpMat,2)),'nodecdata',metaInfo.ent); +caxis([0 60]) +%% generating figure 4: subject specificity +clear; close all; clc; +set(groot, 'DefaultTextInterpreter', 'none') + +sub_spec = zeros(22,121); +load('output/metaInfo_s_msc_odd.mat') +sub_spec(1:2:20,:) = corrProp_sub; +sub_spec(21,:) = mean(corrProp_sub); +metaInfo_s_odd = metaInfo_s; + +load('output/metaInfo_s_msc_even.mat') +sub_spec(2:2:20,:) = corrProp_sub; +sub_spec(22,:) = mean(corrProp_sub); +metaInfo_s_even = metaInfo_s; + +[r,p]=corrplot(sub_spec'); +imagesc(r); + +%% generating figure 6: traversal in time domain +clear; close all; clc; +set(groot, 'DefaultTextInterpreter', 'none') + +net_names_allelse = {'DMN','VIS','FRP','DAN','MOT','VAN','SAL','COP','SM','AUD'}; +net_names_s4 = {'DMN','VIS','FRP','DAN','MOT', 'COP','SM','AUD'}; +net_names_s10 = {'DMN','VIS','FRP','DAN','MOT','VAN', 'COP','SM','AUD'}; + +data_folder = 'output'; +session = 'odd'; +metric = 'euclidean'; +json_thrval_pos = 0.5; +load(sprintf('output/metaInfo_s_msc_%s',session),'metaInfo_s','corrProp_sub') + +doi = [21, 36]; % degree of interest +cmap = [ +254 4 2 +254 206 153 +255 251 0 +30 253 4 +253 176 245 +68 185 217 +0 0 0 +91 22 137 +18 255 235 +184 127 191 +255 255 255]./255; + +ciu_bin_s = {}; +ciu_s = {}; +tr_color_s = {}; + +for s = 1:1:10 + sbj_name = sprintf('sub-MSC%02d-Session-%s', s, session) + mapperout = load(sprintf('%s/sub-MSC%02d/sub-MSC%02d_%s_runs_mat_metric_%s_mapperout_may31_2021.mat', data_folder, s, s, session, metric)); + + % getting fd frames for proper Markov chain estimation + fd_files = dir(sprintf('data/sub-MSC%02d/*TMASK.txt',s)); + + if strcmp(session,'odd') == 1 + ses_idx = 1:2:10 + elseif strcmp(session,'even') == 1 + ses_idx = 2:2:10 + end + + tr_label = []; + sess_label = []; + k_itr = 1; + fd_across_ses = []; + for ses = ses_idx + fd = load([fd_files(ses).folder '/' fd_files(ses).name]); + + fd_across_ses = [fd_across_ses; fd]; + + % fd(end) = -1; + if fd(end) == 1 %last frame of a session is good, then make it 0 so that we can tag end of session + fd(end) = -1; + end + tr_label = [tr_label; fd]; + sess_label = [sess_label; ones(length(fd),1)*k_itr]; + k_itr = k_itr + 1; + end + + % plotting graphs using d3.js + if s==4 + net_names = net_names_s4; + elseif s==10 + net_names = net_names_s10; + else + net_names = net_names_allelse; + end + + nodes_hub = find(metaInfo_s(s).hubs99==2); % 2 hubs, 1 non hubs + nodes_nothub = find(metaInfo_s(s).hubs99==1); + dom = metaInfo_s(s).dom; + + hub_trs_wtd = sum(mapperout.nodeTpMat(nodes_hub,:))./length(nodes_hub); + nothub_trs_wtd = sum(mapperout.nodeTpMat(nodes_nothub,:))./length(nodes_nothub); + rsn_trs_wtd = []; + net_names{end+1} = 'NOTA'; + for net = 1:1:length(net_names) + nodes_dom = find(dom==net); + nodes_dom = setdiff(nodes_dom, nodes_hub); %excluding hub nodes + rsn_trs_wtd(net,:) = sum(mapperout.nodeTpMat(nodes_dom,:))./length(nodes_dom); + + end + + % colorful - across all rsn and hub nodes + [i,ciu]=max([hub_trs_wtd; rsn_trs_wtd]); + tmp = tr_label; + tmp2 = tr_label; + tmp(tr_label~=0) = ciu; + tmp2(tr_label~=0) = ciu; + tmp(tr_label==-1) = []; + [tm2, mc2] = runMC2(tmp, 0, 'ciu'); % removes bad/stiching frames while counting for transitions + ciu_s{s} = tmp; + + mc_s(s) = mc2; + tmp3 = tmp2'; + tmp3(tmp3==0) = []; % remove badframes. + tr_color_s{s}= tmp3; +end + +clc; close all; +figure; set(gcf,'color','w','Position',[ 1 744 2560 448]); + +for s=1:1:10 + ax = subtightplot(10,1,s), imagesc(tr_color_s{s}(1:600)); + if s==4 + map = getColorMap([{'HUB'},net_names_s4,{'NOTA'}]); + elseif s==10 + map = getColorMap([{'HUB'},net_names_s10,{'NOTA'}]); + else + map = getColorMap([{'HUB'},net_names_allelse,{'NOTA'}]); + end + colormap(ax,map); + set(gca, 'FontSize',20) +end + +%% generating figure 6E + +clear; clc; close all; +tmp = load('output/mc_even_hub99.mat'); +ev = reshape(tmp.mc_all, [10,144]); +tmp = load('output/mc_odd_hub99.mat'); +od = reshape(tmp.mc_all, [10,144]); + +mc_nodiag = []; + +for s = 1:1:10 + tmp1 = squeeze(od(s,:,:)); + mc_nodiag = [mc_nodiag; tmp1(:)']; + + tmp1 = squeeze(ev(s,:,:)); + mc_nodiag = [mc_nodiag; tmp1(:)']; + +end + +hypo_mat = [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + ]; + +[r,p]=corrplot(mc_nodiag','type','spearman','testR','on'); +close all; +r = suppress_diag(r); +hypo_mat2 = suppress_diag(hypo_mat); +anova1(fisherz([r(hypo_mat2>0);r(hypo_mat2==0)]), [ones(20,1); 2*ones(380,1)]) +figure; nhist({r(hypo_mat2>0) r(hypo_mat2==0)}) + + +%% utility functions +function new_map = getColorMap(net_names_needed) + + net_names = {'HUB','DMN','VIS','FRP','DAN','MOT','VAN','SAL','COP','SM','AUD','NOTA'}; + map = [ + 201,201,201; + 254,3,2; + 255,250,0; + 255,206,153; + 30,253,3; + 254,176,245; + 71,185,216; + 0,0,0; + 92,22,137; + 16,255,235; + 195,122,193; + 255,255,255; + ]; + map = map./255; + new_map = []; + %new_map = [new_map; map(1,:)]; + for n = 1:1:length(net_names_needed) + new_map = [new_map; map(find(ismember(net_names,net_names_needed{n})),:)]; + end + + +end +function [tm, mc] = runMC2(ciu, plotFlag, plotTitle) + % modifying the code to account for frame dropouts and session + % boundaries + + nciu = max(ciu);%sum(unique(ciu)>0); + tm = zeros(nciu, nciu) + 1e-10; % a small value is added for subjects which don't have all networks. + for i = 1:1:length(ciu)-1 + j = i + 1; + if ciu(i) <= 0 || ciu(j) <=0 + %fprintf(1,'skipping due to 0\n'); + continue; + else + tm(ciu(i),ciu(j)) = tm(ciu(i),ciu(j)) + 1; + end + + end + + mc = dtmc(tm); + + if plotFlag == 1 + figure; set(gcf, 'Position',[596 496 1722 408]); + subplot(1,3,1),graphplot(mc,'ColorNodes',true,'ColorEdges',true,'LabelEdges',true); + subplot(1,3,2),eigplot(mc); + X = redistribute(mc,25); + subplot(1,3,3),distplot(mc,X,'Type','Histogram','FrameRate',0.01); + suptitle(replace(plotTitle,'_','-')); + end + +end +function [hubness95, hubness99] = computeHubness(mat, doi, deg) + g = graph(mat); + closeness_matlab = centrality(g, 'closeness'); + + hubness99 = zeros(size(mat,1),1); + hubness99(intersect(find(deg>doi(1)), find(closeness_matlab>prctile(closeness_matlab,99)))) = 1; + + hubness95 = zeros(size(mat,1),1); + hubness95(intersect(find(deg>doi(1)), find(closeness_matlab>prctile(closeness_matlab,95)))) = 1; + +end +function scaled = zscoreScaling(value) + scaled = zscore(value); + scaled = scaled./max(scaled); +end +function [dom1, corrProp] = calcDominance(nodeTpMat, nodeBynode, netData, thr) + numMetaGroups = size(netData,2)+1; %last one is for neither of the 11 networks + numNodes = size(nodeBynode, 1); + prop = zeros(numNodes, numMetaGroups); + for n = 1:1:numNodes + trs = find(nodeTpMat(n,:)); + + for tr = trs + if any(netData(tr,:)>thr) + prop(n, netData(tr,:)>thr) = prop(n, netData(tr,:)>thr) + 1; + else + prop(n, numMetaGroups) = prop(n, numMetaGroups) + 1; + end + end + + + end + + [~, dom1] = max(prop, [], 2); + corrProp = corr(prop); +end +function [entropy1] = calcEntropy(nodeTpMat, nodeBynode, netData) + + numNodes = size(nodeBynode, 1); + entropy1 = []; + for n = 1:1:numNodes + trs = find(nodeTpMat(n,:)); + if length(trs) > 1 + mean_nets = mean(netData(trs,:)); + entropy1(n) = std(mean_nets); + else + entropy1(n) = -1; + end + end +end +function parsave(myfile, sbj_name, runType, metricType, nodeTpMat, nodeBynode, tpMat, parcelData, networkDataVertex, ses_idx, sess_id, filter) + matObj = matfile(myfile, 'Writable', true); + matObj.nodeTpMat = nodeTpMat; + matObj.nodeBynode = nodeBynode; + matObj.tpMat = tpMat; + matObj.parcelData = parcelData; + matObj.networkDataVertex = networkDataVertex; + matObj.sbj_name = sbj_name; + matObj.ses_idx = ses_idx; + matObj.sess_id = sess_id; + matObj.metricType = metricType; + matObj.runType = runType; + matObj.filter = filter; +end +function [nodeTpMat, nodeBynode, tpMat, filter] = runBDLMapper_wrapper(parcelData, metricType) + + % run mapper + data_z = parcelData; + nclust = 10; + num_bin_clusters = nclust; + num_k = 30; + res_val = 30; + gain_val = 70; + + [nodeTpMat, nodeBynode, tpMat, filter] = runBDLMapper(data_z, metricType, res_val, gain_val, num_k, num_bin_clusters); + +end + +function [nodeTpMat, nodeBynode, tpMat, filter] = runBDLMapper(data, metricType, res_val, gain_val, num_k, num_bin_clusters) + + X = data; + + + resolution = [res_val res_val]; + gain = gain_val; + + fprintf(1,'Estimating distance matrix\n'); + tic + distMat = estimateDistance(X, metricType); + toc + + fprintf(1,'Estimating knn graph\n'); + tic + % create knn graph, estimate geodesic distances, embed using cmdscale and apply mapper + [knnGraphTbl, knnGraph_dense_bin, knnGraph_dense_wtd, knnGraph_dense_bin_conn, knnGraph_dense_wtd_conn]= createPKNNG_bdl(distMat, num_k); + + knn_g_wtd = graph(knnGraph_dense_bin_conn); + + % estimate geodesic distances + dist_geo_wtd = round(distances(knn_g_wtd,'Method','positive')); + toc + + fprintf(1,'Estimating embedding\n'); + tic + + % embed using cmdscale + [y,e] = cmdscale(dist_geo_wtd); + + filter = [y(:,1), y(:,2)]; + toc + + fprintf(1,'Running mapper\n'); + tic + + + + [adja, adja_pruned, pts_in_vertex, pts_in_vertex_pruned] = mapper2d_bdl_hex_binning(distMat, filter, resolution, gain, num_bin_clusters, 3); + + toc + + fprintf(1,'Creating final output\n'); + tic + + % creating matrices for d3 visualization + numNodes = length(pts_in_vertex_pruned); + numTp = size(X,1); + nodeTpMat = zeros(numNodes, numTp); + for node = 1:1:numNodes + tmp = pts_in_vertex_pruned{node}; + nodeTpMat(node, tmp) = 1; + end + + nodeBynode = adja_pruned; + tpMat = getMatTp_wtd(nodeBynode, nodeTpMat); + fprintf(1,'Done\n'); + toc + +end +function distMat = estimateDistance(X, metricType) + distMat = squareform(pdist(X, metricType)); +end \ No newline at end of file diff --git a/tda_msc_rsfmri_runmapper.m b/tda_msc_rsfmri_runmapper.m new file mode 100644 index 0000000000000000000000000000000000000000..18581c82da445ac134f7b83392f98ac9b451814e --- /dev/null +++ b/tda_msc_rsfmri_runmapper.m @@ -0,0 +1,209 @@ +%WGGGG +% +% +% aims - +% [1] Runs Mapper on odd and even runs of MSC data and +% saves the mapperout file that can be used later for analysis and viz. +% [2] Runs corresponding null models for comparison. +% +% Please cite: +% Saggar, M., Shine, J.M., Liegeois, R., Dosenbach, N.U.F., Fair, D. 2021. +% Precision dynamical mapping using topological data analysis reveals a +% unique hub-like transition state at rest. BioRxiv +% +% date - 5.31.2021 +% author - saggar@stanford.edu +% +%% load data +cd ~/data +clear; clc; close all; +metricType = 'euclidean'; +runType = 'odd'; +output_name = 'mapperout'; + +for s = 1:1:10 + sbj_name = sprintf('sub-MSC%02d',s); + cd(sbj_name); + + ses_files = dir('vc*.dtseries.nii'); + ses_files = {ses_files.name}; + fd_files = dir('*TMASK.txt'); + fd_files = {fd_files.name}; + + parcel_ids = cifti_read(sprintf('%s_parcels.dtseries.nii', sbj_name)); + parcel_ids = parcel_ids.cdata; + vertex_net = cifti_read(sprintf('%s_parcel_networks.dscalar.nii', sbj_name)); + vertex_net = vertex_net.cdata; + + vertexNetworksIds = unique(vertex_net); + parcelNetworkIds = unique(parcel_ids); + + net_ids_in_parcels = {}; + for net = 1:1:length(vertexNetworksIds) + tmp = unique(parcel_ids(find(vertex_net == vertexNetworksIds(net)))); + [c,ia,ib] = intersect(parcelNetworkIds, tmp); + net_ids_in_parcels{net} = ia; + end + parcel_order=[]; + for n=1:1:length(net_ids_in_parcels) + parcel_order= [parcel_order; net_ids_in_parcels{n}]; + end + networkDataVertex = []; + parcelData = []; + parcelData_noTmask = []; + + X_odd = []; + + sess_id = []; + if strcmp(runType,'odd') == 1 + ses_idx = 1:2:10 + elseif strcmp(runType,'even') == 1 + ses_idx = 2:2:10 + end + + fd_all_ses = []; + for ses = ses_idx + ses_files{ses} + st = cifti_read(ses_files{ses}); + data = st.cdata; + fd = load(fd_files{ses}); + fd_all_ses = [fd_all_ses; fd]; + + data_notMask = data; + data = data(:,fd>0); + + X_odd = [X_odd; zscore(data')]; + + sess_id = [sess_id; ones(size(data,2),1)*ses]; + + % create Network based mean time series for vertexwise data + tmp = []; + tmp2 = []; + for p = 1:1:length(vertexNetworksIds) + tmp = find(vertex_net == vertexNetworksIds(p)); + tmp2(p,:) = mean(data(tmp,:),1); + end + tmp2 = zscore(tmp2'); + tmp2(:,1) = []; + if s==4 % accounting for two misssing networks in s4 + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + elseif s==10 % accounting for one misssing network in s10 + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,7), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + else + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,7), tmp2(:,8), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + end + + networkDataVertex = [networkDataVertex; tmp2]; + + % create parcel based mean timeseries + tmp = []; + tmp2 = []; + tmp3 = []; + for p = 1:1:length(parcelNetworkIds) + tmp = find(parcel_ids == parcelNetworkIds(p)); + tmp2(p,:) = mean(data(tmp,:),1); + tmp3(p,:) = mean(data_notMask(tmp,:),1); + end + tmp2 = zscore(tmp2'); + tmp3 = zscore(tmp3'); + parcelData = [parcelData; tmp2]; + parcelData_noTmask = [parcelData_noTmask; tmp3]; + + + end + + % real data + [nodeTpMat, nodeBynode, tpMat, filter] = runBDLMapper_wrapper(parcelData, metricType); + myfile = sprintf('%s_%s_runs_mat_metric_%s_%s.mat', sbj_name, runType, metricType, output_name); + parsave(myfile, sbj_name, runType, metricType, nodeTpMat, nodeBynode, tpMat, parcelData, parcel_order, networkDataVertex, ses_idx, sess_id, filter); + + % null data + num_surr = 25; + surr_pr = []; + ar_model_order = 1; + % using CBIG code to generate null datasets. + surr_ar = CBIG_RL2017_get_AR_surrogate(parcelData, num_surr, ar_model_order, 'gaussian', length(fd_all_ses), parcelData_noTmask); + surr_pr = CBIG_RL2017_get_PR_surrogate(parcelData, num_surr); + + for it = 1:1:num_surr + it + parcelData_null = squeeze(surr_pr(:,:,it)); + sbj_name_null = sprintf('%s_PR_null%03d',sbj_name,it); + + + %creating metaInfo for null data + parcelDataNetworks_null = []; + tmp = []; + tmp2 = []; + for net = 1:1:length(vertexNetworksIds) + tmp2(:,net) = mean(parcelData_null(:,net_ids_in_parcels{net}),2); + end + tmp2 = zscore(tmp2); + tmp2(:,1) = []; + if s==4 + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + elseif s==10 + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,7), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + else + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,7), tmp2(:,8), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + end + parcelDataNetworks_null = [parcelDataNetworks_null; tmp2]; + + + try + [nodeTpMat_null, nodeBynode_null, tpMat_null, filter_null] = runBDLMapper_wrapper(parcelData_null, metricType); + catch + fprintf(2,'Error due to CMD SCale in PR it%03d', it); + continue; + end + + myfile = sprintf('%s_%s_runs_mat_metric_%s_%s', sbj_name_null, runType, metricType, output_name); + parsave(myfile, sbj_name_null, runType, metricType, nodeTpMat_null, nodeBynode_null, tpMat_null, parcelData_null, parcel_order, parcelDataNetworks_null, ses_idx, sess_id, filter_null) + + close all; + end + + + for it = 1:1:num_surr + it + parcelData_null_ar = squeeze(surr_ar(:,:,it)); + + % Jul 22, 2021 - adding temporal mask to null to see if that + % changes degree distribution + parcelData_null_ar = parcelData_null_ar(fd_all_ses>0,:); + + sbj_name_null = sprintf('%s_AR_null%03d',sbj_name,it); + + %creating metaInfo for null data + parcelDataNetworks_null_ar = []; + tmp = []; + tmp2 = []; + for net = 1:1:length(vertexNetworksIds) + tmp2(:,net) = mean(parcelData_null_ar(:,net_ids_in_parcels{net}),2); + end + tmp2 = zscore(tmp2); + tmp2(:,1) = []; + if s==4 + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + elseif s==10 + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,7), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + else + tmp2 = [tmp2(:,1), tmp2(:,2), tmp2(:,3), tmp2(:,5), tmp2(:,6), tmp2(:,7), tmp2(:,8), tmp2(:,9), mean(tmp2(:,[10,11]),2), tmp2(:,12)]; + end + parcelDataNetworks_null_ar = [parcelDataNetworks_null_ar; tmp2]; + + try + [nodeTpMat_null_ar, nodeBynode_null_ar, tpMat_null_ar, filter_null_ar] = runBDLMapper_wrapper(parcelData_null_ar, metricType); + catch + fprintf(2,'Error due to CMD SCale in AR it%03d', it); + continue; + end + + myfile = sprintf('%s_%s_runs_mat_metric_%s_%s', sbj_name_null, runType, metricType, output_name); + parsave(myfile, sbj_name_null, runType, metricType, nodeTpMat_null_ar, nodeBynode_null_ar, tpMat_null_ar, parcelData_null_ar, parcel_order, parcelDataNetworks_null_ar, ses_idx, sess_id, filter_null_ar) + + close all; + end + toc +end +