introduction
This tutorial is made for users who want to quickly use CACIMAR to identify conserved celltypes based on scRNA-seq data. Input of this part is seurat object of different species.
library(CACIMAR)
We load the test data by providing a subsetted Seurat object of mouse and zebrafish, as obtained from the paper, in order to demonstrate the API and utility of CACIAMR.
load(system.file("extdata", "cross_species_small.rda", package = "CACIMAR"))
To identify conserved cell types and their corresponding markers, it is necessary to first identify markers specific to each individual cell type. CACIMAR calculates the power of markers separately in each species to avoid the batch effect between species.
marker_spec1 = Identify_Markers(mm_small)
## [1] "Resting MG"
## [1] "Activated MG"
## [1] "Rods"
## [1] "GABAergic AC"
## [1] "Glycinergic AC"
## [1] "Resting MG" "0" "100"
## [1] "Activated MG" "0" "100"
## [1] "Rods" "0" "100"
## [1] "GABAergic AC" "0" "100"
## [1] "Glycinergic AC" "0" "100"
marker_spec2 = Identify_Markers(zf_small)
## [1] "Resting MG"
## [1] "Activated MG"
## [1] "Rods"
## [1] "GABAergic AC"
## [1] "Glycinergic AC"
## [1] "Resting MG" "0" "100"
## [1] "Activated MG" "0" "100"
## [1] "Rods" "0" "100"
## [1] "GABAergic AC" "0" "100"
## [1] "Glycinergic AC" "0" "100"
Then, we identify conserved cell types based on the marker power in each cluster. ‘marker_spec1’ and ‘marker_spec2’ are data.frame of marker power, and should contain three columns gene, power, and cluster.
The ouput of this part is ‘conserved_celltype’. ‘conserved_celltype’ is a list contains three elements, first element is the detailed conserved analysis, including, all gene number in each cell type pair, conserved gene number in each cell type pair, and conserved gene name in each cell type pair. The second element is the conservation score among all cell type pairs.
conserved_celltype <- Identify_ConservedCellTypes(OrthG_Mm_Zf, marker_spec1, marker_spec2,'mm','zf')
CSCT_plot = conserved_celltype[[2]]
CSCT_plot = CSCT_plot[grep('mm',rownames(CSCT_plot)),grep('zf',colnames(CSCT_plot))]
### show the CSCT value with a heatmap
Heatmap_Cor(CSCT_plot,cluster_cols=F, cluster_rows=F,Color1 = c(rgb(102/255,46/255,115/255),rgb(31/255,153/255,139/255),rgb(251/255,232/255,48/255)))
Using CSCT as the distance matrix, we can further construct cell type phylogenetic trees.
dist_matrix <- conserved_celltype[[2]]
species.vector <- substr(rownames(dist_matrix), 1, 2)
species_CellType_Tree <- Plot_Species_CellType_Tree(dist_matrix = dist_matrix, species.vector = species.vector, hcluster.method = "average", geom_nodepoint = 0, layout.tree = "rectangular", offset = 0.02, tiplab.size = 5, tippoint.shape = 22, tippoint.shape.size = 4)
species_CellType_Tree$Plot
Based on the CSCT above, we further identify conserved celltypes
conserved_celltype = identify_conserved_pair(CSCT_plot)
print(conserved_celltype)
## [1] "mmActivated MG-zfActivated MG" "mmGABAergic AC-zfGABAergic AC"
## [3] "mmRods-zfRods"
Then we can identify conserved markers among these conserved cell type pairs.
conserved_markers = identify_conserved_marker(OrthG_Mm_Zf,marker_spec1, marker_spec2,
Species_name1 = 'mm',
Species_name2 = 'zf',conserved_celltype_pair = conserved_celltype)
head(conserved_markers)
## mm_conserved_marker zf_conserved_marker mm_cluster
## mm_zf_1T1G721 ENSMUSG00000008683 ENSDARG00000010160 Activated MG
## mm_zf_1T1G1070 ENSMUSG00000017009 ENSDARG00000059906 Activated MG
## mm_zf_1T1G1177 ENSMUSG00000018593 ENSDARG00000019353 Activated MG
## mm_zf_1T1G1832 ENSMUSG00000021250 ENSDARG00000031683 Activated MG
## mm_zf_1T1G2558...5 ENSMUSG00000023944 ENSDARG00000029150 Activated MG
## mm_zf_1T1G3476 ENSMUSG00000026701 ENSDARG00000043511 Activated MG
## zf_cluster
## mm_zf_1T1G721 Activated MG
## mm_zf_1T1G1070 Activated MG
## mm_zf_1T1G1177 Activated MG
## mm_zf_1T1G1832 Activated MG
## mm_zf_1T1G2558...5 Activated MG
## mm_zf_1T1G3476 Activated MG
Additionally, it is possible to conduct unbiased analysis of conserved markers in all pairs of cell types.
combination=combn(c(unique(marker_spec1$cluster),unique(marker_spec2$cluster)),2)
combination[,1] = paste0('mm',combination[,1])
combination[,2] = paste0('zf',combination[,2])
unbiased_markers = identify_conserved_marker(OrthG_Mm_Zf,marker_spec1, marker_spec2,
Species_name1 = 'mm',
Species_name2 = 'zf',conserved_celltype_pair = paste0(combination[,1],'-',combination[,2]))
head(unbiased_markers)
## mm_conserved_marker zf_conserved_marker mm_cluster zf_cluster
## mm_zf_1T1G1177 ENSMUSG00000018593 ENSDARG00000019353 Resting MG Resting MG
## mm_zf_1T1G1531 ENSMUSG00000020423 ENSDARG00000020298 Resting MG Resting MG
## mm_zf_1T1G1832 ENSMUSG00000021250 ENSDARG00000031683 Resting MG Resting MG
## mm_zf_1T1G2279 ENSMUSG00000022528 ENSDARG00000006514 Resting MG Resting MG
## mm_zf_1T1G3476 ENSMUSG00000026701 ENSDARG00000043511 Resting MG Resting MG
## mm_zf_1T1G6196 ENSMUSG00000035805 ENSDARG00000063026 Resting MG Resting MG
Finally, we provide inner functions to visualize conserved markers in conserved cell types across species.
marker_spec_visualize1 = marker_spec1[conserved_markers$mm_conserved_marker,4:9]
marker_spec_visualize1 = marker_spec_visualize1[order(marker_spec_visualize1[,1]),]
Plot_MarkersHeatmap(marker_spec_visualize1,cellheight = 2.5,cellwidth = 10,border_color=NA,show_colnames = TRUE)
marker_spec_visualize2 = marker_spec2[conserved_markers$zf_conserved_marker,4:9]
marker_spec_visualize2 = marker_spec_visualize2[order(marker_spec_visualize2[,1]),]
Plot_MarkersHeatmap(marker_spec_visualize2,cellheight = 2.5,cellwidth = 10,border_color=NA,show_colnames = TRUE)