细胞通讯分析流程与方法 |

作者： Biomarker

发布于 2022年11月24日

分类：时空组学

细胞通讯分析流程与方法：细胞通讯分析的基础是基因表达数据和配体-受体数据库信息，例如转录组数据表明A、B细胞分别表达了基因α和β，通过数据库查询α和β是配体-受体关系，则认为A-B通过α-β途径进行了通讯。

输入数据格式要求：

（1）细胞注释文件（meta.txt，两列，分别为细胞id和细胞类型）

（2）基因表达矩阵（counts.txt）

#下载软件（按顺序安装）

conda create -n cellphonedb2 python=3.7

conda install Cython

conda install h5py

conda install scikit-learn

conda install r

conda install rpy2

pip install https://pypi.tuna.tsinghua.edu.cn/simple cellphonedb

pip install -i https://pypi.tuna.tsinghua.edu.cn/simple markupsafe==2.0.1

#R包

install.packages(‘ggplot2’)

install.packages(‘heatmap’)

#data1—> R

#使用RCTD注释的空间数据进行后续分析

results <- myRCTD@results

results_df <- results$results_df

barcodes = rownames(results_df[results_df$spot_class != “reject” & puck@nUMI >= 1,])

my_table = puck@coords[barcodes,]

my_table$class = results_df[barcodes,]$first_type

meta = my_table %>% select(class) %>% rownames_to_column(var = ‘cell’) %>% dplyr::rename(cell_type = class)

write.table(meta ,file = ‘/xxx/Test/meta.txt’, sep = ‘\t’, quote = F, row.names = T)

#data2—> R

expr <- Read10X(‘/xxx/BSTViewer_project/subdata/L13_heAuto/’,cell.column = 1)

object <- CreateSeuratObject(counts = expr,assay = “Spatial”)

counts = object[[‘Spatial’]]@counts %>% as.data.frame() %>% rownames_to_column(var = ‘Gene’)

write.table(counts ,file = ‘/xxx/Test/counts.txt’, sep = ‘\t’, quote = F, row.names = T)

#鼠源基因转化

#CellPhoneDB 只能识别人类基因，如果是鼠源基因需要进行同源基因的转化

#下载人鼠同源基因对应关系

http://asia.ensembl.org/biomart/martview/b9f8cc0248e4714ba8e0484f0cbe4f02

#细胞通讯分析流程与方法参考步骤

https://www.jianshu.com/p/922a7f2c21fc

#下载的对应关系表应该有四列

Gene_stable_ID | Gene_name | Mouse_gene_stable_ID | Mouse_gene_name

#将对应关系(human_mouse_gene.txt)替换到 data2 矩阵—>Shell

awk -F ‘\t’ ‘BEGIN{OFS == ‘\t’}ARGIND==1{a[$4]=$1}ARGIND==2{if(FNR ==1){print}else{$1 = a[$1];print}}’ human_mouse_gene.txt counts.txt | sed ‘/^ /d’ | sed ‘1s/^/Gene\t/’ | sed ‘s/\s/\t/g’ > counts_new.txt

#从远程存储库中列出cellphonedb可用数据库版本:

cellphonedb database list_remote

#查看本地数据库

cellphonedb database list_local

#下载远程数据库

cellphonedb database download –version <version_spec|latest>

#运行cellphonedb —> Shell

cellphonedb method statistical_analysis –output-path test_output meta.txt counts_new.txt

#输出文件

deconvoluted.txt | pvalues.txt | significant_means.txt

test.count_network.txt | means.txt | test.interaction_count.txt

#气泡图—> Shell

cellphonedb plot dot_plot –pvalues-path test_output/pvalues.txt –means-path test_output/means.txt –output-path test_output –output-name test.dotplot.pdf

#仅绘制自己关注的细胞和互作—> Shell

cellphonedb plot dot_plot –pvalues-path test_output/pvalues.txt –means-path test_output/means.txt –output-path test_output –output-name test.dotplot2.pdf –rows test_output/row.txt –columns test_output/col.txt

注：横坐标是细胞类型互作，纵坐标是蛋白互作，点越大表示 p 值越小，颜色代表平均表达量

#热图—> Shell

cellphonedb plot heatmap_plot –pvalues-path test_output/pvalues.txt –output-path test_output –pvalue 0.05 –count-name test.heatmap_count.pdf –log-name test.heatmap_log_count.pdf –count-network-name test.count_network.txt –interaction-count-name test.interaction_count.txt meta.txt

注：横纵坐标表示细胞类型，颜色深蓝色到紫红色代表互作数由低到高

#细胞通讯分析流程与方法：细胞类型网络图—> R

#这里借用另一款细胞通讯分析软件——CellChat(https://github.com/sqjin/CellChat)的绘图方法

#加载R包

library(igraph)

library(tidyr)

library(stats)

library(reshape2)

library(Matrix)

library(grDevices)

library(ggplot2)

#使用CellChat作者提供的绘制互作网络图的function

scPalette <- function(n) {

colorSpace <- c(‘#E41A1C’,’#377EB8′,’#4DAF4A’,’#984EA3′,’#F29403′,’#F781BF’,’#BC9DCC’,’#A65628′,’#54B0E4′,’#222F75′,’#1B9E77′,’#B2DF8A’,

‘#E3BE00′,’#FB9A99′,’#E7298A’,’#910241′,’#00CDD1′,’#A6CEE3′,’#CE1261′,’#5E4FA2′,’#8CA77B’,’#00441B’,’#DEDC00′,’#B3DE69′,’#8DD3C7′,’#999999′)

if (n <= length(colorSpace)) {

colors <- colorSpace[1:n]

} else {

colors <- grDevices::colorRampPalette(colorSpace)(n)

}

return(colors)

}

netVisual_circle <-function(net, color.use = NULL,title.name = NULL, sources.use = NULL, targets.use = NULL, idents.use = NULL, remove.isolate = FALSE, top = 1,

weight.scale = FALSE, vertex.weight = 20, vertex.weight.max = NULL, vertex.size.max = NULL, vertex.label.cex=1,vertex.label.color= “black”,

edge.weight.max = NULL, edge.width.max=8, alpha.edge = 0.6, label.edge = FALSE,edge.label.color=’black’,edge.label.cex=0.8,

edge.curved=0.2,shape=’circle’,layout=in_circle(), margin=0.2, vertex.size = NULL,

arrow.width=1,arrow.size = 0.2){

if (!is.null(vertex.size)) {

warning(“‘vertex.size’ is deprecated. Use `vertex.weight`”)

}

if (is.null(vertex.size.max)) {

if (length(unique(vertex.weight)) == 1) {

vertex.size.max <- 5

} else {

vertex.size.max <- 15

}

options(warn = -1)

thresh <- stats::quantile(net, probs = 1-top)

net[net < thresh] <- 0

if ((!is.null(sources.use)) | (!is.null(targets.use)) | (!is.null(idents.use)) ) {

if (is.null(rownames(net))) {

stop(“The input weighted matrix should have rownames!”)

}

cells.level <- rownames(net)

df.net <- reshape2::melt(net, value.name = “value”)

colnames(df.net)[1:2] <- c(“source”,”target”)

# keep the interactions associated with sources and targets of interest

if (!is.null(sources.use)){

if (is.numeric(sources.use)) {

sources.use <- cells.level[sources.use]

}

df.net <- subset(df.net, source %in% sources.use)

}

if (!is.null(targets.use)){

if (is.numeric(targets.use)) {

targets.use <- cells.level[targets.use]

}

df.net <- subset(df.net, target %in% targets.use)

}

if (!is.null(idents.use)) {

if (is.numeric(idents.use)) {

idents.use <- cells.level[idents.use]

}

df.net <- filter(df.net, (source %in% idents.use) | (target %in% idents.use))

}

df.net$source <- factor(df.net$source, levels = cells.level)

df.net$target <- factor(df.net$target, levels = cells.level)

df.net$value[is.na(df.net$value)] <- 0

net <- tapply(df.net[[“value”]], list(df.net[[“source”]], df.net[[“target”]]), sum)

}

net[is.na(net)] <- 0

if (remove.isolate) {

idx1 <- which(Matrix::rowSums(net) == 0)

idx2 <- which(Matrix::colSums(net) == 0)

idx <- intersect(idx1, idx2)

net <- net[-idx, ]

net <- net[, -idx]

}

g <- graph_from_adjacency_matrix(net, mode = “directed”, weighted = T)

edge.start <- igraph::ends(g, es=igraph::E(g), names=FALSE)

coords<-layout_(g,layout)

if(nrow(coords)!=1){

coords_scale=scale(coords)

}else{

coords_scale<-coords

}

if (is.null(color.use)) {

color.use = scPalette(length(igraph::V(g)))

}

if (is.null(vertex.weight.max)) {

vertex.weight.max <- max(vertex.weight)

}

vertex.weight <- vertex.weight/vertex.weight.max*vertex.size.max+5

loop.angle<-ifelse(coords_scale[igraph::V(g),1]>0,-atan(coords_scale[igraph::V(g),2]/coords_scale[igraph::V(g),1]),pi-atan(coords_scale[igraph::V(g),2]/coords_scale[igraph::V(g),1]))

igraph::V(g)$size<-vertex.weight

igraph::V(g)$color<-color.use[igraph::V(g)]

igraph::V(g)$frame.color <- color.use[igraph::V(g)]

igraph::V(g)$label.color <- vertex.label.color

igraph::V(g)$label.cex<-vertex.label.cex

if(label.edge){

igraph::E(g)$label<-igraph::E(g)$weight

igraph::E(g)$label <- round(igraph::E(g)$label, digits = 1)

}

if (is.null(edge.weight.max)) {

edge.weight.max <- max(igraph::E(g)$weight)

}

if (weight.scale == TRUE) {

#E(g)$width<-0.3+edge.width.max/(max(E(g)$weight)-min(E(g)$weight))*(E(g)$weight-min(E(g)$weight))

igraph::E(g)$width<- 0.3+igraph::E(g)$weight/edge.weight.max*edge.width.max

}else{

igraph::E(g)$width<-0.3+edge.width.max*igraph::E(g)$weight

}

igraph::E(g)$arrow.width<-arrow.width

igraph::E(g)$arrow.size<-arrow.size

igraph::E(g)$label.color<-edge.label.color

igraph::E(g)$label.cex<-edge.label.cex

igraph::E(g)$color<- grDevices::adjustcolor(igraph::V(g)$color[edge.start[,1]],alpha.edge)

if(sum(edge.start[,2]==edge.start[,1])!=0){

igraph::E(g)$loop.angle[which(edge.start[,2]==edge.start[,1])]<-loop.angle[edge.start[which(edge.start[,2]==edge.start[,1]),1]]

}

radian.rescale <- function(x, start=0, direction=1) {

c.rotate <- function(x) (x + start) %% (2 * pi) * direction

c.rotate(scales::rescale(x, c(0, 2 * pi), range(x)))

}

label.locs <- radian.rescale(x=1:length(igraph::V(g)), direction=-1, start=0)

label.dist <- vertex.weight/max(vertex.weight)+2

plot(g,edge.curved=edge.curved,vertex.shape=shape,layout=coords_scale,margin=margin, vertex.label.dist=label.dist,

vertex.label.degree=label.locs, vertex.label.family=”Helvetica”, edge.label.family=”Helvetica”) # “sans”

if (!is.null(title.name)) {

text(0,1.5,title.name, cex = 1.1)

}

# https://www.andrewheiss.com/blog/2016/12/08/save-base-graphics-as-pseudo-objects-in-r/

# grid.echo()

# gg <- grid.grab()

gg <- recordPlot()

return(gg)

}

#整体互作网络图绘制

#读取cellphonedb的分析数据

count_net <- read.delim(“/xxx/test_output/test.count_network.txt”, check.names = FALSE)

#数据格式处理

count_inter <- count_net

count_inter$count <- count_inter$count/100

count_inter <- spread(count_inter, TARGET, count)

rownames(count_inter) <- count_inter$SOURCE

count_inter <- count_inter[, -1]

count_inter <- as.matrix(count_inter)

#绘图

netVisual_circle(count_inter,weight.scale = T)

#细胞通讯分析流程与方法：每种细胞与其他细胞的互作

par(mfrow = c(1,3), xpd=TRUE)

for (i in 1:nrow(count_inter)) {

mat2 <- matrix(0, nrow = nrow(count_inter), ncol = ncol(count_inter), dimnames = dimnames(count_inter))

mat2[i, ] <- count_inter[i, ]

netVisual_circle(mat2,

weight.scale = T,

edge.weight.max = max(count_inter),

title.name = rownames(count_inter)[i],

arrow.size=0.2)

}