Codeplot
国家基因库单细胞训练营 -- 单细胞RNA数据高级分析

单细胞RNA-seq数据下游分析

![](https://southpublic-database.obs.cn-south-1.myhuaweicloud.com/Notebook/training_camp/image/scRNA/tutorial.jpg)

参考

一、下游分析

1. 基因富集分析

  差异基因功能富集分析,如GO分析、KEGG分析。

  • GO分析:依托于GO数据库,将基因功能分为三类,细胞组分(cellular component, CC)、分子功能(molecular function, MF)、生物过程(biological process, BP);

  • KEGG分析:作为一个综合型数据库,核心为KEGG PATHWAY及KEGG ORTHOLOGY。

In []:

#加载富集分析需要的Rlibrary(Seurat)
library(MAST)
library(ggrepel)
library(R.utils)
library(clusterProfiler)
library(org.Hs.eg.db)

In []:

# 加载基础课程保存的RDS文件
combined <- readRDS("/opt/example/scRNA-Example3/Example3/combined.rds")

In [3]:

# 寻找差异基因
# test.use="MAST" 适用于单细胞数据的zero-inflated negative binomial models
deg <- FindMarkers(combined, ident.1 = "B naive", ident.2 = "CD4 Naive", logfc.threshold = 0,min.pct = 0, test.use="MAST")

head(deg)

Out [3]:

Done!

Combining coefficients and standard errors

Calculating log-fold changes

Calculating likelihood ratio tests

Refitting on reduced model...

Done!

, , , , , , , , , , , , , , ,
A data.frame: 6 × 5
p_valavg_log2FCpct.1pct.2p_val_adj
<dbl><dbl><dbl><dbl><dbl>
CD74 0.000000e+004.9244580.9970.500 0.000000e+00
HLA-DRA3.265305e-2523.9779850.9710.0811.113632e-247
MS4A13.841225e-2273.9499510.9640.0491.310050e-222
IGHM1.158145e-2254.9755380.8930.1443.949855e-221
HLA-DRB18.390452e-2163.5679750.9320.0732.861564e-211
CD79A5.464371e-2063.0180280.9450.0651.863624e-201
,

In [4]:

# 根据q value和log2FC进行筛选差异基因
# sign_deg <- subset(deg, p_val_adj<0.05 & abs(avg_log2FC)>0.15)

degdf <- deg
degdf$sign <- ifelse(degdf$p_val_adj < 0.05 & abs(degdf$avg_log2FC) > 0.15,rownames(degdf),NA)
# degdf$symbol <- rownames(degdf)

P.Value_t = 0.05
degdf$change = ifelse(degdf$p_val_adj < P.Value_t & degdf$avg_log2FC < -0.15,"down",ifelse(degdf$p_val_adj < P.Value_t & degdf$avg_log2FC > 0.15,"up","stable"))

p<-ggplot(degdf, aes(avg_log2FC,  -log10(p_val_adj))) +
    geom_point(alpha=0.4, size=3, aes(color=change)) +
    ylab("-log10(Pvalue)")+
    scale_color_manual(values=c("blue", "grey","red"))+
    geom_hline(yintercept = -log10(P.Value_t),lty=4,col="black",lwd=0.3) +
    geom_vline(xintercept = 0.15,lty=2,col="black",lwd=0.3)+
    geom_vline(xintercept = -0.15,lty=2,col="black",lwd=0.3)+
    geom_label_repel(aes(label=sign), fontface="bold", color="grey50", box.padding=unit(0.35, "lines"), point.padding=unit(0.5, "lines"), segment.colour = "grey50")+
    theme_bw()
p

Out [4]:
Warning message:
"Removed 32925 rows containing missing values (geom_label_repel)."
Warning message:
"ggrepel: 1158 unlabeled data points (too many overlaps). Consider increasing max.overlaps"

img
In [5]:

## 获取上下调基因
gene_up=rownames(deg[deg$avg_log2FC > 0.15,])
# gene_down=rownames(deg[deg$avg_log2FC < -0.15,])

## 把SYMBOL改为ENTREZID
library(org.Hs.eg.db)
gene_up=as.character(na.omit(AnnotationDbi::select(org.Hs.eg.db,
                                                keys = gene_up,
                                                columns = 'ENTREZID',
                                                keytype = 'SYMBOL')[,2]))
# gene_down=as.character(na.omit(AnnotationDbi::select(org.Hs.eg.db,
#                                                   keys = gene_down,
#                                                   columns = 'ENTREZID',
#                                                   keytype = 'SYMBOL')[,2]))

head(gene_up)

Out [5]:
'select()' returned 1:1 mapping between keys and columns

, ,
  1. '972'
  2. '3122'
  3. '931'
  4. '3507'
  5. '3123'
  6. '973'
,

In [6]:

## GO富集
##CC表示细胞组分,MF表示分子功能,BP表示生物学过程,ALL表示同时富集三种过程。

geneset <- gene_up
deg_type <- "up"

# ego_ALL <- enrichGO(gene = geneset,
#                 OrgDb=org.Hs.eg.db,
#                 keyType = "ENTREZID",
#                 ont = "ALL",
#                 pAdjustMethod = "BH",
#                 minGSSize = 1,
#                 pvalueCutoff = 0.05,
#                 qvalueCutoff = 0.05,
#                 readable = TRUE)

ego_CC <- enrichGO(gene = geneset,
                OrgDb=org.Hs.eg.db,
                keyType = "ENTREZID",
                ont = "CC",
                pAdjustMethod = "BH",
                minGSSize = 1,
                pvalueCutoff = 0.05,
                qvalueCutoff = 0.05,
                readable = TRUE)
ego_BP <- enrichGO(gene = geneset,
                OrgDb=org.Hs.eg.db,
                keyType = "ENTREZID",
                ont = "BP",
                pAdjustMethod = "BH",
                minGSSize = 1,
                pvalueCutoff = 0.05,
                qvalueCutoff = 0.05,
                readable = TRUE)

ego_MF <- enrichGO(gene = geneset,
                 OrgDb=org.Hs.eg.db,
                 keyType = "ENTREZID",
                 ont = "MF",
                 pAdjustMethod = "BH",
                 minGSSize = 1,
                 pvalueCutoff = 0.05,
                 qvalueCutoff = 0.05,
                 readable = TRUE)

head(ego_MF)

p1 <-dotplot(ego_CC)
p1
, , , , , , , , , , , , , , ,
A data.frame: 6 × 9
IDDescriptionGeneRatioBgRatiopvaluep.adjustqvaluegeneIDCount
<chr><chr><chr><chr><dbl><dbl><dbl><chr><int>
GO:0032395GO:0032395MHC class II receptor activity 9/1817 10/18337 8.243216e-091.520049e-051.420436e-05HLA-DRA/HLA-DRB1/HLA-DQA1/HLA-DPA1/HLA-DQA2/HLA-DQB1/HLA-DOB/HLA-DOA/HLA-DQB2 9
GO:0140297GO:0140297DNA-binding transcription factor binding72/1817376/183373.173680e-082.770471e-052.588915e-05CIITA/MEF2C/HDAC9/HHEX/JUN/TLE1/PRKCB/RB1/JUND/MEF2A/NCOA3/PARP1/NOTCH2/SPI1/ANXA4/TCF3/SMAD3/PTMA/JUP/FOS/TLE3/NFKBIA/TAF7/SMARCB1/BCL10/RBBP8/HES1/HIF1A/THRAP3/KLF4/TMF1/ATF4/GTF2I/BAZ2A/MED25/JMJD1C/DDIT3/NR4A2/TOB2/MED30/GABPA/RERE/USF1/HIRA/CREB1/USF2/DNAJA1/BUD31/CHD4/HDAC3/NRIP1/TP53BP1/SRI/TWIST1/GTF2A1/ZNF516/SMARCE1/KAT5/ACTB/CNOT1/ATF2/EP300/PIAS1/SPEN/ZFPM2/TAF4B/TAF4/TACC1/CALR/GTF2A2/CDK5RAP3/SETD3 72
GO:0030695GO:0030695GTPase regulator activity 84/1817467/183374.507274e-082.770471e-052.588915e-05RALGPS2/FGD2/USP6NL/TBC1D9/TRIO/LRRK2/DENND5B/JUN/RASGRP3/SH3BP5/VAV3/ARHGAP24/DENND3/RABEP2/ARHGAP17/FARP2/VAV2/SIPA1L3/GDI2/ITSN2/RAPGEF1/TBC1D22A/RGS2/SMAP2/SIPA1L1/IQGAP1/TAGAP/LAMTOR5/IQSEC1/ARHGEF12/RABGEF1/TBC1D1/ARHGEF7/SRGAP2/DENND4B/RASGEF1B/RAP1GDS1/NF1/EVI5/PREX1/RABEP1/CHML/RP2/LAMTOR1/BNIP2/RGL2/NCKAP1L/SIPA1/TBC1D12/ARHGAP31/ARAP2/HPS1/RALGPS1/GNB5/RAPGEF5/FARP1/SH2D3C/ARHGAP22/FAM13B/DNMBP/TBC1D9B/LAMTOR2/CPEB2/RANGRF/DEPDC5/SGSM3/RUNDC1/RABGAP1L/CYTH3/RABGAP1/DENND11/PSD4/CYTH4/ARHGEF2/ARHGAP32/CCDC88A/RIC1/FGD4/RCC1L/PLXNB1/CHM/ARFGEF2/ARFGAP3/ARHGAP684
GO:0003712GO:0003712transcription coregulator activity 82/1817480/183376.120712e-072.365351e-042.210343e-04CIITA/PDLIM1/POU2AF1/HDAC9/BASP1/TLE1/PRKCB/TRIM38/PAWR/RB1/PARP15/PARP14/NCOA3/DTX1/CBFA2T3/BTG1/ARID5B/JUP/TLE3/JAZF1/SMARCB1/ZMIZ2/BCL10/LIMD1/AKIRIN2/RBBP8/THRAP3/NME2/TMF1/NFKBIB/RAP2C/TRIM22/SS18/ARID3A/JMJD1C/SERTAD2/TRIM5/TOB2/MED30/EWSR1/RERE/ATF7IP/SIAH2/MAML3/KDM7A/BUD31/BTG2/BCOR/ZBED1/HDAC3/TRIM13/NRIP1/TP53BP1/SMARCE1/MED22/TAF5L/KAT5/CENPJ/SUB1/RBFOX2/CDYL/BCLAF1/TRIB3/NRG1/RUVBL1/EP300/KAT7/PIAS1/SPEN/SIN3B/ZFPM2/NCOA5/TACC1/WWOX/CIR1/DCAF6/RYBP/MED12L/SETD3/TADA1/RNF20/KDM5B 82
GO:0017124GO:0017124SH3 domain binding 32/1817128/183376.413642e-072.365351e-042.210343e-04LYN/NCF1/ADAM19/CYBA/PTPN6/SH3BP5/GRB2/KHDRBS2/ARHGAP17/UVRAG/DTX1/HIP1R/SH3BGRL/RAPGEF1/SH3BP2/CCDC6/VASP/RUFY1/USP8/OSTF1/INPP5D/ABI1/PTPN12/EPS15/ARHGAP31/INPPL1/ADAM17/FUT8/ZNF106/KHDRBS1/CD2AP/ARHGAP6 32
GO:0023026GO:0023026MHC class II protein complex binding 10/181717/18337 9.008519e-072.768618e-042.587184e-04CD74/HLA-DRA/MS4A1/HLA-DRB1/HLA-DMB/HLA-DMA/HLA-DOB/HLA-DOA/CD81/HSP90AB1 10
,

img
In [7]:

# GO term 柱形图
display_number = c(10,10,10)
ego_result_BP <- as.data.frame(ego_BP)[1:display_number[1], ]
ego_result_CC <- as.data.frame(ego_CC)[1:display_number[2], ]
ego_result_MF <- as.data.frame(ego_MF)[1:display_number[3], ]
              
##将以上我们摘取的部分通路重新组合成数据框
go_enrich_df <- data.frame(
ID=c(ego_result_BP$ID, ego_result_CC$ID, ego_result_MF$ID), Description=c(ego_result_BP$Description,ego_result_CC$Description,ego_result_MF$Description),
GeneNumber=c(ego_result_BP$Count, ego_result_CC$Count, ego_result_MF$Count),
type=factor(c(rep("biological process", display_number[1]), 
              rep("cellular component", display_number[2]),
              rep("molecular function", display_number[3])), 
              levels=c("biological process", "cellular component","molecular function" )))
    
##通路的名字太长,选取通路的前10个单词作为通路的名字
for(i in 1:nrow(go_enrich_df)){
  description_splite=strsplit(go_enrich_df$Description[i],split = " ")
  description_collapse=paste(description_splite[[1]][1:10],collapse = " ") 
  go_enrich_df$Description[i]=description_collapse
  go_enrich_df$Description=gsub(pattern = "NA","",go_enrich_df$Description)
}
    
##绘制GO柱状图
go_enrich_df$type_order=factor(rev(as.integer(rownames(go_enrich_df))),labels=rev(go_enrich_df$Description))
COLS <- c("#66C3A5", "#8DA1CB", "#FD8D62")
p2<- ggplot(data=go_enrich_df, aes(x=type_order,y=GeneNumber, fill=type)) + 
  geom_bar(stat="identity", width=0.8) + 
  scale_fill_manual(values = COLS) + 
  coord_flip() + 
  xlab("GO term") + 
  ylab("Gene_Number") + 
  labs(title = paste0("The Most Enriched GO Terms ",deg_type))+
  theme_bw()
p2

img
In [8]:

##KEGG富集
# 设置下载方法
R.utils::setOption("clusterProfiler.download.method",'auto')

kk <- enrichKEGG(gene = geneset,keyType = "kegg",organism= "hsa", qvalueCutoff = 0.05, pvalueCutoff=0.05)
head(kk)

Out [8]:
Reading KEGG annotation online:

Reading KEGG annotation online:

, , , , , , , , , , , , , , ,
A data.frame: 6 × 9
IDDescriptionGeneRatioBgRatiopvaluep.adjustqvaluegeneIDCount
<chr><chr><chr><chr><dbl><dbl><dbl><chr><int>
hsa04662hsa04662B cell receptor signaling pathway 35/91082/8163 2.444735e-137.920943e-116.330578e-11973/974/933/6850/4067/930/5336/29760/695/971/2213/10859/118788/27071/3725/25780/5579/5777/2885/10451/5530/7410/1380/2353/975/4792/8915/23547/4893/4793/3635/5879/5880/3636/5594 35
hsa05140hsa05140Leishmaniasis 31/91077/8163 3.633393e-115.886097e-094.704288e-093122/3123/3117/3115/3113/3127/3118/3119/3109/3108/3112/1536/653361/3111/1535/4689/3725/5579/5777/2212/1378/3460/3654/2353/4792/4793/3459/3676/3717/4615/5594 31
hsa04672hsa04672Intestinal immune network for IgA production22/91049/8163 2.217436e-092.394831e-071.913998e-073122/3123/3117/3115/3113/3127/3118/3119/115650/3109/3108/3112/3111/958/7852/23495/23308/942/3676/8741/608/3600 22
hsa05169hsa05169Epstein-Barr virus infection 50/910202/81632.846010e-082.305268e-061.842417e-063122/3123/3117/3115/3113/3127/3118/3119/3109/6850/3108/4067/930/5336/29760/3112/695/3111/2208/958/3725/5925/3654/1380/5971/4938/4792/1026/4791/3280/953/3454/4793/7188/10018/5879/51426/11047/6773/3665/6892/54205/4615/5710/29110/7874/317/5719/811/9541 50
hsa04144hsa04144Endocytosis 57/910251/81638.081430e-084.791643e-063.829578e-066643/7852/157/10094/23325/8724/11031/10193/4088/387/8395/27131/64744/9922/25978/10097/10109/80230/9101/382/9135/57132/4218/80223/58513/2060/5371/116984/10095/55737/22841/830/128866/7037/8218/377/23362/10093/253725/5868/375/8766/9265/7046/23550/27128/1173/147179/274/9525/9765/11021/11311/10564/26286/10617/664257
hsa04666hsa04666Fc gamma R-mediated phagocytosis 30/91097/8163 1.025810e-074.791643e-063.829578e-066850/4067/5336/653361/2213/10094/4082/5579/10451/2212/7410/5581/8395/3055/8613/7408/10097/10109/382/3635/5879/5880/3636/5580/2934/10095/10093/9846/5594/274 30
,

In [9]:

# KEGG dotplot
p<-dotplot(kk)
p

img
In [10]:

##KEGG 柱状图
hh <- as.data.frame(kk)

# 转换rownames为数字
rownames(hh) <- 1:nrow(hh)
hh$order=factor(rev(as.integer(rownames(hh))),labels = rev(hh$Description))

p4 <- ggplot(hh,aes(y=order,x=Count,fill=p.adjust))+
  geom_bar(stat = "identity",width=0.7)+
  scale_fill_gradient(low = "red",high ="blue" )+
  labs(title = paste0("KEGG Pathways Enrichment ",deg_type),
       x = "Gene numbers", 
       y = "Pathways")+
  theme(axis.title.x = element_text(face = "bold",size = 8),
       axis.title.y = element_text(face = "bold",size = 8),
       legend.title = element_text(face = "bold",size = 8),
       axis.text.y = element_text(size=3))
  #theme_bw()
p4

img

2. 拟时序分析

  为什么要做拟时序分析?

  • 不同细胞表达谱连续变化的路径用低维坐标展示;

  • 根据路径推测发育、发展过程中潜在的时间关系;

  • 进一步鉴定细胞的状态。

  拟时序分析能够将高维数据映射到一维,可称为伪时间。这些伪时间可能反应个体在生物过程中的相对进展,如疾病进展、细胞发育等,使我们能够在没有明确的时间数据的情况下理解测量特征的(伪)时间行为。因此,通过建立个体的相对排序,我们可以定义一系列的分子状态,构成感兴趣的过程轨迹。参考

  截止2019年,轨迹推断的方法比较:参考

  Monocle3使用了近似PAGA的算法,先在UMAP空间建立基于细胞的KNN邻图,根据Louvain算法进行聚类。发现不同簇比预期中存在更多连接,这些连接被保留了下来。这种近似PAGA算法的结果可能会由一个或者多个部分组成,每一个部分都会单独进入下一步的轨迹计算,形成轨迹图。参考

In []:

#加载所需包
library(monocle3)
library(SeuratWrappers) # 可将seurat对象转换为其他对象格式
library(dplyr)

In [12]:

DimPlot(combined, reduction = "umap", label = TRUE, label.size = 5,pt.size = 0.5)+ NoLegend()

img
In [13]:

# 获取目标细胞的object
B.obj <- subset(combined, idents = c("NK", "CD8 TEM","CD4 TCM","CD4 Naive"))
B.obj
table(B.obj@active.ident)

In [14]:

# 针对sub object 进行处理
B.obj <- NormalizeData(B.obj)
B.obj <- FindVariableFeatures(B.obj)
B.obj <- ScaleData(B.obj)
B.obj <- RunPCA(B.obj)
B.obj <- FindNeighbors(B.obj, dims = 1:20)
B.obj <- FindClusters(B.obj, resolution=0.9)
B.obj <- RunUMAP(B.obj, dims = 1:20, n.neighbors = 50)

Out [14]:
Centering and scaling data matrix

PC_ 1
Positive: LTB, LEF1, IL7R, CAMK4, MAL, SERINC5, FOXP1, PDE7A, PDE3B, INPP4B
BACH2, MAML2, PRKCA, MALAT1, PAG1, ARHGAP15, ANK3, SMCHD1, TTN, NELL2
CSGALNACT1, FOXO1, LINC01619, CDC14A, DOCK10, LRRN3, RNF157, BCL2, RETREG1, THEMIS
Negative: NKG7, GZMA, FCGR3A, GNLY, CCL5, FGFBP2, GZMB, GZMH, KLRD1, KLRC2
LGALS1, SYNE1, TGFBR3, SAMD3, LINC02384, CEP78, NCAM1, MYOM2, SYNE2, ACTG1
AOAH, CD247, MCTP2, GPR141, ENC1, PTGDS, SLCO4C1, DTHD1, VAV3, JAZF1
PC_ 2
Positive: IL7R, LTB, MAL, TRBC2, JUN, FOS, GZMK, LRRN3, CD8B, RETREG1
TRBC1, NELL2, LEF1, TUBA1B, SCML1, TNFRSF10D, CFH, TNFSF8, TRBV14, ACTG1
RAB3IP, LINC02446, DNAH6, CYP2U1, TEX14, SIRPG, GPR157, IL2RA, ADD2, EPHA1-AS1
Negative: CD247, UTRN, SYNE1, AC016831.7, RAP1GAP2, AOAH, SKAP1, RABGAP1L, SYNE2, KLRD1
ARHGAP26, GNLY, TGFBR3, ANKRD44, KLF12, GZMB, LRBA, SMYD3, NCOA1, MALAT1
ARID1B, PITPNC1, SAMD3, LINC00298, FCGR3A, AKT3, FGFBP2, TNIK, NCALD, NKG7
PC_ 3
Positive: JUN, FOS, TEX14, GZMK, SLC4A10, PHACTR2, AL136456.1, ME1, IL7R, AC016831.7
ADAM12, NIBAN1, IL12RB2, PLCB1, CYTH3, PTPRM, ADAMTSL4-AS1, CFH, IFNG-AS1, IKZF2
SYNE2, RUNX2, COLQ, GPRIN3, PTPN13, SPIDR, TMEM67, RPS6KA5, RABGAP1L, SBF2
Negative: FGFBP2, LEF1, FCGR3A, GZMB, TXK, GZMH, LRRN3, GNLY, AK5, MALAT1
PTGDS, TRBC1, MAL, CD247, LINC02384, KLRC2, ADTRP, KLRD1, FOXP1, BACH2
CA6, SLCO4C1, AC084198.2, ACTG1, SCML1, PDE7A, TTN, CLN5, GPR160, GPR141
PC_ 4
Positive: DPP10, AC013652.1, NAALADL2, FOS, TUBA1B, PTPRD, ACTG1, MYOM2, JUN, LGALS1
AL807742.1, KIAA1217, AC007402.1, KCNJ6, LRRC4C, MAGI2, FCGR3A, MIR99AHG, TRBC2, VEPH1
LINC02694, GZMA, KYNU, NKG7, PTPRK, AC104123.1, CTNNA3, AL355672.1, AC009975.1, AL031073.2
Negative: RAP1GAP2, MALAT1, ARL15, SMYD3, PPP2R2B, PLCB1, SKAP1, VPS13B, ANKRD44, AC016831.7
XRN1, ARHGAP26, AOAH, LINC00298, UBR1, SAP130, FAF1, JAZF1, GMDS-DT, NFAT5
ARID1B, PIK3C2A, UTRN, EXOC4, CDKAL1, FTO, PITPNC1, PTGDS, TOM1L2, SYNE1
PC_ 5
Positive: TSHZ2, F5, MCF2L2, NIBAN1, AL589693.1, LINC02694, INPP4B, MIAT, FAAH2, SYNE2
KLHL5, ANK3, LGALS1, ETV6, CSGALNACT1, TRBC1, BICD1, KANK1, GZMH, FTO
CDC14A, SGPP2, SCMH1, ELMOD3, IL2RA, HIVEP3, ZC2HC1A, AL353660.1, SLC16A1-AS1, SDCCAG8
Negative: CD8B, AOAH, LINC02446, ZMAT4, PTPRK, PLCB1, TXK, TEX14, LRRN3, FOS
IL12RB2, L3MBTL4, AREG, SYK, SLC4A10, CA6, PITPNC1, TBC1D32, MCTP2, LINC02100
S100B, PPP1R12B, ATP8B4, NELL2, TBC1D5, IRGM, SBF2, EPHA1-AS1, MTUS1, KCNQ5

Computing nearest neighbor graph

Computing SNN

Out [14]:
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 2479
Number of edges: 113894

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6712
Number of communities: 6
Elapsed time: 0 seconds

Out [14]:
Warning message:
"The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session"
15:35:07 UMAP embedding parameters a = 0.9922 b = 1.112

15:35:08 Read 2479 rows and found 20 numeric columns

15:35:08 Using Annoy for neighbor search, n_neighbors = 50

15:35:08 Building Annoy index with metric = cosine, n_trees = 50

0% 10 20 30 40 50 60 70 80 90 100%

[----|----|----|----|----|----|----|----|----|----|

|

15:35:08 Writing NN index file to temp file C:\Users\songyumo\AppData\Local\Temp\RtmpKutiEm\file206851c039ea

15:35:08 Searching Annoy index using 1 thread, search_k = 5000

15:35:09 Annoy recall = 100%

15:35:11 Commencing smooth kNN distance calibration using 1 thread

15:35:13 Initializing from normalized Laplacian + noise

15:35:14 Commencing optimization for 500 epochs, with 176258 positive edges

15:35:22 Optimization finished

In [15]:

table(B.obj@active.ident)
head(B.obj@meta.data)
DimPlot(B.obj, group.by="predicted_celltype")
, , , , , , , , , , , , , , ,
A data.frame: 6 × 12
orig.identnCount_RNAnFeature_RNApercent.mtpANN_0.25_0.01_88doublet_infopANN_0.25_0.005_98splitRNA_snn_res.0.5seurat_clusterspredicted_celltypeRNA_snn_res.0.9
<chr><dbl><int><dbl><dbl><chr><dbl><chr><fct><fct><fct><fct>
CELL5918_N1_1SeuratProject1072819593.1319910.08695652SingletNApbmc122CD4 Naive2
CELL2801_N1_1SeuratProject 992918233.8070300.04347826SingletNApbmc122CD4 Naive2
CELL468_N1_1SeuratProject 606113512.4418410.21739130SingletNApbmc120CD4 Naive0
CELL782_N2_1SeuratProject 611317842.3229180.04347826SingletNApbmc105NK 5
CELL2658_N1_1SeuratProject 918418964.2356270.00000000SingletNApbmc133CD8 TEM 3
CELL6637_N1_1SeuratProject 820016962.7195120.21739130SingletNApbmc120CD4 Naive0
,

img
In [16]:

# 将seurat object转换为cds object
cds <- as.cell_data_set(B.obj)
cds <- estimate_size_factors(cds)
cds <- cluster_cells(cds = cds, reduction_method = "UMAP")

Out [16]:
Warning message:
"Monocle 3 trajectories require cluster partitions, which Seurat does not calculate. Please run 'cluster_cells' on your cell_data_set object"

In [17]:

# 计算轨迹
cds <- learn_graph(cds, use_partition = TRUE)
plot_cells(
    cds, 
    color_cells_by = "seurat_clusters", 
    label_groups_by_cluster=TRUE,
    label_leaves=TRUE, 
    label_branch_points=TRUE,
    graph_label_size=3
)

Out [17]:
|======================================================================| 100%

Out [17]:
Warning message in igraph::graph.dfs(stree_ori, root = root_cell, neimode = "all", :
"Argument neimode' is deprecated; use mode' instead"