library(clusterProfiler)
library(dplyr)
## choose organism ----
Organism <- 'Human' # 'Human' or 'Mouse'
ifelse(Organism == 'Human', library(org.Hs.eg.db), library(org.Mm.eg.db))
## prepare data ----
data(geneList, package="DOSE")
gene <- names(geneList)[abs(geneList) > 2]
## KEGG enrich ----
kk <- enrichKEGG(
gene = gene,
organism = ifelse(Organism == 'Human', 'hsa', 'mmu'), # human: hsa, mouse: mmu
pvalueCutoff = 0.05,
# qvalueCutoff = 2,
)
kk_result_p <- subset(kk@result, pvalue < 0.05)
View(kk_result_p)
# write.csv(kk_result_p, file = '_.csv')
# kk_result_p <- read.csv(file = '_.csv', row.names = 1)
# delete suffix ' - Mus musculus (house mouse)' when organism is mouse
if(Organism == 'Mouse') {
kk_result_p$Description <- kk_result_p$Description |> stringr::str_sub(end = -30L)
}
## remove specific terms ----
## remove disease-related terms
tmp <- kk_result_p[stringr::str_detect(kk_result_p$category, 'disease', negate = T), ]
## remove 'Alcoholism'
tmp <- tmp |> dplyr::arrange(fold_enrichment) |> filter(Description != 'Alcoholism')
## bar plot ----
ggplot(tmp,
aes(
x = reorder(Descroption, -pvalue),
y = -log10(pvalue),
fill = FoldEnrichment
)) +
geom_bar(stat = "identity", width = 0.8) +
scale_fill_gradient(low = "#95cefa",
high = "#234a6d",
name = 'FoldEnrichment') +
coord_flip() + theme_bw() +
theme(panel.grid = element_blank())
if(F) {
ggsave(
filename = '_.pdf',
width = 200,
height = 100,
units = 'mm',
dpi = 300
)
}Enrichment Analysis
本文包括ORA分析和GSEA分析,均以最常用的KEGG为例。参考clusterProfiler说明文档。
过代表分析(ORA)
ORA需输入一个基因ID向量。KEGG支持的ID包括Entrez ID等,如果使用蛋白质谱数据则需转换ID。
使用setReadable()函数将富集结果的GeneID列转换为Gene Symbol.
kk <- setReadable(kk, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
## The geneID column is translated to symbol
head(kk, 3)基因集富集分析(GSEA)
GSEA需要一个值为log2FC,names属性为gene id的向量,而且log2FC为降序排列。
library(clusterProfiler)
library(dplyr)
library(enrichplot)
# GSEA ----
f <- file.path('_.xls')
gene_diff <- readr::read_tsv(f)
gene_diff
## choose organism ----
Organism <- 'Mouse' # 'Human' or 'Mouse'
ifelse(Organism == 'Human', library(org.Hs.eg.db), library(org.Mm.eg.db))
## select columns for GSEA ----
gene_gsea <- gene_diff |>
dplyr::select(gene_id, gene_symbol, log2FC) |>
dplyr::arrange(desc(log2FC))
gene_list <- gene_gsea$log2FC
names(gene_list) <- gene_gsea$gene_id
## GSEA KEGG ----
res_KEGG <- gseKEGG(
gene_list,
organism = ifelse(Organism == 'Human', 'hsa', 'mmu'),
pvalueCutoff = 0.05,
pAdjustMethod = "BH"
)
GSEA_result <- subset(res_KEGG@result, pvalue < 0.05)
View(GSEA_result)
# readr::write_csv(GSEA_result, file = '_.csv')
## ridge plot ----
ridgeplot(
res_KEGG,
showCategory = 20,
fill = "p.adjust",
#填充色 "pvalue", "p.adjust", "qvalue"
core_enrichment = TRUE,
#是否只使用 core_enriched gene
label_format = 30,
orderBy = "NES",
decreasing = FALSE
) +
theme(axis.text.y = element_text(size = 8))
## line chart & hits plot ----
# select a specific term
n <- 1
gsea_description <- res_KEGG@result$Description[n]
if(Organism == 'Mouse') {
gsea_description <- gsea_description |> stringr::str_sub(end = -30L)
}
gseaplot2(
res_KEGG,
geneSetID = n,
title = gsea_description,
subplots = 1:2, # choose subplots, including 1 line chart, 2 hits plot and 3 ?
base_size = 10
)
if(F) {
ggsave(
filename = paste0('_', gsea_description, '.pdf'),
width = 100,
height = 80,
units = 'mm',
dpi = 300
)
}