library(survival)
library(survivalROC)
library(survminer)
library(dplyr)
td
# columns: 4
# Patient_ID, Gene, Status, Time
sROC <- survivalROC(
Stime = td$Time,
status = td$Status,
marker = td$Gene,
predict.time = 24,
# 想要考察的时间段
method = "KM"
)
## calculate Yoden index
result <- sROC |>
mutate(Youden = TP - FP) |>
slice_max(order_by = Youden, n = 1, with_ties = FALSE) |>
mutate(
max_Youden = Youden,
cut.value = cut.values,
.keep = "none"
)
print(
paste0(
"最大约登指数 ", result$max_Youden,
",此时基因表达量为 ", result$cut.value
)
)
td <- td |> mutate(LowerThanCutOff = Gene <= result$cut.value) |> count(LowerThanCutOff)
fit <- survfit(data = td , Surv(time, Status) ~ LowerThanCutOff)
# 查看生存率50%对应的时间
# summary(fit)$table[, "median"]
p_value <- pairwise_survdiff(data = td, Surv(time, Status) ~ LowerThanCutOff)
p_value$p.value
# KM plot ----
p <- ggsurvplot(
fit,
data = td,
pval = TRUE,
pval.method = F,
pval.coord = c(0.05, 0.05), # p value location
pval.size = 5,
conf.int = TRUE, # 置信区间
risk.table = F, # 添加风险表
legend = c(0.8, 0.9), # legend location
legend.title = "Expression",
legend.labs = c("High", "Low"),
font.legend = 14,
font.main = c(16, "bold", "darkblue"),
font.tickslab = 12,
ylab = "Survival",
font.x = 16,
font.y = 16,
ggtheme = theme_survminer(), # 更改ggplot2的主题
palette = c("red", "cyan3") #定义颜色
)
p
## save plot ----
# KM plot
filename <- paste0( format(Sys.time(), "%H_%M_%S"), '_KM_plot.pdf')
ggsave(plot = p$plot, filename = filename, width = 225, height = 200, units = 'mm')
# risk table
filename <- paste0(format(Sys.time(), "%H_%M_%S"), '_risk_table.pdf')
ggsave(plot = p$table, filename = filename, width = 225, height = 50, units = 'mm')Statistics
生存分析
现有患者某基因表达量、患者生存状态和生存时间,要求用约登指数最大时的基因表达量作为cutoff值进行生存分析。
四格表卡方检验
有任意一个期望频数小于5时应使用Fisher精确检验。
不推荐用Excel做卡方检验,因为需要手动计算理论频数和判断是否需要进行连续性矫正。
df <- matrix(c(28, 34, 9, 3), nrow = 2, ncol = 2)
result <- chisq.test(df)
if (any(result$expected < 5)) {
print('Fisher 精确检验')
fisher.test(df) # fisher 精确检验
} else {
result
}IC50
library(ggplot2)
### this is the data ###
# data from four experiments
conc <- c(5.00E-07, 1.00E-06, 1.00E-05,
5.00E-07, 1.00E-06, 5.00E-06, 1.00E-05, 2.00E-05,
5.00E-07, 1.00E-06, 2.50E-06, 5.00E-06, 1.00E-05,
5.00E-07, 1.00E-06, 2.50E-06, 5.00E-06, 1.00E-05)
dead.cells <- c(34.6, 47.7, 81.7,
37.6, 55.7, 89.1, 84.3, 85.2,
34.4, 46.1, 76.2, 84.3, 84.1,
24.5, 26.1, 60.6, 82.7, 87)
# transform the data to make it postive and put into a data frame for fitting
data <- as.data.frame(conc) # create the data frame
data$dead.cells <- dead.cells
data$nM <- data$conc * 1000000000
data$log.nM <- log10(data$nM)
### fit the data ###
# make sure logconc remains positive, otherwise multiply to keep positive values
# (such as in this example where the iconc is multiplied by 1000
fit <- nls(
dead.cells ~ bot + (top - bot) / (1 + (log.nM / LD50) ^ slope),
data = data,
start = list(
bot = 20,
top = 95,
LD50 = 3,
slope = -12
)
)
m <- coef(fit)
val <- format((10 ^ m[3]), dig = 1)
### ggplot the results ###
p <- ggplot(data = data, # specify the data frame with data
aes(x = nM, y = dead.cells)) + # specify x and y
geom_point() + # make a scatter plot
scale_x_log10(breaks = c(500, 1000, 2500, 5000, 10000, 20000)) +
xlab("Drug concentration (nM)") + # label x-axis
ylab("Dead cells (% of cells)") + # label y-axis
ggtitle("Drug Dose Response and LD50") + # add a title
theme_bw() + # a simple theme
expand_limits(y = c(20, 100)) # customise the y-axis
# Add the line to graph using methods.args (New: Jan 2016)
p <- p + geom_smooth(
method = "nls",
method.args = list(
formula = y ~ bot + (top - bot) / (1 + (x / LD50) ^ slope),
start = list(
bot = 20,
top = 95,
LD50 = 3,
slope = -12
)
),
se = FALSE
)
# Add LD50 to the graph.
p <- p +
annotate(geom = "text", x = 7000, y = 60, label = "LD50(nM): ", color = "red") +
annotate(geom = "text", x = 9800, y = 60, label = val, color = "red")
p # show the plot