Statistics

生存分析

现有患者某基因表达量、患者生存状态和生存时间，要求用约登指数最大时的基因表达量作为cutoff值进行生存分析。

library(survival)
library(survivalROC)
library(survminer)
library(dplyr)

td
# columns: 4
# Patient_ID, Gene, Status, Time

sROC <- survivalROC(
  Stime = td$Time,
  status = td$Status,
  marker = td$Gene,
  predict.time = 24,
  # 想要考察的时间段
  method = "KM"
)

## calculate Yoden index
result <- sROC |>
  mutate(Youden = TP - FP) |>
  slice_max(order_by = Youden, n = 1, with_ties = FALSE) |> 
  mutate(
    max_Youden = Youden,
    cut.value = cut.values,
    .keep = "none"
  )

print(
  paste0(
    "最大约登指数 ", result$max_Youden,
    "，此时基因表达量为 ", result$cut.value
  )
)

td <- td |> mutate(LowerThanCutOff = Gene <= result$cut.value) |> count(LowerThanCutOff)
fit <- survfit(data = td , Surv(time, Status) ~ LowerThanCutOff)

# 查看生存率50%对应的时间
# summary(fit)$table[, "median"]

p_value <- pairwise_survdiff(data = td, Surv(time, Status) ~ LowerThanCutOff)
p_value$p.value

# KM plot ----
p <- ggsurvplot(
  fit,
  data = td,
  pval = TRUE,
  pval.method = F,
  pval.coord = c(0.05, 0.05), # p value location
  pval.size = 5,
  conf.int = TRUE, # 置信区间
  risk.table = F, # 添加风险表
  legend = c(0.8, 0.9), # legend location
  legend.title = "Expression",
  legend.labs = c("High", "Low"),
  font.legend = 14,
  font.main = c(16, "bold", "darkblue"),
  font.tickslab = 12,
  ylab = "Survival",
  font.x = 16,
  font.y = 16,
  ggtheme = theme_survminer(), # 更改ggplot2的主题
  palette = c("red", "cyan3") #定义颜色
)
p

## save plot ----
# KM plot
filename <- paste0( format(Sys.time(), "%H_%M_%S"), '_KM_plot.pdf')
ggsave(plot = p$plot, filename = filename, width = 225, height = 200, units = 'mm')

# risk table
filename <- paste0(format(Sys.time(), "%H_%M_%S"), '_risk_table.pdf')
ggsave(plot = p$table, filename = filename, width = 225, height = 50, units = 'mm')

四格表卡方检验

有任意一个期望频数小于5时应使用Fisher精确检验。

不推荐用Excel做卡方检验，因为需要手动计算理论频数和判断是否需要进行连续性矫正。

df <- matrix(c(28, 34, 9, 3), nrow = 2, ncol = 2)

result <- chisq.test(df)

if (any(result$expected < 5)) {
  print('Fisher 精确检验')
  fisher.test(df) # fisher 精确检验
} else {
  result
}

IC50

library(ggplot2)   

### this is the data ###    
# data from four experiments   
conc <- c(5.00E-07, 1.00E-06, 1.00E-05,    
          5.00E-07, 1.00E-06, 5.00E-06, 1.00E-05, 2.00E-05,    
          5.00E-07, 1.00E-06, 2.50E-06, 5.00E-06, 1.00E-05,    
          5.00E-07, 1.00E-06, 2.50E-06, 5.00E-06, 1.00E-05)   
dead.cells <- c(34.6, 47.7, 81.7,    
                37.6, 55.7, 89.1, 84.3, 85.2,    
                34.4, 46.1, 76.2, 84.3, 84.1,    
                24.5, 26.1, 60.6, 82.7, 87)   

# transform the data to make it postive and put into a data frame for fitting    
data <- as.data.frame(conc) # create the data frame   
data$dead.cells <- dead.cells   
data$nM <- data$conc * 1000000000   
data$log.nM <- log10(data$nM)    

### fit the data ###   
# make sure logconc remains positive, otherwise multiply to keep positive values   
# (such as in this example where the iconc is multiplied by 1000   

fit <- nls(
  dead.cells ~ bot + (top - bot) / (1 + (log.nM / LD50) ^ slope),
  data = data,
  start = list(
    bot = 20,
    top = 95,
    LD50 = 3,
    slope = -12
  )
)
m <- coef(fit)
val <- format((10 ^ m[3]), dig = 1)

### ggplot the results ###
p <- ggplot(data = data, # specify the data frame with data
            aes(x = nM, y = dead.cells)) + # specify x and y
  geom_point() + # make a scatter plot
  scale_x_log10(breaks = c(500, 1000, 2500, 5000, 10000, 20000)) +
  xlab("Drug concentration (nM)") + # label x-axis
  ylab("Dead cells (% of cells)") + # label y-axis
  ggtitle("Drug Dose Response and LD50") + # add a title
  theme_bw() + # a simple theme
  expand_limits(y = c(20, 100)) # customise the y-axis

# Add the line to graph using methods.args (New: Jan 2016)
p <- p + geom_smooth(
  method = "nls",
  method.args = list(
    formula = y ~ bot + (top - bot) / (1 + (x / LD50) ^ slope),
    start = list(
      bot = 20,
      top = 95,
      LD50 = 3,
      slope = -12
    )
  ),
  se = FALSE
)

# Add LD50 to the graph.
p <- p +
  annotate(geom = "text", x = 7000, y = 60, label = "LD50(nM): ", color = "red") +
  annotate(geom = "text", x = 9800, y = 60, label = val, color = "red")

p # show the plot