将counts数值转化为TPM
1、新建一个Rproject
将这些counts文件复制到Rproject目录下。

2、新建一个R程序
将下列代码复制其中。
321#清空当前环境中的所有变量2rm(list = ls())3# 定义 counts 表格所在的文件夹路径4counts_folder <- "./Counts/"5
6# 获取 counts 表格文件列表7count_file <- list.files(counts_folder, pattern = "\\.txt$", full.names = TRUE)8
9# 循环遍历每个 counts 表格进行转换10for (count_file in count_file) {11# 读取 featureCounts 结果文件12counts_data <- read.table(count_file, header = TRUE, skip = 1)13# 简化表头信息14sample_name <- gsub("./Counts/","",sub("(.*)_hisat2.counts\\.txt", "\\1", count_file))15names(counts_data)[ncol(counts_data)]<-sample_name16# 提取基因长度信息17gene_lengths <- counts_data$Length18# 计算每个样本的总reads数19sample_totals <- sum(counts_data[,ncol(counts_data)])20# 计算 TPM21tpm_data <- counts_data[, ncol(counts_data)] / (gene_lengths / 1000) / (sample_totals / 1e6)22# 添加基因名列23tpm_data <- cbind(counts_data[, 1], tpm_data)24#对tpm_data文件表头进行重命名25colnames(tpm_data)<-c("Geneid",paste0(gsub("_counts","",sample_name),"_tpm"))26head(tpm_data)27#定义输出文件名28tpm_file <- gsub("_counts","",paste0("./TPM/",sample_name,"_tpm_output.txt"))29# 将结果保存为 TPM 文件30write.table(tpm_data, file = tpm_file, sep = "\t", 31 quote = FALSE, col.names = TRUE, row.names = FALSE)32}3、合并TPM文件
231# 指定要合并的文件路径2TPMs_folder <- "./TPM/"3# 创建一个空的数据框用于存储合并结果4merged_data <- data.frame()5# 获取 TPMs 表格文件列表6TPM_file <- list.files(TPMs_folder, pattern = "\\.txt$", full.names = TRUE)7# 循环读取并合并文件8for (TPM_file in TPM_file) {9 # 读取当前文件10 current_data <- read.table(TPM_file, header = TRUE, stringsAsFactors = FALSE)11 12 # 判断是否为第一个文件,如果是,则直接赋值给合并结果13 if (length(merged_data) == 0) {14 merged_data <- current_data15 } else {16 # 合并当前文件到已有结果17 merged_data <- merge(merged_data, current_data, by = "Geneid", all = TRUE)18 }19}20
21# 输出合并结果22write.table(merged_data, file = "all_TPM.txt", sep = "\t", 23 quote = FALSE, col.names = TRUE, row.names = FALSE)all_TPM.txt文件即为所有unique基因在各样本中的TPM表达量数值。