Split and Combine PDFs in R and Python
Program
There are 100 PDFs and now the first page of each PDF needs to be extracted and then combined to create a 100-page PDF.
R code using qpdf package:
input_path <- "/Users/apple/Documents/test/input"
temp_dir <- "/Users/apple/Documents/test/output"
library(qpdf)
all_file_path <- paste0(input_path, "/", list.files(input_path))
for(i in c(1: length(all_file_path))) {
pdf_subset(all_file_path[i], pages = 1, paste0(temp_dir, "/temp", i, ".pdf"))
}
all_file_path <- paste0(temp_dir, "/", list.files(temp_dir))
pdf_combine(all_file_path, paste0(temp_dir, "/output.pdf"))
JIKE@无声 wrote a Python program for me and it worked very well.
import os
from pathlib import Path
from PyPDF2 import PdfFileMerger, PdfFileReader, PdfFileWriter
def walk_target_folder(target_folder_path):
pdf_file_list = []
for root, folder, files in os.walk(target_folder_path):
for file in files:
if file.endswith('.pdf'):
pdf_file_list.append(root + '/' + file)
return pdf_file_list
def main(target_folder_path, save_file_path):
if not os.path.exists(target_folder_path):
print('文件夹地址不存在')
return
pdf_file_list = walk_target_folder(target_folder_path)
pdf_writer = PdfFileWriter()
for index, pdf_file_path in enumerate(pdf_file_list):
try:
reader = PdfFileReader(pdf_file_path)
# 获取第一页
page = reader.getPage(0)
print('读取第{}个pdf'.format(index + 1))
pdf_writer.addPage(page)
print('加入PdfFileWriter')
except Exception as e:
print('==============================\n读取第{}个pdf 失败 文件加密状态:{}'.format(index + 1, reader.isEncrypted))
print('Pdf: {} 执行失败'.format(pdf_file_path))
print('错误信息: ' + str(e) + '\n==============================\n')
print('开始写文件...')
with Path(save_file_path).open(mode="wb") as output_file:
pdf_writer.write(output_file)
print("导出成功")
if __name__ == '__main__':
# 目标文件夹地址
target_folder_path = '/Users/apple/Documents/PDFs'
# 生成文件的目标地址
save_file_path = '/Users/apple/Desktop/combine.pdf'
main(target_folder_path, save_file_path)