import pdfplumber import pandas as pd def readPdf(path, pageNumber): with pdfplumber.open(path) as pdf: content = '' # 读取PDF文档第i+1页 page = pdf.pages[pageNumber] # page.extract_text()函数即读取文本内容,下面这步是去掉文档最下面的页码 page_content = '\n'.join(page.extract_text().split('\n')[:-1]) content = content + page_content return content