12345678910111213 |
- import pdfplumber
- import pandas as pd
- def readPdf(path, pageNumber):
- with pdfplumber.open(path) as pdf:
- content = ''
- # 读取PDF文档第i+1页
- page = pdf.pages[pageNumber]
- # page.extract_text()函数即读取文本内容,下面这步是去掉文档最下面的页码
- page_content = '\n'.join(page.extract_text().split('\n')[:-1])
- content = content + page_content
- return content
|