資源簡介
pdf轉(zhuǎn)txt,可以實(shí)現(xiàn)批量,僅僅針對不需要保留pdf格式的需求。
代碼片段和文件信息
#!/usr/bin/python
#-*-?coding:?utf-8?-*-
import?os
import?re
from?pdfminer.pdfinterp?import?PDFResourceManager?PDFPageInterpreter
from?pdfminer.pdfpage?import?PDFPage
from?pdfminer.converter?import?TextConverter
from?pdfminer.layout?import?LAParams
#將一個(gè)pdf轉(zhuǎn)換成txt
def?pdfTotxt1(filepathoutpath):
????try:
????????fp?=?file(filepath?‘rb‘)
????????outfp=file(outpath‘w‘)
????????#創(chuàng)建一個(gè)PDF資源管理器對象來存儲共享資源
????????#caching?=?False不緩存
????????rsrcmgr?=?PDFResourceManager(caching?=?False)
????????#?創(chuàng)建一個(gè)PDF設(shè)備對象
????????laparams?=?LAParams()
????????device?=?TextConverter(rsrcmgr?outfp?codec=‘utf-8‘?laparams=laparamsimagewriter=None)
????????#創(chuàng)建一個(gè)PDF解析器對象
????????interpreter?=?PDFPageInterpreter(rsrcmgr?device)
????????for?page?in?PDFPage.get_pages(fp?pagenos?=?set()maxpages=0
???????????????????????????????????
評論
共有 條評論