Sometimes a pdf book is composed by many small pdf files. If it can be concatenate to one pdf file, that will be very convenient to read. pyPdf can done that job, it should have more features to manipulate pdf files than I used here. It is simple to do the concatenation job with pyPdf lib.
Below is the code for my record. There is a little bug here, every time the program remove the existed output file it seems that it did not take effect, so far I did not find the reason for this. That is the reason I let it the little problem be there in my disk not paste to the blog for record.
import os
import glob
from pyPdf import PdfFileWriter, PdfFileReader
def appendPdf(input, startPage, endPage, output):
'''
input is PdfFileReader, output is PdfFileWriter()
add pdf pages from startPage to endPage to output
'''
#assert startPage >= 0 and startPage < input.numPages and endPage >=0 and endPage < input.numPages
[output.addPage(input.getPage(pageNum)) for pageNum in range(startPage, endPage)]
def getOrderedListFiles(parentDir):
'''
get a ordered list files names of a directory
'''
files = []
for child in os.listdir(parentDir):
#filename = os.path.abspath(child)
files.append(child)
files.sort()
return files
def generatePdf(outputFile, output):
if os.path.exists(outputFile):
#os.remove(outputFile)
os.unlink(outputFile)
outputStream = file(outputFile, 'wb')
output.write(outputStream)
outputStream.close()
def main():
parent = '/home/will/myworld/workspace/pytest/src/pythonbasic/fileoperation/testfiles'
os.chdir(parent)
files = getOrderedListFiles(parent)
output = PdfFileWriter()
for i in range(1, len(files)):
f = PdfFileReader(file(files[i], 'rb'))
appendPdf(f, 0, f.numPages, output)
outputFile = 'test1.pdf'
generatePdf(outputFile, output)
if __name__ == '__main__':
main()
print 'OK'