Программирование -> Классическое ->
Python 3
Convert doc, docx, pdf, image
1. Convert doc, docx to pdf (возможно, программа конвертирует и другие форматы в pdf)
import os
import comtypes.client
wdFormatPDF = 17
in_file = os.path.abspath('file.doc') # Исходный файл
out_file = os.path.abspath('file.pdf') # Новый файл
word = comtypes.client.CreateObject('Word.Application')
doc = word.Documents.Open(in_file)
doc.SaveAs(out_file, FileFormat=wdFormatPDF)
doc.Close()
word.Quit()
Исходный код подсмотрел на этом сайте.
2. Convert docx to pdf
from docx2pdf import convert
docx_file = 'input.docx'
pdf_file = 'output.pdf'
convert(docx_file, pdf_file)
3. Convert pdf to docx
from pdf2docx import parse
pdf_file = 'sample.pdf'
docx_file = 'sample.docx'
parse(pdf_file, docx_file)
Исходный код подсмотрел на этом сайте.
4. Convert pdf to image
import fitz
import os
file_path = "example.pdf" # исходный файл
doc = fitz.open(file_path) # открываем документ
for i, page in enumerate(doc):
if not os.path.exists("images"): # если папки нет
os.mkdir("images") # создаем папку, в которую сохраним картинки
pix = page.get_pixmap(dpi=200) # преобразуем страницу в изображение
pix.save(f"images/page_{i}.png") # сохраняем в папку
5. Convert image to pdf
from PIL import Image
from glob import glob
import os
files = glob("images/*.png") # открываем картинки из папки images
iml = []
print(f"{files=}")
files = sorted(files)
for img in files:
imgs = Image.open(img)
rgb_im = imgs.convert('RGB') # во избежание ошибок: не удается сохранить режим RGBA
iml.append(rgb_im)
pdf = "output.pdf"
print(iml)
image = iml[0]
iml.pop(0)
image.save(pdf, "PDF" , resolution=100.0, save_all=True, append_images=iml)
os.system(pdf)
6. Ставим защиту от редактирования pdf - только чтение - read only
from hashlib import md5
from PyPDF4 import PdfFileReader, PdfFileWriter
from PyPDF4.generic import NameObject, DictionaryObject, ArrayObject, \
NumberObject, ByteStringObject
from PyPDF4.pdf import _alg33, _alg34, _alg35
from PyPDF4.utils import b_
def encrypt(writer_obj: PdfFileWriter, user_pwd, owner_pwd=None, use_128bit=True):
"""
Encrypt this PDF file with the PDF Standard encryption handler.
:param str user_pwd: The "user password", which allows for opening
and reading the PDF file with the restrictions provided.
:param str owner_pwd: The "owner password", which allows for
opening the PDF files without any restrictions. By default,
the owner password is the same as the user password.
:param bool use_128bit: flag as to whether to use 128bit
encryption. When false, 40bit encryption will be used. By default,
this flag is on.
"""
import time, random
if owner_pwd == None:
owner_pwd = user_pwd
if use_128bit:
V = 2
rev = 3
keylen = int(128 / 8)
else:
V = 1
rev = 2
keylen = int(40 / 8)
# permit copy and printing only:
P = -44
O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
ID_1 = ByteStringObject(md5(b_(repr(time.time()))).digest())
ID_2 = ByteStringObject(md5(b_(repr(random.random()))).digest())
writer_obj._ID = ArrayObject((ID_1, ID_2))
if rev == 2:
U, key = _alg34(user_pwd, O, P, ID_1)
else:
assert rev == 3
U, key = _alg35(user_pwd, rev, keylen, O, P, ID_1, False)
encrypt = DictionaryObject()
encrypt[NameObject("/Filter")] = NameObject("/Standard")
encrypt[NameObject("/V")] = NumberObject(V)
if V == 2:
encrypt[NameObject("/Length")] = NumberObject(keylen * 8)
encrypt[NameObject("/R")] = NumberObject(rev)
encrypt[NameObject("/O")] = ByteStringObject(O)
encrypt[NameObject("/U")] = ByteStringObject(U)
encrypt[NameObject("/P")] = NumberObject(P)
writer_obj._encrypt = writer_obj._addObject(encrypt)
writer_obj._encrypt_key = key
unmeta = PdfFileReader('Input.pdf')
writer = PdfFileWriter()
writer.appendPagesFromReader(unmeta)
encrypt(writer, '', '123')
with open('Output', 'wb') as fp:
writer.write(fp)
Исходный код подсмотрел на этом сайте.
7. Пишем программу для получения отсканированного pdf из word
a) Создадим форму. В нее пользователь будет перетаскивать файлы, которые нужно конвертировать.
from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QWidget, QVBoxLayout, QListWidget
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle('Drag & Drop')
# Даем разрешение на Drop
self.setAcceptDrops(True)
self.list_files = QListWidget()
self.label_total_files = QLabel()
main_layout = QVBoxLayout()
main_layout.addWidget(QLabel('Drag and drop the file:'))
main_layout.addWidget(self.list_files)
main_layout.addWidget(self.label_total_files)
central_widget = QWidget()
central_widget.setLayout(main_layout)
self.setCentralWidget(central_widget)
self._update_states()
def _update_states(self):
self.label_total_files.setText('Files: {}'.format(self.list_files.count()))
def dragEnterEvent(self, event):
# Тут выполняются проверки и дается (или нет) разрешение на Drop
mime = event.mimeData()
# Если перемещаются ссылки
if mime.hasUrls():
# Разрешаем
event.acceptProposedAction()
def dropEvent(self, event):
# Обработка события Drop
for url in event.mimeData().urls():
file_name = url.toLocalFile()
self.list_files.addItem(file_name)
self._update_states()
return super().dropEvent(event)
if __name__ == '__main__':
app = QApplication([])
mw = MainWindow()
mw.resize(500, 350)
mw.show()
app.exec()
Исходный код подсмотрел на этом сайте.
b) Добавим в эту форму код для конвертирования с сохранением результатов в папку исходника.
from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QWidget, QVBoxLayout, QListWidget
import comtypes.client
import fitz
from PIL import Image
from glob import glob
import os
import pathlib
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle('Drag & Drop')
# Даем разрешение на Drop
self.setAcceptDrops(True)
self.list_files = QListWidget()
self.label_total_files = QLabel()
main_layout = QVBoxLayout()
main_layout.addWidget(QLabel('Drag and drop the file:'))
main_layout.addWidget(self.list_files)
main_layout.addWidget(self.label_total_files)
central_widget = QWidget()
central_widget.setLayout(main_layout)
self.setCentralWidget(central_widget)
self._update_states()
def _update_states(self):
self.label_total_files.setText('Files: {}'.format(self.list_files.count()))
def dragEnterEvent(self, event):
# Тут выполняются проверки и дается (или нет) разрешение на Drop
mime = event.mimeData()
# Если перемещаются ссылки
if mime.hasUrls():
# Разрешаем
event.acceptProposedAction()
def dropEvent(self, event):
# Обработка события Drop
for url in event.mimeData().urls():
file_name = url.toLocalFile()
self.list_files.addItem(file_name)
# Работа с файлами
fpath = pathlib.Path(file_name)
way = os.path.dirname(file_name)
docx_file = file_name
pdf_file = way + "/" + fpath.stem + ".pdf"
images_directory = way + "/images"
a = False
# проверка расширения
if os.path.splitext(docx_file)[1] != '.pdf':
# doc or docx to pdf
wdFormatPDF = 17
in_file = os.path.abspath(docx_file)
out_file = os.path.abspath(pdf_file)
word = comtypes.client.CreateObject('Word.Application')
doc = word.Documents.Open(in_file)
doc.SaveAs(out_file, FileFormat=wdFormatPDF)
doc.Close()
word.Quit()
else:
# pdf no convert
pdf_file = docx_file
a = True
# pdf to png
doc = fitz.open(pdf_file) # open document
for i, page in enumerate(doc):
if not os.path.exists(images_directory):
os.mkdir(images_directory) # создаем папку
pix = page.get_pixmap(dpi=200) # render page to an image
pix.save(images_directory + "/" + fpath.stem + f"_{i}.png")
# png to pdf
files = glob(images_directory + "/" + fpath.stem + "_*.png")
iml = []
print(f"{files=}")
files = sorted(files)
for img in files:
imgs = Image.open(img)
rgb_im = imgs.convert('RGB') # to prevent errors: cannot save mode RGBA
iml.append(rgb_im)
if a == False: pdf = pdf_file
else: pdf = pdf_file + ".pdf"
print(iml)
image = iml[0]
iml.pop(0)
image.save(pdf, "PDF", resolution=100.0, save_all=True, append_images=iml)
self._update_states()
return super().dropEvent(event)
if __name__ == '__main__':
app = QApplication([])
mw = MainWindow()
mw.resize(500, 350)
mw.show()
app.exec()
c) Добавим защиту от редактирования, пароль для разблокировки "123"
from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QWidget, QVBoxLayout, QListWidget
import comtypes.client
import fitz
from PIL import Image
from glob import glob
import os
import pathlib
# Protection
from hashlib import md5
from PyPDF4 import PdfFileReader, PdfFileWriter
from PyPDF4.generic import NameObject, DictionaryObject, ArrayObject, \
NumberObject, ByteStringObject
from PyPDF4.pdf import _alg33, _alg34, _alg35
from PyPDF4.utils import b_
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle('Drag & Drop')
# Даем разрешение на Drop
self.setAcceptDrops(True)
self.list_files = QListWidget()
self.label_total_files = QLabel()
main_layout = QVBoxLayout()
main_layout.addWidget(QLabel('Drag and drop the file:'))
main_layout.addWidget(self.list_files)
main_layout.addWidget(self.label_total_files)
central_widget = QWidget()
central_widget.setLayout(main_layout)
self.setCentralWidget(central_widget)
self._update_states()
def _update_states(self):
self.label_total_files.setText('Files: {}'.format(self.list_files.count()))
def dragEnterEvent(self, event):
# Тут выполняются проверки и дается (или нет) разрешение на Drop
mime = event.mimeData()
# Если перемещаются ссылки
if mime.hasUrls():
# Разрешаем
event.acceptProposedAction()
def dropEvent(self, event):
# Обработка события Drop
for url in event.mimeData().urls():
file_name = url.toLocalFile()
self.list_files.addItem(file_name)
# Работа с файлами
fpath = pathlib.Path(file_name)
way = os.path.dirname(file_name)
docx_file = file_name
pdf_file = way + "/" + fpath.stem + ".pdf"
images_directory = way + "/images"
a = False
# проверка расширения
if os.path.splitext(docx_file)[1] != '.pdf':
# doc or docx to pdf
wdFormatPDF = 17
in_file = os.path.abspath(docx_file)
out_file = os.path.abspath(pdf_file)
word = comtypes.client.CreateObject('Word.Application')
doc = word.Documents.Open(in_file)
doc.SaveAs(out_file, FileFormat=wdFormatPDF)
doc.Close()
word.Quit()
else:
# pdf no convert
pdf_file = docx_file
a = True
# pdf to png
doc = fitz.open(pdf_file) # open document
for i, page in enumerate(doc):
if not os.path.exists(images_directory):
os.mkdir(images_directory) # создаем папку
pix = page.get_pixmap(dpi=200) # render page to an image
pix.save(images_directory + "/" + fpath.stem + f"_{i}.png")
# png to pdf
files = glob(images_directory + "/" + fpath.stem + "_*.png")
iml = []
print(f"{files=}")
files = sorted(files)
for img in files:
imgs = Image.open(img)
rgb_im = imgs.convert('RGB') # to prevent errors: cannot save mode RGBA
iml.append(rgb_im)
if a == False: pdf = pdf_file
else: pdf = pdf_file + ".pdf"
print(iml)
image = iml[0]
iml.pop(0)
image.save(pdf, "PDF", resolution=100.0, save_all=True, append_images=iml)
# Protection
def encrypt(writer_obj: PdfFileWriter, user_pwd, owner_pwd=None, use_128bit=True):
"""
Encrypt this PDF file with the PDF Standard encryption handler.
:param str user_pwd: The "user password", which allows for opening
and reading the PDF file with the restrictions provided.
:param str owner_pwd: The "owner password", which allows for
opening the PDF files without any restrictions. By default,
the owner password is the same as the user password.
:param bool use_128bit: flag as to whether to use 128bit
encryption. When false, 40bit encryption will be used. By default,
this flag is on.
"""
import time, random
if owner_pwd == None:
owner_pwd = user_pwd
if use_128bit:
V = 2
rev = 3
keylen = int(128 / 8)
else:
V = 1
rev = 2
keylen = int(40 / 8)
# permit copy and printing only:
P = -44
O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
ID_1 = ByteStringObject(md5(b_(repr(time.time()))).digest())
ID_2 = ByteStringObject(md5(b_(repr(random.random()))).digest())
writer_obj._ID = ArrayObject((ID_1, ID_2))
if rev == 2:
U, key = _alg34(user_pwd, O, P, ID_1)
else:
assert rev == 3
U, key = _alg35(user_pwd, rev, keylen, O, P, ID_1, False)
encrypt = DictionaryObject()
encrypt[NameObject("/Filter")] = NameObject("/Standard")
encrypt[NameObject("/V")] = NumberObject(V)
if V == 2:
encrypt[NameObject("/Length")] = NumberObject(keylen * 8)
encrypt[NameObject("/R")] = NumberObject(rev)
encrypt[NameObject("/O")] = ByteStringObject(O)
encrypt[NameObject("/U")] = ByteStringObject(U)
encrypt[NameObject("/P")] = NumberObject(P)
writer_obj._encrypt = writer_obj._addObject(encrypt)
writer_obj._encrypt_key = key
unmeta = PdfFileReader(pdf)
writer = PdfFileWriter()
writer.appendPagesFromReader(unmeta)
encrypt(writer, '', '123')
with open(pdf, 'wb') as fp:
writer.write(fp)
self._update_states()
return super().dropEvent(event)
if __name__ == '__main__':
app = QApplication([])
mw = MainWindow()
mw.resize(500, 350)
mw.show()
app.exec()
При работе с файловыми путями пользовался материалами с этого сайта и с этого.
Для создания конечной программы .exe использовал эту статью.
Саму программу можно скачать здесь.