From 5fa1c78548d774070f862c1285e09394e5922216 Mon Sep 17 00:00:00 2001 From: Valeriy Mukhtarulin Date: Tue, 1 Jun 2021 10:43:45 -0400 Subject: [PATCH 1/6] Handle missing fonts, clean extracted images --- core/constants.py | 14 ++--- core/document.py | 129 +++++++++++++++++++++++++++++----------------- core/resources.py | 79 +++++++++++++++------------- core/surface.py | 117 +++++++++++++++++++++++------------------ ofd_test.py | 6 +-- 5 files changed, 203 insertions(+), 142 deletions(-) diff --git a/core/constants.py b/core/constants.py index 5e5d18b..d413c03 100644 --- a/core/constants.py +++ b/core/constants.py @@ -1,8 +1,8 @@ UNITS = { - 'mm': 1 / 25.4, - 'cm': 1 / 2.54, - 'in': 1, - 'pt': 1 / 72., - 'pc': 1 / 6., - 'px': None, -} \ No newline at end of file + "mm": 1 / 25.4, + "cm": 1 / 2.54, + "in": 1, + "pt": 1 / 72.0, + "pc": 1 / 6.0, + "px": None, +} diff --git a/core/document.py b/core/document.py index e23a9a6..56c89e1 100644 --- a/core/document.py +++ b/core/document.py @@ -1,34 +1,39 @@ import io import os import traceback +from typing import Optional from zipfile import PyZipFile import cssselect2 from defusedxml import ElementTree from .constants import UNITS -from .resources import res_add_font, res_add_multimedia, MultiMedias, Images +from .resources import res_add_font, res_add_multimedia from .surface import * +from pathlib import Path +import tempfile +import shutil class OFDFile(object): """ OFD Ref:GBT_33190-2016_电子文件存储与交换格式版式文档.pdf """ + #: contains OFD file header data header = None #: references to document's resources resources = None - zf:PyZipFile + zf: PyZipFile def __init__(self, fobj): self.zf = fobj if isinstance(fobj, PyZipFile) else PyZipFile(fobj) # for info in self._zf.infolist(): # print(info) - self.node_tree = self.read_node('OFD.xml') + self.node_tree = self.read_node("OFD.xml") # parse node - self.document_node = self.read_node(self.node_tree['DocBody']['DocRoot'].text) + self.document_node = self.read_node(self.node_tree["DocBody"]["DocRoot"].text) self.document = OFDDocument(self.zf, self.document_node) # print_node_recursive(self.document_node) @@ -38,68 +43,96 @@ def read_node(self, location): root = cssselect2.ElementWrapper.from_xml_root(tree) return Node(root) - def draw_document(self, doc_num=0): + def draw_document(self, doc_num=0, destination: Optional[str] = None): document = self.document + destination = destination or "." + destination = Path(destination) + destination.mkdir(exist_ok=True, parents=True) paths = [] - for page in document.pages: - surface = Surface(page, os.path.split(self.zf.filename)[-1].strip('.ofd')) - paths.append(surface.draw(page)) + for i, page in enumerate(document.pages): + surface = Surface(page, os.path.split(self.zf.filename)[-1].strip(".ofd")) + paths.append( + surface.draw(page, destination / Path(f"{surface.filename}_{i}.png")) + ) + shutil.rmtree(self.document.work_folder, ignore_errors=True) return paths -class OFDDocument(object): +class OFDDocument(object): def __init__(self, _zf, node, n=0): self.pages = [] self._zf = _zf - self.name = f'Doc_{n}' + self.work_folder = tempfile.mkdtemp() + self.name = f"Doc_{n}" self.node = node - self.physical_box = [float(i) for i in node['CommonData']['PageArea']['PhysicalBox'].text.split(' ')] + self.physical_box = [ + float(i) + for i in node["CommonData"]["PageArea"]["PhysicalBox"].text.split(" ") + ] self._parse_res() # print('Resources:', Fonts, Images) # assert len(node['CommonData']['TemplatePage']) == len(node['Pages']['Page']) - if isinstance(node['Pages']['Page'], list): - sorted_pages = sorted(node['Pages']['Page'], key=lambda x: int(x.attr['ID'])) + if isinstance(node["Pages"]["Page"], list): + sorted_pages = sorted( + node["Pages"]["Page"], key=lambda x: int(x.attr["ID"]) + ) else: - sorted_pages = [node['Pages']['Page']] + sorted_pages = [node["Pages"]["Page"]] sorted_tpls = [] - if 'TemplatePage' in node['CommonData']: - if isinstance(node['CommonData']['TemplatePage'], list): - sorted_tpls = sorted(node['CommonData']['TemplatePage'], key=lambda x: int(x.attr['ID'])) + if "TemplatePage" in node["CommonData"]: + if isinstance(node["CommonData"]["TemplatePage"], list): + sorted_tpls = sorted( + node["CommonData"]["TemplatePage"], key=lambda x: int(x.attr["ID"]) + ) else: - sorted_tpls = [node['CommonData']['TemplatePage']] + sorted_tpls = [node["CommonData"]["TemplatePage"]] seal_node = None - if f'{self.name}/Signs/Sign_0/SignedValue.dat' in _zf.namelist(): - seal_file = OFDFile(io.BytesIO(_zf.read(f'{self.name}/Signs/Sign_0/SignedValue.dat'))) + if f"{self.name}/Signs/Sign_0/SignedValue.dat" in _zf.namelist(): + seal_file = OFDFile( + io.BytesIO(_zf.read(f"{self.name}/Signs/Sign_0/SignedValue.dat")) + ) seal_node = seal_file.document.pages[0].page_node for i, p in enumerate(sorted_pages): - document = _zf.read(self.name + '/' + sorted_pages[i].attr['BaseLoc']) + document = _zf.read(self.name + "/" + sorted_pages[i].attr["BaseLoc"]) tree = ElementTree.fromstring(document) root = cssselect2.ElementWrapper.from_xml_root(tree) page_node = Node(root) tpl_node = None if i < len(sorted_tpls): - document = _zf.read(self.name + '/' + sorted_tpls[i].attr['BaseLoc']) + document = _zf.read(self.name + "/" + sorted_tpls[i].attr["BaseLoc"]) tree = ElementTree.fromstring(document) root = cssselect2.ElementWrapper.from_xml_root(tree) tpl_node = Node(root) - self.pages.append(OFDPage(self, f'Page_{i}', page_node, tpl_node, seal_node if i == 0 else None)) + self.pages.append( + OFDPage( + self, + f"Page_{i}", + page_node, + tpl_node, + seal_node if i == 0 else None, + ) + ) def _parse_res(self): - if 'DocumentRes' in self.node['CommonData']: - node = Node.from_zp_location(self._zf, f"{self.name}/{self.node['CommonData']['DocumentRes'].text}") + if "DocumentRes" in self.node["CommonData"]: + node = Node.from_zp_location( + self._zf, f"{self.name}/{self.node['CommonData']['DocumentRes'].text}" + ) self._parse_res_node(node) - if 'PublicRes' in self.node['CommonData']: - node = Node.from_zp_location(self._zf, f"{self.name}/{self.node['CommonData']['PublicRes'].text}") + if "PublicRes" in self.node["CommonData"]: + node = Node.from_zp_location( + self._zf, f"{self.name}/{self.node['CommonData']['PublicRes'].text}" + ) self._parse_res_node(node) def _parse_res_node(self, node): if node.tag in RESOURCE_TAGS: try: - RESOURCE_TAGS[node.tag](node, self._zf) + RESOURCE_TAGS[node.tag](node, self._zf, self.work_folder) except Exception as e: # Error in point parsing, do nothing print_node_recursive(node) @@ -112,20 +145,20 @@ def _parse_res_node(self, node): class OFDPage(object): - def __init__(self, parent: OFDDocument, name, page_node, tpl_node, seal_node): self.parent = parent - self.name = f'{parent.name}_{name}' + self.name = f"{parent.name}_{name}" self.physical_box = self.parent.physical_box - if 'Area' in page_node and 'PhysicalBox' in page_node['Area']: - self.physical_box = [float(i) for i in page_node['Area']['PhysicalBox'].text.split(' ')] + if "Area" in page_node and "PhysicalBox" in page_node["Area"]: + self.physical_box = [ + float(i) for i in page_node["Area"]["PhysicalBox"].text.split(" ") + ] self.tpl_node = tpl_node self.page_node = page_node self.seal_node = seal_node class Surface(object): - def __init__(self, page, name, dpi=192): self.page = page self.dpi = dpi @@ -133,7 +166,7 @@ def __init__(self, page, name, dpi=192): @property def pixels_per_mm(self): - return self.dpi * UNITS['mm'] + return self.dpi * UNITS["mm"] def cairo_draw(self, cr, node): # Only draw known tags @@ -151,13 +184,13 @@ def cairo_draw(self, cr, node): # Only draw known tags self.cairo_draw(cr, child) - def draw(self, page): + def draw(self, page, path: Optional[str] = None) -> str: # 计算A4 210mm 192dpi 下得到的宽高 physical_width = self.page.physical_box[2] physical_height = self.page.physical_box[3] width = int(physical_width * self.pixels_per_mm) height = int(physical_height * self.pixels_per_mm) - print(f'create cairo surface, width: {width}, height: {height}') + # print(f"create cairo surface, width: {width}, height: {height}") cairo_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height) self.cr = cairo.Context(cairo_surface) @@ -177,21 +210,21 @@ def draw(self, page): self.cr.translate(90, 8) self.cairo_draw(self.cr, self.page.seal_node) - path = f'{self.filename}_{page.name}.png' + path = path or f"{self.filename}_{page.name}.png" cairo_surface.write_to_png(path) cairo_surface.finish() return path CAIRO_TAGS = { - 'PathObject': cairo_path, - 'TextObject': cairo_text, - 'ImageObject': cairo_image, + "PathObject": cairo_path, + "TextObject": cairo_text, + "ImageObject": cairo_image, } RESOURCE_TAGS = { - 'Font': res_add_font, - 'MultiMedia': res_add_multimedia, + "Font": res_add_font, + "MultiMedia": res_add_multimedia, } @@ -203,9 +236,11 @@ def __init__(self, element): self.children = [] self.text = node.text - self.tag = (element.local_name - if element.namespace_url in ('', 'http://www.ofdspec.org/2016') else - '{%s}%s' % (element.namespace_url, element.local_name)) + self.tag = ( + element.local_name + if element.namespace_url in ("", "http://www.ofdspec.org/2016") + else "{%s}%s" % (element.namespace_url, element.local_name) + ) self.attr = node.attrib for child in element.iter_children(): child_node = Node(child) @@ -228,10 +263,10 @@ def from_zp_location(zf, location): return Node(root) def __repr__(self): - return f'Tag: {self.tag}, Attr: {self.attr}, Text: {self.text}' + return f"Tag: {self.tag}, Attr: {self.attr}, Text: {self.text}" def print_node_recursive(node, depth=0): - print(' ' * depth, node) + print(" " * depth, node) for child in node.children: print_node_recursive(child, depth=depth + 1) diff --git a/core/resources.py b/core/resources.py index e73509f..7ddc219 100644 --- a/core/resources.py +++ b/core/resources.py @@ -2,7 +2,7 @@ import gi gi.require_version("Gtk", "3.0") -gi.require_version('PangoCairo', '1.0') +gi.require_version("PangoCairo", "1.0") from gi.repository import PangoCairo import cairo from subprocess import Popen, PIPE @@ -14,14 +14,21 @@ font_map = PangoCairo.font_map_get_default() Cairo_Font_Family_Names = [f.get_name() for f in font_map.list_families()] # print(Cairo_Font_Family_Names) -print([f.get_name() for f in font_map.list_families() if - 'sun' in f.get_name().lower() or 'cour' in f.get_name().lower() or 'kai' in f.get_name().lower()]) +# print( +# [ +# f.get_name() +# for f in font_map.list_families() +# if "sun" in f.get_name().lower() +# or "cour" in f.get_name().lower() +# or "kai" in f.get_name().lower() +# ] +# ) OFD_FONT_MAP = { - '楷体': ['KaiTi', 'Kai'], - 'KaiTi': ['KaiTi', 'Kai'], - '宋体': ['SimSun', 'FangSong', 'STSong'], - 'Courier New': ['Courier New', 'Courier'], + "楷体": ["KaiTi", "Kai"], + "KaiTi": ["KaiTi", "Kai"], + "宋体": ["SimSun", "FangSong", "STSong"], + "Courier New": ["Courier New", "Courier"], } @@ -29,18 +36,19 @@ class ResNotFoundException(Exception): """ 资源文件找不到 """ + pass class Font(object): - ID = '' - FontName = '' - FamilyName = '' + ID = "" + FontName = "" + FamilyName = "" def __init__(self, attr): - self.ID = attr['ID'] if 'ID' in attr else '' - self.FontName = attr['FontName'] if 'FontName' in attr else '' - self.FamilyName = attr['FamilyName'] if 'FamilyName' in attr else '' + self.ID = attr["ID"] if "ID" in attr else "" + self.FontName = attr["FontName"] if "FontName" in attr else "" + self.FamilyName = attr["FamilyName"] if "FamilyName" in attr else "" def get_font_family(self): # fixme: 印章的Font只有FontName, 沒有FamilyName @@ -49,18 +57,18 @@ def get_font_family(self): for c in candidates: if c in Cairo_Font_Family_Names: return c - raise ResNotFoundException(f'OFD字体文件[{self.FontName}] 找不到') + # raise ResNotFoundException(f"Can't find '{self.FontName}' font file") return self.FontName def __repr__(self): - return f'ID:{self.ID}, FontName:{self.FontName} FamilyName:{self.FamilyName}, System:{self.get_font_family()}' + return f"ID:{self.ID}, FontName:{self.FontName} FamilyName:{self.FamilyName}, System:{self.get_font_family()}" class MultiMedia(object): def __init__(self, node): - self.ID = node.attr['ID'] - self.Type = node.attr['Type'] - self.location = node['MediaFile'].text + self.ID = node.attr["ID"] + self.Type = node.attr["Type"] + self.location = node["MediaFile"].text @staticmethod def parse_from_node(node): @@ -68,23 +76,22 @@ def parse_from_node(node): class Image(MultiMedia): - def __init__(self, node, _zf): + def __init__(self, node, _zf, work_folder: str): super().__init__(node) self.png_location = None - self.Format = node.attr['Format'] if 'Format' in node.attr else '' - suffix = self.location.split('.')[-1] - if suffix == 'jb2': - # print('tempdir', tempfile.gettempdir()) + self.Format = node.attr["Format"] if "Format" in node.attr else "" + suffix = self.location.split(".")[-1] + if suffix == "jb2": jb2_path = [loc for loc in _zf.namelist() if self.location in loc][0] - png_path = jb2_path.replace('.jb2', '.png') - x_path = _zf.extract(jb2_path) - if platform.system() == 'Windows': - Popen(['./bin/jbig2dec', '-o', png_path, x_path], stdout=PIPE) + x_path = _zf.extract(jb2_path, path=work_folder) + png_path = x_path.replace(".jb2", ".png") + + if platform.system() == "Windows": + Popen(["./bin/jbig2dec", "-o", png_path, x_path], stdout=PIPE) else: - Popen(['jbig2dec', '-o', png_path, x_path], stdout=PIPE) + Popen(["jbig2dec", "-o", png_path, x_path], stdout=PIPE) - # print(f'jbig2dec {png_path}', output.stdout.read()) self.png_location = png_path def get_cairo_surface(self): @@ -93,14 +100,14 @@ def get_cairo_surface(self): return None def __repr__(self): - return f'Image ID:{self.ID}, Format:{self.Format}' + return f"Image ID:{self.ID}, Format:{self.Format}" -def res_add_font(node, _zf): - Fonts[node.attr['ID']] = Font(node.attr) +def res_add_font(node, _zf, work_folder): + Fonts[node.attr["ID"]] = Font(node.attr) -def res_add_multimedia(node, _zf): - if node.attr['Type'] == 'Image': - image = Image(node, _zf) - Images[node.attr['ID']] = image +def res_add_multimedia(node, _zf, work_folder): + if node.attr["Type"] == "Image": + image = Image(node, _zf, work_folder) + Images[node.attr["ID"]] = image diff --git a/core/surface.py b/core/surface.py index 3c793bc..b9f708e 100644 --- a/core/surface.py +++ b/core/surface.py @@ -4,19 +4,26 @@ from .resources import Fonts, Images gi.require_version("Gtk", "3.0") -gi.require_version('PangoCairo', '1.0') +gi.require_version("PangoCairo", "1.0") from gi.repository import Pango, PangoCairo import cairo SCALE_192 = 7.559 SCALE_128 = 5.039 -COMMANDS = set('SMLQBAC') +COMMANDS = set("SMLQBAC") COMMAND_RE = re.compile(r"([SMLQBAC])") FLOAT_RE = re.compile(r"[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?") font_map = PangoCairo.font_map_get_default() -print([f.get_name() for f in font_map.list_families() if - 'song' in f.get_name().lower() or 'cour' in f.get_name().lower() or 'kai' in f.get_name().lower()]) +# print( +# [ +# f.get_name() +# for f in font_map.list_families() +# if "song" in f.get_name().lower() +# or "cour" in f.get_name().lower() +# or "kai" in f.get_name().lower() +# ] +# ) def _tokenize_path(pathdef): @@ -27,7 +34,9 @@ def _tokenize_path(pathdef): yield token -def _draw_AbbreviatedData(draw, boundary, path, fillColor=(128, 128, 128), lineWidth=2, scale=SCALE_192): +def _draw_AbbreviatedData( + draw, boundary, path, fillColor=(128, 128, 128), lineWidth=2, scale=SCALE_192 +): x_start = boundary[0] y_start = boundary[1] current_pos = (x_start, y_start) @@ -39,21 +48,21 @@ def _draw_AbbreviatedData(draw, boundary, path, fillColor=(128, 128, 128), lineW if elements[-1] in COMMANDS: command = elements.pop() else: - raise Exception('操作符违法') + raise Exception("操作符违法") - if command == 'M': + if command == "M": x = scale * float(elements.pop()) y = scale * float(elements.pop()) pos = (x_start + x, y_start + y) current_pos = pos - elif command == 'L': + elif command == "L": x = scale * float(elements.pop()) y = scale * float(elements.pop()) pos = (x_start + x, y_start + y) draw.line(current_pos + pos, fill=fillColor, width=lineWidth) - elif command == 'B': + elif command == "B": pass @@ -70,21 +79,21 @@ def _cairo_draw_path(cr, boundary, path): if elements[-1] in COMMANDS: command = elements.pop() else: - raise Exception('操作符违法') + raise Exception("操作符违法") - if command == 'M': + if command == "M": x = float(elements.pop()) y = float(elements.pop()) cr.move_to(x, y) - elif command == 'L': + elif command == "L": x = float(elements.pop()) y = float(elements.pop()) # pos = (x_start + x, y_start + y) cr.line_to(x, y) # draw.line(current_pos + pos, fill=fillColor, width=lineWidth) - elif command == 'B': + elif command == "B": x1 = float(elements.pop()) y1 = float(elements.pop()) x2 = float(elements.pop()) @@ -92,16 +101,16 @@ def _cairo_draw_path(cr, boundary, path): x3 = float(elements.pop()) y3 = float(elements.pop()) cr.curve_to(x1, y1, x2, y2, x3, y3) - elif command == 'A': + elif command == "A": # cr.arc() pass - elif command == 'Q': + elif command == "Q": x1 = float(elements.pop()) y1 = float(elements.pop()) x2 = float(elements.pop()) y2 = float(elements.pop()) cr.curve_to(x1, y1, x1, y1, x2, y2) - elif command == 'C': + elif command == "C": pass @@ -110,7 +119,7 @@ def _trans_Delta(elements, scale=SCALE_192): elements.reverse() while elements: e = elements.pop() - if e == 'g': + if e == "g": c = int(elements.pop()) v = float(elements.pop()) parsed += c * [v * scale] @@ -121,17 +130,21 @@ def _trans_Delta(elements, scale=SCALE_192): def cairo_path(cr, node): - lineWidth = float(node.attr['LineWidth']) if 'LineWidth' in node.attr else 0.5 - boundary = [float(i) for i in node.attr['Boundary'].split(' ')] + lineWidth = float(node.attr["LineWidth"]) if "LineWidth" in node.attr else 0.5 + boundary = [float(i) for i in node.attr["Boundary"].split(" ")] ctm = None - if 'CTM' in node.attr: - ctm = [float(i) for i in node.attr['CTM'].split(' ')] + if "CTM" in node.attr: + ctm = [float(i) for i in node.attr["CTM"].split(" ")] fillColor = [0, 0, 0] - if 'FillColor' in node: - fillColor = [float(i) / 255. for i in node['FillColor'].attr['Value'].split(' ')] + if "FillColor" in node: + fillColor = [ + float(i) / 255.0 for i in node["FillColor"].attr["Value"].split(" ") + ] strokeColor = [0, 0, 0] - if 'StrokeColor' in node: - strokeColor = [float(i) / 255. for i in node['StrokeColor'].attr['Value'].split(' ')] + if "StrokeColor" in node: + strokeColor = [ + float(i) / 255.0 for i in node["StrokeColor"].attr["Value"].split(" ") + ] # print('draw path', boundary, fillColor, strokeColor) cr.save() if ctm: @@ -145,7 +158,7 @@ def cairo_path(cr, node): else: cr.translate(boundary[0], boundary[1]) - AbbreviatedData = node['AbbreviatedData'].text + AbbreviatedData = node["AbbreviatedData"].text cr.set_source_rgba(*strokeColor) cr.set_line_width(lineWidth) _cairo_draw_path(cr, boundary, AbbreviatedData) @@ -154,41 +167,45 @@ def cairo_path(cr, node): def cairo_text(cr, node): - boundary = [float(i) for i in node.attr['Boundary'].split(' ')] + boundary = [float(i) for i in node.attr["Boundary"].split(" ")] ctm = None - if 'CTM' in node.attr: - ctm = [float(i) for i in node.attr['CTM'].split(' ')] - font_id = node.attr['Font'] + if "CTM" in node.attr: + ctm = [float(i) for i in node.attr["CTM"].split(" ")] + font_id = node.attr["Font"] font_family = get_font_from_id(font_id).get_font_family() - font_size = float(node.attr['Size']) / 1.3 + font_size = float(node.attr["Size"]) / 1.3 fillColor = [0, 0, 0] - if 'FillColor' in node: - fillColor = [float(i) / 255. for i in node['FillColor'].attr['Value'].split(' ')] + if "FillColor" in node: + fillColor = [ + float(i) / 255.0 for i in node["FillColor"].attr["Value"].split(" ") + ] strokeColor = [0, 0, 0] - if 'StrokeColor' in node: - strokeColor = [float(i) / 255. for i in node['StrokeColor'].attr['Value'].split(' ')] + if "StrokeColor" in node: + strokeColor = [ + float(i) / 255.0 for i in node["StrokeColor"].attr["Value"].split(" ") + ] - TextCode = node['TextCode'] + TextCode = node["TextCode"] text = TextCode.text # print(f'cario text {text}, {font_id}') deltaX = None deltaY = None - if 'DeltaX' in TextCode.attr: - deltaX = _trans_Delta(TextCode.attr['DeltaX'].split(' '), scale=1) + if "DeltaX" in TextCode.attr: + deltaX = _trans_Delta(TextCode.attr["DeltaX"].split(" "), scale=1) if deltaX and len(deltaX) + 1 != len(text): # raise Exception('TextCode DeltaX 与字符个数不符') - deltaX = deltaX[:len(text)-1] + deltaX = deltaX[: len(text) - 1] - if 'DeltaY' in TextCode.attr: - deltaY = _trans_Delta(TextCode.attr['DeltaY'].split(' '), scale=1) + if "DeltaY" in TextCode.attr: + deltaY = _trans_Delta(TextCode.attr["DeltaY"].split(" "), scale=1) if deltaY and len(deltaY) + 1 != len(text): # raise Exception('TextCode DeltaY 与字符个数不符') - deltaY = deltaY[:len(text)-1] + deltaY = deltaY[: len(text) - 1] - X = float(TextCode.attr['X']) - Y = float(TextCode.attr['Y']) + X = float(TextCode.attr["X"]) + Y = float(TextCode.attr["Y"]) for idx, rune in enumerate(text): cr.save() # cr.identity_matrix() @@ -221,11 +238,11 @@ def cairo_text(cr, node): def cairo_image(cr, node): - resource_id = node.attr['ResourceID'] - boundary = [float(i) for i in node.attr['Boundary'].split(' ')] + resource_id = node.attr["ResourceID"] + boundary = [float(i) for i in node.attr["Boundary"].split(" ")] ctm = None - if 'CTM' in node.attr: - ctm = [float(i) for i in node.attr['CTM'].split(' ')] + if "CTM" in node.attr: + ctm = [float(i) for i in node.attr["CTM"].split(" ")] img_surface = get_res_image(resource_id).get_cairo_surface() cr.save() @@ -235,7 +252,9 @@ def cairo_image(cr, node): x, y = cr.get_matrix().transform_point(x, y) # 画图片是fillparent,自己重新计算缩放matrix, 同时缩放基础点x,y - matrix = cairo.Matrix(width / img_surface.get_width(), 0, 0, height / img_surface.get_height(), 0, 0) + matrix = cairo.Matrix( + width / img_surface.get_width(), 0, 0, height / img_surface.get_height(), 0, 0 + ) cr.identity_matrix() cr.set_matrix(matrix) matrix.invert() diff --git a/ofd_test.py b/ofd_test.py index 708f82a..6359a32 100644 --- a/ofd_test.py +++ b/ofd_test.py @@ -1,11 +1,11 @@ from core.document import OFDFile import os -folder = 'ofds' +folder = "ofds" for path in os.listdir(folder): - if not path.endswith('.ofd'): + if not path.endswith(".ofd"): continue - print('read file', path) + print("read file", path) file_path = os.path.join(folder, path) doc = OFDFile(file_path) doc.draw_document() From 98e2e5ce4cd1f45df7a011277d1dd740090bc0c7 Mon Sep 17 00:00:00 2001 From: Matthew Eng Date: Tue, 27 Aug 2024 10:36:00 -0700 Subject: [PATCH 2/6] Don't pass work_folder anymore --- core/document.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/document.py b/core/document.py index 0175e69..8f0bbc7 100644 --- a/core/document.py +++ b/core/document.py @@ -175,7 +175,7 @@ def _parse_res(self): def _parse_res_node(self, node): if node.tag in RESOURCE_TAGS: try: - RESOURCE_TAGS[node.tag](node, self._zf, self.work_folder) + RESOURCE_TAGS[node.tag](node, self._zf) except Exception as e: # Error in point parsing, do nothing print_node_recursive(node) From 71b07914cf9acd3fe8b4356344c44734bfb34509 Mon Sep 17 00:00:00 2001 From: Matthew Eng Date: Tue, 27 Aug 2024 10:37:43 -0700 Subject: [PATCH 3/6] Remove work_folder --- core/resources.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/resources.py b/core/resources.py index 77efd72..3dad335 100644 --- a/core/resources.py +++ b/core/resources.py @@ -79,7 +79,7 @@ def parse_from_node(node): class Image(MultiMedia): - def __init__(self, node, _zf, work_folder: str): + def __init__(self, node, _zf): super().__init__(node) self.png_location = None self.Format = node.attr["Format"] if "Format" in node.attr else "" @@ -87,7 +87,8 @@ def __init__(self, node, _zf, work_folder: str): if suffix == "jb2": jb2_path = [loc for loc in _zf.namelist() if self.location in loc][0] - x_path = _zf.extract(jb2_path, path=work_folder) + tmp_folder = os.path.basename(_zf.filename).replace(".ofd", "") + x_path = _zf.extract(jb2_path, tmp_folder) png_path = x_path.replace(".jb2", ".png") if platform.system() == "Windows": Popen(["./bin/jbig2dec", "-o", png_path, x_path], stdout=PIPE) From 03241806113056e126613660992f8024dd2ff85d Mon Sep 17 00:00:00 2001 From: Matthew Eng Date: Tue, 27 Aug 2024 10:38:23 -0700 Subject: [PATCH 4/6] Remove work_folder --- core/document.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/document.py b/core/document.py index 8f0bbc7..3fec951 100644 --- a/core/document.py +++ b/core/document.py @@ -59,7 +59,6 @@ def draw_document(self, doc_num=0, destination: Optional[str] = None): for i, page in enumerate(document.pages): surface = Surface(page, os.path.split(self.zf.filename)[-1].strip(".ofd")) paths.append(surface.draw(page, destination / Path(f"{surface.filename}_{i}.png"))) - shutil.rmtree(self.document.work_folder, ignore_errors=True) return paths @@ -67,7 +66,6 @@ class OFDDocument(object): def __init__(self, _zf, node, n=0): self.pages = [] self._zf = _zf - self.work_folder = tempfile.mkdtemp() self.name = f"Doc_{n}" self.node = node try: From 10e55e34d436497f9944942c56d795d86caf609f Mon Sep 17 00:00:00 2001 From: Matthew Eng Date: Tue, 24 Sep 2024 10:27:20 -0700 Subject: [PATCH 5/6] Add import --- core/document.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core/document.py b/core/document.py index 3fec951..4ba81f6 100644 --- a/core/document.py +++ b/core/document.py @@ -4,6 +4,7 @@ import tempfile import traceback from pathlib import Path +from typing import Optional from zipfile import BadZipFile, PyZipFile import cssselect2 From 0fa63d0bdecf3b7fa5969963fb03c35228e44bfb Mon Sep 17 00:00:00 2001 From: Valeriy Mukhtarulin Date: Fri, 18 Oct 2024 12:45:36 -0300 Subject: [PATCH 6/6] Update requirements.txt --- requirements.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7ff4dd2..34e4e81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -defusedxml -cssselect2 -pillow +cssselect2~=0.4 +defusedxml~=0.7