From cb7e9cb391d8d35073b74c4afc4eac66ea0a2fad Mon Sep 17 00:00:00 2001 From: Daniel Robotics Date: Mon, 19 May 2025 14:01:39 +1000 Subject: [PATCH] The project structure has been changed --- .gitignore | 2 + app.py | 57 +- image_processing/DrawioProcessing.py | 671 --------------- image_processing/ImageProcessing.py | 535 ------------ image_processing/__init__.py | 11 - image_processing/enumerates.py | 23 - pages/analysis_page.py | 182 ---- pages/image_processing_page.py | 296 ------- pages/settings_page.py | 159 ---- pyproject.toml | 37 + {pages => science_helper}/__init__.py | 0 science_helper/image_processing/__init__.py | 14 + .../image_processing/drawio_processing.py | 689 +++++++++++++++ science_helper/image_processing/enumerates.py | 51 ++ .../image_processing/fonts}/Arial.ttf | Bin .../image_processing/fonts}/Calibri.ttf | Bin .../image_processing/fonts}/Garamond.ttf | Bin .../image_processing/fonts}/Georgia.ttf | Bin .../image_processing/fonts}/Helvetica.ttf | Bin .../image_processing/fonts}/Roboto.ttf | Bin .../image_processing/fonts}/Tahoma.ttf | Bin .../fonts}/Times New Roman.ttf | Bin .../image_processing/fonts}/Verdana.ttf | Bin .../image_processing/image_design.py | 306 ++++--- science_helper/image_processing/processing.py | 784 ++++++++++++++++++ .../search_vak_articles/__init__.py | 20 + .../search_vak_articles/downloader.py | 90 ++ science_helper/search_vak_articles/filters.py | 34 + .../search_vak_articles/nomenclature.py | 95 +++ .../search_vak_articles/pdf_parser.py | 179 ++++ science_helper/utils/setting.py | 106 +++ setting.py | 62 -- test/test_drawio_image_design.py | 95 ++- test/test_image_design.py | 107 ++- utils/__init__.py | 23 - utils/downloader.py | 112 --- utils/pdf_parser.py | 114 --- web/pages/__init__.py | 0 web/pages/analysis_page.py | 293 +++++++ web/pages/image_processing_page.py | 427 ++++++++++ web/pages/settings_page.py | 171 ++++ {static => web/static}/favicon.ico | Bin {static => web/static}/logo.png | Bin 43 files changed, 3331 insertions(+), 2414 deletions(-) delete mode 100644 image_processing/DrawioProcessing.py delete mode 100644 image_processing/ImageProcessing.py delete mode 100644 image_processing/__init__.py delete mode 100644 image_processing/enumerates.py delete mode 100644 pages/analysis_page.py delete mode 100644 pages/image_processing_page.py delete mode 100644 pages/settings_page.py create mode 100644 pyproject.toml rename {pages => science_helper}/__init__.py (100%) create mode 100644 science_helper/image_processing/__init__.py create mode 100644 science_helper/image_processing/drawio_processing.py create mode 100644 science_helper/image_processing/enumerates.py rename {fonts => science_helper/image_processing/fonts}/Arial.ttf (100%) rename {fonts => science_helper/image_processing/fonts}/Calibri.ttf (100%) rename {fonts => science_helper/image_processing/fonts}/Garamond.ttf (100%) rename {fonts => science_helper/image_processing/fonts}/Georgia.ttf (100%) rename {fonts => science_helper/image_processing/fonts}/Helvetica.ttf (100%) rename {fonts => science_helper/image_processing/fonts}/Roboto.ttf (100%) rename {fonts => science_helper/image_processing/fonts}/Tahoma.ttf (100%) rename {fonts => science_helper/image_processing/fonts}/Times New Roman.ttf (100%) rename {fonts => science_helper/image_processing/fonts}/Verdana.ttf (100%) rename image_processing/imageDesign.py => science_helper/image_processing/image_design.py (55%) create mode 100644 science_helper/image_processing/processing.py create mode 100644 science_helper/search_vak_articles/__init__.py create mode 100644 science_helper/search_vak_articles/downloader.py create mode 100644 science_helper/search_vak_articles/filters.py create mode 100644 science_helper/search_vak_articles/nomenclature.py create mode 100644 science_helper/search_vak_articles/pdf_parser.py create mode 100644 science_helper/utils/setting.py delete mode 100644 setting.py delete mode 100644 utils/__init__.py delete mode 100644 utils/downloader.py delete mode 100644 utils/pdf_parser.py create mode 100644 web/pages/__init__.py create mode 100644 web/pages/analysis_page.py create mode 100644 web/pages/image_processing_page.py create mode 100644 web/pages/settings_page.py rename {static => web/static}/favicon.ico (100%) rename {static => web/static}/logo.png (100%) diff --git a/.gitignore b/.gitignore index 75f1cfd..c9285fc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ __pycache__ .venv venv +.ruff_cache +.vscode data/*.json data/*.pdf diff --git a/app.py b/app.py index db82894..4db2daf 100644 --- a/app.py +++ b/app.py @@ -1,56 +1,59 @@ from pathlib import Path + from nicegui import ui -from pages.analysis_page import science_articles_page -from pages.settings_page import settings_page -from pages.image_processing_page import image_processing_page -# ──────────────────────────────── -# Глобальное состояние -# ──────────────────────────────── +from web.pages.analysis_page import science_articles_page +from web.pages.image_processing_page import image_processing_page +from web.pages.settings_page import settings_page + + data_ready = False -def check_data_ready(): - required_files = ['specializations.json', 'vak_articles.json', 'whitelist_articles.json'] - return all((Path('data') / name).exists() for name in required_files) -def update_data_status(): - global data_ready +def check_data_ready(): # noqa: D103 + required_files = ["specializations.json", "vak_articles.json", "whitelist_articles.json"] + return all((Path("data") / name).exists() for name in required_files) + + +def update_data_status(): # noqa: D103 + global data_ready # noqa: PLW0603 data_ready = check_data_ready() -# ──────────────────────────────── -# Интерфейс -# ──────────────────────────────── -@ui.page('/') -def main_page(): - update_data_status() # при заходе на страницу проверим наличие данных - with ui.header().classes('items-center justify-between p-4 shadow-md'): - with ui.row().classes('items-center gap-4'): - with ui.link(target='/'): - ui.image('static/logo.png').classes('w-12 h-12') - ui.label('Science Helper').classes('text-xl font-bold') +@ui.page("/") +def main_page(): # noqa: D103 + update_data_status() # при заходе на страницу проверим наличие данных + with ui.header().classes("items-center justify-between p-4 shadow-md"): + with ui.row().classes("items-center gap-4"): + with ui.link(target="/"): + ui.image("./web/static/logo.png").classes("w-12 h-12") + ui.label("Science Helper").classes("text-xl font-bold") - with ui.tabs().classes('w-full') as tabs: + with ui.tabs().classes("w-full") as tabs: tab1 = ui.tab("Обработчик изображений") - tab2 = ui.tab('Парсинг статей') - tab3 = ui.tab('Настройки') + tab2 = ui.tab("Парсинг статей") + tab3 = ui.tab("Настройки") - with ui.tab_panels(tabs, value=tab1).classes('w-full'): + with ui.tab_panels(tabs, value=tab1).classes("w-full"): with ui.tab_panel(tab1): + @ui.refreshable def image_processing_panel(): image_processing_page() + image_processing_panel() with ui.tab_panel(tab2): + @ui.refreshable def science_panel(): science_articles_page() + science_panel() with ui.tab_panel(tab3): settings_page(update_data_status, science_panel.refresh) -ui.run(title="ScienceHelper", favicon="./static/favicon.ico") +ui.run(title="ScienceHelper", favicon="./web/static/favicon.ico") diff --git a/image_processing/DrawioProcessing.py b/image_processing/DrawioProcessing.py deleted file mode 100644 index c030cd0..0000000 --- a/image_processing/DrawioProcessing.py +++ /dev/null @@ -1,671 +0,0 @@ -import io -import uuid -import base64 -import xml.etree.ElementTree as ET - -from PIL import Image -from pathlib import Path -from typing import Optional, Union -from image_processing.enumerates import * -from image_processing.ImageProcessing import ImageProcessing - - -class DrawioImageDesign(ImageProcessing): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._images = self._load_images(self._images_path) - - self._root = None - self._xml_root = None - self._create_drawio_structure() - - # Методы родительского класса - def _draw_border(self): - """ - Generates a style string for the image border in draw.io format. - - Returns: - str: A style string that defines the border color and stroke width - based on the object's `_border_fill` and the maximum of `_border_size`. - Example: "imageBorder=#000;strokeWidth=10;" - - Used in: - - Setting the border style for mxCell elements representing images. - """ - return f"imageBorder={self._border_fill};strokeWidth={max(self._border_size)};" - - def _add_numbering(self, - image_w: int, - image_h: int, - label: str = "", - parent_id: str = "1"): - - """ - Adds a numbering label (e.g., index or identifier) to an image element - as an `mxCell` with geometry and styling for draw.io. - - Args: - image_w (int): Width of the image in pixels. - image_h (int): Height of the image in pixels. - label (str, optional): The text label to display (e.g., a number or letter). Defaults to "". - parent_id (str, optional): The ID of the parent `mxCell` group or image. Defaults to "1". - - Behavior: - - Computes a position for the label based on `_signature_pos` (e.g., top-right). - - Applies an offset using `_border_size`. - - Creates a styled `mxCell` for the label and positions it using `mxGeometry`. - - Used in: - - `preprocessing_image()` for attaching index/label annotations to image blocks. - """ - - x0, y0, _, _ = self._get_positions(image_w, image_h) - offset = max(self._border_size) - key = self._signature_pos.value if isinstance(self._signature_pos, SignaturePosition) else self._signature_pos - - pos_map = { - SignaturePosition.TOP_LEFT.value: (-offset, -offset), - SignaturePosition.TOP_RIGHT.value: (offset, -offset), - SignaturePosition.BOTTOM_LEFT.value: (-offset, offset), - SignaturePosition.BOTTOM_RIGHT.value: (offset, offset) - } - - dx, dy = pos_map.get(key, (0, 0)) - x0 += dx - y0 += dy - - cell = self._create_mx_cell( - id=self._generate_id(suffix="-numbering"), - value=self._get_numbering_text(label), - style=self._get_numbering_style(), - vertex="1", - parent=parent_id - ) - - self._create_mx_geometry( - cell, - x=str(x0), - y=str(y0), - width=str(self._signature_size[0]), - height=str(self._signature_size[1]) - ) - - - def _add_axes(self, - image_w: int, - image_h: int, - label_x: str, - label_y: str, - parent_id: str = "1"): - - """ - Adds X and Y axes with corresponding labels to an image element. - - Args: - image_w (int): Width of the image in pixels. - image_h (int): Height of the image in pixels. - label_x (str): Text label for the X axis. - label_y (str): Text label for the Y axis. - parent_id (str, optional): The ID of the parent `mxCell` group (usually the image). Defaults to "1". - - Behavior: - - Calculates the total size required to draw the axes including labels. - - Creates a group `mxCell` to contain the axes. - - Draws two axis lines using `_create_axis`: - - X axis: horizontal line from (0, height) to (axis_length, height) - - Y axis: vertical line from (0, height) to (0, height - axis_length) - - Adds text labels near the ends of each axis using `_add_label`. - - Used in: - - `preprocessing_image()` when axis display is enabled (`_draw_axis=True`). - """ - - offset_x, offset_y = (self._axis_offset, self._axis_offset) if isinstance(self._axis_offset, int) else self._axis_offset - - label_x_width = len(label_x) * self._axis_font_size * 0.6 - label_y_height = len(label_y) * self._axis_font_size * 0.6 - - width = self._axis_length + 5 + label_x_width - height = self._axis_length + 5 + label_y_height - - group_id = self._generate_id(suffix="-axisgroup") - style = self._get_text_style() - - group_cell = self._create_mx_cell( - id=group_id, - value="", - style="group", - vertex="1", - connectable="0", - parent=parent_id - ) - self._create_mx_geometry( - group_cell, - x=str(offset_x), - y=str(image_h - offset_y - height), - width=str(width), - height=str(height) - ) - - x0, y0 = 0, height - x_end = self._axis_length - y_end = height - self._axis_length - - xaxis_id = self._generate_id(suffix="-xaxis") - yaxis_id = self._generate_id(suffix="-yaxis") - xlabel_id = self._generate_id(suffix="-xlabel") - ylabel_id = self._generate_id(suffix="-ylabel") - - self._create_axis(xaxis_id, x0, y0, x_end, y0, group_id) - self._create_axis(yaxis_id, x0, y0, x0, y_end, group_id) - - self._add_label(xlabel_id, label_x, x_end + 5, y0 - 10, len(label_x), group_id, style) - self._add_label(ylabel_id, label_y, x0 + 5, y_end - self._axis_font_size, len(label_y), group_id, style) - - def _layout_images(self, - layout: str = "row", - spacing: int = 10, - grid_cols: Optional[int] = None, - grid_rows: Optional[int] = None): - """ - Arranges multiple images in a specified layout (row, column, or grid) - and groups them into a single parent mxCell in the draw.io structure. - - Args: - layout (str, optional): Layout mode: "row", "column", or "grid". Defaults to "row". - spacing (int, optional): Spacing in pixels between images. Defaults to 10. - grid_cols (Optional[int], optional): Number of columns in grid layout. Used only if layout is "grid". - grid_rows (Optional[int], optional): Number of rows in grid layout. Used only if layout is "grid". - - Behavior: - - Calculates (x, y) positions for each image depending on layout type: - - "row": horizontally aligned images. - - "column": vertically stacked images. - - "grid": images arranged in a 2D grid. - - Calls `preprocessing_image()` for each image with its computed position. - - Wraps all images in a group `mxCell` with geometry sized to fit all children. - - Raises: - ValueError: If an unsupported layout type is provided. - - Used in: - - `united_images()` to render the full composed diagram from the image list. - """ - image_w, image_h = self._images[0].size - positions = [] - - if layout == LayoutMode.ROW.value: - for i in range(len(self._images)): - x = i * (image_w + spacing) - y = 0 - positions.append((x, y)) - - elif layout == LayoutMode.COLUMN.value: - for i in range(len(self._images)): - x = 0 - y = i * (image_h + spacing) - positions.append((x, y)) - elif layout == LayoutMode.GRID.value: - n = len(self._images) - cols = grid_cols or int(n ** 0.5) - rows = grid_rows or ((n + cols - 1) // cols) - for idx in range(n): - col = idx % cols - row = idx // cols - x = col * (image_w + spacing) - y = row * (image_h + spacing) - positions.append((x, y)) - else: - raise ValueError(f"Unknown layout type: {layout}") - - - group_id = self._generate_id(suffix="-group") - - group_cell = self._create_mx_cell( - id=group_id, - value="", - style="group", - vertex="1", - connectable="0", - parent="1" - ) - - - all_right = [] - all_bottom = [] - for i, (x, y) in enumerate(positions): - self.preprocessing_image(index=i, - position_x=x, - position_y=y, - parent_id=group_id) - all_right.append(x + image_w) - all_bottom.append(y + image_h) - - self._create_mx_geometry( - group_cell, - x="30", - y="30", - width=str(max(all_right)), - height=str(max(all_bottom)) - ) - - - def preprocessing_image(self, - index: int, - width: int | None = None, - height: int | None = None, - position_x: int = 0, - position_y: int = 0, - parent_id: str = "1"): - """ - Processes a single image from the internal image list by resizing, - encoding, and embedding it into the draw.io diagram as an `mxCell`. - - Args: - index (int): Index of the image in the `_images` list. - width (int | None, optional): Target width for resizing. If None, original width is preserved. - height (int | None, optional): Target height for resizing. If None, original height is preserved. - position_x (int, optional): X-coordinate of the image within the parent container. Defaults to 0. - position_y (int, optional): Y-coordinate of the image within the parent container. Defaults to 0. - parent_id (str, optional): ID of the parent `mxCell` group. Defaults to "1". - - Behavior: - - Resizes the image proportionally if dimensions are provided. - - Converts the image to base64 and embeds it into a styled `mxCell`. - - Adds geometry based on specified position and image size. - - Optionally adds: - - A numbering label (`_add_numbering`) if `_signature` and `_signature_label` are enabled. - - Coordinate axes (`_add_axes`) if `_draw_axis` is enabled. - - Raises: - IndexError: If the provided index is out of bounds. - - Used in: - - `_layout_images()` and other high-level composition methods. - """ - if index >= len(self._images): - raise IndexError(f"Index {index} outside the range of the image list") - - image = self._resize_proportional(self._images[index], width, height) - image_w, image_h = image.size - - image_base64 = self._image_to_base64(image) - cell_id = self._generate_id(suffix=f"-{index+1}") - - style = ("shape=image;", - "verticalLabelPosition=bottom;", - "labelBackgroundColor=default;", - "verticalAlign=top;", - "aspect=fixed;", - "imageAspect=0;", - f"image=data:image/png,{image_base64};", - self._draw_border() - ) - - cell = self._create_mx_cell( - id=cell_id, - value="", - style="".join(style), - vertex="1", - parent=parent_id - ) - - self._create_mx_geometry( - cell, - x=str(position_x), - y=str(position_y), - width=str(image_w), - height=str(image_h) - ) - - if self._signature and self._signature_label: - self._add_numbering(image_w=image_w, image_h=image_h, - label=self._get_label(index=index), - parent_id=cell_id) - - if self._draw_axis: - lx, ly = self._axis_labels - lx, ly = (lx[index], ly[index]) if isinstance(lx, tuple) else (lx, ly) - self._add_axes(image_w, image_h, label_x=lx, label_y=ly, parent_id=cell_id) - - - def united_images(self, - layout: Union[str, LayoutMode] = "row", - spacing: int = 10, - grid_cols: Optional[int] = None, - grid_rows: Optional[int] = None, - width: int = None, - height: int = None): - """ - Composes all loaded images into a single layout group and generates - the corresponding draw.io structure. - - Args: - layout (Union[str, LayoutMode], optional): Layout mode for arranging images. - Can be "row", "column", or "grid". Defaults to "row". - spacing (int, optional): Spacing between images in pixels. Defaults to 10. - grid_cols (Optional[int], optional): Number of columns in grid layout. Only used if layout is "grid". - grid_rows (Optional[int], optional): Number of rows in grid layout. Only used if layout is "grid". - width (int, optional): If set, resizes all images to this width before layout. - height (int, optional): If set, resizes all images to this height before layout. - - Behavior: - - Optionally resizes all images to the specified `width` and `height`. - - Passes control to `_layout_images()` to arrange the images based on the selected layout mode. - - Used in: - - `export_to_drawio()` to generate the final diagram for export. - """ - layout = layout.value if isinstance(layout, LayoutMode) else layout - - if width or height: - self._images = [self._resize_proportional(img, width=width, height=height) for img in self._images] - - self._layout_images(layout=layout, - spacing=spacing, - grid_cols=grid_cols, - grid_rows=grid_rows) - - # методы этого класса - def _create_drawio_structure(self): - """ - Initializes the root XML structure for a draw.io diagram. - - Behavior: - - Creates the top-level element with the draw.io host attribute. - - Adds a element with a unique ID and a predefined name ("Обработчик изображений"). - - Constructs the and its container. - - Adds two base `mxCell` elements with IDs "0" and "1", where: - - ID "0" is the invisible root of all elements. - - ID "1" serves as the main container for the user-defined content. - - Used in: - - Constructor (`__init__`) to prepare an empty draw.io-compatible structure. - - Required before adding any cells, images, or layout groups. - """ - self._root = ET.Element("mxfile", host="ScienceHelper") - - diagram_id = self._generate_id(prefix="", suffix="") - diagram = ET.SubElement(self._root, "diagram", - name="Обработчик изображений", - id=diagram_id) - - model = ET.SubElement(diagram, "mxGraphModel") - self._xml_root = ET.SubElement(model, "root") - - ET.SubElement(self._xml_root, "mxCell", id="0") - ET.SubElement(self._xml_root, "mxCell", id="1", parent="0") - - def _create_mx_cell(self, **attrs) -> ET.Element: - """ - Creates and appends an element to the draw.io XML structure. - - Args: - **attrs: Arbitrary keyword arguments representing XML attributes - for the element (e.g., id, value, style, parent, vertex, edge). - - Returns: - ET.Element: The newly created element. - - Behavior: - - Appends the element to the internal `_xml_root` container. - - Used in: - - Most rendering methods to define images, groups, arrows, and text labels. - """ - return ET.SubElement(self._xml_root, "mxCell", **attrs) - - def _create_mx_geometry(self, parent: ET.Element, **attrs) -> ET.Element: - """ - Creates and appends an element to a given element. - - Args: - parent (ET.Element): The parent element to which the geometry is attached. - **attrs: Arbitrary keyword arguments representing attributes of the element - (e.g., x, y, width, height, relative). - - Returns: - ET.Element: The newly created element. - - Behavior: - - Sets the "as" attribute to "geometry", indicating its role in the draw.io structure. - - Used to define the position and size of an . - - Used in: - - Image blocks, labels, axes, and other visual elements requiring placement. - """ - geom = ET.SubElement(parent, "mxGeometry", **attrs) - geom.set("as", "geometry") - return geom - - def _create_axis(self, - id: str, - x0: float, - y0: float, - x1: float, - y1: float, - parent_id: str): - """ - Creates a visual axis (as an edge with an arrow) and appends it to the draw.io XML structure. - - Args: - id (str): Unique ID for the axis mxCell. - x0 (float): X-coordinate of the axis starting point. - y0 (float): Y-coordinate of the axis starting point. - x1 (float): X-coordinate of the axis ending point. - y1 (float): Y-coordinate of the axis ending point. - parent_id (str): ID of the parent mxCell group. - - Behavior: - - Creates an edge-style `mxCell` with a thin arrowhead and custom stroke width. - - Adds an `mxGeometry` block with relative positioning. - - Defines `mxPoint` elements for the source and target coordinates of the axis line. - - Used in: - - `_add_axes()` to render X and Y directional lines next to images. - """ - cell = self._create_mx_cell( - id=id, - value="", - style=f"endArrow=blockThin;html=1;rounded=0;strokeWidth={self._axis_width}", - edge="1", - parent=parent_id - ) - - geom = self._create_mx_geometry(cell, width="50", height="50", relative="1") - - source = ET.SubElement(geom, "mxPoint", x=str(x0), y=str(y0)) - target = ET.SubElement(geom, "mxPoint", x=str(x1), y=str(y1)) - - source.set("as", "sourcePoint") - target.set("as", "targetPoint") - - def _add_label(self, - cell_id: str, - text: str, - x: float, - y: float, - width: int, - parent_id: str, - style: str): - """ - Adds a text label as an `mxCell` element to the draw.io diagram. - - Args: - cell_id (str): Unique ID for the label mxCell. - text (str): The text content to display. - x (float): X-coordinate of the label position. - y (float): Y-coordinate of the label position. - width (int): Logical width of the text (multiplied by font size to determine pixel width). - parent_id (str): ID of the parent mxCell (e.g., an axis group). - style (str): The style string for the label (e.g., font, color, alignment). - - Behavior: - - Creates a vertex `mxCell` containing the label text. - - Applies style and attaches it to the given parent cell. - - Defines the geometry (position and size) based on coordinates and scaled text width. - - Used in: - - `_add_axes()` for axis labels. - - Any other diagram element that needs textual annotation. - """ - - cell = self._create_mx_cell( - id=cell_id, value=text, - style=style, vertex="1", parent=parent_id - ) - self._create_mx_geometry(cell, - x=str(x), y=str(y), - width=str(width * self._axis_font_size * 0.6), height="20" - ) - - def _get_text_style(self) -> str: - """ - Constructs a style string for text labels in draw.io format. - - Returns: - str: A concatenated style string that defines appearance and behavior of text elements. - Includes font settings, alignment, autosizing, and no stroke or fill colors. - - Example: - "text;html=1;align=left;verticalAlign=middle;resizable=0;...;fontFamily=Arial;fontSize=12;" - - Behavior: - - Enables HTML rendering for text. - - Sets text alignment to left and vertically centered. - - Disables resizing and connections. - - Ensures clean appearance with no border or background fill. - - Applies current font family and axis font size. - - Used in: - - `_add_label()` to style axis or annotation text elements. - """ - return "".join(( - "text;", "html=1;", "align=left;", "verticalAlign=middle;", - "resizable=0;", "points=[];", "autosize=1;", - "strokeColor=none;", "fillColor=none;", - f"fontFamily={self._font_family};", - f"fontColor=#000;", f"fontSize={self._axis_font_size};" - )) - - def _get_numbering_style(self) -> str: - """ - Generates a style string for numbering labels in draw.io format. - - Returns: - str: A style string that defines the visual appearance of a numbering label. - Includes background color, font size, and HTML rendering. - - Example: - "rounded=0;whiteSpace=wrap;html=1;strokeColor=none;fillColor=black;fontSize=24;" - - Behavior: - - Disables rounded corners and stroke outlines. - - Enables HTML text rendering and word wrapping. - - Applies background fill color using `_signature_color`. - - Sets font size from `_signature_font_size`. - - Used in: - - `_add_numbering()` to style index or label annotations on images. - """ - return "".join(( - "rounded=0;", "whiteSpace=wrap;", "html=1;", "strokeColor=none;", - f"fillColor={self._signature_color};", - f"fontSize={self._signature_font_size};" - )) - - def _get_numbering_text(self, label: str) -> str: - """ - Generates an HTML-formatted string for a numbering label in draw.io. - - Args: - label (str): The label text to be displayed (e.g., a number or character). - - Returns: - str: An HTML string using a tag with the configured font family and text color. - Example: '1' - - Behavior: - - Uses the current `_font_family` and `_signature_label_color` to format the label. - - Intended for use with draw.io's HTML rendering in `mxCell.value`. - - Used in: - - `_add_numbering()` when embedding label text into image annotations. - """ - return f'{label}' - - def export_to_drawio(self, file: str | Path, **kwargs): - """ - Exports the composed diagram structure to a .drawio-compatible XML file. - - Args: - file (str | Path): The output file path where the XML content will be saved. - **kwargs: Additional keyword arguments passed to `united_images()` - (e.g., layout, spacing, width, height). - - Behavior: - - Calls `united_images()` to arrange and prepare the diagram content. - - Serializes the internal `_root` XML tree into indented draw.io format. - - Writes the final XML string to the specified file with UTF-8 encoding. - - Notes: - - The output file can be opened directly in draw.io or diagrams.net. - - The layout and formatting of images are controlled via kwargs. - - Used in: - - External scripts or UI to generate and save a final visual diagram. - """ - self.united_images(**kwargs) - - tree = ET.ElementTree(self._root) - ET.indent(tree, space=" ", level=0) - tree.write(file, encoding="utf-8", xml_declaration=True) - - @staticmethod - def _generate_id(prefix: str = "E__", - suffix: str = "-1") -> str: - """ - Generates a unique ID string for use in draw.io element attributes. - - Args: - prefix (str, optional): Prefix to prepend to the ID. Defaults to "E__". - suffix (str, optional): Suffix to append to the ID. Defaults to "-1". - - Returns: - str: A unique string composed of the prefix, a base64-encoded UUID segment, - and the suffix. Example: "E__abc123xyz-1" - - Behavior: - - Uses the first 9 bytes of a UUID4 as the base for the ID. - - Encodes it using URL-safe base64 and removes padding. - - Used in: - - Element creation functions to assign distinct and consistent IDs to mxCells. - """ - uid = uuid.uuid4().bytes[:9] - base64_id = base64.urlsafe_b64encode(uid).decode("ascii").rstrip("=") - return f"{prefix}{base64_id}{suffix}" - - @staticmethod - def _image_to_base64(image: Image.Image) -> str: - """ - Converts a PIL Image to a base64-encoded PNG string. - - Args: - image (Image.Image): The PIL Image object to encode. - - Returns: - str: A base64-encoded string representing the image in PNG format. - Suitable for embedding directly in draw.io XML as a data URI. - - Behavior: - - Saves the image to an in-memory bytes buffer in PNG format. - - Encodes the buffer to base64 and decodes it to an ASCII string. - - Used in: - - `preprocessing_image()` to embed images into the `mxCell` style attribute. - """ - buffer = io.BytesIO() - image.save(buffer, format="PNG") - return base64.b64encode(buffer.getvalue()).decode("ascii") diff --git a/image_processing/ImageProcessing.py b/image_processing/ImageProcessing.py deleted file mode 100644 index a13334f..0000000 --- a/image_processing/ImageProcessing.py +++ /dev/null @@ -1,535 +0,0 @@ -from pathlib import Path -from typing import List, Optional, Tuple, Union -from PIL import Image, ImageOps, ImageDraw, ImageFont - -from image_processing.enumerates import * - - -def to_roman(n: int) -> str: - val = [ - 1000, 900, 500, 400, - 100, 90, 50, 40, - 10, 9, 5, 4, 1 - ] - syms = [ - 'M', 'CM', 'D', 'CD', - 'C', 'XC', 'L', 'XL', - 'X', 'IX', 'V', 'IV', 'I' - ] - roman = '' - for i in range(len(val)): - count = n // val[i] - roman += syms[i] * count - n -= val[i] * count - return roman - - -def get_label(index: int, mode: str | LabelMode = LabelMode.CYRILLIC_LOWER) -> str: - mode = mode.value if isinstance(mode, LabelMode) else mode - - match mode: - case "latin_lower": return chr(ord('a') + index) - case "latin_upper": return chr(ord('A') + index) - case "cyrillic_lower" | "cyrillic_upper": - base = ord('а') if mode == "cyrillic_lower" else ord('А') - if index >= 32: - raise ValueError(f"Индекс {index} выходит за пределы кириллического алфавита") - return chr(base + index) - case "arabic": return str(index + 1) - case "roman": return to_roman(index + 1) - case _: - available = ", ".join(m.value for m in LabelMode) - raise ValueError(f"Неверный режим: '{mode}'. Доступные режимы: {available}") - - -class ImageProcessing: - - def __init__(self, - images_path: Union[str, Path], - border_size: Union[int, Tuple[int, int, int, int], None] = 10, - border_fill: Union[str, Tuple[int, int, int]] = "black", - signature: bool = True, - signature_label: Union[str, Tuple[str], LabelMode, None] = "latin_lower", - signature_label_color: str = "white", - signature_pos: Union[str, SignaturePosition] = "top-left", - signature_size: Tuple[int, int] = (40, 40), - signature_color: str = "black", - signature_font_size: int = 24, - draw_axis: bool = False, - axis_labels: Union[Tuple[str, str], Tuple[Tuple[str], Tuple[str]]] = ("X", "Y"), - axis_offset: Union[int, Tuple[int, int]] = 20, - axis_length: int = 60, - axis_width: int = 3, - axis_font_size: int = 24, - font_family: str = "Arial", - ): - - """ - Initialize the class for processing and composing images with optional - borders, labels, and axis annotations. - - Args: - images_path (Union[str, Path]): Path to the folder containing image files (PNG, JPG, JPEG). - border_size (Union[int, Tuple[int, int, int, int], None], optional): - Border size around each image. Can be: - - int: uniform border on all sides, - - tuple: (left, top, right, bottom), - - None: no border. - Defaults to 10. - border_fill (Union[str, Tuple[int, int, int]], optional): - Color of the border. Can be a string color name (e.g., "black") or an RGB tuple. Defaults to "black". - signature (bool, optional): Whether to add a label/numbering on each image. Defaults to True. - signature_label (Union[str, Tuple[str], LabelMode, None], optional): - Labeling mode. Can be: - - str: mode name ("latin_lower", "roman", etc.), - - tuple of strings: custom labels per image, - - LabelMode enum, - - None: no labels. - Defaults to "latin_lower". - signature_label_color (str, optional): Color of the label text. Defaults to "white". - signature_pos (SignaturePosition, optional): Position of the label on the image (top-left, bottom-right, etc.). Defaults to SignaturePosition.TOP_LEFT. - signature_size (Tuple[int, int], optional): Size of the label box (width, height). Defaults to (40, 40). - signature_color (str, optional): Background color of the label box. Defaults to "black". - signature_font_size (int, optional): Font size for labels annotations. Defaults to 24. - draw_axis (bool, optional): Whether to draw X and Y axes on each image. Defaults to False. - axis_labels (Union[Tuple[str, str], Tuple[Tuple[str], Tuple[str]]], optional): - Labels for X and Y axes. Can be: - - Tuple of two strings: global labels, - - Tuple of two tuples: per-image labels. - Defaults to ("X", "Y"). - axis_offset (int, optional): Distance in pixels from the image edge to the axis origin. Defaults to 20. - axis_length (int, optional): Length of the drawn axes in pixels. Defaults to 60. - axis_font_size (int, optional): Font size for axis annotations. Defaults to 24. - - Raises: - TypeError: If any of the arguments are of incorrect type. - ValueError: If label or axis settings are out of bounds or improperly defined. - """ - - self._signature_font_size = signature_font_size - self._axis_font_size = axis_font_size - self._font_family = font_family - - self.signature = signature - self.draw_axis = draw_axis - self.images_path = images_path - self.border_size = border_size - self.border_fill = border_fill - self.axis_labels = axis_labels - self.axis_offset = axis_offset - self.axis_length = axis_length - self.axis_width = axis_width - self.signature_pos = signature_pos - self.signature_size = signature_size - self.signature_color = signature_color - self.signature_label = signature_label - self.signature_label_color = signature_label_color - - # Magic methods - def __str__(self) -> str: - return ( - f"ImagesDesign(\n" - f" images_path={self.images_path},\n" - f" border_size={self.border_size}, border_fill={self.border_fill},\n" - f" signature={self.signature}, signature_label={self.signature_label},\n" - f" signature_pos={self.signature_pos}, signature_size={self.signature_size},\n" - f" signature_font_size={self._signature_font_size},\n" - f" draw_axis={self.draw_axis}, axis_labels={self.axis_labels},\n" - f" axis_offset={self.axis_offset}, axis_length={self.axis_length},\n" - f" axis_font_size={self._axis_font_size}\n" - f")" - ) - - def __repr__(self) -> str: - return ( - f"ImagesDesign(images_path={repr(self.images_path)}, " - f"border_size={repr(self.border_size)}, border_fill={repr(self.border_fill)}, " - f"signature={self.signature}, signature_label={repr(self.signature_label)}, " - f"signature_label_color={repr(self.signature_label_color)}, " - f"signature_pos={repr(self.signature_pos)}, signature_size={repr(self.signature_size)}, " - f"signature_font_size={self._signature_font_size}, signature_color={repr(self.signature_color)}, " - f"draw_axis={self.draw_axis}, axis_labels={repr(self.axis_labels)}, " - f"axis_offset={self.axis_offset}, axis_length={self.axis_length}, axis_font_size={self._axis_font_size})" - f"" - ) - - - def __len__(self) -> int: - return len(self._images) - - def __getitem__(self, index: int) -> Image.Image: - if not isinstance(index, int): - raise TypeError("The index must be an integer") - return self._images[index] - - def __setitem__(self, index: int, value: Image.Image) -> None: - if not isinstance(index, int): - raise TypeError("The index must be an integer") - if not isinstance(value, Image.Image): - raise TypeError("The value must be an instance of PIL.Image.Image") - if not (0 <= index < len(self._images)): - raise IndexError(f"Index {index} outside the range of the image list") - - self._images[index] = value - - def __delitem__(self, index: int) -> None: - if not isinstance(index, int): - raise TypeError("The index must be an integer") - if not (0 <= index < len(self._images)): - raise IndexError(f"Index {index} outside the range of the image list") - del self._images[index] - - def __contains__(self, item: Image.Image) -> bool: - if not isinstance(item, Image.Image): - raise TypeError("Verification is only possible for objects of the PIL.Image type.Image") - return item in self._images - - def __iter__(self): - return iter(self._images) - - def __call__(self, - layout: Union[str, LayoutMode] = "row", - spacing: int = 10, - bg_color: str = "white", - grid_cols: Optional[int] = None, - grid_rows: Optional[int] = None) -> Image.Image: - - return self.united_images(layout=layout, - spacing=spacing, - bg_color=bg_color, - grid_cols=grid_cols, - grid_rows=grid_rows) - - # Validation and conversion - @staticmethod - def _validate_path(path): - if not isinstance(path, (str, Path)): - raise TypeError("images_path must be a str or Path") - return Path(path) - - @staticmethod - def _validate_border_size(border_size): - if isinstance(border_size, int): - return (border_size,) * 4 - if isinstance(border_size, (tuple, list)) and len(border_size) == 4: - return tuple(border_size) - if border_size is None: - return (0,) * 4 - raise TypeError("border_size must be int, tuple of 4 elements, or None") - - @staticmethod - def _validate_color(color): - if isinstance(color, str): - return color - if isinstance(color, tuple) and all(isinstance(c, int) for c in color): - return color - raise TypeError("Color must be a string or tuple of ints") - - @staticmethod - def _validate_tuple_pair(value, name): - if isinstance(value, tuple) and len(value) == 2: - return value - raise TypeError(f"{name} must be a tuple of length 2") - - @staticmethod - def _validate_font_family(font_family): - font_files = sorted([f.stem for f in Path("./fonts/").glob('*.ttf') if f.is_file()]) - - if font_family in font_files: - return font_family - - return "Arial" - - @classmethod - def from_images(cls, images: List[Image.Image], **kwargs): - obj = cls(images_path='.', **kwargs) - obj._images = images - return obj - - # Properties with setters and getters - @property - def images_path(self): - return self._images_path - - @images_path.setter - def images_path(self, value): - self._images_path = self._validate_path(value) - - @property - def border_size(self): - return self._border_size - - @border_size.setter - def border_size(self, value): - self._border_size = self._validate_border_size(value) - - @property - def border_fill(self): - return self._border_fill - - @border_fill.setter - def border_fill(self, value): - self._border_fill = self._validate_color(value) - - @property - def signature(self): - return self._signature - - @signature.setter - def signature(self, value): - if not isinstance(value, bool): - raise TypeError("signature must be a boolean") - self._signature = value - - @property - def signature_label(self): - return self._signature_label - - @signature_label.setter - def signature_label(self, value): - if not (isinstance(value, (str, tuple, LabelMode)) or value is None): - raise TypeError("signature_label must be str, tuple, LabelMode or None") - self._signature_label = value - - @property - def signature_label_color(self): - return self._signature_label_color - - @signature_label_color.setter - def signature_label_color(self, value): - self._signature_label_color = self._validate_color(value) - - @property - def signature_pos(self): - return self._signature_pos - - @signature_pos.setter - def signature_pos(self, value): - if not isinstance(value, (SignaturePosition, str)): - raise TypeError("signature_pos must be a SignaturePosition or string") - self._signature_pos = value - - @property - def signature_size(self): - return self._signature_size - - @signature_size.setter - def signature_size(self, value): - self._signature_size = self._validate_tuple_pair(value, "signature_size") - - @property - def signature_color(self): - return self._signature_color - - @signature_color.setter - def signature_color(self, value): - self._signature_color = self._validate_color(value) - - @property - def draw_axis(self): - return self._draw_axis - - @draw_axis.setter - def draw_axis(self, value): - if not isinstance(value, bool): - raise TypeError("draw_axis must be a boolean") - self._draw_axis = value - - @property - def axis_labels(self): - return self._axis_labels - - @axis_labels.setter - def axis_labels(self, value): - if isinstance(value, tuple) and len(value) == 2: - self._axis_labels = value - else: - raise TypeError("axis_labels must be a tuple of two strings or tuples") - - @property - def axis_offset(self): - return self._axis_offset - - @axis_offset.setter - def axis_offset(self, value): - if not isinstance(value, (int, tuple)): - raise TypeError("axis_offset must be an integer") - self._axis_offset = value - - @property - def axis_length(self): - return self._axis_length - - @axis_length.setter - def axis_length(self, value): - if not isinstance(value, int): - raise TypeError("axis_length must be an integer") - self._axis_length = value - - @property - def axis_width(self): - return self._axis_width - - @axis_width.setter - def axis_width(self, value): - if not isinstance(value, int): - raise TypeError("axis_width must be an integer") - self._axis_width = value - - @property - def signature_font_size(self): - return self._signature_font_size - - @signature_font_size.setter - def signature_font_size(self, value): - if not isinstance(value, int): - raise TypeError("signature_font_size must be an integer") - self._signature_font_size = value - - - @property - def axis_font_size(self): - return self._axis_font_size - - @axis_font_size.setter - def axis_font_size(self, value): - if not isinstance(value, int): - raise TypeError("axis_font_size must be an integer") - self._axis_font_size = value - - @property - def font_family(self): - return self._font_family - - @font_family.setter - def font_family(self, value): - if not isinstance(value, str): - raise TypeError("font_family must be a string") - - self._font_family = self._validate_font_family(value) - - # Assistant methods - - def _get_label(self, index): - valid_modes = {m.value for m in LabelMode} - - label_mode = self._signature_label - if isinstance(label_mode, (str, LabelMode)) and (getattr(label_mode, 'value', label_mode) in valid_modes): - label = get_label(index, label_mode) - elif isinstance(label_mode, tuple): - if index >= len(label_mode): - raise IndexError(f"The signature for the index {index} was not found in the transmitted tuple") - label = label_mode[index] - elif isinstance(label_mode, str): - label = label_mode - else: - raise ValueError(f"Incorrect signature format: {label_mode}") - - return label - - def _get_positions(self, - image_w: int, - image_h: int) -> list | tuple: - - rect_w, rect_h = self._signature_size - left, top, right, bottom = self._border_size - positions = { - "top-left": (left, top, left + rect_w, top + rect_h), - "top-right": (image_w - right - rect_w, top, image_w - right, top + rect_h), - "bottom-left": (left, image_h - bottom - rect_h, left + rect_w, image_h - bottom), - "bottom-right": (image_w - right - rect_w, image_h - bottom - rect_h, image_w - right, image_h - bottom), - } - - key = self._signature_pos.value if isinstance(self._signature_pos, SignaturePosition) else self._signature_pos - rect_position = positions.get(key) - - if not rect_position: - raise ValueError("rect_corner должен быть одним из: top-left, top-right, bottom-left, bottom-right") - - return rect_position - - def _load_images(self, folder) -> List[Image.Image]: - """ - Loads all image files from the specified folder with supported extensions. - - This method searches for files with `.png`, `.jpg`, and `.jpeg` extensions in the given folder, - opens them using PIL, and returns a list of loaded images. - - Args: - folder (Union[str, Path]): Path to the folder containing the images. - - Returns: - List[PIL.Image.Image]: A list of loaded images. - - Notes: - - Only files with extensions "*.png", "*.jpg", "*.jpeg" (case-sensitive) are loaded. - - If the folder is empty or contains no supported image formats, an empty list is returned. - - The input path is internally converted to `Path` using `pathlib`. - - Raises: - FileNotFoundError: If the specified folder does not exist. - PIL.UnidentifiedImageError: If an image file cannot be opened by PIL. - """ - - folder = Path(folder) - images = [] - for ext in ("*.png", "*.jpg", "*.jpeg"): - images.extend([Image.open(p) for p in folder.glob(ext)]) - - return images - - def _resize_proportional(self, - img: Image.Image, - width: int = None, - height: int = None) -> Image.Image: - """ - Resize an image proportionally based on the specified width or height. - - This method adjusts the image size while preserving its aspect ratio - if only `width` or `height` is provided. If both `width` and `height` are - given, the image is resized exactly to that size (aspect ratio may be distorted). - If neither is provided, the original image is returned unchanged. - - Args: - img (PIL.Image.Image): The input image to be resized. - width (int, optional): Target width. If specified alone, height will be adjusted proportionally. - height (int, optional): Target height. If specified alone, width will be adjusted proportionally. - - Returns: - PIL.Image.Image: A resized image according to the specified dimensions. - """ - w, h = img.size - - if width and not height: - ratio = width / w - new_size = (width, int(h * ratio)) - elif height and not width: - ratio = height / h - new_size = (int(w * ratio), height) - elif width and height: - new_size = (width, height) - else: - return img - - return img.resize(new_size, Image.LANCZOS) - - def append(self, image: Image.Image): - if not isinstance(image, Image.Image): - raise TypeError("The value must be an instance of PIL.Image.Image") - self._images.append(image) - - def _draw_border(self): pass - - def _add_numbering(self): pass - - def _add_axes(self): pass - - def _layout_images(self): pass - - # The implementer method - def preprocessing_image(self): pass - - def united_images(self): pass - - - \ No newline at end of file diff --git a/image_processing/__init__.py b/image_processing/__init__.py deleted file mode 100644 index 3502538..0000000 --- a/image_processing/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from __future__ import annotations -from image_processing.enumerates import * -from image_processing.imageDesign import ImagesDesign -from image_processing.DrawioProcessing import DrawioImageDesign - - -__all__ = ["LayoutMode", - "LabelMode", - "SignaturePosition", - "ImagesDesign", - "DrawioImageDesign"] \ No newline at end of file diff --git a/image_processing/enumerates.py b/image_processing/enumerates.py deleted file mode 100644 index 4a1417a..0000000 --- a/image_processing/enumerates.py +++ /dev/null @@ -1,23 +0,0 @@ -from enum import Enum - - -class LayoutMode(Enum): - ROW = "row" - COLUMN = "column" - GRID = "grid" - - -class SignaturePosition(Enum): - TOP_LEFT = "top-left" - TOP_RIGHT = "top-right" - BOTTOM_LEFT = "bottom-left" - BOTTOM_RIGHT = "bottom-right" - - -class LabelMode(Enum): - LATIN_LOWER = "latin_lower" - LATIN_UPPER = "latin_upper" - CYRILLIC_LOWER = "cyrillic_lower" - CYRILLIC_UPPER = "cyrillic_upper" - ARABIC = "arabic" - ROMAN = "roman" \ No newline at end of file diff --git a/pages/analysis_page.py b/pages/analysis_page.py deleted file mode 100644 index bfb877f..0000000 --- a/pages/analysis_page.py +++ /dev/null @@ -1,182 +0,0 @@ -import json -import setting -import pandas as pd - -from nicegui import ui -from pathlib import Path -from tempfile import TemporaryDirectory -from utils import filter_rows_by_specialty, bool_to_YesNo, load_json - -temp_dir = TemporaryDirectory() - -def science_articles_page() -> None: - data_dir = Path(setting.MAIN_DIRECTORY) / setting.DATA_DIRECTORY - req = [data_dir / setting.SPECIALIZATION_NAME, - data_dir / 'vak_articles.json', - data_dir / 'whitelist_articles.json'] - - if not all(p.exists() for p in req): - with ui.column().classes('w-full items-center gap-4'): - ui.label('Анализ научных журналов').classes('text-xl') - ui.label('❌ Не найдены необходимые данные.').style('color:#e53935') - ui.markdown('- Специализации\n- Журналы ВАК\n- Белый список') - return - - taxonomy = json.loads(req[0].read_text(encoding='utf-8')) - get_cat = lambda: ['Выбрать...'] + [c['category_name'] for c in taxonomy] - get_sub = lambda c: ['Выбрать...'] + [s['subcategory_name'] - for s in next((x for x in taxonomy if x['category_name'] == c['label']), {}) - .get('sub_category', [])] - - def get_specs(cat, sub): - c = next((x for x in taxonomy if x['category_name'] == cat['label']), None) - sub = next((s for s in c['sub_category'] if s['subcategory_name'] == sub['label']), None) if c else None - return sub['values'] if sub else [] - - def codes(selected): - return sorted({'.'.join(s['label'].split('.')[:3]) for s in selected}) - - def stringify_lists(rows): - return [{k: ', '.join(v) if isinstance(v, list) else v for k, v in r.items()} for r in rows] - - with ui.column().classes('w-full items-center gap-4'): - ui.label('Анализ научных журналов').classes('text-xl') - - cat_sel = ui.select(get_cat(), label='Категория').classes('w-96') - sub_box = ui.column().classes('w-96 hidden') - spec_box = ui.column().classes('w-96 hidden') - - run_btn = ui.button('АНАЛИЗИРОВАТЬ').props('color=primary').classes('hidden') - spin = ui.spinner(size='lg').props('color=primary').classes('hidden mt-2') - - btn_row = ui.row().classes('w-full gap-3') - tbl_wrap = ui.column().classes('w-full') - dl_btn = ui.button('⬇ Скачать Excel').props('color=secondary').classes('mt-2 self-start hidden') - - active = {'btn': None} - - def highlight(b): - if active['btn']: - active['btn'].props('color=secondary').update() - b.props('color=primary').update() - active['btn'] = b - - def show_table(rows, columns, xlsx_path): - tbl_wrap.clear() - with tbl_wrap: - ui.table(columns=columns, rows=rows, pagination=10, row_key=columns[0]['field']) \ - .classes('w-full').style('table-layout:fixed;word-break:break-word;') - - dl_btn.classes(remove='hidden') - dl_btn.on('click', lambda: ui.download(str(xlsx_path), filename=xlsx_path.name)) - - async def run(): - if not specs_selected: - ui.notify('Выберите хотя бы одну специализацию'); return - - run_btn.disable(); spin.classes(remove='hidden') - btn_row.clear(); tbl_wrap.clear() - - vak_articles = load_json(data_dir / 'vak_articles.json') - vak_filters = filter_rows_by_specialty(vak_articles, codes(specs_selected)) - whitelist = load_json(data_dir / 'whitelist_articles.json') - - data = [] - for it in vak_filters: - hit = next((w for w in whitelist if it['issn'] in w['issns']), None) - if hit: - data.append({ - 'ВАК ID': it['N'], - 'Наименование журнала': ', '.join(hit['title']), - 'issns': ', '.join(hit['issns']), - 'Специализации': ', '.join(it['specialties']), - 'Уровень журнала': hit['level'], - 'WOS': bool_to_YesNo(hit['wos_cc']['value']), - 'Scopus': bool_to_YesNo(hit['scopus']['value']), - 'RSCI': bool_to_YesNo(hit['rsci']['value']), - }) - - xlsx_data = Path(temp_dir.name) / 'data.xlsx'; pd.DataFrame(data).to_excel(xlsx_data, index=False) - xlsx_filters = Path(temp_dir.name) / 'filters.xlsx'; pd.DataFrame(vak_filters).to_excel(xlsx_filters, index=False) - xlsx_articles = Path(temp_dir.name) / 'articles.xlsx'; pd.DataFrame(vak_articles).to_excel(xlsx_articles, index=False) - - cols_data = [ - {'name': 'ВАК ID', 'label': 'ID', 'field': 'ВАК ID', - 'align': 'center', 'headerClasses': 'text-center', - 'style': 'width:70px; white-space:normal;'}, - - {'name': 'Наименование журнала', 'label': 'Журнал', - 'field': 'Наименование журнала', 'headerClasses': 'text-center', - 'style': 'max-width:280px; white-space:normal;'}, - - {'name': 'issns', 'label': 'ISSN', 'field': 'issns', - 'align': 'center', 'headerClasses': 'text-center', - 'style': 'width:120px; white-space:normal;'}, - - {'name': 'Специализации', 'label': 'Специализации', 'field': 'Специализации', - 'headerClasses': 'text-center', 'style': 'max-width:320px; white-space:normal;'}, - - {'name': 'Уровень журнала', 'label': 'Уровень', 'field': 'Уровень журнала', - 'headerClasses': 'text-center', 'style': 'width:90px;'}, - - {'name': 'WOS', 'label': 'WOS', 'field': 'WOS', - 'align': 'center', 'headerClasses': 'text-center', 'style': 'width:70px;'}, - - {'name': 'Scopus', 'label': 'Scopus', 'field': 'Scopus', - 'align': 'center', 'headerClasses': 'text-center', 'style': 'width:70px;'}, - - {'name': 'RSCI', 'label': 'RSCI', 'field': 'RSCI', - 'align': 'center', 'headerClasses': 'text-center', 'style': 'width:70px;'}, - ] - - cols_simple = [ - {'name': 'N', 'label': 'ID', 'field': 'N', - 'align': 'center', 'headerClasses': 'text-center', 'style': 'width:60px;'}, - - {'name': 'title', 'label': 'Название', 'field': 'title', - 'headerClasses': 'text-center', 'style': 'max-width:320px; white-space:normal;'}, - - {'name': 'issn', 'label': 'ISSN', 'field': 'issn', - 'align': 'center', 'headerClasses': 'text-center', 'style': 'width:120px;'}, - - {'name': 'specialties', 'label': 'Специализации', 'field': 'specialties', - 'headerClasses': 'text-center', 'style': 'max-width:340px; white-space:normal;'}, - ] - - with btn_row: - b_art = ui.button('ВАК-статьи', on_click=lambda: [show_table(stringify_lists(vak_articles), cols_simple, xlsx_articles), highlight(b_art)]).props('color=secondary') - b_flt = ui.button('Фильтр', on_click=lambda: [show_table(stringify_lists(vak_filters), cols_simple, xlsx_filters), highlight(b_flt)]).props('color=secondary') - b_res = ui.button('Результат', on_click=lambda: [show_table(data, cols_data, xlsx_data), highlight(b_res)]).props('color=secondary') - - highlight(b_res) - show_table(data, cols_data, xlsx_data) - spin.classes(add='hidden'); run_btn.enable() - - specs_selected: list[dict] = [] - - def show_spec(opts): - spec_box.clear() - if not opts: - return - spec_box.classes(remove='hidden') - run_btn.classes(remove='hidden') - specs_selected.clear() - ui.select(opts, label='Научные специальности', multiple=True).classes('w-96') \ - .on('update:model-value', lambda e: (specs_selected.clear(), specs_selected.extend(e.args))) - - def sub_changed(cat): - if cat['label'] == 'Выбрать...': - sub_box.classes('hidden'); spec_box.clear(); return - sub_box.classes(remove='hidden'); spec_box.classes(add='hidden'); run_btn.classes(add='hidden') - sub_box.clear() - def on_sub(e): - sub = e.args - if sub['label'] == 'Выбрать...': - spec_box.clear(); return - show_spec(get_specs(cat, sub)) - with sub_box: - ui.select(get_sub(cat), label='Подкатегория').classes('w-96') \ - .on('update:model-value', on_sub) - - cat_sel.on('update:model-value', lambda e: sub_changed(e.args)) - run_btn.on('click', run) \ No newline at end of file diff --git a/pages/image_processing_page.py b/pages/image_processing_page.py deleted file mode 100644 index 9bbeeb8..0000000 --- a/pages/image_processing_page.py +++ /dev/null @@ -1,296 +0,0 @@ -import io -import base64 -import xml.etree.ElementTree as ET - -from PIL import Image -from nicegui import ui -from pathlib import Path -from typing import Tuple, Union -from tempfile import TemporaryDirectory -from image_processing import ImagesDesign, SignaturePosition, LabelMode, LayoutMode, DrawioImageDesign - -united_params = { - 'layout': 'row', - 'spacing': 10, - 'bg_color': '#ffffff', - 'grid_cols': None, - 'grid_rows': None, - 'width': 512, - 'height': None, -} - -united_controls = {} - -valid_layouts = set([mode.value for mode in LayoutMode]) - -tmp_dir = TemporaryDirectory() -design = ImagesDesign(images_path=tmp_dir.name) - -font_dir = Path('./fonts') -font_files = sorted([f.stem for f in font_dir.glob('*.ttf') if f.is_file()]) -signature_label_options = [mode.value for mode in LabelMode] -signature_pos_options = [mode.value for mode in SignaturePosition] - -download_link = ui.html('').classes('hidden') -download_drawio_link = ui.html('').classes('hidden') - -def image_processing_page(): - with ui.column().classes('w-full items-center justify-center gap-4'): - image_slot = ui.image().classes('w-1/2 rounded-xl shadow-lg') - - with ui.dialog() as upload_dialog, ui.card().classes('p-6'): - ui.label('Загрузить изображения').classes('text-lg font-semibold') - ui.upload( - on_upload=lambda e: handle_upload(e, upload_dialog, image_slot, download_link), - auto_upload=True, multiple=True, max_file_size=5 * 1024 * 1024 - ).props('accept=.png,.jpg,.jpeg').classes('max-w-full') - ui.button('Закрыть', on_click=upload_dialog.close).props('flat color=secondary') - - with ui.row().classes('gap-4'): - ui.button('📤 Загрузить', on_click=upload_dialog.open).props('color=primary') - ui.button('🗑 Очистить', on_click=lambda: clear_images(image_slot)).props('color=negative') - ui.button('📥 Скачать .png', on_click=download_png).props('color=primary') \ - .bind_visibility_from(image_slot, 'visible') - ui.button('📥 Скачать .drawio', on_click=download_drawio).props('color=accent') \ - .bind_visibility_from(image_slot, 'visible') - - download_link - download_drawio_link - - with ui.expansion('Параметры обработки', icon='settings'): - with ui.grid(columns=4).classes('gap-4 w-full'): - def safe_int(val, default=0): - try: - return int(val) - except ValueError: - return default - - def safe_font(val: str, fallback: int = 12) -> int: - v = safe_int(val, fallback) - if v <= 0: - ui.notify("Размер шрифта должен быть положительным", type="warning") - return fallback - return v - - ui.input('Размер рамки', value=str(design.border_size), - on_change=lambda e: update_param('border_size', safe_int(e.value), image_slot)).props('type=number min=0') - ui.color_input(label='Цвет рамки', value='#000000', - on_change=lambda e: update_param('border_fill', e.value, image_slot)) - - ui.checkbox('Добавлять подпись', value=design.signature, - on_change=lambda e: update_param('signature', e.value, image_slot)) - ui.select(signature_label_options, value=design.signature_label, - label='Тип подписи', - on_change=lambda e: update_param('signature_label', e.value, image_slot)) - ui.color_input(label='Цвет надписи', value='#fff', - on_change=lambda e: update_param('signature_label_color', e.value, image_slot)) - ui.select(signature_pos_options, value=design.signature_pos, - label='Позиция подписи', - on_change=lambda e: update_param('signature_pos', e.value, image_slot)) - ui.input('Размер подписи (ширина)', value=str(design.signature_size[0]), - on_change=lambda e: update_param('signature_size', (safe_int(e.value), design.signature_size[1]), image_slot)).props('type=number min=0') - ui.input('Размер подписи (высота)', value=str(design.signature_size[1]), - on_change=lambda e: update_param('signature_size', (design.signature_size[0], safe_int(e.value)), image_slot)).props('type=number min=0') - ui.color_input(label='Цвет подписи (фон)', value='#000', - on_change=lambda e: update_param('signature_color', e.value, image_slot)) - ui.input('Размер шрифта подписи', value=str(design.signature_font_size), - on_change=lambda e: update_param('signature_font_size', safe_int(e.value), image_slot)).props('type=number min=3') - - ui.checkbox('Показывать оси', value=design.draw_axis, - on_change=lambda e: update_param('draw_axis', e.value, image_slot)) - ui.input('Подписи оси X', value=design.axis_labels[0] if isinstance(design.axis_labels[0], str) else ','.join(design.axis_labels[0]), - on_change=lambda e: update_axis_labels('x', e.value, image_slot)) - ui.input('Подписи оси Y', value=design.axis_labels[1] if isinstance(design.axis_labels[1], str) else ','.join(design.axis_labels[1]), - on_change=lambda e: update_axis_labels('y', e.value, image_slot)) - ui.input('Смещение по X', value=str(design.axis_offset[0] if isinstance(design.axis_offset, tuple) else design.axis_offset), - on_change=lambda e: update_axis_offset('x', e.value, image_slot)).props('type=number min=0') - ui.input('Смещение по Y', value=str(design.axis_offset[1] if isinstance(design.axis_offset, tuple) else design.axis_offset), - on_change=lambda e: update_axis_offset('y', e.value, image_slot)).props('type=number min=0') - ui.input('Длина осей', value=str(design.axis_length), - on_change=lambda e: update_param('axis_length', safe_int(e.value), image_slot)).props('type=number min=1') - ui.input('Толщина осей', value=str(design.axis_width), - on_change=lambda e: update_param('axis_width', safe_int(e.value), image_slot)).props('type=number min=1') - ui.input('Размер шрифта осей', value=str(design.axis_font_size), - on_change=lambda e: update_param('axis_font_size', safe_int(e.value), image_slot)).props('type=number min=3') - ui.select(font_files or ['Arial'], value=design.font_family, - label='Шрифт', - on_change=lambda e: update_param('font_family', e.value, image_slot)) - - with ui.expansion('Параметры компоновки', icon='grid_on'): - with ui.grid(columns=4).classes('gap-4 w-full'): - layout_select = ui.select(['row', 'column', 'grid'], value=united_params['layout'], label='Расположение') - layout_select.on('update:model-value', lambda e: update_united('layout', e.args, image_slot)) - - spacing_input = ui.input('Отступ между изображениями', value=str(united_params['spacing'])).props('type=number') - spacing_input.on('change', lambda e: update_united('spacing', safe_int(e.args), image_slot)) - - bg_color_picker = ui.color_input('Цвет фона', value=united_params['bg_color']) - bg_color_picker.on('change', lambda e: update_united('bg_color', e.args, image_slot)) - - grid_cols_input = ui.input('Число колонок (grid)', value='').props('type=number') - grid_cols_input.on('change', lambda e: update_united('grid_cols', safe_int(e.args) if e.args else None, image_slot)) - - grid_rows_input = ui.input('Число строк (grid)', value='').props('type=number') - grid_rows_input.on('change', lambda e: update_united('grid_rows', safe_int(e.args) if e.args else None, image_slot)) - - width_input = ui.input('Ширина изображения', value=str(united_params['width'])).props('type=number') - width_input.on('change', lambda e: update_united('width', safe_int(e.args), image_slot)) - - height_input = ui.input('Высота изображения', value='').props('type=number') - height_input.on('change', lambda e: update_united('height', safe_int(e.args) if e.args else None, image_slot)) - -# Обработчики -def update_axis_offset(axis: str, value: str, image_slot): - try: - offset = int(value) - current = design.axis_offset - if isinstance(current, int): - current = (current, current) - - if axis == 'x': - new_offset = (offset, current[1]) - else: - new_offset = (current[0], offset) - - update_param('axis_offset', new_offset, image_slot) - except ValueError: - ui.notify(f"Смещение по оси {axis.upper()} должно быть числом", type='warning') - -def update_axis_labels(axis: str, text: str, image_slot): - try: - values = [v.strip() for v in text.split(',') if v.strip()] - if not values: - ui.notify(f"Поле оси {axis.upper()} пусто", type='warning') - return - - parsed_value: Union[str, Tuple[str, ...]] = ( - values[0] if len(values) == 1 else tuple(values) - ) - - if isinstance(parsed_value, tuple) and len(parsed_value) != len(design): - ui.notify(f"Количество подписей для оси {axis.upper()} должно быть {len(design)}", type='negative') - return - - current_x, current_y = design.axis_labels - if axis == 'x': - design.axis_labels = (parsed_value, current_y) - else: - design.axis_labels = (current_x, parsed_value) - - update_output(image_slot) - except Exception as ex: - ui.notify(f"Ошибка при установке подписей осей: {ex}", type='negative') - -def handle_upload(e, dialog, image_slot, download_link): - allowed_ext = ('.png', '.jpg', '.jpeg') - if not e.name.lower().endswith(allowed_ext): - ui.notify("Неподдерживаемый формат", type='negative') - return - e.content.seek(0) - img = Image.open(io.BytesIO(e.content.read())).convert("RGB") - design.append(img) - ui.notify(f"{e.name} загружен", type="positive") - dialog.close() - update_output(image_slot) - -def clear_images(image_slot): - design._images.clear() - image_slot.set_source("") - ui.notify("Изображения очищены", type="info") - -def update_param(name, value, image_slot): - setattr(design, name, value) - update_output(image_slot) - -def update_united(name, value, image_slot): - if name == 'layout': - if isinstance(value, dict) and 'label' in value: - value = value['label'] - if value not in valid_layouts: - ui.notify(f"Недопустимое значение layout: {value}", type='negative') - return - united_params[name] = value - if len(design) > 1: - update_output(image_slot) - -def update_output(image_slot): - if not len(design): - return - result = design.united_images( - layout=united_params['layout'], - spacing=united_params['spacing'], - bg_color=united_params['bg_color'], - grid_cols=united_params['grid_cols'], - grid_rows=united_params['grid_rows'], - width=united_params['width'], - height=united_params['height'], - ) - buffer = io.BytesIO() - result.save(buffer, format="PNG") - buffer.seek(0) - b64 = base64.b64encode(buffer.getvalue()).decode() - image_slot.set_source(f'data:image/png;base64,{b64}') - download_link.set_content(f''' - - ''') - -def download_png(): - if not len(design): - ui.notify("Нет изображений для сохранения", type="warning") - return - - try: - result = design.united_images( - layout=united_params['layout'], - spacing=united_params['spacing'], - bg_color=united_params['bg_color'], - grid_cols=united_params['grid_cols'], - grid_rows=united_params['grid_rows'], - width=united_params['width'], - height=united_params['height'], - ) - output_path = Path(tmp_dir.name) / "result.png" - result.save(output_path, format="PNG") - ui.download(str(output_path), filename="result.png") - except Exception as e: - ui.notify(f"Ошибка при сохранении PNG: {e}", type="negative") - -def download_drawio(): - - if not len(design): - ui.notify("Нет изображений для сохранения", type="warning") - return - try: - drawio = DrawioImageDesign(images_path=tmp_dir.name) - drawio._images = design._images.copy() - - drawio.border_size = design.border_size - drawio.border_fill = design.border_fill - drawio.signature = design.signature - drawio.signature_label = design.signature_label - drawio.signature_label_color = design.signature_label_color - drawio.signature_color = design.signature_color - drawio.signature_font_size = design.signature_font_size - drawio.signature_size = design.signature_size - drawio.signature_pos = design.signature_pos - drawio.axis_labels = design.axis_labels - drawio.axis_length = design.axis_length - drawio.axis_width = design.axis_width - drawio.axis_font_size = design.axis_font_size - drawio.axis_offset = design.axis_offset - drawio.font_family = design.font_family - drawio.draw_axis = design.draw_axis - - output_path = Path(tmp_dir.name) / "result.drawio" - - drawio.export_to_drawio(file=output_path, - layout=united_params['layout'], - spacing=united_params['spacing'], - grid_cols=united_params['grid_cols'], - grid_rows=united_params['grid_rows'], - width=united_params['width'], - height=united_params['height']) - - ui.download(str(output_path), filename="result.drawio") - except Exception as e: - ui.notify(f"Ошибка при сохранении drawio: {e}", type="negative") diff --git a/pages/settings_page.py b/pages/settings_page.py deleted file mode 100644 index f847f02..0000000 --- a/pages/settings_page.py +++ /dev/null @@ -1,159 +0,0 @@ -import asyncio -import setting - -from nicegui import ui -from pathlib import Path -from utils import ( - download_pdf_if_needed, - get_nomenclature_scientific_specialties, - save_to_json, - parse_vak_pdf, - dict_from_web -) - -# ────────────────────────────────────────────────────────── -# Глобальные переменные‑контейнеры -# ────────────────────────────────────────────────────────── -regex_values: dict[str, ui.input] = {} -web_values: dict[str, ui.input] = {} -dir_values: dict[str, ui.input] = {} - -admin_container: ui.column | None = None -use_admin_checkbox: ui.checkbox | None = None - -# кнопки, которые надо блокировать во время фоновых задач -save_btn: ui.button | None = None -spec_btn: ui.button | None = None -download_btn: ui.button | None = None - -# ────────────────────────────────────────────────────────── -# Вспомогательные функции UI -# ────────────────────────────────────────────────────────── - -def _toggle_buttons(state: bool) -> None: - for btn in (save_btn, spec_btn, download_btn): - if btn is not None: - if state: - btn.enable() - btn.props(remove='loading') - else: - btn.disable() - btn.props('loading') - -def admin_setting(state: bool) -> None: - global admin_container - if not admin_container: - return - admin_container.clear() - if state: - with admin_container: - ui.input('Логин', value=setting.ADMIN_LOGIN ).classes('w-full').props('readonly') - ui.input('Пароль', value=setting.ADMIN_PASSWORD, password=True).classes('w-full').props('readonly') - -# ────────────────────────────────────────────────────────── -# Долгие процессы (sync helpers) -# ────────────────────────────────────────────────────────── - -def _download_and_save_specializations_sync() -> None: - data = get_nomenclature_scientific_specialties(setting.SPECIALIZATION_URL) - save_to_json(data, Path(setting.MAIN_DIRECTORY) / setting.DATA_DIRECTORY / setting.SPECIALIZATION_NAME) - -aasync = asyncio.to_thread - -# ────────────────────────────────────────────────────────── -# Callbacks -# ────────────────────────────────────────────────────────── - -async def on_load_specializations(update_data_status: callable, refresh_analysis: callable) -> None: - _toggle_buttons(False) - try: - await aasync(_download_and_save_specializations_sync) - ui.notify('Специализации успешно загружены', timeout=300) - update_data_status() - refresh_analysis() - finally: - _toggle_buttons(True) - - -async def _download_and_parse_pdf() -> bool: - await aasync( - download_pdf_if_needed, - url=setting.VAK_LIST_URL, - output_dir=Path(setting.MAIN_DIRECTORY) / setting.DATA_DIRECTORY, - config_path='config.ini', - ) - path = Path(setting.MAIN_DIRECTORY) / setting.DATA_DIRECTORY - vak_path = path / setting.FILENAME - if vak_path.is_file(): - rows = await aasync(parse_vak_pdf, vak_path) - save_to_json(rows, path / 'vak_articles.json') - await aasync(dict_from_web, setting.WHITE_LIST_URL, path / "whitelist_articles.json") - return True - else: - return False - - -async def on_download_pdf(update_data_status: callable, refresh_analysis: callable) -> None: - _toggle_buttons(False) - try: - success = await _download_and_parse_pdf() - if success: - ui.notify('Журналы успешно загружены', timeout=300) - update_data_status() - refresh_analysis() - else: - ui.notify('Файл PDF не найден или недоступен', timeout=10) - finally: - _toggle_buttons(True) - - -# ────────────────────────────────────────────────────────── -# Страница настроек -# ────────────────────────────────────────────────────────── - -def settings_page(update_data_status: callable, refresh_analysis: callable) -> None: - global admin_container, use_admin_checkbox, save_btn, spec_btn, download_btn - - with ui.row().classes('w-full justify-center'): - with ui.row().classes('w-10/12 justify-between'): - with ui.column().classes('w-1/4'): - ui.label('REGEX настройки').classes('text-lg font-bold') - for key, val in setting.config['REGEX'].items(): - regex_values[key] = ui.input(label=key, value=val).classes('w-full') - - with ui.column().classes('w-1/4'): - ui.label('WEB настройки').classes('text-lg font-bold') - web_values['white_list_url'] = ui.input('Ссылка на белый список (json)', value=setting.WHITE_LIST_URL).classes('w-full') - web_values['vak_list_url'] = ui.input('Ссылка на список ВАК (pdf)', value=setting.VAK_LIST_URL ).classes('w-full') - web_values['spec_url'] = ui.input('Ссылка на специализации', value=setting.SPECIALIZATION_URL).classes('w-full') - - state = True if setting.USE_ADMIN.strip().lower() == "true" else False - use_admin_checkbox = ui.checkbox('Использовать админ панель?', value=state, on_change=lambda e: admin_setting(e.value)).classes('w-full') - admin_container = ui.column().classes('gap-2 mt-2') - admin_setting(state) - - with ui.column().classes('w-1/4'): - ui.label('Настройки директории').classes('text-lg font-bold') - dir_values['main_dir'] = ui.input('Основная директория', value=setting.MAIN_DIRECTORY ).classes('w-full') - dir_values['data_dir'] = ui.input('Директория с данными', value=setting.DATA_DIRECTORY ).classes('w-full') - dir_values['spec_file'] = ui.input('Имя файла для специализаций', value=setting.SPECIALIZATION_NAME).classes('w-full') - dir_values['file_name'] = ui.input('Имя файла ВАК', value=setting.FILENAME).classes('w-full').props('readonly') - - def on_save() -> None: - config = { - 'regex': {k: v.value for k, v in regex_values.items()}, - 'web': {k: v.value for k, v in web_values.items()}, - 'directories': {k: v.value for k, v in dir_values.items()}, - 'admin': { - 'enabled': use_admin_checkbox.value, - 'login': setting.ADMIN_LOGIN, - 'password': setting.ADMIN_PASSWORD, - }, - } - setting.save_config(config) - ui.notify('Настройки обновлены', timeout=3) - - with ui.row().classes('w-full justify-center mt-6 gap-4'): - save_btn = ui.button('ОБНОВИТЬ НАСТРОЙКИ', on_click=on_save).props('color=primary') - spec_btn = ui.button('Загрузить специализации', on_click=lambda: on_load_specializations(update_data_status, refresh_analysis)).props('color=primary') - download_btn = ui.button('Загрузить журналы', on_click=lambda: on_download_pdf(update_data_status, refresh_analysis)).props('color=primary') diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e7341df --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,37 @@ +[tool.ruff] +line-length = 100 +target-version = "py311" +exclude = ["venv", ".venv", "build", "dist", "data", "__pycache__"] + +[tool.ruff.lint] +select = [ + "E", + "F", + "B", + "I", + "N", + "D", + "UP", + "C90", + "PL", + "RUF", +] +ignore = [ + "D100", + "D104", + "RUF001", + "RUF002", +] + +[tool.ruff.lint.isort] +known-first-party = ["science_helper"] +force-sort-within-sections = true +lines-after-imports = 2 +combine-as-imports = true + +[tool.ruff.lint.pydocstyle] +convention = "pep257" + +[tool.ruff.format] +quote-style = "double" +docstring-code-format = true diff --git a/pages/__init__.py b/science_helper/__init__.py similarity index 100% rename from pages/__init__.py rename to science_helper/__init__.py diff --git a/science_helper/image_processing/__init__.py b/science_helper/image_processing/__init__.py new file mode 100644 index 0000000..b56744d --- /dev/null +++ b/science_helper/image_processing/__init__.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from science_helper.image_processing.drawio_processing import DrawioImageDesign +from science_helper.image_processing.enumerates import LabelMode, LayoutMode, SignaturePosition +from science_helper.image_processing.image_design import ImagesDesign + + +__all__ = [ + "DrawioImageDesign", + "ImagesDesign", + "LabelMode", + "LayoutMode", + "SignaturePosition", +] diff --git a/science_helper/image_processing/drawio_processing.py b/science_helper/image_processing/drawio_processing.py new file mode 100644 index 0000000..d3b5023 --- /dev/null +++ b/science_helper/image_processing/drawio_processing.py @@ -0,0 +1,689 @@ +import base64 +import io +from pathlib import Path +import uuid +import xml.etree.ElementTree as ET + +from PIL import Image + +from science_helper.image_processing.enumerates import LayoutMode, SignaturePosition +from science_helper.image_processing.processing import ImageProcessing + + +class DrawioImageDesign(ImageProcessing): + """A class for generating image-based diagrams in draw.io (mxGraph XML) format. + + This class extends `ImageProcessing` and transforms a list of images into a structured + `.drawio` XML diagram with features such as borders, numbering (labels), and coordinate axes. + Each image is embedded as a base64-encoded PNG and arranged in a specified layout. + + Key Features: + - Embeds images in draw.io format with layout control (row, column, grid). + - Adds numbered labels with configurable position, font, and color. + - Optionally includes X and Y axes with labels and arrowheads. + - Exports the result as a `.drawio`-compatible XML file. + + Usage: + >>> designer = DrawioImageDesign(images_path="images/") + >>> designer.export_to_drawio("result.drawio", layout="grid", spacing=20) + + Raises: + IndexError: If image indices exceed available range. + ValueError: If an unsupported layout mode is used. + OSError: If font files cannot be loaded. + """ + + def __init__(self, *args, **kwargs): + """Initialize the DrawioImageDesign class with inherited image processing settings. + + This constructor extends the base class `ImageProcessing` and prepares additional internal + structures required for generating a `.drawio` diagram. + + Behavior: + - Loads images from the given path using `_load_images`. + - Initializes XML elements `_root` and `_xml_root` for the draw.io diagram. + - Calls `_create_drawio_structure()` to prepare the base diagram layout. + + Args: + *args: Positional arguments passed to the `ImageProcessing` superclass. + **kwargs: Keyword arguments passed to the `ImageProcessing` superclass. + + Raises: + FileNotFoundError: If the specified image directory does not exist. + PIL.UnidentifiedImageError: If any image file is unreadable. + """ + super().__init__(*args, **kwargs) + self._images = self._load_images(self._images_path) + + self._root = None + self._xml_root = None + self._create_drawio_structure() + + # Методы родительского класса + def _draw_border(self): + """Generate the draw.io style string for image borders. + + This method creates a style string used in the `mxCell` element of draw.io to define + the appearance of image borders, including color and stroke width. + + Returns: + str: A style string with `imageBorder` color and `strokeWidth` values based on + the instance's `_border_fill` and `_border_size`. + + Example: + "imageBorder=black;strokeWidth=10;" + """ + return f"imageBorder={self._border_fill};strokeWidth={max(self._border_size)};" + + def _add_numbering(self, image_w: int, image_h: int, label: str = "", parent_id: str = "1"): + """Add a label (numbering) to the image in the draw.io structure. + + This method adds a label as an `mxCell` element positioned relative to the image, + based on the configured signature position and offset. It is commonly used to + annotate images with index numbers or custom labels. + + Args: + image_w (int): Width of the image in pixels. + image_h (int): Height of the image in pixels. + label (str, optional): The text label to be displayed. Defaults to "". + parent_id (str, optional): ID of the parent `mxCell` element. Defaults to "1". + + Behavior: + - Computes the label position based on `_signature_pos` and `_border_size`. + - Uses `_get_numbering_text()` to format the label text as HTML. + - Uses `_get_numbering_style()` to apply styling to the label. + - Adds the label to the internal draw.io XML under the given parent. + + Raises: + KeyError: If `_signature_pos` is not a recognized key in `pos_map`. + + Used in: + - `preprocessing_image()` to place automatic or custom labels on each image. + """ + x0, y0, _, _ = self._get_positions(image_w, image_h) + offset = max(self._border_size) + key = ( + self._signature_pos.value + if isinstance(self._signature_pos, SignaturePosition) + else self._signature_pos + ) + + pos_map = { + SignaturePosition.TOP_LEFT.value: (-offset, -offset), + SignaturePosition.TOP_RIGHT.value: (offset, -offset), + SignaturePosition.BOTTOM_LEFT.value: (-offset, offset), + SignaturePosition.BOTTOM_RIGHT.value: (offset, offset), + } + + dx, dy = pos_map.get(key, (0, 0)) + x0 += dx + y0 += dy + + cell = self._create_mx_cell( + id=self._generate_id(suffix="-numbering"), + value=self._get_numbering_text(label), + style=self._get_numbering_style(), + vertex="1", + parent=parent_id, + ) + + self._create_mx_geometry( + cell, + x=str(x0), + y=str(y0), + width=str(self._signature_size[0]), + height=str(self._signature_size[1]), + ) + + def _add_axes( + self, image_w: int, image_h: int, label_x: str, label_y: str, parent_id: str = "1" + ): + """Add coordinate axes with labels to an image block in the draw.io structure. + + This method draws X and Y axes as lines with arrowheads and places corresponding + text labels using `mxCell` elements. All components are grouped under a parent + cell to maintain logical structure. + + Args: + image_w (int): Width of the image in pixels. + image_h (int): Height of the image in pixels. + label_x (str): Label text for the X-axis. + label_y (str): Label text for the Y-axis. + parent_id (str, optional): ID of the parent `mxCell` group. Defaults to "1". + + Behavior: + - Computes the position for axis origin based on `self._axis_offset`. + - Calculates required dimensions to fit arrows and labels. + - Creates a group `mxCell` container to hold both axes and their labels. + - Uses `_create_axis` for each axis line with arrowheads. + - Uses `_add_label` to place labels near the arrow tips. + + Notes: + - The entire axis group is positioned relative to the bottom-left corner of the image. + - The label sizes are computed based on their length and font size. + - Useful for indicating orientation or scale in diagrams. + + Used in: + - `preprocessing_image()` if `self._draw_axis` is enabled. + """ + offset_x, offset_y = ( + (self._axis_offset, self._axis_offset) + if isinstance(self._axis_offset, int) + else self._axis_offset + ) + + label_x_width = len(label_x) * self._axis_font_size * 0.6 + label_y_height = len(label_y) * self._axis_font_size * 0.6 + + width = self._axis_length + 5 + label_x_width + height = self._axis_length + 5 + label_y_height + + group_id = self._generate_id(suffix="-axisgroup") + style = self._get_text_style() + + group_cell = self._create_mx_cell( + id=group_id, value="", style="group", vertex="1", connectable="0", parent=parent_id + ) + self._create_mx_geometry( + group_cell, + x=str(offset_x), + y=str(image_h - offset_y - height), + width=str(width), + height=str(height), + ) + + x0, y0 = 0, height + x_end = self._axis_length + y_end = height - self._axis_length + + xaxis_id = self._generate_id(suffix="-xaxis") + yaxis_id = self._generate_id(suffix="-yaxis") + xlabel_id = self._generate_id(suffix="-xlabel") + ylabel_id = self._generate_id(suffix="-ylabel") + + self._create_axis(xaxis_id, x0, y0, x_end, y0, group_id) + self._create_axis(yaxis_id, x0, y0, x0, y_end, group_id) + + self._add_label(xlabel_id, label_x, x_end + 5, y0 - 10, len(label_x), group_id, style) + self._add_label( + ylabel_id, label_y, x0 + 5, y_end - self._axis_font_size, len(label_y), group_id, style + ) + + def _layout_images( + self, + layout: str = "row", + spacing: int = 10, + grid_cols: int | None = None, + grid_rows: int | None = None, + ): + """Arrange images in a specified layout and embed them into the draw.io structure. + + This method computes the position for each image based on the chosen layout + ("row", "column", or "grid") and embeds them into the internal draw.io XML structure. + It also creates a group `mxCell` that contains all image elements. + + Args: + layout (str, optional): Layout mode. Must be one of: + - "row": images in a single horizontal line. + - "column": images in a vertical column. + - "grid": images arranged in a rectangular grid. + Defaults to "row". + spacing (int, optional): Space in pixels between images. Defaults to 10. + grid_cols (Optional[int], optional): Number of columns for grid layout. + Required if `layout == "grid"` and `grid_rows` is not specified. + grid_rows (Optional[int], optional): Number of rows for grid layout. + Required if `layout == "grid"` and `grid_cols` is not specified. + + Behavior: + - Computes (x, y) positions for each image depending on layout type. + - Calls `preprocessing_image()` for each image with computed coordinates. + - Wraps all images into a single group cell (`mxCell`) in the diagram. + - Calculates the group's total width and height to contain all images. + + Raises: + ValueError: If `layout` is not one of the supported types. + + Used in: + - `united_images()` to organize all images into a coherent visual structure. + """ + image_w, image_h = self._images[0].size + positions = [] + + if layout == LayoutMode.ROW.value: + for i in range(len(self._images)): + x = i * (image_w + spacing) + y = 0 + positions.append((x, y)) + + elif layout == LayoutMode.COLUMN.value: + for i in range(len(self._images)): + x = 0 + y = i * (image_h + spacing) + positions.append((x, y)) + elif layout == LayoutMode.GRID.value: + n = len(self._images) + cols = grid_cols or int(n**0.5) + grid_rows or ((n + cols - 1) // cols) + for idx in range(n): + col = idx % cols + row = idx // cols + x = col * (image_w + spacing) + y = row * (image_h + spacing) + positions.append((x, y)) + else: + raise ValueError(f"Unknown layout type: {layout}") + + group_id = self._generate_id(suffix="-group") + + group_cell = self._create_mx_cell( + id=group_id, value="", style="group", vertex="1", connectable="0", parent="1" + ) + + all_right = [] + all_bottom = [] + for i, (x, y) in enumerate(positions): + self.preprocessing_image(index=i, position_x=x, position_y=y, parent_id=group_id) + all_right.append(x + image_w) + all_bottom.append(y + image_h) + + self._create_mx_geometry( + group_cell, x="30", y="30", width=str(max(all_right)), height=str(max(all_bottom)) + ) + + def preprocessing_image( # noqa: PLR0913 + self, + index: int, + width: int | None = None, + height: int | None = None, + position_x: int = 0, + position_y: int = 0, + parent_id: str = "1", + ): + """Process and embed a single image into the draw.io XML structure. + + This method performs image resizing, base64 encoding, and creates an element + that represents the image in the draw.io diagram. Optional features such as labeling and + axes are also added if configured. + + Args: + index (int): Index of the image in the internal list to process. + width (Optional[int]): Desired width of the image. If set, used for proportional resizing. + height (Optional[int]): Desired height of the image. If set, used for proportional resizing. + position_x (int, optional): X-coordinate for placing the image. Defaults to 0. + position_y (int, optional): Y-coordinate for placing the image. Defaults to 0. + parent_id (str, optional): ID of the parent mxCell in the draw.io structure. Defaults to "1". + + Behavior: + - Resizes the image proportionally if `width` or `height` is provided. + - Encodes the image in base64 PNG format for draw.io embedding. + - Creates an with style for image appearance and positioning. + - Optionally adds: + - a numbering label using `_add_numbering()`, + - X and Y axes using `_add_axes()`. + + Raises: + IndexError: If the specified index is out of bounds of the internal image list. + + Used in: + - `_layout_images()` for arranging multiple images. + - `united_images()` for full diagram generation. + """ # noqa: E501 + if index >= len(self._images): + raise IndexError(f"Index {index} outside the range of the image list") + + image = self._resize_proportional(self._images[index], width, height) + image_w, image_h = image.size + + image_base64 = self._image_to_base64(image) + cell_id = self._generate_id(suffix=f"-{index + 1}") + + style = ( + "shape=image;", + "verticalLabelPosition=bottom;", + "labelBackgroundColor=default;", + "verticalAlign=top;", + "aspect=fixed;", + "imageAspect=0;", + f"image=data:image/png,{image_base64};", + self._draw_border(), + ) + + cell = self._create_mx_cell( + id=cell_id, value="", style="".join(style), vertex="1", parent=parent_id + ) + + self._create_mx_geometry( + cell, x=str(position_x), y=str(position_y), width=str(image_w), height=str(image_h) + ) + + if self._signature and self._signature_label: + self._add_numbering( + image_w=image_w, + image_h=image_h, + label=self._get_label(index=index), + parent_id=cell_id, + ) + + if self._draw_axis: + lx, ly = self._axis_labels + lx, ly = (lx[index], ly[index]) if isinstance(lx, tuple) else (lx, ly) + self._add_axes(image_w, image_h, label_x=lx, label_y=ly, parent_id=cell_id) + + def united_images( # noqa: PLR0913 + self, + layout: str | LayoutMode = "row", + spacing: int = 10, + grid_cols: int | None = None, + grid_rows: int | None = None, + width: int | None = None, + height: int | None = None, + ): + """Compose and layout all images into a draw.io diagram structure. + + This method prepares the diagram by optionally resizing all images, arranging them + according to the specified layout (row, column, or grid), and embedding them as mxCell + elements into the XML structure used by draw.io. + + Args: + layout (Union[str, LayoutMode], optional): Layout strategy for arranging images. + Can be "row", "column", or "grid". Defaults to "row". + spacing (int, optional): Space in pixels between images. Defaults to 10. + grid_cols (Optional[int], optional): Number of columns in the grid layout. + Only used if layout is "grid". + grid_rows (Optional[int], optional): Number of rows in the grid layout. + Only used if layout is "grid". + width (Optional[int], optional): Target width for resizing all images. + height (Optional[int], optional): Target height for resizing all images. + + Behavior: + - Resizes all images if `width` or `height` is provided. + - Computes layout positions for each image. + - Calls `preprocessing_image()` for each image to embed it into the diagram. + - All images are grouped under a parent mxCell in the XML structure. + + Used in: + - `export_to_drawio()` to generate the final .drawio file. + """ + layout = layout.value if isinstance(layout, LayoutMode) else layout + + if width or height: + self._images = [ + self._resize_proportional(img, width=width, height=height) for img in self._images + ] + + self._layout_images( + layout=layout, spacing=spacing, grid_cols=grid_cols, grid_rows=grid_rows + ) + + # методы этого класса + def _create_drawio_structure(self): + """Initialize the root XML structure for the draw.io diagram. + + This method sets up the required elements for a valid draw.io file: + - Creates the `` root element with the host attribute. + - Adds a `` element with a unique ID and a default name ("Обработчик изображений"). + - Constructs the `` and its nested `` element. + - Inserts two base `` elements: + - ID "0": the invisible root container. + - ID "1": the main layer where all diagram elements will be attached. + + This function must be called before adding any visual elements (e.g., images, labels, arrows). + """ # noqa: E501 + self._root = ET.Element("mxfile", host="ScienceHelper") + + diagram_id = self._generate_id(prefix="", suffix="") + diagram = ET.SubElement(self._root, "diagram", name="Обработчик изображений", id=diagram_id) + + model = ET.SubElement(diagram, "mxGraphModel") + self._xml_root = ET.SubElement(model, "root") + + ET.SubElement(self._xml_root, "mxCell", id="0") + ET.SubElement(self._xml_root, "mxCell", id="1", parent="0") + + def _create_mx_cell(self, **attrs) -> ET.Element: + """Create and append an element to the draw.io XML structure. + + Args: + **attrs: Arbitrary keyword arguments representing attributes for the element, + such as id, value, style, parent, vertex, edge, etc. + + Returns: + ET.Element: The newly created element, attached to the internal XML root. + + Notes: + - This method is used to define visual elements like images, groups, arrows, or text boxes. + - The element is added directly to `self._xml_root`. + """ # noqa: E501 + return ET.SubElement(self._xml_root, "mxCell", **attrs) + + def _create_mx_geometry(self, parent: ET.Element, **attrs) -> ET.Element: + """Create and attach an element to the given element. + + Args: + parent (ET.Element): The parent element to which geometry should be added. + **attrs: Arbitrary keyword arguments representing geometry attributes like x, y, width, height, etc. + + Returns: + ET.Element: The created element with the attribute `as="geometry"`. + + Notes: + - This method is used to define size and position for visual elements in draw.io. + - The geometry is added as a child of the given parent . + """ # noqa: E501 + geom = ET.SubElement(parent, "mxGeometry", **attrs) + geom.set("as", "geometry") + return geom + + def _create_axis(self, # noqa: PLR0913 + id: str, + x0: float, y0: float, + x1: float, y1: float, + parent_id: str): + """Create a directional axis line in the draw.io XML structure. + + This method creates an `mxCell` representing an axis (X or Y) with an arrowhead, + then adds source and target points to define the line geometry. + + Args: + id (str): Unique identifier for the created axis element. + x0 (float): X-coordinate of the starting point. + y0 (float): Y-coordinate of the starting point. + x1 (float): X-coordinate of the ending point. + y1 (float): Y-coordinate of the ending point. + parent_id (str): ID of the parent group or cell to attach this axis to. + + Notes: + - The axis is styled with a thin block arrow and fixed stroke width. + - Coordinates are embedded as `mxPoint` elements inside `mxGeometry`. + - The geometry is marked with `relative="1"` to support group-based positioning. + """ + cell = self._create_mx_cell( + id=id, + value="", + style=f"endArrow=blockThin;html=1;rounded=0;strokeWidth={self._axis_width}", + edge="1", + parent=parent_id, + ) + + geom = self._create_mx_geometry(cell, width="50", height="50", relative="1") + + source = ET.SubElement(geom, "mxPoint", x=str(x0), y=str(y0)) + target = ET.SubElement(geom, "mxPoint", x=str(x1), y=str(y1)) + + source.set("as", "sourcePoint") + target.set("as", "targetPoint") + + def _add_label(self, # noqa: PLR0913 + cell_id: str, text: str, + x: float, y: float, + width: int, parent_id: str, + style: str + ): + """Add a text label to the draw.io XML structure at a specific position. + + This method creates a vertex `mxCell` element that displays the provided text + with the given styling and places it at the specified (x, y) coordinates. + + Args: + cell_id (str): Unique identifier for the created label cell. + text (str): Text content to display. + x (float): X-coordinate of the label’s top-left corner. + y (float): Y-coordinate of the label’s top-left corner. + width (int): Logical width of the text (multiplied by font size to calculate actual width). + parent_id (str): ID of the parent `mxCell` to which the label belongs. + style (str): Styling string defining appearance (font, alignment, color, etc.). + + Notes: + - Label size is computed using `width * font_size * 0.6` as an estimate. + - The label is placed using `mxGeometry` and added as a child of the specified parent. + - Suitable for labeling axes or other elements in the diagram. + """ # noqa: E501 + cell = self._create_mx_cell( + id=cell_id, value=text, style=style, vertex="1", parent=parent_id + ) + self._create_mx_geometry( + cell, x=str(x), y=str(y), width=str(width * self._axis_font_size * 0.6), height="20" + ) + + def _get_text_style(self) -> str: + """Generate a draw.io-compatible style string for text labels. + + Constructs a style definition string used for labeling axes and other elements + in the diagram. This string includes alignment, font settings, and visual options. + + Returns: + str: A concatenated style string, e.g., suitable for use in an `mxCell` + with text content. It includes parameters like `fontFamily`, `fontSize`, + `html`, `align`, `strokeColor`, etc. + + Notes: + - The font family is set using `self._font_family`. + - The font size is defined by `self._axis_font_size`. + - Text is styled to be HTML-rendered, unresizable, and with no background or stroke. + - Used in `_add_label()` to consistently format axis and annotation labels. + """ + return "".join( + ( + "text;", + "html=1;", + "align=left;", + "verticalAlign=middle;", + "resizable=0;", + "points=[];", + "autosize=1;", + "strokeColor=none;", + "fillColor=none;", + f"fontFamily={self._font_family};", + "fontColor=#000;", + f"fontSize={self._axis_font_size};", + ) + ) + + def _get_numbering_style(self) -> str: + """Generate a style string for draw.io numbering labels. + + Constructs a draw.io-compatible style string that defines the appearance + of a label used for numbering or annotating images (e.g., image index or name). + + Returns: + str: A style string that includes options for rounded corners, + text wrapping, HTML rendering, and font settings such as background + fill color and font size. + + Notes: + - The fill color is defined by `self._signature_color`. + - The font size is defined by `self._signature_font_size`. + - Used in `_add_numbering()` to style the label block. + """ + return "".join( + ( + "rounded=0;", + "whiteSpace=wrap;", + "html=1;", + "strokeColor=none;", + f"fillColor={self._signature_color};", + f"fontSize={self._signature_font_size};", + ) + ) + + def _get_numbering_text(self, label: str) -> str: + """Generate an HTML-formatted text string for a numbering label in draw.io. + + Args: + label (str): The label content to display (e.g., a letter or number). + + Returns: + str: An HTML string that wraps the label in a tag with the configured + font family and text color. This string is suitable for inclusion in the + `value` attribute of a draw.io `mxCell`. + + Notes: + - The font face is taken from `self._font_family`. + - The text color is taken from `self._signature_label_color`. + - Used in `_add_numbering()` to render the label inside the image block. + """ + return f'{label}' # noqa: E501 + + def export_to_drawio(self, file: str | Path, **kwargs): + """Export the current image layout to a .drawio-compatible XML file. + + Args: + file (str | Path): Path to the output .drawio file. + **kwargs: Additional keyword arguments passed to `united_images()` + (e.g., layout, spacing, grid_cols, grid_rows, width, height). + + Behavior: + - Calls `united_images()` to build the diagram structure. + - Serializes the internal XML structure into draw.io-compatible format. + - Writes the result to the specified file in UTF-8 encoding. + + Raises: + ValueError: If layout or content is invalid during composition. + + Notes: + - The output file can be opened directly in draw.io or diagrams.net. + - This method must be called after images are loaded and parameters configured. + """ + self.united_images(**kwargs) + + tree = ET.ElementTree(self._root) + ET.indent(tree, space=" ", level=0) + tree.write(file, encoding="utf-8", xml_declaration=True) + + @staticmethod + def _generate_id(prefix: str = "E__", suffix: str = "-1") -> str: + """Generate a unique ID for draw.io XML elements. + + Args: + prefix (str, optional): Prefix for the ID. Defaults to "E__". + suffix (str, optional): Suffix for the ID. Defaults to "-1". + + Returns: + str: A unique, base64-encoded identifier string. + + Notes: + - Uses the first 9 bytes of a UUID4, encoded in URL-safe base64 format. + - Padding is stripped from the encoded string. + - Useful for generating unique IDs for mxCell elements in draw.io. + """ + uid = uuid.uuid4().bytes[:9] + base64_id = base64.urlsafe_b64encode(uid).decode("ascii").rstrip("=") + return f"{prefix}{base64_id}{suffix}" + + @staticmethod + def _image_to_base64(image: Image.Image) -> str: + """Convert a PIL Image to a base64-encoded PNG string. + + Args: + image (Image.Image): The image to convert. + + Returns: + str: A base64-encoded string representing the PNG image. + + Notes: + - The resulting string can be embedded in XML or HTML as a data URI. + - Image is saved to an in-memory buffer before encoding. + """ + buffer = io.BytesIO() + image.save(buffer, format="PNG") + return base64.b64encode(buffer.getvalue()).decode("ascii") diff --git a/science_helper/image_processing/enumerates.py b/science_helper/image_processing/enumerates.py new file mode 100644 index 0000000..ccecbd0 --- /dev/null +++ b/science_helper/image_processing/enumerates.py @@ -0,0 +1,51 @@ +from enum import Enum + + +class LayoutMode(Enum): + """Enumeration of available layout modes for image composition. + + Attributes: + ROW (str): Arrange images horizontally in a single row. + COLUMN (str): Arrange images vertically in a single column. + GRID (str): Arrange images in a 2D grid. + """ + + ROW = "row" + COLUMN = "column" + GRID = "grid" + + +class SignaturePosition(Enum): + """Enumeration of possible positions for signature labels on images. + + Attributes: + TOP_LEFT (str): Place the label in the top-left corner. + TOP_RIGHT (str): Place the label in the top-right corner. + BOTTOM_LEFT (str): Place the label in the bottom-left corner. + BOTTOM_RIGHT (str): Place the label in the bottom-right corner. + """ + + TOP_LEFT = "top-left" + TOP_RIGHT = "top-right" + BOTTOM_LEFT = "bottom-left" + BOTTOM_RIGHT = "bottom-right" + + +class LabelMode(Enum): + """Enumeration of supported label formats for image annotations. + + Attributes: + LATIN_LOWER (str): Use lowercase Latin letters (a, b, c, ...). + LATIN_UPPER (str): Use uppercase Latin letters (A, B, C, ...). + CYRILLIC_LOWER (str): Use lowercase Cyrillic letters (а, б, в, ...). + CYRILLIC_UPPER (str): Use uppercase Cyrillic letters (А, Б, В, ...). + ARABIC (str): Use Arabic numerals (1, 2, 3, ...). + ROMAN (str): Use Roman numerals (I, II, III, ...). + """ + + LATIN_LOWER = "latin_lower" + LATIN_UPPER = "latin_upper" + CYRILLIC_LOWER = "cyrillic_lower" + CYRILLIC_UPPER = "cyrillic_upper" + ARABIC = "arabic" + ROMAN = "roman" diff --git a/fonts/Arial.ttf b/science_helper/image_processing/fonts/Arial.ttf similarity index 100% rename from fonts/Arial.ttf rename to science_helper/image_processing/fonts/Arial.ttf diff --git a/fonts/Calibri.ttf b/science_helper/image_processing/fonts/Calibri.ttf similarity index 100% rename from fonts/Calibri.ttf rename to science_helper/image_processing/fonts/Calibri.ttf diff --git a/fonts/Garamond.ttf b/science_helper/image_processing/fonts/Garamond.ttf similarity index 100% rename from fonts/Garamond.ttf rename to science_helper/image_processing/fonts/Garamond.ttf diff --git a/fonts/Georgia.ttf b/science_helper/image_processing/fonts/Georgia.ttf similarity index 100% rename from fonts/Georgia.ttf rename to science_helper/image_processing/fonts/Georgia.ttf diff --git a/fonts/Helvetica.ttf b/science_helper/image_processing/fonts/Helvetica.ttf similarity index 100% rename from fonts/Helvetica.ttf rename to science_helper/image_processing/fonts/Helvetica.ttf diff --git a/fonts/Roboto.ttf b/science_helper/image_processing/fonts/Roboto.ttf similarity index 100% rename from fonts/Roboto.ttf rename to science_helper/image_processing/fonts/Roboto.ttf diff --git a/fonts/Tahoma.ttf b/science_helper/image_processing/fonts/Tahoma.ttf similarity index 100% rename from fonts/Tahoma.ttf rename to science_helper/image_processing/fonts/Tahoma.ttf diff --git a/fonts/Times New Roman.ttf b/science_helper/image_processing/fonts/Times New Roman.ttf similarity index 100% rename from fonts/Times New Roman.ttf rename to science_helper/image_processing/fonts/Times New Roman.ttf diff --git a/fonts/Verdana.ttf b/science_helper/image_processing/fonts/Verdana.ttf similarity index 100% rename from fonts/Verdana.ttf rename to science_helper/image_processing/fonts/Verdana.ttf diff --git a/image_processing/imageDesign.py b/science_helper/image_processing/image_design.py similarity index 55% rename from image_processing/imageDesign.py rename to science_helper/image_processing/image_design.py index 9ea9e32..0a805db 100644 --- a/image_processing/imageDesign.py +++ b/science_helper/image_processing/image_design.py @@ -1,94 +1,140 @@ import math from pathlib import Path -from typing import List, Optional, Tuple, Union -from PIL import Image, ImageOps, ImageDraw, ImageFont -from image_processing.enumerates import * -from image_processing.ImageProcessing import get_label, ImageProcessing - +from PIL import Image, ImageDraw, ImageFont, ImageOps + +from science_helper.image_processing.enumerates import LabelMode, LayoutMode, SignaturePosition +from science_helper.image_processing.processing import ImageProcessing + class ImagesDesign(ImageProcessing): + """Extended image composition class with support for layout, labels, borders, and axes. + + This class inherits from `ImageProcessing` and adds functionality to process and + combine multiple images into a single composite image. It supports various layout + strategies (row, column, grid), optional axis drawing, label numbering, and custom + border styles. + + The user can control all aspects of the visualization via class parameters, + including font style, position of labels, and layout dimensions. + + Typical usage: + design = ImagesDesign(images_path="path/to/images", signature=True, draw_axis=True) + combined = design.united_images(layout="grid", spacing=20, grid_cols=3) + combined.save("output.png") + + Raises: + TypeError: If input parameters are of invalid types. + ValueError: If configuration values are invalid or inconsistent. + """ + + def __init__( # noqa: PLR0913 + self, + images_path: str | Path, + border_size: int | tuple[int, int, int, int] | None = 10, + border_fill: str | tuple[int, int, int] = "black", + signature: bool = True, + signature_label: str | tuple[str] | LabelMode | None = "latin_lower", + signature_label_color: str = "white", + signature_pos: str | SignaturePosition = "top-left", + signature_size: tuple[int, int] = (40, 40), + signature_color: str = "black", + signature_font_size: int = 24, + draw_axis: bool = False, + axis_labels: tuple[str, str] | tuple[tuple[str], tuple[str]] = ("X", "Y"), + axis_offset: int | tuple[int, int] = 20, + axis_length: int = 60, + axis_width: int = 3, + axis_font_size: int = 24, + font_family: str = "Arial", + ): + """Initialize an ImagesDesign instance for structured image layout and annotation. + + This constructor sets up the configuration for image processing, including: + borders, signature labels, coordinate axes, and layout behavior. It loads fonts + and images immediately upon initialization. - def __init__(self, - images_path: Union[str, Path], - border_size: Union[int, Tuple[int, int, int, int], None] = 10, - border_fill: Union[str, Tuple[int, int, int]] = "black", - signature: bool = True, - signature_label: Union[str, Tuple[str], LabelMode, None] = "latin_lower", - signature_label_color: str = "white", - signature_pos: Union[str, SignaturePosition] = "top-left", - signature_size: Tuple[int, int] = (40, 40), - signature_color: str = "black", - signature_font_size: int = 24, - draw_axis: bool = False, - axis_labels: Union[Tuple[str, str], Tuple[Tuple[str], Tuple[str]]] = ("X", "Y"), - axis_offset: Union[int, Tuple[int, int]] = 20, - axis_length: int = 60, - axis_width: int = 3, - axis_font_size: int = 24, - font_family: str = "Arial", - ): - - super().__init__(images_path, - border_size, - border_fill, - signature, - signature_label, - signature_label_color, - signature_pos, - signature_size, - signature_color, - signature_font_size, - draw_axis, - axis_labels, - axis_offset, - axis_length, - axis_width, - axis_font_size, - font_family) + Args: + images_path (Union[str, Path]): Path to the folder containing input images. + border_size (Union[int, Tuple[int, int, int, int], None]): Border around each image. + border_fill (Union[str, Tuple[int, int, int]]): Border color (string name or RGB tuple). + signature (bool): Whether to draw labels on each image. + signature_label (Union[str, Tuple[str], LabelMode, None]): Labeling mode or labels. + signature_label_color (str): Color of the label text. + signature_pos (Union[str, SignaturePosition]): Position of the label on the image. + signature_size (Tuple[int, int]): Dimensions (width, height) of the label box. + signature_color (str): Background color of the label box. + signature_font_size (int): Font size of the signature label text. + draw_axis (bool): Whether to draw coordinate axes (X, Y) on each image. + axis_labels (Union[Tuple[str, str], Tuple[Tuple[str], Tuple[str]]]): Axis labels. + axis_offset (Union[int, Tuple[int, int]]): Offset from image edge to axis origin. + axis_length (int): Length of the drawn axes in pixels. + axis_width (int): Width of the axis lines. + axis_font_size (int): Font size for axis label text. + font_family (str): Name of the font (must be available as .ttf in the fonts folder). + + Raises: + TypeError: If any argument is of the wrong type. + ValueError: If label configuration or axis values are invalid. + """ + super().__init__( + images_path, + border_size, + border_fill, + signature, + signature_label, + signature_label_color, + signature_pos, + signature_size, + signature_color, + signature_font_size, + draw_axis, + axis_labels, + axis_offset, + axis_length, + axis_width, + axis_font_size, + font_family, + ) self._load_fonts() self._images = self._load_images(self.images_path) - # Assistant methods def _load_fonts(self): try: - self._signature_font = ImageFont.truetype(f"./fonts/{self._font_family}.ttf", self._signature_font_size) - except IOError: + self._signature_font = ImageFont.truetype( + self.FONT_FAMILY_PATH / f"{self._font_family}.ttf", self._signature_font_size + ) + except OSError: print("error") self._signature_font = ImageFont.load_default() try: - self._axis_font = ImageFont.truetype(f"./fonts/{self._font_family}.ttf", self._axis_font_size) - except IOError: + self._axis_font = ImageFont.truetype( + self.FONT_FAMILY_PATH / f"{self._font_family}.ttf", self._axis_font_size + ) + except OSError: self._axis_font = ImageFont.load_default() - + def _draw_border(self, image: Image.Image) -> Image.Image: - """ - Adds a border around the given image using the configured border size and color. + """Add a border around the image using the configured size and color. - If the border size is non-zero, the image will be expanded using the specified padding and fill color. - Otherwise, the original image is returned unmodified. + If a non-zero border size is specified, the image is expanded using padding + with the defined color. If the border size is zero, the original image is returned. Args: image (PIL.Image.Image): The input image to which the border will be applied. Returns: - PIL.Image.Image: The image with the border applied, or the original image if the border size is zero. - - Notes: - - Border size and color are taken from `self._border_size` and `self._border_fill`. - - This method does not modify the original image but returns a new one. - """ - + PIL.Image.Image: The image with a border applied, or the original image if no border is set. + """ # noqa: E501 if self._border_size: return ImageOps.expand(image, border=self._border_size, fill=self._border_fill) return image - def _add_numbering(self, img: Image.Image, label: Optional[str]) -> Image.Image: - """ - Draws a label (e.g. a number or letter) inside a colored rectangle on the image. + def _add_numbering(self, img: Image.Image, label: str | None) -> Image.Image: + """Draws a label (e.g. a number or letter) inside a colored rectangle on the image. The label is positioned inside a rectangle at one of the predefined corners of the image, with padding taken into account from the border size. The rectangle serves as a background @@ -110,8 +156,7 @@ def _add_numbering(self, img: Image.Image, label: Optional[str]) -> Image.Image: - Rectangle background color: `self._signature_color`. - Font and text color are defined by `self._font` and `self._signature_label_color`. - The function modifies the image in-place and returns the same reference. - """ - + """ # noqa: E501 if not label: return img @@ -131,8 +176,7 @@ def _add_numbering(self, img: Image.Image, label: Optional[str]) -> Image.Image: return img def _add_axes(self, img: Image.Image, label_x: str, label_y: str) -> Image.Image: - """ - Draws coordinate axes (X and Y) with arrowheads and labels on the given image. + """Draws coordinate axes (X and Y) with arrowheads and labels on the given image. This method draws two perpendicular axes: - X-axis: From left to right near the bottom edge. @@ -151,52 +195,63 @@ def _add_axes(self, img: Image.Image, label_x: str, label_y: str) -> Image.Image Notes: - The method uses `self.axis_font_size` and `self._font` to style the labels. - Axes are drawn in black with fixed dimensions (offset = 20, length = 60 pixels). - """ + """ # noqa: E501 img = img.copy() draw = ImageDraw.Draw(img) w, h = img.size # Handle axis_offset as int or tuple - offset_x, offset_y = (self._axis_offset, self._axis_offset) \ - if isinstance(self._axis_offset, int) else self._axis_offset + offset_x, offset_y = ( + (self._axis_offset, self._axis_offset) + if isinstance(self._axis_offset, int) + else self._axis_offset + ) arrow_length = self._axis_width * 2.5 - arrow_half = self._axis_width * 1.2 + arrow_half = self._axis_width * 1.2 x0, y0 = offset_x, h - offset_y # X-axis end_x = x0 + self._axis_length - arrow_length - draw.line((x0, y0, end_x, y0), fill="black", width= self._axis_width) - draw.polygon([ - (end_x + arrow_length, y0), - (end_x, y0 - arrow_half), - (end_x, y0 + arrow_half) - ], fill="black") - draw.text((end_x + arrow_length + 5, y0 - self._axis_font_size // 2), label_x, font=self._axis_font, fill="black") + draw.line((x0, y0, end_x, y0), fill="black", width=self._axis_width) + draw.polygon( + [(end_x + arrow_length, y0), (end_x, y0 - arrow_half), (end_x, y0 + arrow_half)], + fill="black", + ) + draw.text( + (end_x + arrow_length + 5, y0 - self._axis_font_size // 2), + label_x, + font=self._axis_font, + fill="black", + ) # Y-axis end_y = y0 - self.axis_length + arrow_length - draw.line((x0, y0, x0, end_y), fill="black", width= self._axis_width) - draw.polygon([ - (x0, end_y - arrow_length), - (x0 - arrow_half, end_y), - (x0 + arrow_half, end_y) - ], fill="black") - draw.text((x0 + 5, end_y - arrow_length - self._axis_font_size), label_y, font=self._axis_font, fill="black") + draw.line((x0, y0, x0, end_y), fill="black", width=self._axis_width) + draw.polygon( + [(x0, end_y - arrow_length), (x0 - arrow_half, end_y), (x0 + arrow_half, end_y)], + fill="black", + ) + draw.text( + (x0 + 5, end_y - arrow_length - self._axis_font_size), + label_y, + font=self._axis_font, + fill="black", + ) return img - - def _layout_images(self, - images: List[Image.Image], - layout: Union[str, LayoutMode], - spacing: int, - bg_color: str, - cols: Optional[int], - rows: Optional[int]) -> Image.Image: - """ - Arrange a list of processed images into a single composite image using the specified layout mode. + def _layout_images( # noqa: PLR0913 + self, + images: list[Image.Image], + layout: str | LayoutMode, + spacing: int, + bg_color: str, + cols: int | None, + rows: int | None, + ) -> Image.Image: + """Arrange a list of processed images into a single composite image using the specified layout mode. Supported layout modes: - "row": Arrange images in a single horizontal row. @@ -216,9 +271,8 @@ def _layout_images(self, Raises: ValueError: If `layout` is not one of the supported values ("row", "column", "grid"). - """ - - w_list, h_list = zip(*(img.size for img in images)) + """ # noqa: E501 + w_list, h_list = zip(*(img.size for img in images), strict=False) layout = layout.value if isinstance(layout, LayoutMode) else layout if layout == "row": @@ -253,10 +307,11 @@ def _layout_images(self, max_w = max(w_list) max_h = max(h_list) - canvas = Image.new("RGB", ( - cols * max_w + spacing * (cols - 1), - rows * max_h + spacing * (rows - 1) - ), color=bg_color) + canvas = Image.new( + "RGB", + (cols * max_w + spacing * (cols - 1), rows * max_h + spacing * (rows - 1)), + color=bg_color, + ) for idx, img in enumerate(images): row, col = divmod(idx, cols) @@ -268,12 +323,11 @@ def _layout_images(self, raise ValueError("layout должен быть 'row', 'column' или 'grid'") # The implementer method - def preprocessing_image(self, - index: int, - width: int = None, - height: int = None) -> Image.Image: - """ - Process a single image by optional resizing, adding a border, label, and axes. + def preprocessing_image(self, + index: int, + width: int | None = None, + height: int | None = None) -> Image.Image: + """Process a single image by optional resizing, adding a border, label, and axes. This method applies the following transformations to an image at the specified index: 1. Resizes the image proportionally if `width` or `height` is provided. @@ -292,8 +346,7 @@ def preprocessing_image(self, Raises: IndexError: If the index is outside the bounds of the image list or a label index is out of range. ValueError: If the signature label is in an unsupported format. - """ - + """ # noqa: E501 if index >= len(self._images): raise IndexError(f"Index {index} outside the range of the image list") @@ -316,18 +369,17 @@ def preprocessing_image(self, return proc - - def united_images(self, - layout: Union[str, LayoutMode] = "row", - spacing: int = 10, - bg_color: str = "white", - grid_cols: Optional[int] = None, - grid_rows: Optional[int] = None, - width: int = None, - height: int = None) -> Image.Image: - - """ - Compose and return a single image from a collection of images with optional borders, labels, and axes. + def united_images( # noqa: PLR0913 + self, + layout: str | LayoutMode = "row", + spacing: int = 10, + bg_color: str = "white", + grid_cols: int | None = None, + grid_rows: int | None = None, + width: int | None = None, + height: int | None = None, + ) -> Image.Image: + """Compose and return a single image from a collection of images with optional borders, labels, and axes. The method applies the following operations to each image (in order): 1. Adds a border if specified. @@ -336,7 +388,7 @@ def united_images(self, Finally, the processed images are arranged into a single image using the selected layout. Args: - layout (Union[str, LayoutMode], optional): + layout (Union[str, LayoutMode], optional): Layout mode for arranging images. Can be: - "row": images in a single horizontal row, - "column": images in a vertical column, @@ -351,14 +403,13 @@ def united_images(self, PIL.Image.Image: A single composed image with all processed individual images arranged according to the layout. Raises: - ValueError: + ValueError: - If the list of images is empty. - If `layout` is not one of "row", "column", "grid". - If label index is out of range for a provided tuple. - TypeError: + TypeError: - If label or axis configuration is of unsupported type. - """ - + """ # noqa: E501 if not self._images: raise ValueError("The list of images is empty") @@ -368,6 +419,3 @@ def united_images(self, images.append(proc) return self._layout_images(images, layout, spacing, bg_color, grid_cols, grid_rows) - - - \ No newline at end of file diff --git a/science_helper/image_processing/processing.py b/science_helper/image_processing/processing.py new file mode 100644 index 0000000..a2efcba --- /dev/null +++ b/science_helper/image_processing/processing.py @@ -0,0 +1,784 @@ +from pathlib import Path + +from PIL import Image + +from science_helper.image_processing.enumerates import LabelMode, LayoutMode, SignaturePosition + + +def to_roman(n: int) -> str: + """Convert an integer to its Roman numeral representation. + + Supports values from 1 and above, using standard Roman numeral symbols: + M, D, C, L, X, V, and I. The algorithm performs a greedy decomposition + using predefined value-symbol pairs. + + Args: + n (int): The integer number to convert. Must be ≥ 1. + + Returns: + str: The Roman numeral representation of the input number. + + Raises: + ValueError: If `n` is less than 1. + """ + val = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1] + syms = ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"] + roman = "" + for i in range(len(val)): + count = n // val[i] + roman += syms[i] * count + n -= val[i] * count + return roman + + +def get_label(index: int, mode: str | LabelMode = LabelMode.CYRILLIC_LOWER) -> str: + """Return a label string based on the specified mode and index. + + Supports multiple label modes including Latin, Cyrillic, Arabic numerals, + and Roman numerals. The index defines the position in the corresponding label set. + + Args: + index (int): The index of the label (starting from 0). + mode (str | LabelMode, optional): The labeling mode to use. + Can be a string or `LabelMode` enum. Defaults to `LabelMode.CYRILLIC_LOWER`. + + Returns: + str: A label corresponding to the given index and mode. + + Raises: + ValueError: If the index is out of range for Cyrillic modes (≥32). + ValueError: If the provided mode is not recognized. + + Supported modes: + - 'latin_lower' → 'a', 'b', ... + - 'latin_upper' → 'A', 'B', ... + - 'cyrillic_lower' → 'а', 'б', ... + - 'cyrillic_upper' → 'А', 'Б', ... + - 'arabic' → '1', '2', ... + - 'roman' → 'I', 'II', ... + """ + mode = mode.value if isinstance(mode, LabelMode) else mode + + match mode: + case "latin_lower": + return chr(ord("a") + index) + case "latin_upper": + return chr(ord("A") + index) + case "cyrillic_lower" | "cyrillic_upper": + base = ord("а") if mode == "cyrillic_lower" else ord("А") + if index >= 32: # noqa: PLR2004 + raise ValueError(f"The {index} index goes beyond the Cyrillic alphabet") + return chr(base + index) + case "arabic": + return str(index + 1) + case "roman": + return to_roman(index + 1) + case _: + available = ", ".join(m.value for m in LabelMode) + raise ValueError(f"Неверный режим: '{mode}'. Доступные режимы: {available}") + + +class ImageProcessing: + """A class for processing, composing, and annotating a collection of images. + + This class provides functionality for: + - Loading images from a folder + - Drawing borders around images + - Adding numbered labels or custom text + - Drawing coordinate axes (X, Y) + - Exporting the final composition as a single image + + Attributes: + FONT_FAMILY_PATH (Path): Path to the directory containing available `.ttf` fonts. + + Examples: + >>> processor = ImageProcessing(images_path="data/images", signature=True) + >>> composed = processor() + >>> composed.show() + """ + + FONT_FAMILY_PATH = Path("./scienceHelper/image_processing/fonts/") + + def __init__( # noqa: PLR0913 + self, + images_path: str | Path, + border_size: int | tuple[int, int, int, int] | None = 10, + border_fill: str | tuple[int, int, int] = "black", + signature: bool = True, + signature_label: str | tuple[str] | LabelMode | None = "latin_lower", + signature_label_color: str = "white", + signature_pos: str | SignaturePosition = "top-left", + signature_size: tuple[int, int] = (40, 40), + signature_color: str = "black", + signature_font_size: int = 24, + draw_axis: bool = False, + axis_labels: tuple[str, str] | tuple[tuple[str], tuple[str]] = ("X", "Y"), + axis_offset: int | tuple[int, int] = 20, + axis_length: int = 60, + axis_width: int = 3, + axis_font_size: int = 24, + font_family: str = "Arial", + ): + """Initialize the class for processing and composing images with optional borders, labels, and axis annotations. + + Args: + images_path (Union[str, Path]): Path to the folder containing image files (PNG, JPG, JPEG). + border_size (Union[int, Tuple[int, int, int, int], None], optional): + Border size around each image. Can be: + - int: uniform border on all sides, + - tuple: (left, top, right, bottom), + - None: no border. + Defaults to 10. + border_fill (Union[str, Tuple[int, int, int]], optional): + Color of the border. Can be a string color name (e.g., "black") or an RGB tuple. Defaults to "black". + signature (bool, optional): Whether to add a label/numbering on each image. Defaults to True. + signature_label (Union[str, Tuple[str], LabelMode, None], optional): + Labeling mode. Can be: + - str: mode name ("latin_lower", "roman", etc.), + - tuple of strings: custom labels per image, + - LabelMode enum, + - None: no labels. + Defaults to "latin_lower". + signature_label_color (str, optional): Color of the label text. Defaults to "white". + signature_pos (SignaturePosition, optional): Position of the label on the image (top-left, bottom-right, etc.). Defaults to SignaturePosition.TOP_LEFT. + signature_size (Tuple[int, int], optional): Size of the label box (width, height). Defaults to (40, 40). + signature_color (str, optional): Background color of the label box. Defaults to "black". + signature_font_size (int, optional): Font size for labels annotations. Defaults to 24. + draw_axis (bool, optional): Whether to draw X and Y axes on each image. Defaults to False. + axis_labels (Union[Tuple[str, str], Tuple[Tuple[str], Tuple[str]]], optional): + Labels for X and Y axes. Can be: + - Tuple of two strings: global labels, + - Tuple of two tuples: per-image labels. + Defaults to ("X", "Y"). + axis_offset (int, optional): Distance in pixels from the image edge to the axis origin. Defaults to 20. + axis_length (int, optional): Length of the drawn axes in pixels. Defaults to 60. + axis_font_size (int, optional): Font size for axis annotations. Defaults to 24. + + Raises: + TypeError: If any of the arguments are of incorrect type. + ValueError: If label or axis settings are out of bounds or improperly defined. + """ # noqa: E501 + self._signature_font_size = signature_font_size + self._axis_font_size = axis_font_size + self._font_family = font_family + + self.signature = signature + self.draw_axis = draw_axis + self.images_path = images_path + self.border_size = border_size + self.border_fill = border_fill + self.axis_labels = axis_labels + self.axis_offset = axis_offset + self.axis_length = axis_length + self.axis_width = axis_width + self.signature_pos = signature_pos + self.signature_size = signature_size + self.signature_color = signature_color + self.signature_label = signature_label + self.signature_label_color = signature_label_color + + # Magic methods + def __str__(self) -> str: + """Return a human-readable string representation of the object.""" + return ( + f"ImagesDesign(\n" + f" images_path={self.images_path},\n" + f" border_size={self.border_size}, border_fill={self.border_fill},\n" + f" signature={self.signature}, signature_label={self.signature_label},\n" + f" signature_pos={self.signature_pos}, signature_size={self.signature_size},\n" + f" signature_font_size={self._signature_font_size},\n" + f" draw_axis={self.draw_axis}, axis_labels={self.axis_labels},\n" + f" axis_offset={self.axis_offset}, axis_length={self.axis_length},\n" + f" axis_font_size={self._axis_font_size}\n" + f")" + ) + + def __repr__(self) -> str: + """Return an unambiguous string representation of the object for debugging.""" + return ( + f"ImagesDesign(images_path={self.images_path!r}, " + f"border_size={self.border_size!r}, border_fill={self.border_fill!r}, " + f"signature={self.signature}, signature_label={self.signature_label!r}, " + f"signature_label_color={self.signature_label_color!r}, " + f"signature_pos={self.signature_pos!r}, signature_size={self.signature_size!r}, " + f"signature_font_size={self._signature_font_size}, " + f"signature_color={self.signature_color!r}, " + f"draw_axis={self.draw_axis}, axis_labels={self.axis_labels!r}, " + f"axis_offset={self.axis_offset}, axis_length={self.axis_length}, " + f"axis_font_size={self._axis_font_size})" + f"" + ) + + def __len__(self) -> int: + """Return the number of images in the collection.""" + return len(self._images) + + def __getitem__(self, index: int) -> Image.Image: + """Return the image at the specified index. + + Args: + index (int): Index of the image to retrieve. + + Returns: + Image.Image: The image at the specified index. + + Raises: + TypeError: If the index is not an integer. + """ + if not isinstance(index, int): + raise TypeError("The index must be an integer") + return self._images[index] + + def __setitem__(self, index: int, value: Image.Image) -> None: + """Set an image at the specified index in the internal list. + + Args: + index (int): The index at which to set the image. + value (Image.Image): The image to insert at the specified index. + + Raises: + TypeError: If the index is not an integer or value is not a PIL.Image.Image. + IndexError: If the index is out of bounds. + """ + if not isinstance(index, int): + raise TypeError("The index must be an integer") + if not isinstance(value, Image.Image): + raise TypeError("The value must be an instance of PIL.Image.Image") + if not (0 <= index < len(self._images)): + raise IndexError(f"Index {index} outside the range of the image list") + + self._images[index] = value + + def __delitem__(self, index: int) -> None: + """Delete the image at the specified index from the internal list. + + Args: + index (int): The index of the image to delete. + + Raises: + TypeError: If the index is not an integer. + IndexError: If the index is out of bounds. + """ + if not isinstance(index, int): + raise TypeError("The index must be an integer") + if not (0 <= index < len(self._images)): + raise IndexError(f"Index {index} outside the range of the image list") + del self._images[index] + + def __contains__(self, item: Image.Image) -> bool: + """Check whether the given image exists in the internal list. + + Args: + item (Image.Image): The image to search for. + + Returns: + bool: True if the image is found, False otherwise. + + Raises: + TypeError: If the provided item is not an instance of PIL.Image.Image. + """ + if not isinstance(item, Image.Image): + raise TypeError("Verification is only possible for objects of the PIL.Image type.Image") + return item in self._images + + def __iter__(self): + """Return an iterator over the internal list of images. + + Returns: + Iterator[Image.Image]: An iterator over the stored images. + """ + return iter(self._images) + + def __call__( + self, + layout: str | LayoutMode = "row", + spacing: int = 10, + bg_color: str = "white", + grid_cols: int | None = None, + grid_rows: int | None = None, + ) -> Image.Image: + """Compose and return a single image from the internal collection. + + This method allows the object to be called like a function, delegating to `united_images`. + + Args: + layout (str | LayoutMode, optional): Layout mode for arranging images ("row", "grid", etc.). Defaults to "row". + spacing (int, optional): Spacing in pixels between images. Defaults to 10. + bg_color (str, optional): Background color for the final image. Defaults to "white". + grid_cols (int | None, optional): Number of columns in grid layout. Required for grid mode. Defaults to None. + grid_rows (int | None, optional): Number of rows in grid layout. Required for grid mode. Defaults to None. + + Returns: + Image.Image: The composed image containing all processed images. + """ # noqa: E501 + return self.united_images( + layout=layout, + spacing=spacing, + bg_color=bg_color, + grid_cols=grid_cols, + grid_rows=grid_rows, + ) + + # Validation and conversion + @staticmethod + def _validate_font_family(path: Path, font_family): + font_files = sorted([f.stem for f in path.glob("*.ttf") if f.is_file()]) + + if font_family in font_files: + return font_family + + return "Arial" + + @staticmethod + def _validate_path(path): + if not isinstance(path, str | Path): + raise TypeError("images_path must be a str or Path") + return Path(path) + + @staticmethod + def _validate_border_size(border_size): + if isinstance(border_size, int): + return (border_size,) * 4 + if isinstance(border_size, tuple | list) and len(border_size) == 4: # noqa: PLR2004 + return tuple(border_size) + if border_size is None: + return (0,) * 4 + raise TypeError("border_size must be int, tuple of 4 elements, or None") + + @staticmethod + def _validate_color(color): + if isinstance(color, str): + return color + if isinstance(color, tuple) and all(isinstance(c, int) for c in color): + return color + raise TypeError("Color must be a string or tuple of ints") + + @staticmethod + def _validate_tuple_pair(value, name): + if isinstance(value, tuple) and len(value) == 2: # noqa: PLR2004 + return value + raise TypeError(f"{name} must be a tuple of length 2") + + @classmethod + def from_images(cls, images: list[Image.Image], **kwargs): + """Create an instance of the class from a list of images. + + This method allows creating an `ImageProcessing` object directly from + a list of `PIL.Image.Image` instances without loading them from disk. + + Args: + images (list[Image.Image]): A list of PIL images to initialize the object with. + **kwargs: Additional keyword arguments forwarded to the class constructor. + + Returns: + ImageProcessing: An initialized instance containing the provided images. + """ + obj = cls(images_path=".", **kwargs) + obj._images = images + return obj + + # Properties with setters and getters + @property + def images_path(self): + """Get the path to the directory containing the input images. + + Returns: + Path: The folder path where images are stored. + """ + return self._images_path + + @images_path.setter + def images_path(self, value): + self._images_path = self._validate_path(value) + + @property + def border_size(self): + """Get the border size applied to each image. + + Returns: + Tuple[int, int, int, int]: The border size as (left, top, right, bottom). + """ + return self._border_size + + @border_size.setter + def border_size(self, value): + self._border_size = self._validate_border_size(value) + + @property + def border_fill(self): + """Get the color of the image border. + + Returns: + Union[str, Tuple[int, int, int]]: The border color as a string or RGB tuple. + """ + return self._border_fill + + @border_fill.setter + def border_fill(self, value): + self._border_fill = self._validate_color(value) + + @property + def signature(self): + """Check whether signature labels are enabled. + + Returns: + bool: True if labels are to be drawn on images. + """ + return self._signature + + @signature.setter + def signature(self, value): + if not isinstance(value, bool): + raise TypeError("signature must be a boolean") + self._signature = value + + @property + def signature_label(self): + """Get the label format or custom labels for image annotations. + + Returns: + Union[str, Tuple[str], LabelMode, None]: The labeling strategy or labels. + """ + return self._signature_label + + @signature_label.setter + def signature_label(self, value): + if not (isinstance(value, str | tuple | LabelMode) or value is None): + raise TypeError("signature_label must be str, tuple, LabelMode or None") + self._signature_label = value + + @property + def signature_label_color(self): + """Get the text color used for signature labels. + + Returns: + str: The label text color. + """ + return self._signature_label_color + + @signature_label_color.setter + def signature_label_color(self, value): + self._signature_label_color = self._validate_color(value) + + @property + def signature_pos(self): + """Get the position where the label is drawn on the image. + + Returns: + Union[str, SignaturePosition]: The label position (e.g., "top-left"). + """ + return self._signature_pos + + @signature_pos.setter + def signature_pos(self, value): + if not isinstance(value, SignaturePosition | str): + raise TypeError("signature_pos must be a SignaturePosition or string") + self._signature_pos = value + + @property + def signature_size(self): + """Get the dimensions of the signature label box. + + Returns: + Tuple[int, int]: Width and height of the label box. + """ + return self._signature_size + + @signature_size.setter + def signature_size(self, value): + self._signature_size = self._validate_tuple_pair(value, "signature_size") + + @property + def signature_color(self): + """Get the background color of the signature label box. + + Returns: + Union[str, Tuple[int, int, int]]: The box background color. + """ + return self._signature_color + + @signature_color.setter + def signature_color(self, value): + self._signature_color = self._validate_color(value) + + @property + def draw_axis(self): + """Check whether coordinate axes are drawn on each image. + + Returns: + bool: True if axes are enabled. + """ + return self._draw_axis + + @draw_axis.setter + def draw_axis(self, value): + if not isinstance(value, bool): + raise TypeError("draw_axis must be a boolean") + self._draw_axis = value + + @property + def axis_labels(self): + """Get labels for the X and Y axes. + + Returns: + Union[Tuple[str, str], Tuple[Tuple[str], Tuple[str]]]: Axis label(s). + """ + return self._axis_labels + + @axis_labels.setter + def axis_labels(self, value): + if isinstance(value, tuple) and len(value) == 2: # noqa: PLR2004 + self._axis_labels = value + else: + raise TypeError("axis_labels must be a tuple of two strings or tuples") + + @property + def axis_offset(self): + """Get the offset from the image edge where axes begin. + + Returns: + Union[int, Tuple[int, int]]: Axis origin offset. + """ + return self._axis_offset + + @axis_offset.setter + def axis_offset(self, value): + if not isinstance(value, int | tuple): + raise TypeError("axis_offset must be an integer") + self._axis_offset = value + + @property + def axis_length(self): + """Get the length of the X and Y axes. + + Returns: + int: The length of each axis in pixels. + """ + return self._axis_length + + @axis_length.setter + def axis_length(self, value): + if not isinstance(value, int): + raise TypeError("axis_length must be an integer") + self._axis_length = value + + @property + def axis_width(self): + """Get the thickness of the axes. + + Returns: + int: Line width in pixels. + """ + return self._axis_width + + @axis_width.setter + def axis_width(self, value): + if not isinstance(value, int): + raise TypeError("axis_width must be an integer") + self._axis_width = value + + @property + def signature_font_size(self): + """Get the font size used for signature labels. + + Returns: + int: Font size in points. + """ + return self._signature_font_size + + @signature_font_size.setter + def signature_font_size(self, value): + if not isinstance(value, int): + raise TypeError("signature_font_size must be an integer") + self._signature_font_size = value + + @property + def axis_font_size(self): + """Get the font size used for axis labels. + + Returns: + int: Font size in points. + """ + return self._axis_font_size + + @axis_font_size.setter + def axis_font_size(self, value): + if not isinstance(value, int): + raise TypeError("axis_font_size must be an integer") + self._axis_font_size = value + + @property + def font_family(self): + """Get the font family used for rendering labels. + + Returns: + str: Name of the font family (must match a .ttf file). + """ + return self._font_family + + @font_family.setter + def font_family(self, value): + if not isinstance(value, str): + raise TypeError("font_family must be a string") + + self._font_family = self._validate_font_family(self.FONT_FAMILY_PATH, value) + + # Assistant methods + def _get_label(self, index): + valid_modes = {m.value for m in LabelMode} + + label_mode = self._signature_label + if isinstance(label_mode, str | LabelMode) and ( + getattr(label_mode, "value", label_mode) in valid_modes + ): + label = get_label(index, label_mode) + elif isinstance(label_mode, tuple): + if index >= len(label_mode): + raise IndexError( + f"The signature for the index {index} was not found in the transmitted tuple" + ) + label = label_mode[index] + elif isinstance(label_mode, str): + label = label_mode + else: + raise ValueError(f"Incorrect signature format: {label_mode}") + + return label + + def _get_positions(self, image_w: int, image_h: int) -> list | tuple: + rect_w, rect_h = self._signature_size + left, top, right, bottom = self._border_size + positions = { + "top-left": (left, top, left + rect_w, top + rect_h), + "top-right": (image_w - right - rect_w, top, image_w - right, top + rect_h), + "bottom-left": (left, image_h - bottom - rect_h, left + rect_w, image_h - bottom), + "bottom-right": ( + image_w - right - rect_w, + image_h - bottom - rect_h, + image_w - right, + image_h - bottom, + ), + } + + key = ( + self._signature_pos.value + if isinstance(self._signature_pos, SignaturePosition) + else self._signature_pos + ) + rect_position = positions.get(key) + + if not rect_position: + raise ValueError( + "rect_corner должен быть одним из: top-left, top-right, bottom-left, bottom-right" + ) + + return rect_position + + def _load_images(self, folder) -> list[Image.Image]: + """Load all image files from the specified folder with supported extensions. + + This method searches for files with `.png`, `.jpg`, and `.jpeg` extensions in the given folder, + opens them using PIL, and returns a list of loaded images. + + Args: + folder (Union[str, Path]): Path to the folder containing the images. + + Returns: + List[PIL.Image.Image]: A list of loaded images. + + Notes: + - Only files with extensions "*.png", "*.jpg", "*.jpeg" (case-sensitive) are loaded. + - If the folder is empty or contains no supported image formats, an empty list is returned. + - The input path is internally converted to `Path` using `pathlib`. + + Raises: + FileNotFoundError: If the specified folder does not exist. + PIL.UnidentifiedImageError: If an image file cannot be opened by PIL. + """ # noqa: E501 + folder = Path(folder) + images = [] + for ext in ("*.png", "*.jpg", "*.jpeg"): + images.extend([Image.open(p) for p in folder.glob(ext)]) + + return images + + def _resize_proportional( + self, img: Image.Image, width: int | None = None, height: int | None = None + ) -> Image.Image: + """Resize an image proportionally based on the specified width or height. + + This method adjusts the image size while preserving its aspect ratio + if only `width` or `height` is provided. If both `width` and `height` are + given, the image is resized exactly to that size (aspect ratio may be distorted). + If neither is provided, the original image is returned unchanged. + + Args: + img (PIL.Image.Image): The input image to be resized. + width (int, optional): Target width. If specified alone, height will be adjusted proportionally. + height (int, optional): Target height. If specified alone, width will be adjusted proportionally. + + Returns: + PIL.Image.Image: A resized image according to the specified dimensions. + """ # noqa: E501 + w, h = img.size + + if width and not height: + ratio = width / w + new_size = (width, int(h * ratio)) + elif height and not width: + ratio = height / h + new_size = (int(w * ratio), height) + elif width and height: + new_size = (width, height) + else: + return img + + return img.resize(new_size, Image.LANCZOS) + + def append(self, image: Image.Image): + """Append a single image to the internal image list. + + Validates that the input is an instance of `PIL.Image.Image` before adding it + to the internal list of images. + + Args: + image (Image.Image): The image to be added. + + Raises: + TypeError: If the provided object is not an instance of `PIL.Image.Image`. + """ + if not isinstance(image, Image.Image): + raise TypeError("The value must be an instance of PIL.Image.Image") + self._images.append(image) + + def _draw_border(self): + pass + + def _add_numbering(self): + pass + + def _add_axes(self): + pass + + def _layout_images(self): + pass + + # The implementer method + def preprocessing_image(self): + """Preprocess a single image by applying configured transformations. + + This may include adding borders, labels, and axis annotations. + Intended to be called internally for each image before composition. + """ + pass + + def united_images(self): + """Compose all loaded images into a single combined image. + + The composition respects layout, spacing, and visual options such as + borders, labels, and axes. Configuration is taken from instance attributes. + + Returns: + Image.Image: The final combined image. + """ + pass diff --git a/science_helper/search_vak_articles/__init__.py b/science_helper/search_vak_articles/__init__.py new file mode 100644 index 0000000..11d45dd --- /dev/null +++ b/science_helper/search_vak_articles/__init__.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from .downloader import PDFDownloader +from .filters import filter_rows_by_specialty +from .nomenclature import NomenclatureParser +from .pdf_parser import PDFParser, load_json, save_to_json + + +def bool_to_yes_no(val: bool) -> str: + return "Да" if val else "Нет" + +__all__ = [ + "NomenclatureParser", + "PDFDownloader", + "PDFParser", + "bool_to_YesNo", + "filter_rows_by_specialty", + "load_json", + "save_to_json" +] \ No newline at end of file diff --git a/science_helper/search_vak_articles/downloader.py b/science_helper/search_vak_articles/downloader.py new file mode 100644 index 0000000..692c051 --- /dev/null +++ b/science_helper/search_vak_articles/downloader.py @@ -0,0 +1,90 @@ +import configparser +from pathlib import Path +from urllib.parse import parse_qs, urlparse + +from fake_useragent import UserAgent +import requests + +from science_helper.search_vak_articles.pdf_parser import save_to_json + + +class PDFDownloader: + """Class for downloading PDF files and fetching JSON data from the web. + + This class supports: + - Downloading a PDF file from a URL if it does not already exist. + - Automatically updating a configuration file with the latest downloaded filename. + - Fetching JSON data from a URL and saving it to a local `.json` file. + """ + + def __init__(self, output_dir: str = ".", config_path: str = "config.ini", timeout: int = 60): + """Initialize the PDF downloader instance. + + Args: + output_dir (str): Directory where files will be saved. Defaults to the current directory. + config_path (str): Path to the configuration INI file. Defaults to "config.ini". + timeout (int): Timeout in seconds for HTTP requests. Defaults to 60. + """ # noqa: E501 + self.output_dir = Path(output_dir) + self.config_path = config_path + self.timeout = timeout + + def download_pdf_if_needed(self, url: str) -> Path: + """Download a PDF file from the URL if it is not already downloaded. + + The URL must contain a `?name=...` parameter that specifies the filename. + If the file already exists, it will not be downloaded again. + The method also updates the `[DIRECTORY]` section in the configuration file with the new filename. + + Args: + url (str): A URL pointing to a downloadable PDF, containing a `name` query parameter. + + Returns: + Path: Path to the downloaded or existing PDF file. + + Raises: + ValueError: If the `name` parameter is missing in the URL. + requests.HTTPError: If the HTTP request fails. + """ # noqa: E501 + parsed = urlparse(url) + params = parse_qs(parsed.query) + name = params.get("name", [None])[0] + if not name: + raise ValueError("URL не содержит параметра ?name=...") + + filename = f"{name}.pdf" + filepath = self.output_dir / filename + + if filepath.exists(): + print(f"[✓] Файл уже существует: {filepath}") + else: + headers = {"User-Agent": UserAgent(os="Linux").random} + resp = requests.get(url, headers=headers, timeout=self.timeout, verify=False) + resp.raise_for_status() + filepath.write_bytes(resp.content) + print(f"[↓] Скачано: {filepath}") + + config = configparser.ConfigParser() + config.read(self.config_path, encoding="utf-8") + config["DIRECTORY"]["filename"] = filename + with open(self.config_path, "w", encoding="utf-8") as f: + config.write(f) + + return filepath + + def dict_from_web(self, url: str, output_file: str) -> None: + """Fetch JSON data from the given URL and save it to a `.json` file. + + Args: + url (str): A URL that returns JSON content. + output_file (str): Filename for the output JSON file. ".json" extension will be appended if missing. + + Raises: + requests.RequestException: If the request fails or times out. + """ # noqa: E501 + if not output_file.endswith(".json"): + output_file += ".json" + + r = requests.get(url, timeout=self.timeout, verify=False) + if r.status_code == 200: # noqa: PLR2004 + save_to_json(r.json(), self.output_dir / output_file) diff --git a/science_helper/search_vak_articles/filters.py b/science_helper/search_vak_articles/filters.py new file mode 100644 index 0000000..fc98222 --- /dev/null +++ b/science_helper/search_vak_articles/filters.py @@ -0,0 +1,34 @@ +from typing import Any + + +def fizbuz(spec_line: str, targets: list[str]) -> bool: + """Check if a specialty line starts with any of the target prefixes. + + Args: + spec_line (str): A single specialty string. + targets (list[str]): A list of target prefixes to match. + + Returns: + bool: True if the spec_line starts with any of the target values, False otherwise. + """ + return any(spec_line.startswith(t) for t in targets) + + +def filter_rows_by_specialty( + rows: list[dict[str, Any]], targets: list[str] +) -> list[dict[str, Any]]: + """Filter a list of rows by matching specialty prefixes. + + Each row is expected to contain a "specialties" field with a list of strings. + If the `targets` list contains "all" (case-insensitive), no filtering is applied. + + Args: + rows (list[dict[str, Any]]): List of dictionaries representing rows with a "specialties" key. + targets (list[str]): List of specialty prefixes to filter by. + + Returns: + list[dict[str, Any]]: Filtered list of rows where at least one specialty matches any target prefix. + """ # noqa: E501 + if not targets or any(t.lower() == "all" for t in targets): + return rows + return [r for r in rows if any(fizbuz(sp, targets) for sp in r["specialties"])] diff --git a/science_helper/search_vak_articles/nomenclature.py b/science_helper/search_vak_articles/nomenclature.py new file mode 100644 index 0000000..0b2b9fb --- /dev/null +++ b/science_helper/search_vak_articles/nomenclature.py @@ -0,0 +1,95 @@ +from bs4 import BeautifulSoup +import requests + + +class NomenclatureParser: + """Parser for extracting specialty categories and subcategories from a web-based HTML table. + + This class is designed to download and parse a nomenclature table from a specified URL + and convert it into a structured list of dictionaries. + + Attributes: + timeout (int): Timeout in seconds for HTTP requests. + """ + + def __init__(self, timeout: int = 60): + """Initialize the parser with a specified request timeout. + + Args: + timeout (int, optional): Timeout in seconds for HTTP requests. Defaults to 60. + """ + self.timeout = timeout + + def get_specialties(self, url: str) -> list[dict]: # noqa: C901 + """Parse the HTML page from the given URL to extract specialties structure. + + The expected structure is a table with merged cells (rowspan) representing + category names, subcategories, and nested specialty values. + + Args: + url (str): The URL of the page containing the HTML table. + + Returns: + list[dict]: A list of dictionaries representing categories with subcategories + and their associated values. Example structure: + [ + { + "category_name": "Engineering Sciences", + "sub_category": [ + { + "subcategory_name": "Mechanical Engineering", + "values": ["2.1.1", "2.1.2"] + }, + ... + ] + }, + ... + ] + + Notes: + - Returns an empty list if the request fails or the table is not found. + - The parser assumes a specific structure with 2 to 4 columns per row. + """ + r = requests.get(url=url, timeout=self.timeout, verify=False) + if r.status_code != 200: # noqa: PLR2004 + return [] + + output = [] + current_main, current_sub = None, None + + soup = BeautifulSoup(r.text, "html.parser") + table = soup.find("table") + if not table: + return [] + + rows = table.find_all("tr") + for row in rows[1:]: + cells = row.find_all("td") + if not cells: + continue + + if len(cells) == 4: # noqa: PLR2004 + if cells[0].has_attr("rowspan"): + current_main = cells[0].get_text(strip=True) + output.append({"category_name": current_main, "sub_category": []}) + if cells[1].has_attr("rowspan"): + current_sub = cells[1].get_text(strip=True) + output[-1]["sub_category"].append( + {"subcategory_name": current_sub, "values": [cells[2].get_text(strip=True)]} + ) + else: + output[-1]["sub_category"][-1]["values"].append(cells[2].get_text(strip=True)) + + elif len(cells) == 3: # noqa: PLR2004 + if cells[0].has_attr("rowspan"): + current_sub = cells[0].get_text(strip=True) + output[-1]["sub_category"].append( + {"subcategory_name": current_sub, "values": [cells[1].get_text(strip=True)]} + ) + else: + output[-1]["sub_category"][-1]["values"].append(cells[1].get_text(strip=True)) + + elif len(cells) == 2: # noqa: PLR2004 + output[-1]["sub_category"][-1]["values"].append(cells[0].get_text(strip=True)) + + return output diff --git a/science_helper/search_vak_articles/pdf_parser.py b/science_helper/search_vak_articles/pdf_parser.py new file mode 100644 index 0000000..7387cbd --- /dev/null +++ b/science_helper/search_vak_articles/pdf_parser.py @@ -0,0 +1,179 @@ +import json +import pathlib +import re +from typing import Any + +import PyPDF2 + +from science_helper.utils.setting import RE_DATE, RE_ISSN_RAW, RE_ROW_START, RE_SPEC_CODE + + +class PDFParser: + """Parses a structured PDF file containing journal metadata such as title, ISSN, and specialties. + + This class provides functionality for reading a PDF file, extracting textual content, + parsing it into sections, and extracting structured metadata from each section. + + Attributes: + path (Path): Path to the PDF file to be parsed. + """ # noqa: E501 + + def __init__(self, path: str | pathlib.Path): + """Initialize the parser with the path to a PDF file. + + Args: + path (str | Path): Path to the PDF file. + """ + self.path = pathlib.Path(path) + + def _read_pdf_text(self) -> str: + """Read and extract all text content from the PDF file. + + Returns: + str: Combined text from all pages of the PDF with normalized line endings. + """ + reader = PyPDF2.PdfReader(self.path) + return "\n".join(page.extract_text() or "" for page in reader.pages).replace("\r", "") + + def _split_sections(self, raw: str) -> list[str]: + """Split the raw text into sections using a regular expression pattern. + + Each section is assumed to start with a recognizable numeric identifier + based on the `RE_ROW_START` pattern. + + Args: + raw (str): Raw text extracted from the PDF. + + Returns: + list[str]: List of text segments corresponding to logical journal entries. + """ + idx = [m.start() for m in RE_ROW_START.finditer(raw)] + [len(raw)] + return [raw[idx[i] : idx[i + 1]] for i in range(len(idx) - 1)] + + def _normalize_issn(self, raw: str) -> str: + """Normalize ISSN text to a consistent format (uppercase, hyphenated). + + Args: + raw (str): Raw ISSN string. + + Returns: + str: Normalized ISSN string. + """ + raw = raw.replace("Х", "X").replace("х", "x") + raw = re.sub(r"[\-‑–—−]", "-", raw) + raw = re.sub(r"\s*-\s*", "-", raw) + return raw.upper() + + def _split_specialties(self, tail: str) -> list[str]: + """Split a string containing specialty codes into a clean list of values. + + Args: + tail (str): Text segment that may contain multiple specialty codes. + + Returns: + list[str]: List of extracted and cleaned specialty codes. + """ + specs = [] + matches = list(RE_SPEC_CODE.finditer(tail)) + if not matches: + raw_parts = [s.strip() for s in tail.split(",") if s.strip()] + else: + raw_parts = [] + for i, m in enumerate(matches): + start = m.start() + end = matches[i + 1].start() if i + 1 < len(matches) else len(tail) + raw_parts.append(tail[start:end].strip().lstrip(",; )")) + for seg in raw_parts: + cleaned_seg = seg.replace(",", " ") + cleaned_seg = re.sub(r"\s{2,}", " ", cleaned_seg) + if cleaned_seg: + specs.append(cleaned_seg) + return specs + + def _parse_section(self, sec: str) -> dict[str, Any] | None: + """Parse a single section of text into structured metadata. + + Args: + sec (str): Text section representing a single journal entry. + + Returns: + dict[str, Any] | None: Dictionary with keys: + - "N": Entry number + - "title": Journal title + - "issn": Normalized ISSN (if present) + - "specialties": List of specialty codes (if present) + Returns None if the section is not properly formatted. + """ + clean = sec.replace("\r", "").replace("\n", " ") + m_num = RE_ROW_START.match(clean) + if not m_num: + return None + n = int(m_num.group(1)) + body = clean[m_num.end() :].strip() + + issn_iter = list(RE_ISSN_RAW.finditer(body)) + if issn_iter: + last = issn_iter[-1] + issn = self._normalize_issn(last.group(0)) + before, after = body[: last.start()].strip(), body[last.end() :].strip() + else: + issn, before, after = "", body, "" + + specialties = [] + if after: + tail = RE_DATE.sub("", after).strip() + tail = RE_ISSN_RAW.sub("", tail).strip() + specialties = self._split_specialties(tail) + + title = re.sub(r"\s{2,}", " ", before).strip() + return {"N": n, "title": title, "issn": issn, "specialties": specialties} + + def parse(self) -> list[dict[str, Any]]: + """Read the PDF file and parses it into a list of structured records. + + Returns: + list[dict[str, Any]]: List of dictionaries, each representing a parsed journal entry. + + Raises: + FileNotFoundError: If the PDF file does not exist. + """ + if not self.path.exists(): + raise FileNotFoundError(f"Файл {self.path} не найден") + raw = self._read_pdf_text() + sections = self._split_sections(raw) + rows = [r for r in (self._parse_section(s) for s in sections) if r] + return sorted(rows, key=lambda d: d["N"]) + + +def save_to_json( + rows: list[dict[str, Any]], out_path: str | pathlib.Path | None = None +) -> pathlib.Path: + """Save a list of dictionaries to a JSON file. + + If `out_path` is not provided, the file is saved as 'vak_articles.json' + in the current working directory. + + Args: + rows (list[dict[str, Any]]): The list of dictionaries to save. + out_path (str | pathlib.Path | None): The path to save the JSON file to. + + Returns: + pathlib.Path: The path to the saved JSON file. + """ + out_path = pathlib.Path(out_path or "vak_articles.json") + with out_path.open("w", encoding="utf-8") as f: + json.dump(rows, f, ensure_ascii=False, indent=2) + return out_path + + +def load_json(in_path: str | pathlib.Path) -> list[dict[str, Any]]: + """Load and parse a JSON file into a list of dictionaries. + + Args: + in_path (str | pathlib.Path): Path to the JSON file. + + Returns: + list[dict[str, Any]]: The parsed content of the JSON file. + """ + with in_path.open("r", encoding="utf-8") as f: + return json.load(f) diff --git a/science_helper/utils/setting.py b/science_helper/utils/setting.py new file mode 100644 index 0000000..a0feba6 --- /dev/null +++ b/science_helper/utils/setting.py @@ -0,0 +1,106 @@ +import configparser +from pathlib import Path +import re + + +# Путь до конфигурационного файла +CONFIG_PATH = Path("config.ini") + +# Чтение конфигурации +config = configparser.ConfigParser() +config.read(CONFIG_PATH, encoding="utf-8") + +# Регулярные выражения +RE_ROW_START = re.compile(config["REGEX"]["RE_ROW_START"], re.MULTILINE) +RE_ISSN_RAW = re.compile(config["REGEX"]["RE_ISSN_RAW"]) +RE_DATE = re.compile(config["REGEX"]["RE_DATE"], re.IGNORECASE) +RE_SPEC_CODE = re.compile(config["REGEX"]["RE_SPEC_CODE"]) +RE_INNER_SPACE = re.compile(config["REGEX"]["RE_INNER_SPACE"]) + +# Ссылки на онлайн ресурсы +WHITE_LIST_URL = config["WEB"]["WHITE_LIST_URL"] +VAK_LIST_URL = config["WEB"]["VAK_LIST_URL"] +SPECIALIZATION_URL = config["WEB"]["SPECIALIZATION_URL"] + +# Настройки админ панели +USE_ADMIN = config["WEB.INTERFACE"]["use_admin"] +ADMIN_LOGIN = config["WEB.INTERFACE"]["admin_login"] +ADMIN_PASSWORD = config["WEB.INTERFACE"]["admin_password"] + +# Настройки директории +MAIN_DIRECTORY = config["DIRECTORY"]["MAIN_DIRECTORY"] +DATA_DIRECTORY = config["DIRECTORY"]["DATA_DIRECTORY"] +SPECIALIZATION_NAME = config["DIRECTORY"]["SPECIALIZATION_NAME"] +FILENAME = config["DIRECTORY"]["filename"] + + +def save_config(config): + """Save the provided configuration dictionary to an INI file. + + The function writes structured configuration data into an INI file, + organizing it into sections such as REGEX, WEB, WEB.INTERFACE, and DIRECTORY. + + Args: + config (dict): A nested dictionary containing configuration values. + Expected structure: + { + "regex": { + "re_row_start": str, + "re_issn_raw": str, + "re_date": str, + "re_spec_code": str, + "re_inner_space": str + }, + "web": { + "white_list_url": str, + "vak_list_url": str, + "spec_url": str + }, + "admin": { + "enabled": bool, + "login": str, + "password": str + }, + "directories": { + "main_dir": str, + "data_dir": str, + "spec_file": str, + "file_name": str + } + } + + Raises: + KeyError: If any of the expected keys are missing from the config. + OSError: If writing to the configuration file fails. + """ + config_parser = configparser.ConfigParser() + + config_parser["REGEX"] = { + "re_row_start": config["regex"]["re_row_start"], + "re_issn_raw": config["regex"]["re_issn_raw"], + "re_date": config["regex"]["re_date"], + "re_spec_code": config["regex"]["re_spec_code"], + "re_inner_space": config["regex"]["re_inner_space"], + } + + config_parser["WEB"] = { + "white_list_url": config["web"]["white_list_url"], + "vak_list_url": config["web"]["vak_list_url"], + "SPECIALIZATION_URL": config["web"]["spec_url"], + } + + config_parser["WEB.INTERFACE"] = { + "use_admin": str(config["admin"]["enabled"]), + "admin_login": config["admin"]["login"], + "admin_password": config["admin"]["password"], + } + + config_parser["DIRECTORY"] = { + "MAIN_DIRECTORY": config["directories"]["main_dir"], + "DATA_DIRECTORY": config["directories"]["data_dir"], + "SPECIALIZATION_NAME": config["directories"]["spec_file"], + "FILENAME": config["directories"]["file_name"], + } + + with CONFIG_PATH.open("w", encoding="utf-8") as f: + config_parser.write(f) diff --git a/setting.py b/setting.py deleted file mode 100644 index 1c2cdb8..0000000 --- a/setting.py +++ /dev/null @@ -1,62 +0,0 @@ -import re -import configparser - -config = configparser.ConfigParser() -config.read("config.ini", encoding="utf-8") - -# Регулярные выражения -RE_ROW_START = re.compile(config["REGEX"]["RE_ROW_START"], re.MULTILINE) -RE_ISSN_RAW = re.compile(config["REGEX"]["RE_ISSN_RAW"]) -RE_DATE = re.compile(config["REGEX"]["RE_DATE"], re.IGNORECASE) -RE_SPEC_CODE = re.compile(config["REGEX"]["RE_SPEC_CODE"]) -RE_INNER_SPACE = re.compile(config["REGEX"]["RE_INNER_SPACE"]) - -# Ссылки на онлайн ресурсы -WHITE_LIST_URL = config['WEB']["WHITE_LIST_URL"] -VAK_LIST_URL = config["WEB"]["VAK_LIST_URL"] -SPECIALIZATION_URL = config["WEB"]["SPECIALIZATION_URL"] - -# Настройки админ панели -USE_ADMIN = config["WEB.INTERFACE"]["use_admin"] -ADMIN_LOGIN = config["WEB.INTERFACE"]["admin_login"] -ADMIN_PASSWORD = config["WEB.INTERFACE"]["admin_password"] - -# Настройки директории -MAIN_DIRECTORY = config["DIRECTORY"]["MAIN_DIRECTORY"] -DATA_DIRECTORY = config["DIRECTORY"]["DATA_DIRECTORY"] -SPECIALIZATION_NAME = config["DIRECTORY"]["SPECIALIZATION_NAME"] -FILENAME = config["DIRECTORY"]["filename"] - - -def save_config(config): - config_parser = configparser.ConfigParser() - - config_parser['REGEX'] = { - 're_row_start': config['regex']['re_row_start'], - 're_issn_raw': config['regex']['re_issn_raw'], - 're_date': config['regex']['re_date'], - 're_spec_code': config['regex']['re_spec_code'], - 're_inner_space': config['regex']['re_inner_space'], - } - - config_parser['WEB'] = { - 'white_list_url': config['web']['white_list_url'], - 'vak_list_url': config['web']['vak_list_url'], - 'SPECIALIZATION_URL': config['web']['spec_url'], - } - - config_parser['WEB.INTERFACE'] = { - 'use_admin': str(config['admin']['enabled']), - 'admin_login': config['admin']['login'], - 'admin_password': config['admin']['password'], - } - - config_parser['DIRECTORY'] = { - 'MAIN_DIRECTORY': config['directories']['main_dir'], - 'DATA_DIRECTORY': config['directories']['data_dir'], - 'SPECIALIZATION_NAME': config['directories']['spec_file'], - 'FILENAME': config["directories"]["file_name"] - } - - with open('config.ini', 'w', encoding='utf-8') as configfile: - config_parser.write(configfile) \ No newline at end of file diff --git a/test/test_drawio_image_design.py b/test/test_drawio_image_design.py index 9802c5d..14ab92c 100644 --- a/test/test_drawio_image_design.py +++ b/test/test_drawio_image_design.py @@ -1,12 +1,12 @@ -import pytest -import xml.etree.ElementTree as ET from PIL import Image -from image_processing.enumerates import SignaturePosition, LabelMode -from image_processing import DrawioImageDesign +import pytest + +from science_helper.image_processing import DrawioImageDesign +from science_helper.image_processing.enumerates import LabelMode, SignaturePosition -class MockDrawioImageDesign(DrawioImageDesign): - def __init__(self): +class MockDrawioImageDesign(DrawioImageDesign): # noqa: D101 + def __init__(self): # noqa: D107 self._images_path = "" self._images = [Image.new("RGB", (320, 270), "white")] @@ -42,24 +42,32 @@ def _load_images(self, path): @pytest.fixture -def design(): +def design(): # noqa: D103 d = MockDrawioImageDesign() - d._signature_label = LabelMode.CYRILLIC_LOWER # Исправление ошибки сигнатуры + d._signature_label = LabelMode.CYRILLIC_LOWER return d -def test_add_numbering_creates_cell(design): +def test_add_numbering_creates_cell(design): # noqa: D103 design._add_numbering(image_w=320, image_h=270, label="1", parent_id="testparent") - cell = next((e for e in design._xml_root if e.attrib.get("parent") == "testparent" and e.attrib.get("id", "").endswith("-numbering")), None) + cell = next( + ( + e + for e in design._xml_root + if e.attrib.get("parent") == "testparent" + and e.attrib.get("id", "").endswith("-numbering") + ), + None, + ) assert cell is not None assert cell.attrib.get("vertex") == "1" assert "html=1" in cell.attrib.get("style", "") - assert ' Path: - # --- Извлекаем имя файла по параметру `name=...` - parsed = urlparse(url) - params = parse_qs(parsed.query) - name = params.get("name", [None])[0] - if not name: - raise ValueError("URL не содержит параметра ?name=...") - - filename = f"{name}.pdf" - filepath = Path(output_dir) / filename - - # --- Пропускаем загрузку, если файл уже есть - if filepath.exists(): - print(f"[✓] Файл уже существует: {filepath}") - else: - resp = requests.get(url, - timeout=timeout, - verify=False) - resp.raise_for_status() - filepath.write_bytes(resp.content) - print(f"[↓] Скачано: {filepath}") - - config = configparser.ConfigParser() - config.read(config_path, encoding="utf-8") - config["DIRECTORY"]["filename"] = filename - with open(config_path, "w", encoding="utf-8") as f: - config.write(f) - - return filepath - - -def dict_from_web(url: str, output_dir: str = "./", timeout: int = 60) -> dict: - r = requests.get(url, - timeout=timeout, - verify=False) - if not r.status_code == 200: - return({}) - - save_to_json(r.json(), output_dir) - - -def get_nomenclature_scientific_specialties(url: str, timeout: int = 60) -> list[dict]: - r = requests.get(url=url, timeout=timeout, verify=False) - - if not r.status_code == 200: - return({}) - - - output = [] - current_main = None - current_sub = None - main_index = {} - - soup = BeautifulSoup(r.text, "html.parser") - table = soup.find("table") - - if table: - rows = table.find_all("tr") - - for row in rows[1:]: - cells = row.find_all("td") - if not cells: - continue - - text_cells = [cell.get_text(strip=True) for cell in cells] - if all(not txt for txt in text_cells): - continue - - if len(cells) == 4: - if cells[0].has_attr("rowspan"): - current_main = cells[0].get_text(strip=True) - output.append({ - "category_name": current_main, - "sub_category": [] - }) - if cells[1].has_attr("rowspan"): - current_sub = cells[1].get_text(strip=True) - sub_entry = { - "subcategory_name": current_sub, - "values": [cells[2].get_text(strip=True)] - } - output[-1]["sub_category"].append(sub_entry) - else: - output[-1]["sub_category"][-1]["values"].append(cells[2].get_text(strip=True)) - - - elif len(cells) == 3: - if cells[0].has_attr("rowspan"): - current_sub = cells[0].get_text(strip=True) - sub_entry = { - "subcategory_name": current_sub, - "values": [cells[1].get_text(strip=True)] - } - output[-1]["sub_category"].append(sub_entry) - else: - output[-1]["sub_category"][-1]["values"].append(cells[1].get_text(strip=True)) - - elif len(cells) == 2: - output[-1]["sub_category"][-1]["values"].append(cells[0].get_text(strip=True)) - - return output \ No newline at end of file diff --git a/utils/pdf_parser.py b/utils/pdf_parser.py deleted file mode 100644 index 291d605..0000000 --- a/utils/pdf_parser.py +++ /dev/null @@ -1,114 +0,0 @@ -import json -import re -import pathlib -from typing import List, Dict, Any -from setting import (RE_ROW_START, - RE_ISSN_RAW, - RE_DATE, - RE_SPEC_CODE, - RE_INNER_SPACE) - -import PyPDF2 - - -def _read_pdf_text(pdf_path: pathlib.Path | str) -> str: - reader = PyPDF2.PdfReader(str(pdf_path)) - return "\n".join(page.extract_text() or "" for page in reader.pages).replace("\r", "") - - -def _split_sections(raw: str) -> List[str]: - idx = [m.start() for m in RE_ROW_START.finditer(raw)] + [len(raw)] - return [raw[idx[i]:idx[i+1]] for i in range(len(idx)-1)] - - -def _normalize_issn(raw: str) -> str: - raw = raw.replace("Х", "X").replace("х", "x") - raw = re.sub(r"[\-‑–—−]", "-", raw) - raw = re.sub(r"\s*-\s*", "-", raw) - return raw.upper() - -def _split_specialties(tail: str) -> List[str]: - specs: List[str] = [] - matches = list(RE_SPEC_CODE.finditer(tail)) - if not matches: - raw_parts = [s.strip() for s in tail.split(',') if s.strip()] - else: - raw_parts = [] - for i, m in enumerate(matches): - start = m.start() - end = matches[i+1].start() if i+1 < len(matches) else len(tail) - raw_parts.append(tail[start:end].strip().lstrip(',; )')) - - for seg in raw_parts: - seg = seg.replace(',', ' ') - seg = re.sub(r"\s{2,}", " ", seg) - if seg: - specs.append(seg) - return specs - - -def _parse_section(sec: str) -> Dict[str, Any] | None: - clean = sec.replace("\r", "").replace("\n", " ") - m_num = RE_ROW_START.match(clean) - if not m_num: - return None - n = int(m_num.group(1)) - body = clean[m_num.end():].strip() - - issn_iter = list(RE_ISSN_RAW.finditer(body)) - if issn_iter: - last = issn_iter[-1] - issn = _normalize_issn(last.group(0)) - before, after = body[:last.start()].strip(), body[last.end():].strip() - else: - issn, before, after = "", body, "" - - specialties: List[str] = [] - if after: - # вырезаем любые фрагменты вида "с 16.12.2021" но НЕ обрезаем хвост - tail = RE_DATE.sub("", after).strip() - tail = RE_ISSN_RAW.sub("", tail).strip() - specialties = _split_specialties(tail) - - title = re.sub(r"\s{2,}", " ", before).strip() - return {"N": n, "title": title, "issn": issn, "specialties": specialties} - - -# --------------------------------------------------------------------------- -# Фильтрация -# --------------------------------------------------------------------------- -def fizbuz(spec_line: str, targets: List[str]) -> bool: - return any(spec_line.startswith(t) for t in targets) - - -def filter_rows_by_specialty(rows: List[Dict[str, Any]], targets: List[str]) -> List[Dict[str, Any]]: - if not targets or any(t.lower() == "all" for t in targets): - return rows # без фильтра - return [r for r in rows if any(fizbuz(sp, targets) for sp in r["specialties"])] - - -# --------------------------------------------------------------------------- -# API -# --------------------------------------------------------------------------- -def parse_vak_pdf(pdf_path: str | pathlib.Path) -> List[Dict[str, Any]]: - path = pathlib.Path(pdf_path) - if not path.exists(): - raise FileNotFoundError(f"Файл {pdf_path} не найден") - raw = _read_pdf_text(path) - sections = _split_sections(raw) - rows = [r for r in (_parse_section(s) for s in sections) if r] - rows.sort(key=lambda d: d["N"]) - return rows - - -def save_to_json(rows: List[Dict[str, Any]], out_path: str | pathlib.Path | None = None) -> pathlib.Path: - out_path = pathlib.Path(out_path or "vak_articles.json") - with out_path.open("w", encoding="utf-8") as f: - json.dump(rows, f, ensure_ascii=False, indent=2) - return out_path - -def load_json(in_path: str | pathlib.Path | None = None) -> List[dict]: - with in_path.open('r', encoding="utf-8") as f: - data = json.loads(f.read()) - - return data \ No newline at end of file diff --git a/web/pages/__init__.py b/web/pages/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/pages/analysis_page.py b/web/pages/analysis_page.py new file mode 100644 index 0000000..608b532 --- /dev/null +++ b/web/pages/analysis_page.py @@ -0,0 +1,293 @@ +import json +from pathlib import Path +from tempfile import TemporaryDirectory + +from nicegui import ui +import pandas as pd + +from science_helper.search_vak_articles import ( + bool_to_yes_no, + filter_rows_by_specialty, + load_json, +) +from science_helper.utils import setting + + +temp_dir = TemporaryDirectory() + + +def science_articles_page() -> None: # noqa: C901, D103, PLR0915 + data_dir = Path(setting.MAIN_DIRECTORY) / setting.DATA_DIRECTORY + required_files = [ + data_dir / setting.SPECIALIZATION_NAME, + data_dir / "vak_articles.json", + data_dir / "whitelist_articles.json", + ] + + if not all(p.exists() for p in required_files): + with ui.column().classes("w-full items-center gap-4"): + ui.label("Анализ научных журналов").classes("text-xl") + ui.label("❌ Не найдены необходимые данные.").style("color:#e53935") + ui.markdown("- Специализации\n- Журналы ВАК\n- Белый список") + return + + taxonomy = json.loads(required_files[0].read_text(encoding="utf-8")) + def get_cat(): + return ["Выбрать..."] + [c["category_name"] for c in taxonomy] + def get_sub(c): + return ["Выбрать..."] + [ + s["subcategory_name"] + for s in next((x for x in taxonomy if x["category_name"] == c["label"]), {}).get( + "sub_category", [] + ) + ] + + def get_specs(cat, sub): + c = next((x for x in taxonomy if x["category_name"] == cat["label"]), None) + sub = ( + next((s for s in c["sub_category"] if s["subcategory_name"] == sub["label"]), None) + if c + else None + ) + return sub["values"] if sub else [] + + def codes(selected): + return sorted({".".join(s["label"].split(".")[:3]) for s in selected}) + + def stringify_lists(rows): + return [{k: ", ".join(v) if isinstance(v, list) else v for k, v in r.items()} for r in rows] + + with ui.column().classes("w-full items-center gap-4"): + ui.label("Анализ научных журналов").classes("text-xl") + + cat_sel = ui.select(get_cat(), label="Категория").classes("w-96") + sub_box = ui.column().classes("w-96 hidden") + spec_box = ui.column().classes("w-96 hidden") + + run_btn = ui.button("АНАЛИЗИРОВАТЬ").props("color=primary").classes("hidden") + spin = ui.spinner(size="lg").props("color=primary").classes("hidden mt-2") + + btn_row = ui.row().classes("w-full gap-3") + tbl_wrap = ui.column().classes("w-full") + dl_btn = ( + ui.button("⬇ Скачать Excel").props("color=secondary").classes("mt-2 self-start hidden") + ) + + active = {"btn": None} + + def highlight(b): + if active["btn"]: + active["btn"].props("color=secondary").update() + b.props("color=primary").update() + active["btn"] = b + + def show_table(rows, columns, xlsx_path): + tbl_wrap.clear() + with tbl_wrap: + ui.table( + columns=columns, rows=rows, pagination=10, row_key=columns[0]["field"] + ).classes("w-full").style("table-layout:fixed;word-break:break-word;") + + dl_btn.classes(remove="hidden") + dl_btn.on("click", lambda: ui.download(str(xlsx_path), filename=xlsx_path.name)) + + async def run(): + if not specs_selected: + ui.notify("Выберите хотя бы одну специализацию") + return + + run_btn.disable() + spin.classes(remove="hidden") + btn_row.clear() + tbl_wrap.clear() + + vak_articles = load_json(data_dir / "vak_articles.json") + vak_filters = filter_rows_by_specialty(vak_articles, codes(specs_selected)) + whitelist = load_json(data_dir / "whitelist_articles.json") + + data = [] + for it in vak_filters: + hit = next((w for w in whitelist if it["issn"] in w["issns"]), None) + if hit: + data.append( + { + "ВАК ID": it["N"], + "Наименование журнала": ", ".join(hit["title"]), + "issns": ", ".join(hit["issns"]), + "Специализации": ", ".join(it["specialties"]), + "Уровень журнала": hit["level"], + "WOS": bool_to_yes_no(hit["wos_cc"]["value"]), + "Scopus": bool_to_yes_no(hit["scopus"]["value"]), + "RSCI": bool_to_yes_no(hit["rsci"]["value"]), + } + ) + + xlsx_dir = Path(temp_dir.name) + xlsx_data = xlsx_dir / "data.xlsx" + xlsx_filters = xlsx_dir / "filters.xlsx" + xlsx_articles = xlsx_dir / "articles.xlsx" + pd.DataFrame(data).to_excel(xlsx_data, index=False) + pd.DataFrame(vak_filters).to_excel(xlsx_filters, index=False) + pd.DataFrame(vak_articles).to_excel(xlsx_articles, index=False) + + # Определения колонок + cols_data = [ + { + "name": "ВАК ID", + "label": "ID", + "field": "ВАК ID", + "align": "center", + "headerClasses": "text-center", + "style": "width:70px; white-space:normal;", + }, + { + "name": "Наименование журнала", + "label": "Журнал", + "field": "Наименование журнала", + "headerClasses": "text-center", + "style": "max-width:280px; white-space:normal;", + }, + { + "name": "issns", + "label": "ISSN", + "field": "issns", + "align": "center", + "headerClasses": "text-center", + "style": "width:120px; white-space:normal;", + }, + { + "name": "Специализации", + "label": "Специализации", + "field": "Специализации", + "headerClasses": "text-center", + "style": "max-width:320px; white-space:normal;", + }, + { + "name": "Уровень журнала", + "label": "Уровень", + "field": "Уровень журнала", + "headerClasses": "text-center", + "style": "width:90px;", + }, + { + "name": "WOS", + "label": "WOS", + "field": "WOS", + "align": "center", + "headerClasses": "text-center", + "style": "width:70px;", + }, + { + "name": "Scopus", + "label": "Scopus", + "field": "Scopus", + "align": "center", + "headerClasses": "text-center", + "style": "width:70px;", + }, + { + "name": "RSCI", + "label": "RSCI", + "field": "RSCI", + "align": "center", + "headerClasses": "text-center", + "style": "width:70px;", + }, + ] + + cols_simple = [ + { + "name": "N", + "label": "ID", + "field": "N", + "align": "center", + "headerClasses": "text-center", + "style": "width:60px;", + }, + { + "name": "title", + "label": "Название", + "field": "title", + "headerClasses": "text-center", + "style": "max-width:320px; white-space:normal;", + }, + { + "name": "issn", + "label": "ISSN", + "field": "issn", + "align": "center", + "headerClasses": "text-center", + "style": "width:120px;", + }, + { + "name": "specialties", + "label": "Специализации", + "field": "specialties", + "headerClasses": "text-center", + "style": "max-width:340px; white-space:normal;", + }, + ] + + with btn_row: + b_art = ui.button( + "ВАК-статьи", + on_click=lambda: [ + show_table(stringify_lists(vak_articles), cols_simple, xlsx_articles), + highlight(b_art), + ], + ).props("color=secondary") + b_flt = ui.button( + "Фильтр", + on_click=lambda: [ + show_table(stringify_lists(vak_filters), cols_simple, xlsx_filters), + highlight(b_flt), + ], + ).props("color=secondary") + b_res = ui.button( + "Результат", + on_click=lambda: [show_table(data, cols_data, xlsx_data), highlight(b_res)], + ).props("color=secondary") + + highlight(b_res) + show_table(data, cols_data, xlsx_data) + spin.classes(add="hidden") + run_btn.enable() + + specs_selected: list[dict] = [] + + def show_spec(opts): + spec_box.clear() + if not opts: + return + spec_box.classes(remove="hidden") + run_btn.classes(remove="hidden") + specs_selected.clear() + ui.select(opts, label="Научные специальности", multiple=True).classes("w-96").on( + "update:model-value", + lambda e: (specs_selected.clear(), specs_selected.extend(e.args)), + ) + + def sub_changed(cat): + if cat["label"] == "Выбрать...": + sub_box.classes("hidden") + spec_box.clear() + return + sub_box.classes(remove="hidden") + spec_box.classes(add="hidden") + run_btn.classes(add="hidden") + sub_box.clear() + + def on_sub(e): + sub = e.args + if sub["label"] == "Выбрать...": + spec_box.clear() + return + show_spec(get_specs(cat, sub)) + + with sub_box: + ui.select(get_sub(cat), label="Подкатегория").classes("w-96").on( + "update:model-value", on_sub + ) + + cat_sel.on("update:model-value", lambda e: sub_changed(e.args)) + run_btn.on("click", run) diff --git a/web/pages/image_processing_page.py b/web/pages/image_processing_page.py new file mode 100644 index 0000000..60d7c06 --- /dev/null +++ b/web/pages/image_processing_page.py @@ -0,0 +1,427 @@ +import base64 +import io +from pathlib import Path +from tempfile import TemporaryDirectory + +from nicegui import ui +from PIL import Image + +from science_helper.image_processing import ( + DrawioImageDesign, + ImagesDesign, + LabelMode, + LayoutMode, + SignaturePosition, +) + + +united_params = { + "layout": "row", + "spacing": 10, + "bg_color": "#ffffff", + "grid_cols": None, + "grid_rows": None, + "width": 512, + "height": None, +} + +united_controls = {} + +valid_layouts = set([mode.value for mode in LayoutMode]) + +tmp_dir = TemporaryDirectory() +design = ImagesDesign(images_path=tmp_dir.name) + +font_dir = Path("./fonts") +font_files = sorted([f.stem for f in font_dir.glob("*.ttf") if f.is_file()]) +signature_label_options = [mode.value for mode in LabelMode] +signature_pos_options = [mode.value for mode in SignaturePosition] + +download_link = ui.html("").classes("hidden") +download_drawio_link = ui.html("").classes("hidden") + + +def image_processing_page(): # noqa: D103, PLR0915 + with ui.column().classes("w-full items-center justify-center gap-4"): + image_slot = ui.image().classes("w-1/2 rounded-xl shadow-lg") + + with ui.dialog() as upload_dialog, ui.card().classes("p-6"): + ui.label("Загрузить изображения").classes("text-lg font-semibold") + ui.upload( + on_upload=lambda e: handle_upload(e, upload_dialog, image_slot, download_link), + auto_upload=True, + multiple=True, + max_file_size=5 * 1024 * 1024, + ).props("accept=.png,.jpg,.jpeg").classes("max-w-full") + ui.button("Закрыть", on_click=upload_dialog.close).props("flat color=secondary") + + with ui.row().classes("gap-4"): + ui.button("📤 Загрузить", on_click=upload_dialog.open).props("color=primary") + ui.button("🗑 Очистить", on_click=lambda: clear_images(image_slot)).props( + "color=negative" + ) + ui.button("📥 Скачать .png", on_click=download_png).props( + "color=primary" + ).bind_visibility_from(image_slot, "visible") + ui.button("📥 Скачать .drawio", on_click=download_drawio).props( + "color=accent" + ).bind_visibility_from(image_slot, "visible") + + download_link # noqa: B018 + download_drawio_link # noqa: B018 + + with ui.expansion("Параметры обработки", icon="settings"): + with ui.grid(columns=4).classes("gap-4 w-full"): + + def safe_int(val, default=0): + try: + return int(val) + except ValueError: + return default + + def safe_font(val: str, fallback: int = 12) -> int: + v = safe_int(val, fallback) + if v <= 0: + ui.notify("Размер шрифта должен быть положительным", type="warning") + return fallback + return v + + ui.input( + "Размер рамки", + value=str(design.border_size), + on_change=lambda e: update_param("border_size", safe_int(e.value), image_slot), + ).props("type=number min=0") + ui.color_input( + label="Цвет рамки", + value="#000000", + on_change=lambda e: update_param("border_fill", e.value, image_slot), + ) + + ui.checkbox( + "Добавлять подпись", + value=design.signature, + on_change=lambda e: update_param("signature", e.value, image_slot), + ) + ui.select( + signature_label_options, + value=design.signature_label, + label="Тип подписи", + on_change=lambda e: update_param("signature_label", e.value, image_slot), + ) + ui.color_input( + label="Цвет надписи", + value="#fff", + on_change=lambda e: update_param("signature_label_color", e.value, image_slot), + ) + ui.select( + signature_pos_options, + value=design.signature_pos, + label="Позиция подписи", + on_change=lambda e: update_param("signature_pos", e.value, image_slot), + ) + ui.input( + "Размер подписи (ширина)", + value=str(design.signature_size[0]), + on_change=lambda e: update_param( + "signature_size", (safe_int(e.value), design.signature_size[1]), image_slot + ), + ).props("type=number min=0") + ui.input( + "Размер подписи (высота)", + value=str(design.signature_size[1]), + on_change=lambda e: update_param( + "signature_size", (design.signature_size[0], safe_int(e.value)), image_slot + ), + ).props("type=number min=0") + ui.color_input( + label="Цвет подписи (фон)", + value="#000", + on_change=lambda e: update_param("signature_color", e.value, image_slot), + ) + ui.input( + "Размер шрифта подписи", + value=str(design.signature_font_size), + on_change=lambda e: update_param( + "signature_font_size", safe_int(e.value), image_slot + ), + ).props("type=number min=3") + + ui.checkbox( + "Показывать оси", + value=design.draw_axis, + on_change=lambda e: update_param("draw_axis", e.value, image_slot), + ) + ui.input( + "Подписи оси X", + value=design.axis_labels[0] + if isinstance(design.axis_labels[0], str) + else ",".join(design.axis_labels[0]), + on_change=lambda e: update_axis_labels("x", e.value, image_slot), + ) + ui.input( + "Подписи оси Y", + value=design.axis_labels[1] + if isinstance(design.axis_labels[1], str) + else ",".join(design.axis_labels[1]), + on_change=lambda e: update_axis_labels("y", e.value, image_slot), + ) + ui.input( + "Смещение по X", + value=str( + design.axis_offset[0] + if isinstance(design.axis_offset, tuple) + else design.axis_offset + ), + on_change=lambda e: update_axis_offset("x", e.value, image_slot), + ).props("type=number min=0") + ui.input( + "Смещение по Y", + value=str( + design.axis_offset[1] + if isinstance(design.axis_offset, tuple) + else design.axis_offset + ), + on_change=lambda e: update_axis_offset("y", e.value, image_slot), + ).props("type=number min=0") + ui.input( + "Длина осей", + value=str(design.axis_length), + on_change=lambda e: update_param("axis_length", safe_int(e.value), image_slot), + ).props("type=number min=1") + ui.input( + "Толщина осей", + value=str(design.axis_width), + on_change=lambda e: update_param("axis_width", safe_int(e.value), image_slot), + ).props("type=number min=1") + ui.input( + "Размер шрифта осей", + value=str(design.axis_font_size), + on_change=lambda e: update_param( + "axis_font_size", safe_int(e.value), image_slot + ), + ).props("type=number min=3") + ui.select( + font_files or ["Arial"], + value=design.font_family, + label="Шрифт", + on_change=lambda e: update_param("font_family", e.value, image_slot), + ) + + with ui.expansion("Параметры компоновки", icon="grid_on"): + with ui.grid(columns=4).classes("gap-4 w-full"): + layout_select = ui.select( + ["row", "column", "grid"], value=united_params["layout"], label="Расположение" + ) + layout_select.on( + "update:model-value", lambda e: update_united("layout", e.args, image_slot) + ) + + spacing_input = ui.input( + "Отступ между изображениями", value=str(united_params["spacing"]) + ).props("type=number") + spacing_input.on( + "change", lambda e: update_united("spacing", safe_int(e.args), image_slot) + ) + + bg_color_picker = ui.color_input("Цвет фона", value=united_params["bg_color"]) + bg_color_picker.on( + "change", lambda e: update_united("bg_color", e.args, image_slot) + ) + + grid_cols_input = ui.input("Число колонок (grid)", value="").props("type=number") + grid_cols_input.on( + "change", + lambda e: update_united( + "grid_cols", safe_int(e.args) if e.args else None, image_slot + ), + ) + + grid_rows_input = ui.input("Число строк (grid)", value="").props("type=number") + grid_rows_input.on( + "change", + lambda e: update_united( + "grid_rows", safe_int(e.args) if e.args else None, image_slot + ), + ) + + width_input = ui.input( + "Ширина изображения", value=str(united_params["width"]) + ).props("type=number") + width_input.on( + "change", lambda e: update_united("width", safe_int(e.args), image_slot) + ) + + height_input = ui.input("Высота изображения", value="").props("type=number") + height_input.on( + "change", + lambda e: update_united( + "height", safe_int(e.args) if e.args else None, image_slot + ), + ) + + +# Обработчики +def update_axis_offset(axis: str, value: str, image_slot): # noqa: D103 + try: + offset = int(value) + current = design.axis_offset + if isinstance(current, int): + current = (current, current) + + if axis == "x": + new_offset = (offset, current[1]) + else: + new_offset = (current[0], offset) + + update_param("axis_offset", new_offset, image_slot) + except ValueError: + ui.notify(f"Смещение по оси {axis.upper()} должно быть числом", type="warning") + + +def update_axis_labels(axis: str, text: str, image_slot): # noqa: D103 + try: + values = [v.strip() for v in text.split(",") if v.strip()] + if not values: + ui.notify(f"Поле оси {axis.upper()} пусто", type="warning") + return + + parsed_value: str | tuple[str, ...] = values[0] if len(values) == 1 else tuple(values) + + if isinstance(parsed_value, tuple) and len(parsed_value) != len(design): + ui.notify( + f"Количество подписей для оси {axis.upper()} должно быть {len(design)}", + type="negative", + ) + return + + current_x, current_y = design.axis_labels + if axis == "x": + design.axis_labels = (parsed_value, current_y) + else: + design.axis_labels = (current_x, parsed_value) + + update_output(image_slot) + except Exception as ex: + ui.notify(f"Ошибка при установке подписей осей: {ex}", type="negative") + + +def handle_upload(e, dialog, image_slot, download_link): # noqa: D103 + allowed_ext = (".png", ".jpg", ".jpeg") + if not e.name.lower().endswith(allowed_ext): + ui.notify("Неподдерживаемый формат", type="negative") + return + e.content.seek(0) + img = Image.open(io.BytesIO(e.content.read())).convert("RGB") + design.append(img) + ui.notify(f"{e.name} загружен", type="positive") + dialog.close() + update_output(image_slot) + + +def clear_images(image_slot): # noqa: D103 + design._images.clear() + image_slot.set_source("") + ui.notify("Изображения очищены", type="info") + + +def update_param(name, value, image_slot): # noqa: D103 + setattr(design, name, value) + update_output(image_slot) + + +def update_united(name, value, image_slot): # noqa: D103 + if name == "layout": + if isinstance(value, dict) and "label" in value: + value = value["label"] + if value not in valid_layouts: + ui.notify(f"Недопустимое значение layout: {value}", type="negative") + return + united_params[name] = value + if len(design) > 1: + update_output(image_slot) + + +def update_output(image_slot): # noqa: D103 + if not len(design): + return + result = design.united_images( + layout=united_params["layout"], + spacing=united_params["spacing"], + bg_color=united_params["bg_color"], + grid_cols=united_params["grid_cols"], + grid_rows=united_params["grid_rows"], + width=united_params["width"], + height=united_params["height"], + ) + buffer = io.BytesIO() + result.save(buffer, format="PNG") + buffer.seek(0) + b64 = base64.b64encode(buffer.getvalue()).decode() + image_slot.set_source(f"data:image/png;base64,{b64}") + download_link.set_content(f""" + + """) + + +def download_png(): # noqa: D103 + if not len(design): + ui.notify("Нет изображений для сохранения", type="warning") + return + + try: + result = design.united_images( + layout=united_params["layout"], + spacing=united_params["spacing"], + bg_color=united_params["bg_color"], + grid_cols=united_params["grid_cols"], + grid_rows=united_params["grid_rows"], + width=united_params["width"], + height=united_params["height"], + ) + output_path = Path(tmp_dir.name) / "result.png" + result.save(output_path, format="PNG") + ui.download(str(output_path), filename="result.png") + except Exception as e: + ui.notify(f"Ошибка при сохранении PNG: {e}", type="negative") + + +def download_drawio(): # noqa: D103 + if not len(design): + ui.notify("Нет изображений для сохранения", type="warning") + return + try: + drawio = DrawioImageDesign(images_path=tmp_dir.name) + drawio._images = design._images.copy() + + drawio.border_size = design.border_size + drawio.border_fill = design.border_fill + drawio.signature = design.signature + drawio.signature_label = design.signature_label + drawio.signature_label_color = design.signature_label_color + drawio.signature_color = design.signature_color + drawio.signature_font_size = design.signature_font_size + drawio.signature_size = design.signature_size + drawio.signature_pos = design.signature_pos + drawio.axis_labels = design.axis_labels + drawio.axis_length = design.axis_length + drawio.axis_width = design.axis_width + drawio.axis_font_size = design.axis_font_size + drawio.axis_offset = design.axis_offset + drawio.font_family = design.font_family + drawio.draw_axis = design.draw_axis + + output_path = Path(tmp_dir.name) / "result.drawio" + + drawio.export_to_drawio( + file=output_path, + layout=united_params["layout"], + spacing=united_params["spacing"], + grid_cols=united_params["grid_cols"], + grid_rows=united_params["grid_rows"], + width=united_params["width"], + height=united_params["height"], + ) + + ui.download(str(output_path), filename="result.drawio") + except Exception as e: + ui.notify(f"Ошибка при сохранении drawio: {e}", type="negative") diff --git a/web/pages/settings_page.py b/web/pages/settings_page.py new file mode 100644 index 0000000..de65664 --- /dev/null +++ b/web/pages/settings_page.py @@ -0,0 +1,171 @@ +import asyncio +from pathlib import Path + +from nicegui import ui + +from science_helper.search_vak_articles import ( + NomenclatureParser, + PDFDownloader, + PDFParser, + save_to_json, +) +from science_helper.utils import setting + + +regex_values: dict[str, ui.input] = {} +web_values: dict[str, ui.input] = {} +dir_values: dict[str, ui.input] = {} + +admin_container: ui.column | None = None +use_admin_checkbox: ui.checkbox | None = None + +save_btn: ui.button | None = None +spec_btn: ui.button | None = None +download_btn: ui.button | None = None + + +def _toggle_buttons(state: bool) -> None: + for btn in (save_btn, spec_btn, download_btn): + if btn: + if state: + btn.enable() + btn.props(remove="loading") + else: + btn.disable() + btn.props("loading") + + +def admin_setting(state: bool) -> None: # noqa: D103 + global admin_container # noqa: PLW0602 + if not admin_container: + return + admin_container.clear() + if state: + with admin_container: + ui.input("Логин", value=setting.ADMIN_LOGIN).classes("w-full").props("readonly") + ui.input("Пароль", value=setting.ADMIN_PASSWORD, password=True).classes("w-full").props( + "readonly" + ) + + +async def on_load_specializations(update_data_status: callable, refresh_analysis: callable) -> None: # noqa: D103 + _toggle_buttons(False) + try: + parser = NomenclatureParser() + specializations = await asyncio.to_thread( + parser.get_specialties, setting.SPECIALIZATION_URL + ) + out_path = ( + Path(setting.MAIN_DIRECTORY) / setting.DATA_DIRECTORY / setting.SPECIALIZATION_NAME + ) + save_to_json(specializations, out_path) + ui.notify("Специализации успешно загружены", timeout=300) + update_data_status() + refresh_analysis() + finally: + _toggle_buttons(True) + + +async def _download_and_parse_pdf() -> str | None: + out_path = Path(setting.MAIN_DIRECTORY) / setting.DATA_DIRECTORY + downloader = PDFDownloader(output_dir=out_path, config_path="config.ini") + + vak_path = await asyncio.to_thread(downloader.download_pdf_if_needed, setting.VAK_LIST_URL) + if vak_path and vak_path.is_file(): + parser = PDFParser(vak_path) + parsed_data = await asyncio.to_thread(parser.parse) + save_to_json(parsed_data, out_path / "vak_articles.json") + await asyncio.to_thread( + downloader.dict_from_web, setting.WHITE_LIST_URL, "whitelist_articles.json" + ) + return vak_path.name + return None + + +async def on_download_pdf(update_data_status: callable, refresh_analysis: callable) -> None: # noqa: D103 + _toggle_buttons(False) + try: + success = await _download_and_parse_pdf() + if success: + ui.notify("Журналы успешно загружены", timeout=300) + update_data_status() + refresh_analysis() + else: + ui.notify("Файл PDF не найден или недоступен", timeout=10) + finally: + _toggle_buttons(True) + + +def settings_page(update_data_status: callable, refresh_analysis: callable) -> None: # noqa: D103 + global admin_container, use_admin_checkbox, save_btn, spec_btn, download_btn # noqa: PLW0603 + + with ui.row().classes("w-full justify-center"): + with ui.row().classes("w-10/12 justify-between"): + with ui.column().classes("w-1/4"): + ui.label("REGEX настройки").classes("text-lg font-bold") + for key, val in setting.config["REGEX"].items(): + regex_values[key] = ui.input(label=key, value=val).classes("w-full") + + with ui.column().classes("w-1/4"): + ui.label("WEB настройки").classes("text-lg font-bold") + web_values["white_list_url"] = ui.input( + "Ссылка на белый список (json)", value=setting.WHITE_LIST_URL + ).classes("w-full") + web_values["vak_list_url"] = ui.input( + "Ссылка на список ВАК (pdf)", value=setting.VAK_LIST_URL + ).classes("w-full") + web_values["spec_url"] = ui.input( + "Ссылка на специализации", value=setting.SPECIALIZATION_URL + ).classes("w-full") + + state = setting.USE_ADMIN.strip().lower() == "true" + use_admin_checkbox = ui.checkbox( + "Использовать админ панель?", + value=state, + on_change=lambda e: admin_setting(e.value), + ).classes("w-full") + admin_container = ui.column().classes("gap-2 mt-2") + admin_setting(state) + + with ui.column().classes("w-1/4"): + ui.label("Настройки директории").classes("text-lg font-bold") + dir_values["main_dir"] = ui.input( + "Основная директория", value=setting.MAIN_DIRECTORY + ).classes("w-full") + dir_values["data_dir"] = ui.input( + "Директория с данными", value=setting.DATA_DIRECTORY + ).classes("w-full") + dir_values["spec_file"] = ui.input( + "Имя файла для специализаций", value=setting.SPECIALIZATION_NAME + ).classes("w-full") + dir_values["file_name"] = ( + ui.input("Имя файла ВАК", value=setting.FILENAME) + .classes("w-full") + .props("readonly") + ) + + def on_save() -> None: + config = { + "regex": {k: v.value for k, v in regex_values.items()}, + "web": {k: v.value for k, v in web_values.items()}, + "directories": {k: v.value for k, v in dir_values.items()}, + "admin": { + "enabled": use_admin_checkbox.value, + "login": setting.ADMIN_LOGIN, + "password": setting.ADMIN_PASSWORD, + }, + } + dir_values.items() + setting.save_config(config) + ui.notify("Настройки обновлены", timeout=3) + + with ui.row().classes("w-full justify-center mt-6 gap-4"): + save_btn = ui.button("ОБНОВИТЬ НАСТРОЙКИ", on_click=on_save).props("color=primary") + spec_btn = ui.button( + "Загрузить специализации", + on_click=lambda: on_load_specializations(update_data_status, refresh_analysis), + ).props("color=primary") + download_btn = ui.button( + "Загрузить журналы", + on_click=lambda: on_download_pdf(update_data_status, refresh_analysis), + ).props("color=primary") diff --git a/static/favicon.ico b/web/static/favicon.ico similarity index 100% rename from static/favicon.ico rename to web/static/favicon.ico diff --git a/static/logo.png b/web/static/logo.png similarity index 100% rename from static/logo.png rename to web/static/logo.png