From 266582fdad88df76385e78d0795fe1a7ef9648e1 Mon Sep 17 00:00:00 2001 From: Mateh Elismar Date: Fri, 10 May 2024 15:14:57 -0400 Subject: [PATCH 1/9] feat: implement realtime functionality. --- pyproject.toml | 1 + requirements.txt | 7 ++++ src/alfred/base/constants.py | 10 ++++++ src/alfred/channel/__init__.py | 63 ++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+) create mode 100644 src/alfred/channel/__init__.py diff --git a/pyproject.toml b/pyproject.toml index fe626b0..24bef50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "pydantic >= 2.0", "pydantic-settings >= 2.0", "requests >= 2.30", + "python-socketio >= 5.11" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index c0657b0..4c78517 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,9 @@ annotated-types==0.6.0 +bidict==0.23.1 build==1.2.1 certifi==2024.2.2 charset-normalizer==3.3.2 +h11==0.14.0 idna==3.7 importlib_metadata==7.1.0 packaging==24.0 @@ -10,8 +12,13 @@ pydantic-settings==2.2.1 pydantic_core==2.18.2 pyproject_hooks==1.1.0 python-dotenv==1.0.1 +python-engineio==4.9.0 +python-socketio==5.11.2 requests==2.31.0 +simple-websocket==1.0.0 tomli==2.0.1 typing_extensions==4.11.0 urllib3==2.2.1 +websocket-client==1.8.0 +wsproto==1.2.0 zipp==3.18.1 diff --git a/src/alfred/base/constants.py b/src/alfred/base/constants.py index 41b6108..f089d52 100644 --- a/src/alfred/base/constants.py +++ b/src/alfred/base/constants.py @@ -1,3 +1,5 @@ +from enum import Enum + from src.alfred.http.typed import ResponseType # Response type/header mapping @@ -6,3 +8,11 @@ ResponseType.TEXT: "text/plain", ResponseType.XML: "application/xml", } + + +class EventName(Enum): + """ + Enumeration of event names. + """ + JOB_EVENT = "job_event" + FILE_EVENT = "file_event" diff --git a/src/alfred/channel/__init__.py b/src/alfred/channel/__init__.py new file mode 100644 index 0000000..ae518c7 --- /dev/null +++ b/src/alfred/channel/__init__.py @@ -0,0 +1,63 @@ +# 3rd Party Imports +import socketio + +from src.alfred.base.config import ConfigurationDict +from src.alfred.base.constants import EventName +from src.alfred.http.typed import AuthConfiguration +from src.alfred.utils import logging + + +class AlfredChannel: + def __init__(self, config: ConfigurationDict, auth_config: AuthConfiguration, verbose=False): + self.socket = socketio.Client() + self.verbose = verbose + self.config = config + self.auth_config = auth_config + + # Initialize logger + self.logger = logging.getLogger("alfred-python") + + # Establish connection with verbose output if enabled + if self.verbose: + self.logger.debug("Attempting to establish a connection...") + self.socket.connect(f"{config.get('base_url')}?apiKey={auth_config.get('api_key')}") + + self.socket.on('connect', self.on_connect) + self.socket.on('disconnect', self.on_disconnect) + self.socket.on('connect_error', self.on_connect_error) + + def on_connect(self): + if self.verbose: + self.logger.debug("Connected successfully to:", self.config.get("base_url")) + + def on_disconnect(self): + if self.verbose: + self.logger.debug("Disconnected from the server.") + + def on_connect_error(self, err): + if self.verbose: + self.logger.debug("Connection error:", err) + self.disconnect() + raise Exception(f"Failed to connect to {self.config.get('base_url')}: {err}") + + def _callback(self, event: str, callback): + def handle_event(data): + if self.verbose: + self.logger.debug(f"Event {event} received:", data) + callback(data) + + self.socket.on(event, handle_event) + + def on_file_event(self, callback): + self._callback(EventName.FILE_EVENT.value, callback) + + def on_job_event(self, callback): + self._callback(EventName.JOB_EVENT.value, callback) + + def on(self, event: str, callback): + self._callback(event, callback) + + def disconnect(self): + if self.verbose: + self.logger.debug("Closing connection...") + self.socket.disconnect() From bec45958670e5b3341fafb5d40e25521c73b9aea Mon Sep 17 00:00:00 2001 From: Mateh Elismar Date: Fri, 10 May 2024 16:19:09 -0400 Subject: [PATCH 2/9] docs: add docstring to class methods. - refactor: rename socket client to not reflect the protocol. --- src/alfred/base/config.py | 4 +- src/alfred/channel/__init__.py | 71 +++++++++++++++++++++++++++++----- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/src/alfred/base/config.py b/src/alfred/base/config.py index d834023..5bafaa0 100644 --- a/src/alfred/base/config.py +++ b/src/alfred/base/config.py @@ -4,6 +4,7 @@ class ConfigurationDict(TypedDict): base_url: Text + realtime_url: Text version: int @@ -15,7 +16,7 @@ class Configuration: @staticmethod def default() -> ConfigurationDict: """ - Returns default client configuration. Currently targets Alfred V1. + Returns default client configuration. Currently, targets Alfred V1. """ return Configuration.v1() @@ -28,4 +29,5 @@ def v1(overrides: Optional[OverridesDict] = None) -> ConfigurationDict: return { "version": 1, "base_url": overrides.get("base_url", "https://app.tagshelf.com"), + "realtime_url": overrides.get("realtime_url", "https://sockets.tagshelf.io"), } diff --git a/src/alfred/channel/__init__.py b/src/alfred/channel/__init__.py index ae518c7..c49dae1 100644 --- a/src/alfred/channel/__init__.py +++ b/src/alfred/channel/__init__.py @@ -7,57 +7,108 @@ from src.alfred.utils import logging -class AlfredChannel: +class AlfredRealtimeClient: def __init__(self, config: ConfigurationDict, auth_config: AuthConfiguration, verbose=False): + """ + Initializes the AlfredRealtimeClient class. + + Args: + config (ConfigurationDict): The configuration dictionary. + auth_config (AuthConfiguration): The authentication configuration. + verbose (bool, optional): Whether to print verbose output. Defaults to False. + """ self.socket = socketio.Client() self.verbose = verbose self.config = config self.auth_config = auth_config + self.base_url = config.get("realtime_url") # Initialize logger self.logger = logging.getLogger("alfred-python") - # Establish connection with verbose output if enabled - if self.verbose: - self.logger.debug("Attempting to establish a connection...") - self.socket.connect(f"{config.get('base_url')}?apiKey={auth_config.get('api_key')}") - + # Subscribe to connection life-cycle events. self.socket.on('connect', self.on_connect) self.socket.on('disconnect', self.on_disconnect) self.socket.on('connect_error', self.on_connect_error) + # Establish connection with verbose output if enabled + if self.verbose: + self.logger.debug("Attempting to establish a connection...") + self.socket.connect(f"{self.base_url}?apiKey={auth_config.get('api_key')}") + def on_connect(self): + """ + Handles the 'connect' event. + """ if self.verbose: - self.logger.debug("Connected successfully to:", self.config.get("base_url")) + self.logger.debug(f"Connected successfully to: {self.base_url}") def on_disconnect(self): + """ + Handles the 'disconnect' event. + """ if self.verbose: self.logger.debug("Disconnected from the server.") def on_connect_error(self, err): + """ + Handles the 'connect_error' event. + + Args: + err (str): The error message. + """ if self.verbose: - self.logger.debug("Connection error:", err) + self.logger.debug("Connection error: %s", err) self.disconnect() - raise Exception(f"Failed to connect to {self.config.get('base_url')}: {err}") + raise Exception(f"Failed to connect to {self.base_url}: {err}") def _callback(self, event: str, callback): + """ + Wrapper function to subscribe a specific event. + + Args: + event (str): The event name. + callback (function): The callback function to handle the event. + """ def handle_event(data): if self.verbose: - self.logger.debug(f"Event {event} received:", data) + self.logger.debug(f"Event {event} received: %s", data) callback(data) self.socket.on(event, handle_event) def on_file_event(self, callback): + """ + Handles the 'file_event' event. + + Args: + callback (function): The callback function to handle the event. + """ self._callback(EventName.FILE_EVENT.value, callback) def on_job_event(self, callback): + """ + Handles the 'job_event' event. + + Args: + callback (function): The callback function to handle the event. + """ self._callback(EventName.JOB_EVENT.value, callback) def on(self, event: str, callback): + """ + Handles a specific event. + + Args: + event (str): The event name. + callback (function): The callback function to handle the event. + """ self._callback(event, callback) def disconnect(self): + """ + Disconnects client from the server. + """ if self.verbose: self.logger.debug("Closing connection...") self.socket.disconnect() From 524070a9c6a55c83189c3efd446351cff03d268a Mon Sep 17 00:00:00 2001 From: Mateh Elismar Date: Fri, 10 May 2024 16:48:31 -0400 Subject: [PATCH 3/9] fix: add ConnectionError exception. --- src/alfred/exceptions/__init__.py | 7 +++++++ src/alfred/{channel => realtime}/__init__.py | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 src/alfred/exceptions/__init__.py rename src/alfred/{channel => realtime}/__init__.py (92%) diff --git a/src/alfred/exceptions/__init__.py b/src/alfred/exceptions/__init__.py new file mode 100644 index 0000000..446f135 --- /dev/null +++ b/src/alfred/exceptions/__init__.py @@ -0,0 +1,7 @@ +class ConnectionError(Exception): + """ + Raised when a connection error occurs. + """ + def __init__(self, message="A connection error occurred"): + self.message = message + super().__init__(self.message) diff --git a/src/alfred/channel/__init__.py b/src/alfred/realtime/__init__.py similarity index 92% rename from src/alfred/channel/__init__.py rename to src/alfred/realtime/__init__.py index c49dae1..6a0a5b3 100644 --- a/src/alfred/channel/__init__.py +++ b/src/alfred/realtime/__init__.py @@ -1,6 +1,7 @@ # 3rd Party Imports import socketio +import src.alfred.exceptions from src.alfred.base.config import ConfigurationDict from src.alfred.base.constants import EventName from src.alfred.http.typed import AuthConfiguration @@ -34,7 +35,11 @@ def __init__(self, config: ConfigurationDict, auth_config: AuthConfiguration, ve # Establish connection with verbose output if enabled if self.verbose: self.logger.debug("Attempting to establish a connection...") - self.socket.connect(f"{self.base_url}?apiKey={auth_config.get('api_key')}") + try: + self.socket.connect(f"{self.base_url}?apiKey={auth_config.get('api_key')}") + except Exception as err: + raise src.alfred.exceptions.ConnectionError(f"Could not establish connection with server: {err}") + def on_connect(self): """ From b2b1edae7f7255df1be2262751647c5389440c07 Mon Sep 17 00:00:00 2001 From: Mateh Elismar Date: Fri, 10 May 2024 16:49:45 -0400 Subject: [PATCH 4/9] chore: change debug message for established connection. --- src/alfred/realtime/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alfred/realtime/__init__.py b/src/alfred/realtime/__init__.py index 6a0a5b3..9693ff7 100644 --- a/src/alfred/realtime/__init__.py +++ b/src/alfred/realtime/__init__.py @@ -46,7 +46,7 @@ def on_connect(self): Handles the 'connect' event. """ if self.verbose: - self.logger.debug(f"Connected successfully to: {self.base_url}") + self.logger.debug(f"Successfully connected to: {self.base_url}") def on_disconnect(self): """ From f7dc451b40d5d75950147d09ea9d0ee4344f8cda Mon Sep 17 00:00:00 2001 From: Mateh Elismar Date: Fri, 10 May 2024 16:55:14 -0400 Subject: [PATCH 5/9] chore: add websocket-client dependency to pyproject.toml. --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 24bef50..6eaa243 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,8 @@ dependencies = [ "pydantic >= 2.0", "pydantic-settings >= 2.0", "requests >= 2.30", - "python-socketio >= 5.11" + "python-socketio >= 5.11", + "websocket-client >= 1.8" ] [project.urls] From 9332a336d65c4eac3f99b868ccdcacf452851ef3 Mon Sep 17 00:00:00 2001 From: Mateh Elismar Date: Fri, 10 May 2024 17:10:39 -0400 Subject: [PATCH 6/9] docs: add realtime section to readme.md. --- README.md | 40 +++++++++++++++++++++++++++++++++ src/alfred/realtime/__init__.py | 4 ++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d6404a0..7e28b31 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,46 @@ In this SDK, we implement automatic retries to enhance the reliability of networ For non-idempotent methods like POST and PATCH, the SDK does not perform retries by default because doing so could potentially result in unwanted side effects or duplicate operations. If you need to enable retries for these methods under specific circumstances, please handle them cautiously in your application logic. +## Real-time Events + +The `alfred-python` library provides a way to listen to events emitted by Alfred IPA in real-time through a websockets implementation. This feature is particularly useful when you need to monitor the progress of a Job, File, or any other event that occurs within the Alfred platform. To see more information visit our [official documentation](https://docs.tagshelf.dev). + +### Getting started + +To get started, you need to create an instance of the `AlfredRealTimeClient` class. + +```python +from src.alfred.realtime import AlfredRealTimeClient +from src.alfred.base.config import Configuration +from src.alfred.http.typed import AuthConfiguration + +config = Configuration.v1() + +auth_config = AuthConfiguration({ + "api_key": "AXXXXXXXXXXXXXXXXXXXXXX" +}) + +client = AlfredRealTimeClient(config, auth_config, verbose=True) +``` + +### File Events +These events are specifically designed to respond to a variety of actions or status changes related to Files. To see more details about File events, visit our [official documentation](https://docs.tagshelf.dev/event-api/fileevents). +```python +client.on_file_event(lambda data: print(data)) +``` + +### Job Events +Alfred performs asynchronous document classification, extraction, and indexing on a variety of file types. The events detailed here offer insights into how a Job progresses, fails, retries, or completes its tasks. To see more details about Job events, visit our [official documentation](https://docs.tagshelf.dev/event-api/jobevents). + +```python +client.on_job_event(lambda data: print(data)) +``` + +### Custom Events +This enables you to select the specific event you wish to monitor. It's particularly beneficial when new events are introduced that have not yet received official support within the library. +```python +client.on("custom-event", lambda data: print(data)) +``` ## Development Setup ### Setting up the development environment diff --git a/src/alfred/realtime/__init__.py b/src/alfred/realtime/__init__.py index 9693ff7..f37bf56 100644 --- a/src/alfred/realtime/__init__.py +++ b/src/alfred/realtime/__init__.py @@ -8,10 +8,10 @@ from src.alfred.utils import logging -class AlfredRealtimeClient: +class AlfredRealTimeClient: def __init__(self, config: ConfigurationDict, auth_config: AuthConfiguration, verbose=False): """ - Initializes the AlfredRealtimeClient class. + Initializes the AlfredRealTimeClient class. Args: config (ConfigurationDict): The configuration dictionary. From 8a658774b6b6cd890d82aee056f33652003ce628 Mon Sep 17 00:00:00 2001 From: Mateh Elismar Date: Fri, 10 May 2024 17:24:42 -0400 Subject: [PATCH 7/9] fix: make private methods "private" --- src/alfred/realtime/__init__.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/alfred/realtime/__init__.py b/src/alfred/realtime/__init__.py index f37bf56..ceb1f50 100644 --- a/src/alfred/realtime/__init__.py +++ b/src/alfred/realtime/__init__.py @@ -28,9 +28,9 @@ def __init__(self, config: ConfigurationDict, auth_config: AuthConfiguration, ve self.logger = logging.getLogger("alfred-python") # Subscribe to connection life-cycle events. - self.socket.on('connect', self.on_connect) - self.socket.on('disconnect', self.on_disconnect) - self.socket.on('connect_error', self.on_connect_error) + self.socket.on('connect', self.__on_connect) + self.socket.on('disconnect', self.__on_disconnect) + self.socket.on('connect_error', self.__on_connect_error) # Establish connection with verbose output if enabled if self.verbose: @@ -40,22 +40,21 @@ def __init__(self, config: ConfigurationDict, auth_config: AuthConfiguration, ve except Exception as err: raise src.alfred.exceptions.ConnectionError(f"Could not establish connection with server: {err}") - - def on_connect(self): + def __on_connect(self): """ Handles the 'connect' event. """ if self.verbose: self.logger.debug(f"Successfully connected to: {self.base_url}") - def on_disconnect(self): + def __on_disconnect(self): """ Handles the 'disconnect' event. """ if self.verbose: self.logger.debug("Disconnected from the server.") - def on_connect_error(self, err): + def __on_connect_error(self, err): """ Handles the 'connect_error' event. @@ -67,7 +66,7 @@ def on_connect_error(self, err): self.disconnect() raise Exception(f"Failed to connect to {self.base_url}: {err}") - def _callback(self, event: str, callback): + def __callback(self, event: str, callback): """ Wrapper function to subscribe a specific event. @@ -89,7 +88,7 @@ def on_file_event(self, callback): Args: callback (function): The callback function to handle the event. """ - self._callback(EventName.FILE_EVENT.value, callback) + self.__callback(EventName.FILE_EVENT.value, callback) def on_job_event(self, callback): """ @@ -98,7 +97,7 @@ def on_job_event(self, callback): Args: callback (function): The callback function to handle the event. """ - self._callback(EventName.JOB_EVENT.value, callback) + self.__callback(EventName.JOB_EVENT.value, callback) def on(self, event: str, callback): """ @@ -108,7 +107,7 @@ def on(self, event: str, callback): event (str): The event name. callback (function): The callback function to handle the event. """ - self._callback(event, callback) + self.__callback(event, callback) def disconnect(self): """ From 33f5bd57eb173e303bc3c4b37ef16912de683720 Mon Sep 17 00:00:00 2001 From: Mateh Elismar Date: Mon, 13 May 2024 10:54:26 -0400 Subject: [PATCH 8/9] fix: initialize logger inside AlfredRealtimeClient so it can log important stages of the connection. --- src/alfred/realtime/__init__.py | 20 +++++++++++--------- src/alfred/typings/misc.py | 4 ++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/alfred/realtime/__init__.py b/src/alfred/realtime/__init__.py index ceb1f50..d9ee08f 100644 --- a/src/alfred/realtime/__init__.py +++ b/src/alfred/realtime/__init__.py @@ -5,7 +5,7 @@ from src.alfred.base.config import ConfigurationDict from src.alfred.base.constants import EventName from src.alfred.http.typed import AuthConfiguration -from src.alfred.utils import logging +from src.alfred.utils import logging, setup_logger class AlfredRealTimeClient: @@ -26,6 +26,12 @@ def __init__(self, config: ConfigurationDict, auth_config: AuthConfiguration, ve # Initialize logger self.logger = logging.getLogger("alfred-python") + if self.logger.level == logging.NOTSET: + setup_logger({ + "level": "DEBUG" if verbose else "INFO", + "name": "alfred-python" + }) + print(self.logger.level) # Subscribe to connection life-cycle events. self.socket.on('connect', self.__on_connect) @@ -44,15 +50,13 @@ def __on_connect(self): """ Handles the 'connect' event. """ - if self.verbose: - self.logger.debug(f"Successfully connected to: {self.base_url}") + self.logger.info(f"Successfully connected to: {self.base_url}") def __on_disconnect(self): """ Handles the 'disconnect' event. """ - if self.verbose: - self.logger.debug("Disconnected from the server.") + self.logger.info("Disconnected from the server.") def __on_connect_error(self, err): """ @@ -61,8 +65,7 @@ def __on_connect_error(self, err): Args: err (str): The error message. """ - if self.verbose: - self.logger.debug("Connection error: %s", err) + self.logger.info("Connection error: %s", err) self.disconnect() raise Exception(f"Failed to connect to {self.base_url}: {err}") @@ -113,6 +116,5 @@ def disconnect(self): """ Disconnects client from the server. """ - if self.verbose: - self.logger.debug("Closing connection...") + self.logger.info("Closing connection...") self.socket.disconnect() diff --git a/src/alfred/typings/misc.py b/src/alfred/typings/misc.py index 90a4920..b855fab 100644 --- a/src/alfred/typings/misc.py +++ b/src/alfred/typings/misc.py @@ -1,10 +1,10 @@ # Native imports -from typing import TypedDict +from typing import TypedDict, Union # Typed dictionaries class LoggingOptions(TypedDict): - level: int + level: Union[str, int] name: str format: str papertrail_host: str From 1b999784456a00555e8c957a8834dec84081136d Mon Sep 17 00:00:00 2001 From: MiguelTapTagshelf Date: Mon, 13 May 2024 14:55:44 -0400 Subject: [PATCH 9/9] Revamped documentation --- README.md | 332 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 227 insertions(+), 105 deletions(-) diff --git a/README.md b/README.md index 7e28b31..68b6409 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,48 @@ Welcome to the `alfred-python` SDK, the official Python library for interfacing with Alfred, your intelligent process automation platform. This SDK provides a simple and efficient way to integrate Alfred's capabilities into your Python applications. +## Alfred + +Alfred is a powerful document processing platform that enables you to extract, index, and search through large document collections with ease. It offers a wide range of features, including: + +- **Job Management**: Provides a robust job management system that allows you to schedule and monitor document processing jobs. + +- **Tagging**: Tag documents based on their content, making it easy to organize and search through large document collections. + +- **Extraction**: Can extract specific data from PDFs, images, and other documents with ease using its powerful extraction engine. + +- **Indexing**: Powerful indexing engine that can index and search through millions of documents in seconds. + +- **Integration**: Alfred can be easily integrated into your existing applications using its powerful API and SDKs. + +- **Scalability**: Alfred is designed to scale with your needs, whether you're processing thousands of documents a day or millions. + +### Features + +- **Comprehensive Authentication Support**: Seamlessly handles OAuth, HMAC, and API key authentication methods, simplifying the process of connecting to the Alfred API. + +- **Domain-Specific Operations**: Offers specialized support for File and Job operations, enabling developers to intuitively manage and interact with API resources. + +- **Cross-Platform Compatibility**: Designed to be fully compatible across .NET Core, .NET Standard, and .NET Framework 4.7.2, ensuring broad usability in diverse development environments. + +- **Minimal Dependencies**: Crafted to minimize external dependencies, facilitating an easier integration and deployment process with reduced conflict risk. + ## Prerequisites -- Python v3.8+ +- Python v3.8+ installed on your development machine. +- An active Alfred API key for authentication. + +## Installation + +To integrate the Alfred python library into your python project, install the package via Pypy: -## Usage +```bash +pip install alfred-python +``` + +# Getting Started -Check out this simple example to get up and running: +Check out this simple example to get up and running (this script creates a new session for uploading local files and its using the API key authentication): ```python from alfred.rest import AlfredClient @@ -19,64 +54,130 @@ auth_config = {"api_key": "AXXXXXXXXXXXXXXXXXXXXXX"} client = AlfredClient(config, auth_config) -values = client.data_points.get_values("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") -print(values) +result = client.sessions.create() +print(result) ``` -### Sessions +## Initialize the Client (Step 1) + +Begin by creating an instance of the Alfred client using your preferred authentication method. + +### Authentication Methods + +The following examples demonstrate how to initialize the Alfred client with different authentication methods: + +- For API key authentication, use the following method with the API key: + + ```python + auth_config = {"api_key": "AXXXXXXXXXXXXXXXXXXXXXX"} + ``` + +- For OAuth authentication, specify the method and credentials explicitly. +- For HMAC authentication, provide the secret key and public key + +## Sessions (Step 2) + +Then create a session to be able to upload files and interact with jobs. A Session is a mechanism designed for asynchronous file uploads. It serves as a container or grouping for files that are uploaded at different times or from various sources, but are all part of a single Job. To see more information visit our [official documentation](https://docs.tagshelf.dev/enpoints/deferred-session). -#### Get session by ID +### Get existing session by ID ```python # Get a session by ID -result = client.sessions.get("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") -print(result) -``` +>>> result = client.sessions.get("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") +>>> print(result) + +{'id': '3386f840-74e2-4bd8-92a7-57e829e46d05', 'creation_date': '2024-05-10T20:32:43.85', 'update_date': '2024-05-10T20:32:43.85', 'status': 'open', 'user_name': 'API Key 1', 'company_id': '286e2ed0-3626-4faa-a745-8ebf3488fbd7', 'job_id': None} +``` -#### Create session +### Create a new session ```python # Create a session -result = client.sessions.create() -print(result) +>>> result = client.sessions.create() +>>> print(result) + +{'session_id': '3386f840-74e2-4bd8-92a7-57e829e46d05'} ``` -### Jobs +## Files (Step 3) -A Job represents a single unit of work that group one or more Files within Alfred. To see more information visit our [official documentation](https://docs.tagshelf.dev/enpoints/job). +After creating and having an open session, upload the files you want to process in your jobs. -#### Get job by ID +File is an individual document or data unit undergoing specialized operations tailored for document analysis and management. To see more information visit our [official documentation](https://docs.tagshelf.dev/enpoints/file). + +You can upload a file by 2 different methods: +- Uploading a remote file from your local machine. +- Upload a remote file from a remote location with its URL. + +### Get file by ID ```python -# Get a job by ID -result = client.jobs.get("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") -print(result) +# Get a file by ID +>>> result = client.files.get("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") +>>> print(result) + +{'id': '9f8447ef-4090-4a29-ac7c-7ae2f23ca110', 'creation_date': '2024-05-13T16:22:22.593', 'update_date': '2024-05-13T16:22:22.593', 'file_name': 'CertificacionMeta.pdf', 'file_name_without_extension': 'CertificacionMeta', 'blob_name': '9f8447ef-4090-4a29-ac7c-7ae2f23ca110', 'blob_url': 'https://testbox.blob.core.windows.net/tsc-286e2ed0-3626-4faa-a745-8ebf3488fbd7-files/9f8447ef-4090-4a29-ac7c-7ae2f23ca110', 'user_name': None, 'md5_hash': 'GiZkvwxF0QwxbALzdPX6gA==', 'content_type': 'application/octet-stream', 'channel': 'api', 'should_be_classified': True, 'classifier': None, 'classification_score': 0.0, 'status': 'uploaded', 'input_type': 'single_unit', 'is_duplicate': False, 'is_duplicate_by_values': False, 'duplicate_origin_id': None, 'tag_id': None, 'is_parent': False, 'parent_id': None, 'deferred_session_id': None, 'tag_name': None, 'company_id': '286e2ed0-3626-4faa-a745-8ebf3488fbd7', 'file_size': 285901, 'proposed_tag_id': None, 'proposed_tag_variance': 0.0, 'classification_score_above_deviation': True, 'confirmed_tag_id': None, 'confirmed_by': None, 'manual_classification': False, 'metadata': '', 'page_count': 1, 'page_number': -1} ``` -#### Create job +### Download file by ID ```python -job = { - "session_id": "session-id", - "propagate_metadata": True, - "merge": True, - "decompose": True, - "metadata": { - "key": "value", - }, - "channel": "channel", - "parent_file_prefix": "prefix", - "page_rotation": 90, - "container": "container", - "file_name": "file-name", - "file_names": ["file-name-1", "file-name-2"], -} +# Download a file by ID +>>> result = client.files.download("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") +>>> with open(result.get("original_name"), "wb") as f: +>>> f.write(result.get("file").getvalue()) +``` + +### Upload remote file +```python +# Upload a remote file +>>> result = client.files.upload({ +>>> "url": "", +>>> "metadata": {} +>>> }) +>>> print(result) +``` + +### Upload a local file + +```python +>>> with open("", "rb") as upload_file: +>>> result = client.files.upload_file({ +>>> "file": upload_file, +>>> "session_id": "XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX", +>>> "metadata": {} +>>> }) +>>> print(result) + +{'file_id': '9f8447ef-4090-4a29-ac7c-7ae2f23ca110'} +``` + +## Jobs (Step 4) + +A Job represents a single unit of work that groups one or more Files within Alfred. To see more information visit our [official documentation](https://docs.tagshelf.dev/enpoints/job). + +### Get job by ID + +```python +# Get a job by ID +>>> result = client.jobs.get("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") +>>> print(result) + +{'id': 'fc7f1ca9-2486-4ccb-8229-b658c3c73050', 'creation_date': '2024-05-13T17:41:20.133', 'has_job_request_info': False, 'job_request_date': None, 'update_date': '2024-05-13T17:41:25.087', 'company_id': '286e2ed0-3626-4faa-a745-8ebf3488fbd7', 'bulk_id': None, 'deferred_session_id': '5557f5c2-164b-4126-a55f-3603f2c5ee8b', 'user_name': 'API Key 1', 'channel': 'api', 'source': None, 'container': None, 'remote_file_name': None, 'remote_file_names': None, 'merge': False, 'decompose': False, 'propagate_metadata': False, 'parent_file_prefix': None, 'decomposed_page_rotation': -1, 'metadata': None, 'file_count': 1, 'file_sources_count': 1, 'metadata_objects_count': 0, 'finished_files': 1, 'files': [{'id': 'da09e1c9-35e6-4b29-83d9-2c2af27ee9e1', 'creation_date': '2024-05-13T17:41:19.727', 'update_date': '2024-05-13T17:41:27.383', 'file_name': 'CertificacionMeta-6.pdf', 'tag_name': '', 'is_parent': False, 'is_children': False, 'status': 'finished'}], 'retries': 0, 'exceeded_retries': False, 'file_urls': ['https://testbox.blob.core.windows.net/tsc-286e2ed0-3626-4faa-a745-8ebf3488fbd7-files/da09e1c9-35e6-4b29-83d9-2c2af27ee9e1'], 'error_messages': [], 'stage': 'completed', 'priority': 'normal', 'input_source_type': 'deferred_upload', 'start_date': '2024-05-13T17:41:20.21', 'email_from': None, 'email_subject': None, 'email_body': None} +``` + +### Create new job + +```python # Create a job -result = client.jobs.create(job) -print(result) +>>> job = {"session_id": "3386f840-74e2-4bd8-92a7-57e829e46d05"} +>>> result = client.jobs.create(job) +>>> print(result) + +{'job_id': '4c7d6041-2293-42c6-991d-4c9e9af6f9d0'} ``` Here is a description for each valid argument when creating a job: @@ -95,87 +196,43 @@ Here is a description for each valid argument when creating a job: | file_name | string | Unique name of the file within an object storage source. | | file_names | string[] | Array of unique names of the files within an object storage source. | -### Files - -File is an individual document or data unit undergoing specialized operations tailored for document analysis and management. To see more information visit our [official documentation](https://docs.tagshelf.dev/enpoints/file). - -#### Get file by ID - -```python -# Get a file by ID -result = client.files.get("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") -print(result) -``` - -#### Download file by ID - -```python -# Download a file by ID -result = client.files.download("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") - -with open(result.get("original_name"), "wb") as f: - f.write(result.get("file").getvalue()) -``` - -#### Upload remote file - -```python -# Upload a remote file -result = client.files.upload({ - "url": "", - "metadata": {} -}) -print(result) -``` - -#### Upload a local file - ```python -with open("", "rb") as upload_file: - result = client.files.upload_file({ - "file": upload_file, - "session_id": "XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX", - "metadata": {} - }) - print(result) +job = { + "session_id": "session-id", + "propagate_metadata": True, + "merge": True, + "decompose": True, + "metadata": { + "key": "value", + }, + "channel": "channel", + "parent_file_prefix": "prefix", + "page_rotation": 90, + "container": "container", + "file_name": "file-name", + "file_names": ["file-name-1", "file-name-2"], +} ``` -### Data Points +## Data Points (Step 5) Data Points are the core of Alfred's platform and represent data that you want to extract. To see more information visit our [official documentation](https://docs.tagshelf.dev/enpoints/metadata). > [!IMPORTANT] > Data Points where previously known as Metadata. -#### Get Data Point by File ID +### Get Data Point by File ID ```python # Get a data point by file ID -result = client.data_points.get_values("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") -print(result) +>>> result = client.data_points.get_values("XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX") +>>> print(result) ``` -## Configuration - -This section provides detailed instructions and guidelines for configuring the SDK to interface effectively with the target API. - -### Retry Policy - -In this SDK, we implement automatic retries to enhance the reliability of network requests. However, to maintain the integrity of data transactions, retries are only enabled for HTTP methods that are considered idempotent. Idempotent methods are those that can be called multiple times without different outcomes. Thus, retries are applied only to the following HTTP methods: - -- `GET`: Retrieves data from the server without changing any state. -- `PUT`: Updates a resource in a way that it can be repeatedly updated without changing the outcome beyond the initial application. -- `DELETE`: Removes a resource and subsequent deletions of the same resource are redundant. -- `HEAD`: Fetches metadata about a resource without side-effects. -- `OPTIONS`: Retrieves supported communication options for a given URL or server without causing any side effects. - -For non-idempotent methods like POST and PATCH, the SDK does not perform retries by default because doing so could potentially result in unwanted side effects or duplicate operations. If you need to enable retries for these methods under specific circumstances, please handle them cautiously in your application logic. - -## Real-time Events - +# Real-time Events The `alfred-python` library provides a way to listen to events emitted by Alfred IPA in real-time through a websockets implementation. This feature is particularly useful when you need to monitor the progress of a Job, File, or any other event that occurs within the Alfred platform. To see more information visit our [official documentation](https://docs.tagshelf.dev). -### Getting started +## Initalizing Instance To get started, you need to create an instance of the `AlfredRealTimeClient` class. @@ -193,24 +250,77 @@ auth_config = AuthConfiguration({ client = AlfredRealTimeClient(config, auth_config, verbose=True) ``` -### File Events +## File Events These events are specifically designed to respond to a variety of actions or status changes related to Files. To see more details about File events, visit our [official documentation](https://docs.tagshelf.dev/event-api/fileevents). + ```python client.on_file_event(lambda data: print(data)) ``` -### Job Events +### File Event List + +| Event Type | Description | | | | +|------------------------------|---------------------------------------------------------|---|---|---| +| FileAddToJobEvent | Triggered when a file is added to a job for processing. | | | | +| FileCategoryCreateEvent | Occurs when a new category is created for a file. | | | | +| FileCategoryDeleteEvent | Signals the deletion of a file's category. | | | | +| FileChangeTagEvent | Indicates a change in the tag associated with a file. | | | | +| FileDoneEvent | Marks the completion of file processing. | | | | +| FileExtractedDataCreateEvent | Triggered when new data is extracted from a file. | | | | +| FileExtractedDataDeleteEvent | Occurs when extracted data from a file is deleted. | | | | +| FileFailedEvent | Indicates a failure in file processing. | | | | +| FileMoveEvent | Signals the movement of a file within the system. | | | | +| FileMoveToPendingEvent | Triggered when a file is moved to a pending state. | | | | +| FileMoveToRecycleBinEvent | Indicates movement of a file to the recycle bin. | | | | +| FilePropertyCreateEvent | Reflects the creation of a file property. | | | | +| FilePropertyDeleteEvent | Signals the deletion of a file property. | | | | +| FileRemoveTagEvent | Signals the removal of a tag from a file. | | | | +| FileStatusUpdateEvent | Indicates an update in the file's status. | | | | +| FileUpdateEvent | Triggered when a file is updated in any manner. | | | | + +## Job Events Alfred performs asynchronous document classification, extraction, and indexing on a variety of file types. The events detailed here offer insights into how a Job progresses, fails, retries, or completes its tasks. To see more details about Job events, visit our [official documentation](https://docs.tagshelf.dev/event-api/jobevents). ```python client.on_job_event(lambda data: print(data)) ``` -### Custom Events +### Job Event List + +| Event Name | Job Events | | | | +|--------------------------|-----------------------------------------------------------------------|---|---|---| +| JobCreateEvent | Triggered when a new job is instantiated for file operations. | | | | +| JobExceededRetriesEvent | Fires when job exceeds maximum retry attempts for a stage. | | | | +| JobFailedEvent | Occurs when a job halts due to an unrecoverable error. | | | | +| JobFinishedEvent | Triggered when job successfully completes all workflow stages. | | | | +| JobInvalidEvent | Fires when job fails initial validation of input files or parameters. | | | | +| JobRetryEvent | Triggered when job retries a stage after a recoverable failure. | | | | +| JobStageUpdateEvent | Occurs when job transitions from one workflow stage to another. | | | | +| JobStartEvent | Triggered when job begins its workflow and state machine. | | | | + +## Custom Events This enables you to select the specific event you wish to monitor. It's particularly beneficial when new events are introduced that have not yet received official support within the library. + ```python client.on("custom-event", lambda data: print(data)) ``` + +# Configuration + +This section provides detailed instructions and guidelines for configuring the SDK to interface effectively with the target API. + +## Retry Policy + +In this SDK, we implement automatic retries to enhance the reliability of network requests. However, to maintain the integrity of data transactions, retries are only enabled for HTTP methods that are considered idempotent. Idempotent methods are those that can be called multiple times without different outcomes. Thus, retries are applied only to the following HTTP methods: + +- `GET`: Retrieves data from the server without changing any state. +- `PUT`: Updates a resource in a way that it can be repeatedly updated without changing the outcome beyond the initial application. +- `DELETE`: Removes a resource and subsequent deletions of the same resource are redundant. +- `HEAD`: Fetches metadata about a resource without side-effects. +- `OPTIONS`: Retrieves supported communication options for a given URL or server without causing any side effects. + +For non-idempotent methods like POST and PATCH, the SDK does not perform retries by default because doing so could potentially result in unwanted side effects or duplicate operations. If you need to enable retries for these methods under specific circumstances, please handle them cautiously in your application logic. + ## Development Setup ### Setting up the development environment @@ -286,3 +396,15 @@ python -m build ``` This command will produce a source distribution (`tar.gz`) and a wheel file (`whl`) in the `dist/` directory. These files are what you would upload to a package index like PyPI, or distribute to other developers. + +# Contributing + +Contributions to improve this library are welcome. Please feel free to fork the repository, make your changes, and submit a pull request for review. + +# License + +This project is licensed under the MIT License - see the [LICENSE](https://github.com/tagshelfsrl/dotnet-alfred-api-wrapper/blob/feature/AL-887/LICENSE) file for details. + +# Acknowledgements + +Thanks to all the contributors who invest their time into making this library better. \ No newline at end of file