Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ cli *ARGS:
init FIELD:
uv run osa field init {{FIELD}}

# Wipe all local OSA data (database, vectors, cache)
wipe:
uv run osa local clean --force

# === Complete Workflow ===

# Set up everything for first time (start DB + run migrations)
Expand Down
5 changes: 0 additions & 5 deletions ingestors/__init__.py

This file was deleted.

6 changes: 0 additions & 6 deletions ingestors/geo_entrez/__init__.py

This file was deleted.

6 changes: 3 additions & 3 deletions osa/application/api/rest/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from osa.config import Config, configure_logging
from osa.domain.shared.error import OSAError
from osa.infrastructure.event.worker import BackgroundWorker
from osa.infrastructure.ingest.discovery import validate_ingestors_at_startup
from osa.infrastructure.source.discovery import validate_sources_at_startup
from osa.util.di.fastapi import setup_dishka

logger = logging.getLogger(__name__)
Expand All @@ -37,8 +37,8 @@ def create_app() -> FastAPI:
configure_logging(config.logging)
logger.info("Starting OSA server: %s v%s", config.server.name, config.server.version)

# Validate ingestor configs at startup (fail fast with clear errors)
validate_ingestors_at_startup(config.ingestors)
# Validate source configs at startup (fail fast with clear errors)
validate_sources_at_startup(config.sources)

app_instance = FastAPI(
title=config.server.name,
Expand Down
4 changes: 2 additions & 2 deletions osa/application/di.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from osa.domain.validation.util.di import ValidationProvider
from osa.infrastructure.event.di import EventProvider
from osa.infrastructure.index.di import IndexProvider
from osa.infrastructure.ingest.di import IngestProvider
from osa.infrastructure.source.di import SourceProvider
from osa.infrastructure.oci import OciProvider
from osa.infrastructure.persistence import PersistenceProvider
from osa.util.di.scope import Scope
Expand All @@ -22,7 +22,7 @@ def create_container() -> AsyncContainer:
PersistenceProvider(),
OciProvider(),
IndexProvider(),
IngestProvider(),
SourceProvider(),
EventProvider(),
DepositionProvider(),
ValidationProvider(),
Expand Down
12 changes: 6 additions & 6 deletions osa/cli/commands/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
# logging:
# level: "INFO"

# GEO Ingestor - pulls from NCBI Gene Expression Omnibus via Entrez API
ingestors:
- ingestor: geo-entrez
# GEO Source - pulls from NCBI Gene Expression Omnibus via Entrez API
sources:
- source: geo-entrez
config:
record_type: gse # gse (~250k all) or gds (~5k curated)
email: your@email.com # Required by NCBI - please update this
Expand Down Expand Up @@ -77,9 +77,9 @@
# logging:
# level: "INFO"

# Add your ingestors here:
# ingestors:
# - ingestor: geo-entrez
# Add your sources here:
# sources:
# - source: geo-entrez
# config:
# email: your@email.com

Expand Down
28 changes: 14 additions & 14 deletions osa/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,41 +34,41 @@ class IndexConfig(BaseModel):


# =============================================================================
# Ingest Configuration
# Source Configuration
# =============================================================================


class IngestSchedule(BaseModel):
"""Schedule configuration for an ingestor."""
class SourceSchedule(BaseModel):
"""Schedule configuration for a source."""

cron: str # Cron expression (e.g., "0 * * * *" for hourly)
limit: int | None = None # Optional limit per scheduled run


class InitialRun(BaseModel):
"""Initial run configuration for an ingestor."""
"""Initial run configuration for a source."""

enabled: bool = False
limit: int | None = 10 # Limit records for initial run
since: datetime | None = None # Optional: bootstrap from specific date


class IngestConfig(BaseModel):
"""Configuration for an ingestor.
class SourceConfig(BaseModel):
"""Configuration for a source.

The `config` field is validated at runtime based on the ingestor type,
allowing external ingestors to define their own config schemas.
The `config` field is validated at runtime based on the source type,
allowing external sources to define their own config schemas.
"""

ingestor: str # "geo-entrez", etc. - matches entry point name
config: dict[str, Any] = {} # Validated at runtime by ingestor's config_class
schedule: IngestSchedule | None = None # Optional: if set, runs on schedule
source: str # "geo-entrez", etc. - matches entry point name
config: dict[str, Any] = {} # Validated at runtime by source's config_class
schedule: SourceSchedule | None = None # Optional: if set, runs on schedule
initial_run: InitialRun | None = None # Optional: if set, runs on startup

@property
def name(self) -> str:
"""The ingestor name (same as ingestor type for now)."""
return self.ingestor
"""The source name (same as source type for now)."""
return self.source


# =============================================================================
Expand Down Expand Up @@ -147,7 +147,7 @@ class Config(BaseSettings):
database: DatabaseConfig = DatabaseConfig()
logging: LoggingConfig = LoggingConfig()
indexes: list[IndexConfig] = [] # list of index configs
ingestors: list[IngestConfig] = [] # list of ingestor configs
sources: list[SourceConfig] = [] # list of source configs

model_config = {
"env_prefix": "OSA_",
Expand Down
2 changes: 1 addition & 1 deletion osa/domain/deposition/command/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async def run(self, cmd: SubmitDeposition) -> DepositionSubmitted:
event = DepositionSubmittedEvent(
id=EventId(uuid4()),
deposition_id=cmd.srn,
metadata={}, # Empty metadata for direct submission (not from ingest)
metadata={}, # Empty metadata for direct submission (not from source)
)
await self.outbox.append(event)

Expand Down
2 changes: 1 addition & 1 deletion osa/domain/deposition/model/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Deposition(Aggregate, Generic[T]):
metadata: T
files: list[DepositionFile] = []
record_srn: RecordSRN | None = None
provenance: dict[str, Any] = {} # Ingestor info, source tracking
provenance: dict[str, Any] = {} # Source info, provenance tracking

def remove_all_files(self) -> None:
self.files = []
1 change: 0 additions & 1 deletion osa/domain/ingest/__init__.py

This file was deleted.

6 changes: 0 additions & 6 deletions osa/domain/ingest/event/__init__.py

This file was deleted.

6 changes: 0 additions & 6 deletions osa/domain/ingest/listener/__init__.py

This file was deleted.

22 changes: 0 additions & 22 deletions osa/domain/ingest/listener/ingest_listener.py

This file was deleted.

65 changes: 0 additions & 65 deletions osa/domain/ingest/listener/initial_ingest_listener.py

This file was deleted.

5 changes: 0 additions & 5 deletions osa/domain/ingest/model/__init__.py

This file was deleted.

29 changes: 0 additions & 29 deletions osa/domain/ingest/model/registry.py

This file was deleted.

5 changes: 0 additions & 5 deletions osa/domain/ingest/schedule/__init__.py

This file was deleted.

53 changes: 0 additions & 53 deletions osa/domain/ingest/schedule/ingest_schedule.py

This file was deleted.

5 changes: 0 additions & 5 deletions osa/domain/ingest/service/__init__.py

This file was deleted.

Loading
Loading