diff --git a/.github/scripts/sync_version_metadata.py b/.github/scripts/sync_version_metadata.py new file mode 100644 index 000000000..049671963 --- /dev/null +++ b/.github/scripts/sync_version_metadata.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +"""Sync the version recorded in project metadata files to a single source of truth. + +The canonical client version lives in ``synapseclient/synapsePythonClient`` +(the ``latestVersion`` field). The ARPA-H BDF metadata files duplicate that +version, which can easily drift. This script keeps them in lockstep so the +version only has to be maintained in one place. + +Files kept in sync: + * bdf.yaml -> top-level ``version:`` + * codemeta.json -> ``"version"`` + * CITATION.cff -> top-level ``version:`` + +Usage: + python .github/scripts/sync_version_metadata.py # report drift only + python .github/scripts/sync_version_metadata.py --write # rewrite to match + python .github/scripts/sync_version_metadata.py --check # exit 1 on drift +""" +import argparse +import json +import re +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +VERSION_FILE = REPO_ROOT / "synapseclient" / "synapsePythonClient" + + +def latest_version() -> str: + """Return ``latestVersion`` from the canonical version file.""" + return json.loads(VERSION_FILE.read_text(encoding="utf-8"))["latestVersion"] + + +def _replace(path: Path, pattern: str, replacement: str, write: bool) -> bool: + """Apply a single regex substitution. Return True if the file changed.""" + text = path.read_text(encoding="utf-8") + new_text, count = re.subn(pattern, replacement, text, count=1) + if count == 0: + raise ValueError(f"Could not find a version field to update in {path.name}") + changed = new_text != text + if changed and write: + path.write_text(new_text, encoding="utf-8") + return changed + + +def sync(version: str, write: bool) -> list[str]: + """Sync all metadata files to ``version``. Return the list that drifted.""" + targets = [ + # (path, regex, replacement) — each touches only the version value. + (REPO_ROOT / "bdf.yaml", r"(?m)^version:[^\n]*$", f'version: "{version}"'), + ( + REPO_ROOT / "codemeta.json", + r'("version":\s*)"[^"]*"', + rf'\1"{version}"', + ), + (REPO_ROOT / "CITATION.cff", r"(?m)^version:[^\n]*$", f'version: "{version}"'), + ] + drifted = [] + for path, pattern, replacement in targets: + if not path.exists(): + continue + if _replace(path, pattern, replacement, write): + drifted.append(path.name) + return drifted + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--write", action="store_true", help="Rewrite metadata files to match." + ) + parser.add_argument( + "--check", + action="store_true", + help="Exit non-zero if any file is out of sync (no changes made).", + ) + args = parser.parse_args() + + version = latest_version() + drifted = sync(version, write=args.write) + + if not drifted: + print(f"Version metadata already in sync with {version}.") + return 0 + + if args.write: + print(f"Updated {', '.join(drifted)} to version {version}.") + return 0 + + print( + f"Version metadata out of sync with {version}: {', '.join(drifted)}.\n" + "Run: python .github/scripts/sync_version_metadata.py --write" + ) + return 1 if args.check else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/sync-version-metadata.yml b/.github/workflows/sync-version-metadata.yml new file mode 100644 index 000000000..796f0e6e5 --- /dev/null +++ b/.github/workflows/sync-version-metadata.yml @@ -0,0 +1,60 @@ +# Keeps the version in the BDF metadata files (bdf.yaml, codemeta.json, +# CITATION.cff) in sync with the single source of truth: +# synapseclient/synapsePythonClient ("latestVersion"). +# +# When a pull request bumps the canonical version (or edits a metadata file), +# this workflow rewrites the metadata versions to match and commits the fix +# back onto the same PR branch. For pull requests from forks (where it cannot +# push) it instead fails with instructions, so drift can never merge silently. +name: Sync version metadata + +on: + pull_request: + paths: + - "synapseclient/synapsePythonClient" + - "bdf.yaml" + - "codemeta.json" + - "CITATION.cff" + - ".github/scripts/sync_version_metadata.py" + - ".github/workflows/sync-version-metadata.yml" + workflow_dispatch: + +permissions: + contents: write + +jobs: + sync: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Sync metadata versions to latestVersion + run: python .github/scripts/sync_version_metadata.py --write + + - name: Commit any changes back to the PR branch + # Only same-repo branches grant a writable token; forks cannot be pushed to. + if: >- + github.event_name == 'workflow_dispatch' || + github.event.pull_request.head.repo.full_name == github.repository + run: | + if [ -n "$(git status --porcelain)" ]; then + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add bdf.yaml codemeta.json CITATION.cff + git commit -m "Sync version metadata to latestVersion [skip ci]" + git push + else + echo "Version metadata already in sync." + fi + + - name: Fail if a fork PR left metadata out of sync + if: >- + github.event_name == 'pull_request' && + github.event.pull_request.head.repo.full_name != github.repository + run: python .github/scripts/sync_version_metadata.py --check diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..06ce40d00 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,12 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it using the metadata from this file." +title: "Synapse Python Client (synapseclient)" +type: software +authors: + # TODO: add individual authors (names / ORCIDs) in addition to the organization below. + - name: "Sage Bionetworks" + website: "https://sagebionetworks.org" +repository-code: "https://github.com/Sage-Bionetworks/synapsePythonClient" +url: "https://python-docs.synapse.org" +license: Apache-2.0 +version: "4.13.0" diff --git a/bdf.yaml b/bdf.yaml new file mode 100644 index 000000000..9bf3bf79c --- /dev/null +++ b/bdf.yaml @@ -0,0 +1,119 @@ +# ARPA-H BDF Toolbox metadata for the Synapse Python Client. +# Conforms to the ARPA-H-BDF/bdfkb-schema `SystemMetadata` class +# (src/bdfkb_schema/schema/bdfkb_schema.yaml, tree_root: SystemMetadata). +# Validate with: +# linkml-validate -s /bdfkb_schema.yaml -C SystemMetadata bdf.yaml +version: "4.13.0" + +tool: + name: "Synapse Python Client (synapseclient)" + repo_url: "https://github.com/Sage-Bionetworks/synapsePythonClient" + api_url: "https://repo-prod.prod.sagebase.org/repo/v1" + package_url: "https://pypi.org/project/synapseclient/" + tool_homepage: "https://www.synapse.org" + documentation_url: "https://python-docs.synapse.org" + tool_type: "Library" + tool_notes: "Python client library and command-line interface (CLI) for the Synapse collaborative research platform. Provides synchronous and asynchronous (OOP) APIs for managing Projects, Folders, Files, Tables, and provenance." + +domains: + - "biomedical-data-management" + - "research-data-sharing" + - "collaborative-research" + - "data-provenance" + - "open-science" + +llm: + usesLlm: false + bringOwnKey: false + +input: + - name: "Local research data files" + dataCategory: "general-biomedical-data" + dataClass: "other" + dataType: "various" + dataStandard: + - "Synapse" + format: + - "various" + proprietary: false + - name: "Tabular metadata and annotations" + dataCategory: "general-biomedical-data" + dataClass: "tabular" + dataType: "various" + dataStandard: + - "Synapse" + format: + - "csv" + - "tsv" + proprietary: false + +output: + - name: "Synapse entities (Projects, Folders, Files, Tables)" + dataCategory: "general-biomedical-data" + dataClass: "structured" + dataType: "various" + dataStandard: + - "Synapse" + format: + - "json" + proprietary: false + - name: "Downloaded data files" + dataCategory: "general-biomedical-data" + dataClass: "other" + dataType: "various" + dataStandard: + - "Synapse" + format: + - "various" + proprietary: false + +collaborations: + - name: "Synapse platform" + description: "The client communicates with the Synapse platform, operated by Sage Bionetworks, through its public REST API." + tools: + - "Synapse REST API" + collaboration_purpose: "Data storage, sharing, governance, and provenance tracking on Synapse." + +# TODO: confirm funding source, agreement/award number, and link with the Sage program lead. +funding: + source: "TODO: confirm funding source" + agreement: "TODO: confirm agreement / award number" + link: "https://example.org/TODO-confirm-funding-link" + +maturity: + beginning_maturity: 7 + current_maturity: 8 # TODO: confirm official BDF TRL with Sage program lead + final_maturity: 9 + +license: + license_type: "Apache-2.0" + link: "https://github.com/Sage-Bionetworks/synapsePythonClient/blob/master/LICENSE" + notes: "Apache License 2.0" + +credit: + - name: "Sage Bionetworks" + email: + - "platform@sagebase.org" # TODO: confirm preferred BDF contact email + url: "https://sagebionetworks.org" + role: "Maintainer" + org: "Sage Bionetworks" + note: "Maintained by the Synapse Engineering Team at Sage Bionetworks." + +target_users: + primary_user: + user_type: "researcher" + technical_literacy_level: + - "intermediate" + - "advanced" + biomedical_literacy_level: + - "intermediate" + - "expert" + secondary_user: + user_type: "data-curator" + technical_literacy_level: + - "advanced" + biomedical_literacy_level: + - "intermediate" + +media: + - link: "https://python-docs.synapse.org" diff --git a/codemeta.json b/codemeta.json new file mode 100644 index 000000000..3d21a9211 --- /dev/null +++ b/codemeta.json @@ -0,0 +1,35 @@ +{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "@type": "SoftwareSourceCode", + "name": "Synapse Python Client (synapseclient)", + "description": "A Python client for Sage Bionetworks' Synapse, a collaborative, open-source research platform that allows teams to share data, track analyses, and collaborate. The client can be used as a library for software that communicates with Synapse or as a command-line utility.", + "version": "4.13.0", + "codeRepository": "https://github.com/Sage-Bionetworks/synapsePythonClient", + "url": "https://python-docs.synapse.org", + "downloadUrl": "https://pypi.org/project/synapseclient/", + "programmingLanguage": "Python", + "runtimePlatform": "Python 3", + "applicationCategory": "Biomedical data management", + "keywords": [ + "synapse", + "biomedical-data-management", + "research-data-sharing", + "collaborative-research", + "data-provenance", + "open-science" + ], + "license": "https://spdx.org/licenses/Apache-2.0", + "author": [ + { + "@type": "Organization", + "name": "Sage Bionetworks", + "url": "https://sagebionetworks.org", + "email": "dpe@sagebase.org" + } + ], + "maintainer": { + "@type": "Organization", + "name": "Sage Bionetworks", + "url": "https://sagebionetworks.org" + } +}