-
Notifications
You must be signed in to change notification settings - Fork 75
Add BDF Toolbox metadata (bdf.yaml + codemeta.json + CITATION.cff) #1403
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
db0e851
c8c8175
551aeef
3d74f4e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| #!/usr/bin/env python3 | ||
| """Sync the version recorded in project metadata files to a single source of truth. | ||
|
|
||
| The canonical client version lives in ``synapseclient/synapsePythonClient`` | ||
| (the ``latestVersion`` field). The ARPA-H BDF metadata files duplicate that | ||
| version, which can easily drift. This script keeps them in lockstep so the | ||
| version only has to be maintained in one place. | ||
|
|
||
| Files kept in sync: | ||
| * bdf.yaml -> top-level ``version:`` | ||
| * codemeta.json -> ``"version"`` | ||
| * CITATION.cff -> top-level ``version:`` | ||
|
|
||
| Usage: | ||
| python .github/scripts/sync_version_metadata.py # report drift only | ||
| python .github/scripts/sync_version_metadata.py --write # rewrite to match | ||
| python .github/scripts/sync_version_metadata.py --check # exit 1 on drift | ||
| """ | ||
| import argparse | ||
| import json | ||
| import re | ||
| import sys | ||
| from pathlib import Path | ||
|
|
||
| REPO_ROOT = Path(__file__).resolve().parents[2] | ||
| VERSION_FILE = REPO_ROOT / "synapseclient" / "synapsePythonClient" | ||
|
|
||
|
|
||
| def latest_version() -> str: | ||
| """Return ``latestVersion`` from the canonical version file.""" | ||
| return json.loads(VERSION_FILE.read_text(encoding="utf-8"))["latestVersion"] | ||
|
|
||
|
|
||
| def _replace(path: Path, pattern: str, replacement: str, write: bool) -> bool: | ||
| """Apply a single regex substitution. Return True if the file changed.""" | ||
| text = path.read_text(encoding="utf-8") | ||
| new_text, count = re.subn(pattern, replacement, text, count=1) | ||
| if count == 0: | ||
| raise ValueError(f"Could not find a version field to update in {path.name}") | ||
| changed = new_text != text | ||
| if changed and write: | ||
| path.write_text(new_text, encoding="utf-8") | ||
| return changed | ||
|
|
||
|
|
||
| def sync(version: str, write: bool) -> list[str]: | ||
| """Sync all metadata files to ``version``. Return the list that drifted.""" | ||
| targets = [ | ||
| # (path, regex, replacement) — each touches only the version value. | ||
| (REPO_ROOT / "bdf.yaml", r"(?m)^version:[^\n]*$", f'version: "{version}"'), | ||
| ( | ||
| REPO_ROOT / "codemeta.json", | ||
| r'("version":\s*)"[^"]*"', | ||
| rf'\1"{version}"', | ||
| ), | ||
| (REPO_ROOT / "CITATION.cff", r"(?m)^version:[^\n]*$", f'version: "{version}"'), | ||
| ] | ||
| drifted = [] | ||
| for path, pattern, replacement in targets: | ||
| if not path.exists(): | ||
| continue | ||
| if _replace(path, pattern, replacement, write): | ||
| drifted.append(path.name) | ||
| return drifted | ||
|
|
||
|
|
||
| def main() -> int: | ||
| parser = argparse.ArgumentParser(description=__doc__) | ||
| parser.add_argument( | ||
| "--write", action="store_true", help="Rewrite metadata files to match." | ||
| ) | ||
| parser.add_argument( | ||
| "--check", | ||
| action="store_true", | ||
| help="Exit non-zero if any file is out of sync (no changes made).", | ||
| ) | ||
| args = parser.parse_args() | ||
|
|
||
| version = latest_version() | ||
| drifted = sync(version, write=args.write) | ||
|
|
||
| if not drifted: | ||
| print(f"Version metadata already in sync with {version}.") | ||
| return 0 | ||
|
|
||
| if args.write: | ||
| print(f"Updated {', '.join(drifted)} to version {version}.") | ||
| return 0 | ||
|
|
||
| print( | ||
| f"Version metadata out of sync with {version}: {', '.join(drifted)}.\n" | ||
| "Run: python .github/scripts/sync_version_metadata.py --write" | ||
| ) | ||
| return 1 if args.check else 0 | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| sys.exit(main()) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| # Keeps the version in the BDF metadata files (bdf.yaml, codemeta.json, | ||
| # CITATION.cff) in sync with the single source of truth: | ||
| # synapseclient/synapsePythonClient ("latestVersion"). | ||
| # | ||
| # When a pull request bumps the canonical version (or edits a metadata file), | ||
| # this workflow rewrites the metadata versions to match and commits the fix | ||
| # back onto the same PR branch. For pull requests from forks (where it cannot | ||
| # push) it instead fails with instructions, so drift can never merge silently. | ||
| name: Sync version metadata | ||
|
|
||
| on: | ||
| pull_request: | ||
| paths: | ||
| - "synapseclient/synapsePythonClient" | ||
| - "bdf.yaml" | ||
| - "codemeta.json" | ||
| - "CITATION.cff" | ||
| - ".github/scripts/sync_version_metadata.py" | ||
| - ".github/workflows/sync-version-metadata.yml" | ||
| workflow_dispatch: | ||
|
|
||
| permissions: | ||
| contents: write | ||
|
|
||
| jobs: | ||
| sync: | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| with: | ||
| ref: ${{ github.head_ref }} | ||
|
|
||
| - uses: actions/setup-python@v5 | ||
| with: | ||
| python-version: "3.x" | ||
|
|
||
| - name: Sync metadata versions to latestVersion | ||
| run: python .github/scripts/sync_version_metadata.py --write | ||
|
|
||
| - name: Commit any changes back to the PR branch | ||
| # Only same-repo branches grant a writable token; forks cannot be pushed to. | ||
| if: >- | ||
| github.event_name == 'workflow_dispatch' || | ||
| github.event.pull_request.head.repo.full_name == github.repository | ||
| run: | | ||
| if [ -n "$(git status --porcelain)" ]; then | ||
| git config user.name "github-actions[bot]" | ||
| git config user.email "41898282+github-actions[bot]@users.noreply.github.com" | ||
| git add bdf.yaml codemeta.json CITATION.cff | ||
| git commit -m "Sync version metadata to latestVersion [skip ci]" | ||
| git push | ||
| else | ||
| echo "Version metadata already in sync." | ||
| fi | ||
|
|
||
| - name: Fail if a fork PR left metadata out of sync | ||
| if: >- | ||
| github.event_name == 'pull_request' && | ||
| github.event.pull_request.head.repo.full_name != github.repository | ||
| run: python .github/scripts/sync_version_metadata.py --check | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| cff-version: 1.2.0 | ||
| message: "If you use this software, please cite it using the metadata from this file." | ||
| title: "Synapse Python Client (synapseclient)" | ||
| type: software | ||
| authors: | ||
| # TODO: add individual authors (names / ORCIDs) in addition to the organization below. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which individual authors should be going into this section? Really it would be all of @Sage-Bionetworks/dpe |
||
| - name: "Sage Bionetworks" | ||
| website: "https://sagebionetworks.org" | ||
| repository-code: "https://github.com/Sage-Bionetworks/synapsePythonClient" | ||
| url: "https://python-docs.synapse.org" | ||
| license: Apache-2.0 | ||
| version: "4.13.0" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,119 @@ | ||
| # ARPA-H BDF Toolbox metadata for the Synapse Python Client. | ||
| # Conforms to the ARPA-H-BDF/bdfkb-schema `SystemMetadata` class | ||
| # (src/bdfkb_schema/schema/bdfkb_schema.yaml, tree_root: SystemMetadata). | ||
| # Validate with: | ||
| # linkml-validate -s <path>/bdfkb_schema.yaml -C SystemMetadata bdf.yaml | ||
| version: "4.13.0" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another version value to maintain - Ideally it can come from https://github.com/Sage-Bionetworks/synapsePythonClient/blob/develop/synapseclient/synapsePythonClient |
||
|
|
||
| tool: | ||
| name: "Synapse Python Client (synapseclient)" | ||
| repo_url: "https://github.com/Sage-Bionetworks/synapsePythonClient" | ||
| api_url: "https://repo-prod.prod.sagebase.org/repo/v1" | ||
| package_url: "https://pypi.org/project/synapseclient/" | ||
| tool_homepage: "https://www.synapse.org" | ||
| documentation_url: "https://python-docs.synapse.org" | ||
| tool_type: "Library" | ||
| tool_notes: "Python client library and command-line interface (CLI) for the Synapse collaborative research platform. Provides synchronous and asynchronous (OOP) APIs for managing Projects, Folders, Files, Tables, and provenance." | ||
|
|
||
| domains: | ||
| - "biomedical-data-management" | ||
| - "research-data-sharing" | ||
| - "collaborative-research" | ||
| - "data-provenance" | ||
| - "open-science" | ||
|
|
||
| llm: | ||
| usesLlm: false | ||
| bringOwnKey: false | ||
|
|
||
| input: | ||
| - name: "Local research data files" | ||
| dataCategory: "general-biomedical-data" | ||
| dataClass: "other" | ||
| dataType: "various" | ||
| dataStandard: | ||
| - "Synapse" | ||
| format: | ||
| - "various" | ||
| proprietary: false | ||
| - name: "Tabular metadata and annotations" | ||
| dataCategory: "general-biomedical-data" | ||
| dataClass: "tabular" | ||
| dataType: "various" | ||
| dataStandard: | ||
| - "Synapse" | ||
| format: | ||
| - "csv" | ||
| - "tsv" | ||
| proprietary: false | ||
|
|
||
| output: | ||
| - name: "Synapse entities (Projects, Folders, Files, Tables)" | ||
| dataCategory: "general-biomedical-data" | ||
| dataClass: "structured" | ||
| dataType: "various" | ||
| dataStandard: | ||
| - "Synapse" | ||
| format: | ||
| - "json" | ||
| proprietary: false | ||
| - name: "Downloaded data files" | ||
| dataCategory: "general-biomedical-data" | ||
| dataClass: "other" | ||
| dataType: "various" | ||
| dataStandard: | ||
| - "Synapse" | ||
| format: | ||
| - "various" | ||
| proprietary: false | ||
|
|
||
| collaborations: | ||
| - name: "Synapse platform" | ||
| description: "The client communicates with the Synapse platform, operated by Sage Bionetworks, through its public REST API." | ||
| tools: | ||
| - "Synapse REST API" | ||
| collaboration_purpose: "Data storage, sharing, governance, and provenance tracking on Synapse." | ||
|
|
||
| # TODO: confirm funding source, agreement/award number, and link with the Sage program lead. | ||
| funding: | ||
| source: "TODO: confirm funding source" | ||
| agreement: "TODO: confirm agreement / award number" | ||
| link: "https://example.org/TODO-confirm-funding-link" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do these need to be fixed? |
||
|
|
||
| maturity: | ||
| beginning_maturity: 7 | ||
| current_maturity: 8 # TODO: confirm official BDF TRL with Sage program lead | ||
| final_maturity: 9 | ||
|
|
||
| license: | ||
| license_type: "Apache-2.0" | ||
| link: "https://github.com/Sage-Bionetworks/synapsePythonClient/blob/master/LICENSE" | ||
| notes: "Apache License 2.0" | ||
|
|
||
| credit: | ||
| - name: "Sage Bionetworks" | ||
| email: | ||
| - "platform@sagebase.org" # TODO: confirm preferred BDF contact email | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be dpe@sagebase.org? |
||
| url: "https://sagebionetworks.org" | ||
| role: "Maintainer" | ||
| org: "Sage Bionetworks" | ||
| note: "Maintained by the Synapse Engineering Team at Sage Bionetworks." | ||
|
|
||
| target_users: | ||
| primary_user: | ||
| user_type: "researcher" | ||
| technical_literacy_level: | ||
| - "intermediate" | ||
| - "advanced" | ||
| biomedical_literacy_level: | ||
| - "intermediate" | ||
| - "expert" | ||
| secondary_user: | ||
| user_type: "data-curator" | ||
| technical_literacy_level: | ||
| - "advanced" | ||
| biomedical_literacy_level: | ||
| - "intermediate" | ||
|
|
||
| media: | ||
| - link: "https://python-docs.synapse.org" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| { | ||
| "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||
| "@type": "SoftwareSourceCode", | ||
| "name": "Synapse Python Client (synapseclient)", | ||
| "description": "A Python client for Sage Bionetworks' Synapse, a collaborative, open-source research platform that allows teams to share data, track analyses, and collaborate. The client can be used as a library for software that communicates with Synapse or as a command-line utility.", | ||
| "version": "4.13.0", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is going to be another annoying spot to maintain a version that can easily diverge from the actual deployed version. Is there a way to dynamically bring this version in from https://github.com/Sage-Bionetworks/synapsePythonClient/blob/develop/synapseclient/synapsePythonClient so we only maintain it in a single place? |
||
| "codeRepository": "https://github.com/Sage-Bionetworks/synapsePythonClient", | ||
| "url": "https://python-docs.synapse.org", | ||
| "downloadUrl": "https://pypi.org/project/synapseclient/", | ||
| "programmingLanguage": "Python", | ||
| "runtimePlatform": "Python 3", | ||
| "applicationCategory": "Biomedical data management", | ||
| "keywords": [ | ||
| "synapse", | ||
| "biomedical-data-management", | ||
| "research-data-sharing", | ||
| "collaborative-research", | ||
| "data-provenance", | ||
| "open-science" | ||
| ], | ||
| "license": "https://spdx.org/licenses/Apache-2.0", | ||
| "author": [ | ||
| { | ||
| "@type": "Organization", | ||
| "name": "Sage Bionetworks", | ||
| "url": "https://sagebionetworks.org", | ||
| "email": "dpe@sagebase.org" | ||
| } | ||
| ], | ||
| "maintainer": { | ||
| "@type": "Organization", | ||
| "name": "Sage Bionetworks", | ||
| "url": "https://sagebionetworks.org" | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is good in theory, but it doesn't match the typical workflow that we follow for updating the version and going through the release process.
https://sagebionetworks.jira.com/wiki/spaces/SYNPY/pages/643498030/Synapse+Python+Client+Staging+Validation+Production
Some AI suggestions I was seeing:
synapsePythonClient/.github/workflows/build.yml
Lines 396 to 403 in 7c60563
@allaway I see https://github.com/ARPA-H-BDF/bdfkb-schema/blob/6afe39430894512131b7f86f75ac165e8db65551/src/bdfkb_schema/schema/bdfkb_schema.yaml#L101-L104 that version is a required field. However, what I am not certain on is if the version in source control has to be correct, or just the version in the packaged artifact of https://pypi.org/project/synapseclient/ .
If source control has to be correct then I think the most straight forward way is to just update our release process to do this manually in 3 spots.
If source control doesn't have to be correct then packaging the artifact with the correct version/files would be nice for maintainability.