From 9313aa77b65ab829657b821becc12b828bde7cb7 Mon Sep 17 00:00:00 2001 From: Fabio Andrijauskas Date: Mon, 30 Mar 2026 10:07:07 -0700 Subject: [PATCH] Using Pelican API to list OSDF Facilities --- campus-contributions/osdf.py | 138 +++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 45 deletions(-) diff --git a/campus-contributions/osdf.py b/campus-contributions/osdf.py index 296c8b5..e6b1ab8 100755 --- a/campus-contributions/osdf.py +++ b/campus-contributions/osdf.py @@ -1,64 +1,112 @@ -#!/usr/bin/env python +""" +osdf_facilities.py -from __future__ import print_function +Lists unique institutions that have a registered cache or origin in OSDF. -import collections -import operator +Sources: + Registry : https://osdf-registry.osg-htc.org/api/v1.0/registry_ui/servers + Institutions: https://topology-institutions.osg-htc.org/api/institution_ids + +Usage: + python osdf_facilities.py # all (cache + origin) + python osdf_facilities.py --type cache # cache only + python osdf_facilities.py --type origin # origin only +""" + +import argparse +import json import sys -import os +import urllib.request +import urllib.error -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen -import xml.etree.ElementTree as et +REGISTRY_URL = "https://osdf-registry.osg-htc.org/api/v1.0/registry_ui/servers" +INSTITUTIONS_URL = "https://topology-institutions.osg-htc.org/api/institution_ids" -_topology_host = "topology.opensciencegrid.org" -_rgsummary_url = 'https://{host}/rgsummary/xml'.format(host=_topology_host) -_active_params = [ - ('active', 'on'), # filter resources by "Active" field - ('active_value', '1' ), # require Active: true - ('disable', 'on'), # filter resources by "Disable" field - ('disable_value', '0' ), # require Disable: false -] -_xml_url = "%s?%s" % (_rgsummary_url, '&'.join(map('='.join, _active_params))) +def fetch_json(url: str) -> list | dict: + try: + req = urllib.request.Request(url, headers={"Accept": "application/json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode()) + except urllib.error.URLError as e: + print(f"ERROR: could not reach {url}\n {e}", file=sys.stderr) + sys.exit(1) -_osdf_service_types = [ - "XRootD cache server", - "XRootD origin server", - "Pelican cache", - "Pelican origin", -] +def build_institution_map(institutions: list) -> dict: + """Map institution ID URL -> human-readable name.""" + return { + inst["id"]: inst["name"] + for inst in institutions + if inst.get("id") and inst.get("name") + } -def getxml(): - return urlopen(_xml_url).read() +def get_facilities(server_type: str | None) -> list[str]: + """ + Return a sorted, deduplicated list of institution names + that have at least one approved cache or origin registration. + server_type: None = both, "cache" = caches only, "origin" = origins only + """ + registry = fetch_json(REGISTRY_URL) + institutions = fetch_json(INSTITUTIONS_URL) -def get_osdf_facilities(xmltxt=None): - if xmltxt is None: - xmltxt = getxml() - xmltree = et.fromstring(xmltxt) - return set( - rg.find("Facility").find("Name").text - for rg in xmltree.findall('ResourceGroup') - for r in rg.find('Resources').findall('Resource') - for s in r.find("Services").findall("Service") - if s.find("Name").text in _osdf_service_types - ) + institution_map = build_institution_map(institutions) + + seen = set() + for server in registry: + is_cache = server.get("is_cache", False) + is_origin = server.get("is_origin", False) + + # Apply type filter + if server_type == "cache" and not is_cache: + continue + if server_type == "origin" and not is_origin: + continue + if not is_cache and not is_origin: + continue + + # Walk registrations to find an approved one with an institution + for reg in server.get("registration", []): + meta = reg.get("admin_metadata") or {} + status = meta.get("status", "") + + if status.lower() != "approved": + continue + + institution_id = meta.get("institution", "") + institution = institution_map.get(institution_id, "").strip() + + if institution: + seen.add(institution) + break # one match per server is enough + + return sorted(seen, key=str.casefold) def main(): - facilities = sorted(get_osdf_facilities()) - n = len(facilities) - print("%d OSDF Facilit%s:" % (n, "y" if n == 1 else "ies")) - for f in sorted(facilities): - print(" - %s" % f) + parser = argparse.ArgumentParser( + description="List OSDF institutions with registered cache or origin servers." + ) + parser.add_argument( + "--type", + choices=["cache", "origin"], + default=None, + metavar="TYPE", + help="Filter by server type: 'cache' or 'origin' (default: all)", + ) + args = parser.parse_args() + type_label = args.type.capitalize() if args.type else "Cache & Origin" -if __name__ == '__main__': - main() + facilities = get_facilities(args.type) + + print(f"{len(facilities)} OSDF Facilities:") + for name in facilities: + print(f"- {name}") + +if __name__ == "__main__": + main()