Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 93 additions & 45 deletions campus-contributions/osdf.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,112 @@
#!/usr/bin/env python
"""
osdf_facilities.py

from __future__ import print_function
Lists unique institutions that have a registered cache or origin in OSDF.
Comment thread
biozit marked this conversation as resolved.

import collections
import operator
Sources:
Registry : https://osdf-registry.osg-htc.org/api/v1.0/registry_ui/servers
Institutions: https://topology-institutions.osg-htc.org/api/institution_ids

Usage:
python osdf_facilities.py # all (cache + origin)
python osdf_facilities.py --type cache # cache only
python osdf_facilities.py --type origin # origin only
"""

import argparse
import json
import sys
import os
import urllib.request
import urllib.error

try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen

import xml.etree.ElementTree as et
REGISTRY_URL = "https://osdf-registry.osg-htc.org/api/v1.0/registry_ui/servers"
INSTITUTIONS_URL = "https://topology-institutions.osg-htc.org/api/institution_ids"


_topology_host = "topology.opensciencegrid.org"
_rgsummary_url = 'https://{host}/rgsummary/xml'.format(host=_topology_host)
_active_params = [
('active', 'on'), # filter resources by "Active" field
('active_value', '1' ), # require Active: true
('disable', 'on'), # filter resources by "Disable" field
('disable_value', '0' ), # require Disable: false
]
_xml_url = "%s?%s" % (_rgsummary_url, '&'.join(map('='.join, _active_params)))
def fetch_json(url: str) -> list | dict:
try:
req = urllib.request.Request(url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode())
except urllib.error.URLError as e:
print(f"ERROR: could not reach {url}\n {e}", file=sys.stderr)
sys.exit(1)


_osdf_service_types = [
"XRootD cache server",
"XRootD origin server",
"Pelican cache",
"Pelican origin",
]
def build_institution_map(institutions: list) -> dict:
"""Map institution ID URL -> human-readable name."""
return {
inst["id"]: inst["name"]
for inst in institutions
if inst.get("id") and inst.get("name")
}


def getxml():
return urlopen(_xml_url).read()
def get_facilities(server_type: str | None) -> list[str]:
"""
Return a sorted, deduplicated list of institution names
that have at least one approved cache or origin registration.

server_type: None = both, "cache" = caches only, "origin" = origins only
"""
registry = fetch_json(REGISTRY_URL)
institutions = fetch_json(INSTITUTIONS_URL)

def get_osdf_facilities(xmltxt=None):
if xmltxt is None:
xmltxt = getxml()
xmltree = et.fromstring(xmltxt)
return set(
rg.find("Facility").find("Name").text
for rg in xmltree.findall('ResourceGroup')
for r in rg.find('Resources').findall('Resource')
for s in r.find("Services").findall("Service")
if s.find("Name").text in _osdf_service_types
)
institution_map = build_institution_map(institutions)
Comment thread
biozit marked this conversation as resolved.

seen = set()
for server in registry:
is_cache = server.get("is_cache", False)
is_origin = server.get("is_origin", False)

# Apply type filter
if server_type == "cache" and not is_cache:
continue
if server_type == "origin" and not is_origin:
continue
if not is_cache and not is_origin:
continue

# Walk registrations to find an approved one with an institution
for reg in server.get("registration", []):
meta = reg.get("admin_metadata") or {}
status = meta.get("status", "")

if status.lower() != "approved":
continue

institution_id = meta.get("institution", "")
institution = institution_map.get(institution_id, "").strip()

if institution:
seen.add(institution)
break # one match per server is enough

return sorted(seen, key=str.casefold)


def main():
facilities = sorted(get_osdf_facilities())
n = len(facilities)
print("%d OSDF Facilit%s:" % (n, "y" if n == 1 else "ies"))
for f in sorted(facilities):
print(" - %s" % f)
parser = argparse.ArgumentParser(
description="List OSDF institutions with registered cache or origin servers."
)
parser.add_argument(
"--type",
choices=["cache", "origin"],
default=None,
metavar="TYPE",
help="Filter by server type: 'cache' or 'origin' (default: all)",
)
args = parser.parse_args()

type_label = args.type.capitalize() if args.type else "Cache & Origin"
Comment thread
biozit marked this conversation as resolved.

if __name__ == '__main__':
main()
facilities = get_facilities(args.type)

print(f"{len(facilities)} OSDF Facilities:")
for name in facilities:
print(f"- {name}")


if __name__ == "__main__":
main()