From 1dc58200fa351de170903eaa7b15dabade9162ef Mon Sep 17 00:00:00 2001 From: Federico Ceratto Date: Thu, 9 Jul 2020 18:14:55 +0100 Subject: [PATCH] Prio 0.3 --- af/prio/debian/changelog | 6 ++ af/prio/debian/install | 1 + af/prio/debian/prio.service | 2 +- af/prio/etc/nginx/sites-available/prio.conf | 16 ++++++ af/prio/etc/ooni/prio.conf | 4 +- af/prio/prio.py | 62 ++++++++++++++------- 6 files changed, 67 insertions(+), 24 deletions(-) create mode 100644 af/prio/debian/install create mode 100644 af/prio/etc/nginx/sites-available/prio.conf diff --git a/af/prio/debian/changelog b/af/prio/debian/changelog index 27c9d621..f2632b2b 100644 --- a/af/prio/debian/changelog +++ b/af/prio/debian/changelog @@ -1,3 +1,9 @@ +prio (0.3) unstable; urgency=medium + + * Refactor, improve metrics + + -- Federico Ceratto Thu, 09 Jul 2020 18:12:53 +0100 + prio (0.2) unstable; urgency=medium * Update conf diff --git a/af/prio/debian/install b/af/prio/debian/install new file mode 100644 index 00000000..ad0876f3 --- /dev/null +++ b/af/prio/debian/install @@ -0,0 +1 @@ +etc/* /etc diff --git a/af/prio/debian/prio.service b/af/prio/debian/prio.service index 415ed0af..f14afffe 100644 --- a/af/prio/debian/prio.service +++ b/af/prio/debian/prio.service @@ -10,7 +10,7 @@ Type=simple RestartSec=2s WorkingDirectory=/var/lib/prio -WatchdogSec=600s +#WatchdogSec=600s User=prio Group=prio diff --git a/af/prio/etc/nginx/sites-available/prio.conf b/af/prio/etc/nginx/sites-available/prio.conf new file mode 100644 index 00000000..87c52222 --- /dev/null +++ b/af/prio/etc/nginx/sites-available/prio.conf @@ -0,0 +1,16 @@ +# deployed by prio .deb package +log_format prio_fmt '"$request" $status'; +server { + listen 8080; + listen [::]:8080; + server_name mia-ps-test.ooni.nu orchestrate.ooni.io; + + access_log syslog:server=unix:/dev/log,tag=prio,severity=info prio_fmt; + + location /api/v1/test-list/urls { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_pass http://127.0.0.1:8788; + } + +} diff --git a/af/prio/etc/ooni/prio.conf b/af/prio/etc/ooni/prio.conf index a6570ed8..56d1d5a9 100644 --- a/af/prio/etc/ooni/prio.conf +++ b/af/prio/etc/ooni/prio.conf @@ -1,9 +1,9 @@ [DEFAULT] apiport = 8788 -dbhost = amsmetadb.ooni.nu +dbhost = hkgmetadb.infra.ooni.io dbname = metadb # already public dbpassword = yEqgNr2eXvgG255iEBxVeP dbport = 5432 dbuser = shovel -refresh_interval_s = 300 +refresh_interval_s = 600 diff --git a/af/prio/prio.py b/af/prio/prio.py index 3bf3e33c..0873baf5 100755 --- a/af/prio/prio.py +++ b/af/prio/prio.py @@ -13,11 +13,8 @@ country codes are in the `cc` column, lowercase, with the exception of ZZ Outputs: - Files in /var/lib/analysis - Node exporter / prometheus metrics - Dedicated unlogged database tables and charts - tables: - currently_blocked + API + Statsd metrics Special country code values: ZZ: unknown @@ -32,6 +29,7 @@ import time from systemd.journal import JournalHandler # debdeps: python3-systemd +import statsd # debdeps: python3-statsd from bottle import route import bottle @@ -41,13 +39,18 @@ conf = None test_items = {} -last_update_time = 0 +next_update_time = None log = logging.getLogger("prio") log.addHandler(JournalHandler(SYSLOG_IDENTIFIER="prio")) +log.setLevel(logging.DEBUG) + + +metrics = statsd.StatsClient("localhost", 8125, prefix="prio") def connect_db(c): + log.info("Connecting to %s %d", c.dbhost, c.dbport) conn = psycopg2.connect( dbname=c.dbname, user=c.dbuser, @@ -58,15 +61,15 @@ def connect_db(c): return conn -# @metrics.timer("update_url_prioritization") +@metrics.timer("update_url_prioritization") def update_url_prioritization(): - """ + """Fetch URL prioritization from database and update lookup dict """ log.info("Started update_url_prioritization") conn = connect_db(conf) cur = conn.cursor(cursor_factory=RealDictCursor) - log.info("Regenerating URL prioritization file") + log.info("Regenerating URL prioritization data") sql = """SELECT priority, domain, url, cc, category_code FROM citizenlab""" cur.execute(sql) entries = list(cur.fetchall()) @@ -82,13 +85,18 @@ def update_url_prioritization(): ccode = e["category_code"] entries_by_country[country].setdefault(ccode, []).append(e) - # merge ZZ into each country - zz = entries_by_country.pop("ZZ") + # merge ZZ into each country: in this way, when we look for entries for a + # given cc we'll find both global and country-specific URLs + zz = entries_by_country["ZZ"] for ccode, country_dict in entries_by_country.items(): for category_code, test_items in zz.items(): country_dict.setdefault(category_code, []).extend(test_items) - log.info("Update done") + log.info( + "Update done: %d countries, %d global URLs", + len(entries_by_country) - 1, + len(entries_by_country["ZZ"]), + ) return entries_by_country @@ -113,11 +121,12 @@ def algo_chao(s: List, k: int) -> List: return r +@metrics.timer("generate_test_list") def generate_test_list(country_code: str, category_codes: str, limit: int): - global test_items, last_update_time + global test_items, next_update_time - if last_update_time < time.time() - 100: # conf.refresh_interval: - last_update_time = time.time() + if next_update_time < time.time(): + next_update_time = time.time() + float(conf.refresh_interval_s) try: test_items = update_url_prioritization() except Exception as e: @@ -127,6 +136,8 @@ def generate_test_list(country_code: str, category_codes: str, limit: int): if category_codes: category_codes = [c.strip().upper() for c in category_codes.split(",")] + for cat in category_codes: + metrics.incr(f"category_code_requested[category={cat}]") else: category_codes = candidates_d.keys() @@ -135,8 +146,6 @@ def generate_test_list(country_code: str, category_codes: str, limit: int): s = candidates_d.get(ccode, []) candidates.extend(s) - log.info("%d candidates", len(candidates)) - if limit == -1: limit = 100 limit = min(limit, len(candidates)) @@ -151,6 +160,9 @@ def generate_test_list(country_code: str, category_codes: str, limit: int): "country_code": "XX" if entry["cc"] == "ZZ" else entry["cc"], } ) + metrics.incr("total_urls_served", count=len(out)) + metrics.incr(f"country_code_requested[cc={country_code}]") + log.info("Serving %d URLs", len(out)) return out @@ -160,9 +172,12 @@ def list_urls(): https://orchestrate.ooni.io/api/v1/test-list/urls?country_code=IT """ try: - country_code = bottle.request.query.country_code.upper() or "ZZ" - category_codes = bottle.request.query.category_code - limit = int(bottle.request.query.limit or -1) + q = bottle.request.query + # look for country_code or probe_cc or default to "ZZ" which + # represents the global list + country_code = q.country_code.upper() or q.probe_cc.upper() or "ZZ" + category_codes = q.category_codes + limit = int(q.limit or -1) test_items = generate_test_list(country_code, category_codes, limit) out = { "metadata": { @@ -181,13 +196,18 @@ def list_urls(): def main(): - global conf + global conf, test_items, next_update_time conffile = "/etc/ooni/prio.conf" cp = ConfigParser() with open(conffile) as f: cp.read_file(f) d = cp.defaults() # parsed values from DEFAULT section conf = namedtuple("Conf", d.keys())(*d.values()) + log.info("Refresh interval: %s", conf.refresh_interval_s) + + test_items = update_url_prioritization() + next_update_time = time.time() + float(conf.refresh_interval_s) + bottle.run(host="localhost", port=conf.apiport)