Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions af/prio/debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
prio (0.3) unstable; urgency=medium

* Refactor, improve metrics

-- Federico Ceratto <federico@debian.org> Thu, 09 Jul 2020 18:12:53 +0100

prio (0.2) unstable; urgency=medium

* Update conf
Expand Down
1 change: 1 addition & 0 deletions af/prio/debian/install
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
etc/* /etc
2 changes: 1 addition & 1 deletion af/prio/debian/prio.service
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Type=simple
RestartSec=2s
WorkingDirectory=/var/lib/prio

WatchdogSec=600s
#WatchdogSec=600s

User=prio
Group=prio
Expand Down
16 changes: 16 additions & 0 deletions af/prio/etc/nginx/sites-available/prio.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# deployed by prio .deb package
log_format prio_fmt '"$request" $status';
server {
listen 8080;
listen [::]:8080;
server_name mia-ps-test.ooni.nu orchestrate.ooni.io;

access_log syslog:server=unix:/dev/log,tag=prio,severity=info prio_fmt;

location /api/v1/test-list/urls {
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_pass http://127.0.0.1:8788;
}

}
4 changes: 2 additions & 2 deletions af/prio/etc/ooni/prio.conf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[DEFAULT]
apiport = 8788
dbhost = amsmetadb.ooni.nu
dbhost = hkgmetadb.infra.ooni.io
dbname = metadb
# already public
dbpassword = yEqgNr2eXvgG255iEBxVeP
dbport = 5432
dbuser = shovel
refresh_interval_s = 300
refresh_interval_s = 600
62 changes: 41 additions & 21 deletions af/prio/prio.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@
country codes are in the `cc` column, lowercase, with the exception of ZZ

Outputs:
Files in /var/lib/analysis
Node exporter / prometheus metrics
Dedicated unlogged database tables and charts
tables:
currently_blocked
API
Statsd metrics

Special country code values:
ZZ: unknown
Expand All @@ -32,6 +29,7 @@
import time

from systemd.journal import JournalHandler # debdeps: python3-systemd
import statsd # debdeps: python3-statsd

from bottle import route
import bottle
Expand All @@ -41,13 +39,18 @@

conf = None
test_items = {}
last_update_time = 0
next_update_time = None

log = logging.getLogger("prio")
log.addHandler(JournalHandler(SYSLOG_IDENTIFIER="prio"))
log.setLevel(logging.DEBUG)


metrics = statsd.StatsClient("localhost", 8125, prefix="prio")


def connect_db(c):
log.info("Connecting to %s %d", c.dbhost, c.dbport)
conn = psycopg2.connect(
dbname=c.dbname,
user=c.dbuser,
Expand All @@ -58,15 +61,15 @@ def connect_db(c):
return conn


# @metrics.timer("update_url_prioritization")
@metrics.timer("update_url_prioritization")
def update_url_prioritization():
"""
"""Fetch URL prioritization from database and update lookup dict
"""
log.info("Started update_url_prioritization")
conn = connect_db(conf)
cur = conn.cursor(cursor_factory=RealDictCursor)

log.info("Regenerating URL prioritization file")
log.info("Regenerating URL prioritization data")
sql = """SELECT priority, domain, url, cc, category_code FROM citizenlab"""
cur.execute(sql)
entries = list(cur.fetchall())
Expand All @@ -82,13 +85,18 @@ def update_url_prioritization():
ccode = e["category_code"]
entries_by_country[country].setdefault(ccode, []).append(e)

# merge ZZ into each country
zz = entries_by_country.pop("ZZ")
# merge ZZ into each country: in this way, when we look for entries for a
# given cc we'll find both global and country-specific URLs
zz = entries_by_country["ZZ"]
for ccode, country_dict in entries_by_country.items():
for category_code, test_items in zz.items():
country_dict.setdefault(category_code, []).extend(test_items)

log.info("Update done")
log.info(
"Update done: %d countries, %d global URLs",
len(entries_by_country) - 1,
len(entries_by_country["ZZ"]),
)
return entries_by_country


Expand All @@ -113,11 +121,12 @@ def algo_chao(s: List, k: int) -> List:
return r


@metrics.timer("generate_test_list")
def generate_test_list(country_code: str, category_codes: str, limit: int):
global test_items, last_update_time
global test_items, next_update_time

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there is a nicer way to do this without having global variables.


if last_update_time < time.time() - 100: # conf.refresh_interval:
last_update_time = time.time()
if next_update_time < time.time():
next_update_time = time.time() + float(conf.refresh_interval_s)
try:
test_items = update_url_prioritization()
except Exception as e:
Expand All @@ -127,6 +136,8 @@ def generate_test_list(country_code: str, category_codes: str, limit: int):

if category_codes:
category_codes = [c.strip().upper() for c in category_codes.split(",")]
for cat in category_codes:
metrics.incr(f"category_code_requested[category={cat}]")
else:
category_codes = candidates_d.keys()

Expand All @@ -135,8 +146,6 @@ def generate_test_list(country_code: str, category_codes: str, limit: int):
s = candidates_d.get(ccode, [])
candidates.extend(s)

log.info("%d candidates", len(candidates))

if limit == -1:
limit = 100
limit = min(limit, len(candidates))
Expand All @@ -151,6 +160,9 @@ def generate_test_list(country_code: str, category_codes: str, limit: int):
"country_code": "XX" if entry["cc"] == "ZZ" else entry["cc"],
}
)
metrics.incr("total_urls_served", count=len(out))
metrics.incr(f"country_code_requested[cc={country_code}]")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the country_code guaranteed to always be uppercase or lowercase? Do you maybe want to normalise it at the beginning?

log.info("Serving %d URLs", len(out))
return out


Expand All @@ -160,9 +172,12 @@ def list_urls():
https://orchestrate.ooni.io/api/v1/test-list/urls?country_code=IT
"""
try:
country_code = bottle.request.query.country_code.upper() or "ZZ"
category_codes = bottle.request.query.category_code
limit = int(bottle.request.query.limit or -1)
q = bottle.request.query
# look for country_code or probe_cc or default to "ZZ" which
# represents the global list
country_code = q.country_code.upper() or q.probe_cc.upper() or "ZZ"
category_codes = q.category_codes
limit = int(q.limit or -1)
test_items = generate_test_list(country_code, category_codes, limit)
out = {
"metadata": {
Expand All @@ -181,13 +196,18 @@ def list_urls():


def main():
global conf
global conf, test_items, next_update_time
conffile = "/etc/ooni/prio.conf"
cp = ConfigParser()
with open(conffile) as f:
cp.read_file(f)
d = cp.defaults() # parsed values from DEFAULT section
conf = namedtuple("Conf", d.keys())(*d.values())
log.info("Refresh interval: %s", conf.refresh_interval_s)

test_items = update_url_prioritization()
next_update_time = time.time() + float(conf.refresh_interval_s)

bottle.run(host="localhost", port=conf.apiport)


Expand Down