From 2ab357a1e4a1067477c5dd52cb614d31253000b3 Mon Sep 17 00:00:00 2001 From: Pratyush Niraula Date: Sun, 26 Apr 2026 21:19:06 -0500 Subject: [PATCH 1/2] misinfo with updates --- pipelines/misinfo_checker.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pipelines/misinfo_checker.py b/pipelines/misinfo_checker.py index 9b191bd..d2a2dc0 100644 --- a/pipelines/misinfo_checker.py +++ b/pipelines/misinfo_checker.py @@ -1058,20 +1058,19 @@ def _calculate_narrative_risk(claims_data: list[dict]) -> dict: } -def process_narratives_table(batch_size: int = 50): +def process_narratives_table(batch_size: int = 9999, force_reprocess: bool = False): """Enrich narratives with category and risk score based on associated claims.""" client = get_supabase_client() # Get narratives that need enrichment (category is "Uncategorized" or risk is 5.0) - resp = ( - client.table("narratives") - .select( - "narrative_id, narrative_label, narrative_description, narrative_category" - ) - .or_("narrative_category.eq.Uncategorized,narrative_risk_score.eq.5.0") - .limit(batch_size) - .execute() + query = client.table("narratives").select( + "narrative_id, narrative_label, narrative_description, narrative_category" ) + + if not force_reprocess: + query = query.or_("narrative_category.eq.Uncategorized,narrative_risk_score.eq.5.0") + + resp = query.limit(batch_size).execute() rows = resp.data if not rows: @@ -1161,12 +1160,13 @@ def process_narratives_table(batch_size: int = 50): sys.exit(0) if len(sys.argv) > 1 and sys.argv[1] == "5": - batch = 50 + batch = 9999 if "--batch" in sys.argv: idx = sys.argv.index("--batch") if idx + 1 < len(sys.argv): batch = int(sys.argv[idx + 1]) - process_narratives_table(batch_size=batch) + force = "--force" in sys.argv + process_narratives_table(batch_size=batch, force_reprocess=force) sys.exit(0) write_json = "--json" in sys.argv From 310e1b775362fdd4174249349030f16e8b91a686 Mon Sep 17 00:00:00 2001 From: Pratyush Niraula Date: Sun, 26 Apr 2026 21:19:59 -0500 Subject: [PATCH 2/2] ruff fixes --- pipelines/misinfo_checker.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pipelines/misinfo_checker.py b/pipelines/misinfo_checker.py index d2a2dc0..912b30d 100644 --- a/pipelines/misinfo_checker.py +++ b/pipelines/misinfo_checker.py @@ -1066,10 +1066,12 @@ def process_narratives_table(batch_size: int = 9999, force_reprocess: bool = Fal query = client.table("narratives").select( "narrative_id, narrative_label, narrative_description, narrative_category" ) - + if not force_reprocess: - query = query.or_("narrative_category.eq.Uncategorized,narrative_risk_score.eq.5.0") - + query = query.or_( + "narrative_category.eq.Uncategorized,narrative_risk_score.eq.5.0" + ) + resp = query.limit(batch_size).execute() rows = resp.data