From df461a09f9b2d8f335e3f4040e5273e3a0aa2766 Mon Sep 17 00:00:00 2001 From: RahulModugula Date: Sat, 27 Jun 2026 18:47:01 -0400 Subject: [PATCH 1/6] Reduce RAG verifier false positives on faithful text Sentence-level NLI collapsed to ~0 on faithful answers in two common cases: a sentence opening with an anaphor ("This cap applies...") lost its antecedent once the answer was split, and facts spread across several context sentences matched no single sentence-unit premise. - Prepend the previous sentence when a hypothesis starts with a pronoun or discourse marker, restoring the referent before NLI scoring. - Score each sentence against individual context sentences *and* the whole chunk, taking the max. - Share one _ground_sentences helper across verify / verify_async / verify_batch / verify_batch_async / verify_stream, removing the old concatenate-all-chunks premise that silently truncated at the model's token limit. All entry points now return identical results and populate supporting_spans. - Resolve the entailment class index from the model's id2label instead of hardcoding it, so non-default NLI checkpoints aren't scored on the wrong class. - Make the regex sentence splitter abbreviation-aware (Dr., U.S., Inc.). Synthetic benchmark: faithful false-positive rate 16.9% -> 11.5%, overall F1 91.3% -> 93.5%, recall unchanged at 96.7%. --- athena_verify/core.py | 334 +++++++++++++++++++++++++-------- athena_verify/nli.py | 52 ++++- athena_verify/parser.py | 60 ++++-- tests/test_new_features.py | 10 +- tests/test_nli.py | 20 +- tests/test_supporting_spans.py | 1 - tests/test_verify.py | 20 +- 7 files changed, 373 insertions(+), 124 deletions(-) diff --git a/athena_verify/core.py b/athena_verify/core.py index b6012a9..f01c306 100644 --- a/athena_verify/core.py +++ b/athena_verify/core.py @@ -7,6 +7,7 @@ from __future__ import annotations +import asyncio import os import time from collections.abc import AsyncIterator @@ -28,12 +29,151 @@ SupportingSpan, VerificationResult, ) -from athena_verify.nli import batch_compute_entailment, batch_compute_entailment_async +from athena_verify.nli import batch_compute_entailment from athena_verify.overlap import best_overlap_score from athena_verify.parser import sentence_buffer, split_sentences logger = structlog.get_logger() +# Span-level entailment threshold: a context unit must clear this to be +# reported as a supporting span for a sentence. +_SPAN_ENTAILMENT_THRESHOLD = 0.5 + +# Return shape of _ground_sentences: (nli_scores, per-sentence span-unit scores, +# span unit texts, span unit (chunk_idx, start, end) locations). +_GroundResult = tuple[list[float], list[list[float]], list[str], list[tuple[int, int, int]]] + +# Leading tokens that signal a sentence depends on its predecessor for meaning +# (anaphora / discourse continuation). When an answer sentence starts with one +# of these, NLI scored on the sentence in isolation collapses to ~0 even when +# the claim is fully grounded, because the referent ("it", "this cap") is gone. +# We prepend the previous sentence to restore the antecedent before scoring. +_ANAPHORA_TOKENS = frozenset( + { + "it", + "its", + "it's", + "this", + "that", + "these", + "those", + "they", + "them", + "their", + "theirs", + "he", + "she", + "his", + "her", + "hers", + "such", + "also", + "additionally", + "moreover", + "furthermore", + "however", + "therefore", + "thus", + "then", + "there", + "both", + "neither", + "either", + } +) + + +def _starts_with_anaphor(sentence: str) -> bool: + """True if a sentence opens with a pronoun/discourse marker needing context.""" + stripped = sentence.strip() + if not stripped: + return False + first = stripped.split(maxsplit=1)[0].lower().strip(",.;:\"'()") + return first in _ANAPHORA_TOKENS + + +def _build_context_units( + chunk_texts: list[str], +) -> tuple[list[str], list[tuple[int, int, int]], list[bool]]: + """Expand context chunks into NLI premise candidates. + + Returns parallel lists of: + - unit text, + - (chunk_idx, char_start, char_end) location into the original chunk, + - is_span_unit flag (True for sentence-level units usable as precise + supporting spans, False for whole-chunk fallback premises). + + Each chunk contributes its individual sentences (focused premises that + avoid the long-premise "neutral" bias) plus, when it has more than one + sentence, the full chunk text (so facts spread across several sentences + are still entailed). NLI takes the max over all candidates, so adding the + whole-chunk premise can only raise a faithful sentence's score. + """ + units: list[str] = [] + locations: list[tuple[int, int, int]] = [] + is_span_unit: list[bool] = [] + for chunk_idx, chunk in enumerate(chunk_texts): + sub = split_sentences(chunk) or [chunk] + for unit in sub: + char_start = chunk.find(unit) + if char_start == -1: + char_start = 0 + units.append(unit) + locations.append((chunk_idx, char_start, char_start + len(unit))) + is_span_unit.append(True) + if len(sub) > 1: + units.append(chunk) + locations.append((chunk_idx, 0, len(chunk))) + is_span_unit.append(False) + return units, locations, is_span_unit + + +def _ground_sentences( + sentences: list[str], + chunk_texts: list[str], + nli_model: str, +) -> tuple[list[float], list[list[float]], list[str], list[tuple[int, int, int]]]: + """Score how well each answer sentence is grounded in the context. + + Splits context into focused premise candidates, applies anaphora windowing + to each hypothesis, and returns: + - nli_scores: best entailment per sentence over all premise candidates, + - span_scores: per-sentence entailment over the span-eligible units only, + - span_units / span_locations: the span-eligible units these align to. + """ + units, locations, is_span_unit = _build_context_units(chunk_texts) + span_units = [u for u, keep in zip(units, is_span_unit, strict=True) if keep] + span_locations = [ + loc for loc, keep in zip(locations, is_span_unit, strict=True) if keep + ] + + if not units or not sentences: + empty_spans: list[list[float]] = [[] for _ in sentences] + return [0.0] * len(sentences), empty_spans, span_units, span_locations + + # Anaphora windowing: prepend the previous sentence when the current one + # opens with a referent, so the NLI hypothesis carries its antecedent. + hypotheses: list[str] = [] + for i, sentence in enumerate(sentences): + if i > 0 and _starts_with_anaphor(sentence): + hypotheses.append(f"{sentences[i - 1]} {sentence}") + else: + hypotheses.append(sentence) + + nli_pairs = [(unit, hyp) for hyp in hypotheses for unit in units] + flat = batch_compute_entailment(nli_pairs, model_name=nli_model) + + nli_scores: list[float] = [] + span_scores: list[list[float]] = [] + n_units = len(units) + for i in range(len(sentences)): + unit_scores = flat[i * n_units : (i + 1) * n_units] + nli_scores.append(max(unit_scores) if unit_scores else 0.0) + span_scores.append( + [s for s, keep in zip(unit_scores, is_span_unit, strict=True) if keep] + ) + return nli_scores, span_scores, span_units, span_locations + def verify_step( claim: str, @@ -134,42 +274,14 @@ def verify( ) # --- NLI scoring --- - # NLI models work best with short, focused premises. When a context chunk - # contains multiple sentences of info, the model classifies entailed - # hypotheses as "neutral" because the premise has information BEYOND the - # hypothesis. Fix: split chunks into individual sentences for NLI scoring. - _SPAN_ENTAILMENT_THRESHOLD = 0.5 - - context_units: list[str] = [] - # (chunk_idx, char_start, char_end) into the original chunk text - unit_locations: list[tuple[int, int, int]] = [] - for chunk_idx, chunk in enumerate(chunk_texts): - sub = split_sentences(chunk) - if not sub: - sub = [chunk] - for unit in sub: - char_start = chunk.find(unit) - if char_start == -1: - char_start = 0 - context_units.append(unit) - unit_locations.append((chunk_idx, char_start, char_start + len(unit))) - - nli_pairs = [(unit, sentence) for sentence in sentences for unit in context_units] - nli_scores_flat = batch_compute_entailment(nli_pairs, model_name=nli_model) - nli_scores: list[float] = [] - nli_best_chunks: list[str | None] = [] - per_sentence_unit_scores: list[list[float]] = [] - for i in range(len(sentences)): - start = i * len(context_units) - unit_scores = nli_scores_flat[start : start + len(context_units)] - per_sentence_unit_scores.append(unit_scores) - if unit_scores: - best_idx = unit_scores.index(max(unit_scores)) - nli_scores.append(unit_scores[best_idx]) - nli_best_chunks.append(context_units[best_idx]) - else: - nli_scores.append(0.0) - nli_best_chunks.append(None) + # NLI works best on short, focused premises with hypotheses that carry + # their own referents. _ground_sentences handles both: it scores each + # sentence against individual context sentences plus the whole chunk + # (max wins), and prepends the prior sentence when a hypothesis opens with + # an anaphor. See athena_verify.core helpers for details. + nli_scores, per_sentence_unit_scores, span_units, span_locations = _ground_sentences( + sentences, chunk_texts, nli_model + ) # --- Lexical overlap scoring --- overlap_results = [best_overlap_score(s, chunk_texts) for s in sentences] @@ -196,7 +308,9 @@ def verify( judge_start = time.time() judge_results = batch_judge_sentences(sentences, combined_context, question, llm_client) llm_scores = [score for score, _ in judge_results] - llm_judge_avg_ms = (time.time() - judge_start) * 1000 / len(sentences) if sentences else 2000.0 + llm_judge_avg_ms = ( + (time.time() - judge_start) * 1000 / len(sentences) if sentences else 2000.0 + ) # --- Build per-sentence results --- sentence_scores: list[SentenceScore] = [] @@ -211,10 +325,10 @@ def verify( unit_scores_i = per_sentence_unit_scores[i] if i < len(per_sentence_unit_scores) else [] supporting_spans = [ SupportingSpan( - chunk_idx=unit_locations[j][0], - start=unit_locations[j][1], - end=unit_locations[j][2], - text=context_units[j], + chunk_idx=span_locations[j][0], + start=span_locations[j][1], + end=span_locations[j][2], + text=span_units[j], ) for j, score in enumerate(unit_scores_i) if score >= _SPAN_ENTAILMENT_THRESHOLD @@ -335,8 +449,13 @@ async def verify_async( ) # --- NLI scoring (async) --- - nli_pairs = [(" ".join(chunk_texts), sentence) for sentence in sentences] - nli_scores = await batch_compute_entailment_async(nli_pairs, model_name=nli_model) + # Offload the same grounding logic used by verify() to a thread so we get + # per-unit + whole-chunk premises and anaphora windowing here too, instead + # of the old concatenate-all-chunks premise that silently truncated at the + # model's token limit. + nli_scores, per_sentence_unit_scores, span_units, span_locations = await asyncio.to_thread( + _ground_sentences, sentences, chunk_texts, nli_model + ) # --- Lexical overlap scoring --- overlap_results = [best_overlap_score(s, chunk_texts) for s in sentences] @@ -363,7 +482,9 @@ async def verify_async( judge_start = time.time() judge_results = batch_judge_sentences(sentences, combined_context, question, llm_client) llm_scores = [score for score, _ in judge_results] - llm_judge_avg_ms = (time.time() - judge_start) * 1000 / len(sentences) if sentences else 2000.0 + llm_judge_avg_ms = ( + (time.time() - judge_start) * 1000 / len(sentences) if sentences else 2000.0 + ) # --- Build per-sentence results --- sentence_scores: list[SentenceScore] = [] @@ -375,6 +496,18 @@ async def verify_async( trust = compute_trust_score(nli, overlap, llm, weights) status = classify_support(trust) + unit_scores_i = per_sentence_unit_scores[i] if i < len(per_sentence_unit_scores) else [] + supporting_spans = [ + SupportingSpan( + chunk_idx=span_locations[j][0], + start=span_locations[j][1], + end=span_locations[j][2], + text=span_units[j], + ) + for j, score in enumerate(unit_scores_i) + if score >= _SPAN_ENTAILMENT_THRESHOLD + ] + sentence_scores.append( SentenceScore( text=sentence, @@ -385,6 +518,7 @@ async def verify_async( trust_score=trust, support_status=status, best_matching_context=best_chunk, + supporting_spans=supporting_spans, ) ) @@ -502,28 +636,13 @@ def verify_batch( all_chunks: list[list[Chunk]] = [] all_sentences: list[list[str]] = [] - all_nli_pairs: list[tuple[str, str]] = [] - pair_offsets: list[int] = [] for q_idx in range(len(questions_list)): chunks = [Chunk.from_input(c) for c in contexts_list[q_idx]] - chunk_texts = [c.content for c in chunks] - combined_context = " ".join(chunk_texts) sentences = split_sentences(answers_list[q_idx]) - all_chunks.append(chunks) all_sentences.append(sentences) - offset = len(all_nli_pairs) - pair_offsets.append(offset) - - for sentence in sentences: - all_nli_pairs.append((combined_context, sentence)) - - nli_scores_all = batch_compute_entailment( - all_nli_pairs, model_name=nli_model, batch_size=batch_size - ) - for q_idx in range(len(questions_list)): chunks = all_chunks[q_idx] chunk_texts = [c.content for c in chunks] @@ -544,7 +663,9 @@ def verify_batch( ) continue - offset = pair_offsets[q_idx] + nli_scores, per_sentence_unit_scores, span_units, span_locations = _ground_sentences( + sentences, chunk_texts, nli_model + ) sentence_scores: list[SentenceScore] = [] llm_scores: list[float | None] = [None] * len(sentences) @@ -556,13 +677,25 @@ def verify_batch( llm_scores = [score for score, _ in judge_results] for i, sentence in enumerate(sentences): - nli = nli_scores_all[offset + i] if (offset + i) < len(nli_scores_all) else 0.0 + nli = nli_scores[i] if i < len(nli_scores) else 0.0 overlap, best_chunk = best_overlap_score(sentence, chunk_texts) llm = llm_scores[i] if i < len(llm_scores) else None trust = compute_trust_score(nli, overlap, llm, weights) status = classify_support(trust) + unit_scores_i = per_sentence_unit_scores[i] if i < len(per_sentence_unit_scores) else [] + supporting_spans = [ + SupportingSpan( + chunk_idx=span_locations[j][0], + start=span_locations[j][1], + end=span_locations[j][2], + text=span_units[j], + ) + for j, score in enumerate(unit_scores_i) + if score >= _SPAN_ENTAILMENT_THRESHOLD + ] + sentence_scores.append( SentenceScore( text=sentence, @@ -573,6 +706,7 @@ def verify_batch( trust_score=trust, support_status=status, best_matching_context=best_chunk, + supporting_spans=supporting_spans, ) ) @@ -666,27 +800,27 @@ async def verify_batch_async( all_chunks: list[list[Chunk]] = [] all_sentences: list[list[str]] = [] - all_nli_pairs: list[tuple[str, str]] = [] - pair_offsets: list[int] = [] for q_idx in range(len(questions_list)): chunks = [Chunk.from_input(c) for c in contexts_list[q_idx]] - chunk_texts = [c.content for c in chunks] - combined_context = " ".join(chunk_texts) sentences = split_sentences(answers_list[q_idx]) - all_chunks.append(chunks) all_sentences.append(sentences) - offset = len(all_nli_pairs) - pair_offsets.append(offset) - - for sentence in sentences: - all_nli_pairs.append((combined_context, sentence)) + # Ground every question with the shared per-unit + windowing logic, offloaded + # to a single worker thread so we don't block the event loop. + def _ground_all() -> list[_GroundResult]: + out: list[_GroundResult] = [] + for q_idx in range(len(questions_list)): + sents = all_sentences[q_idx] + if not sents: + out.append(([], [], [], [])) + continue + texts = [c.content for c in all_chunks[q_idx]] + out.append(_ground_sentences(sents, texts, nli_model)) + return out - nli_scores_all = await batch_compute_entailment_async( - all_nli_pairs, model_name=nli_model, batch_size=batch_size - ) + grounding = await asyncio.to_thread(_ground_all) for q_idx in range(len(questions_list)): try: @@ -709,7 +843,7 @@ async def verify_batch_async( ) continue - offset = pair_offsets[q_idx] + nli_scores, per_sentence_unit_scores, span_units, span_locations = grounding[q_idx] sentence_scores: list[SentenceScore] = [] llm_scores: list[float | None] = [None] * len(sentences) @@ -721,13 +855,27 @@ async def verify_batch_async( llm_scores = [score for score, _ in judge_results] for i, sentence in enumerate(sentences): - nli = nli_scores_all[offset + i] if (offset + i) < len(nli_scores_all) else 0.0 + nli = nli_scores[i] if i < len(nli_scores) else 0.0 overlap, best_chunk = best_overlap_score(sentence, chunk_texts) llm = llm_scores[i] if i < len(llm_scores) else None trust = compute_trust_score(nli, overlap, llm, weights) status = classify_support(trust) + unit_scores_i = ( + per_sentence_unit_scores[i] if i < len(per_sentence_unit_scores) else [] + ) + supporting_spans = [ + SupportingSpan( + chunk_idx=span_locations[j][0], + start=span_locations[j][1], + end=span_locations[j][2], + text=span_units[j], + ) + for j, score in enumerate(unit_scores_i) + if score >= _SPAN_ENTAILMENT_THRESHOLD + ] + sentence_scores.append( SentenceScore( text=sentence, @@ -738,6 +886,7 @@ async def verify_batch_async( trust_score=trust, support_status=status, best_matching_context=best_chunk, + supporting_spans=supporting_spans, ) ) @@ -897,20 +1046,42 @@ async def verify_stream( chunks = [Chunk.from_input(c) for c in context] chunk_texts = [c.content for c in chunks] - combined_context = " ".join(chunk_texts) sentence_scores: list[SentenceScore] = [] idx = 0 async for sentence in sentence_buffer(answer_stream): - nli_pairs = [(combined_context, sentence)] - nli_scores = await batch_compute_entailment_async(nli_pairs, model_name=nli_model) - nli = nli_scores[0] if nli_scores else 0.0 + # Ground each completed sentence with the same per-unit + whole-chunk + # premises as verify(). When the sentence opens with an anaphor, include + # the previous one so the referent is present, then keep the current + # sentence's score (the last entry). + prev_text = sentence_scores[-1].text if sentence_scores else None + if prev_text and _starts_with_anaphor(sentence): + ground_input = [prev_text, sentence] + else: + ground_input = [sentence] + + nli_scores, span_scores, span_units, span_locations = await asyncio.to_thread( + _ground_sentences, ground_input, chunk_texts, nli_model + ) + nli = nli_scores[-1] if nli_scores else 0.0 + unit_scores_i = span_scores[-1] if span_scores else [] overlap, best_chunk = best_overlap_score(sentence, chunk_texts) trust = compute_trust_score(nli, overlap, None, weights) status = classify_support(trust) + supporting_spans = [ + SupportingSpan( + chunk_idx=span_locations[j][0], + start=span_locations[j][1], + end=span_locations[j][2], + text=span_units[j], + ) + for j, score in enumerate(unit_scores_i) + if score >= _SPAN_ENTAILMENT_THRESHOLD + ] + score = SentenceScore( text=sentence, index=idx, @@ -919,6 +1090,7 @@ async def verify_stream( trust_score=trust, support_status=status, best_matching_context=best_chunk, + supporting_spans=supporting_spans, ) sentence_scores.append(score) idx += 1 diff --git a/athena_verify/nli.py b/athena_verify/nli.py index c360bd9..cb3d4c5 100644 --- a/athena_verify/nli.py +++ b/athena_verify/nli.py @@ -59,17 +59,46 @@ def get_nli_model(model_name: str = "cross-encoder/nli-deberta-v3-base") -> Any: return CrossEncoder(resolved) -def _softmax_entailment(logits: Any) -> float: - """Convert 3-class NLI logits to entailment probability using softmax. +@lru_cache(maxsize=32) +def entailment_index(model_name: str) -> int | None: + """Resolve the entailment class index from the model's label map. + + Different NLI checkpoints order their classes differently — e.g. the + cross-encoder/nli-* family uses ``0=contradiction, 1=entailment, + 2=neutral`` while many MoritzLaurer/DeBERTa checkpoints use + ``0=entailment``. Hardcoding the index silently scores the wrong class + on non-default models, which reads as a flood of false positives. + + Returns the index of the class whose label contains "entail", or + ``None`` for single-logit consistency models (e.g. Vectara HHEM) that + have no label map. + """ + model = get_nli_model(model_name) + config = getattr(getattr(model, "model", None), "config", None) or getattr( + model, "config", None + ) + id2label = getattr(config, "id2label", None) + if not isinstance(id2label, dict): + return None + for idx, label in id2label.items(): + if "entail" in str(label).lower(): + return int(idx) + return None - Standard NLI label ordering: 0=contradiction, 1=entailment, 2=neutral. - We return the probability of class 1 (entailment). + +def _softmax_entailment(logits: Any, entail_idx: int) -> float: + """Convert NLI logits to entailment probability via softmax. + + Args: + logits: Per-class logits for one premise/hypothesis pair. + entail_idx: Index of the entailment class for this model. """ row = list(logits) max_val = max(row) exp_vals = [math.exp(v - max_val) for v in row] total = sum(exp_vals) - return exp_vals[1] / total + idx = entail_idx if 0 <= entail_idx < len(row) else 1 + return exp_vals[idx] / total def compute_entailment_score( @@ -88,10 +117,13 @@ def compute_entailment_score( Probability of entailment (0.0-1.0). """ model = get_nli_model(model_name) + entail_idx = entailment_index(model_name) scores = model.predict([[premise, hypothesis]]) - if hasattr(scores[0], "__len__") and len(scores[0]) >= 3: - return _softmax_entailment(scores[0]) - return float(scores[0]) if not hasattr(scores[0], "__len__") else float(scores[0][0]) + row = scores[0] + if hasattr(row, "__len__") and len(row) >= 3: + return _softmax_entailment(row, entail_idx if entail_idx is not None else 1) + # Single-logit consistency model (e.g. HHEM): score is already a probability. + return float(row) if not hasattr(row, "__len__") else float(row[0]) def batch_compute_entailment( @@ -113,6 +145,8 @@ def batch_compute_entailment( return [] model = get_nli_model(model_name) + entail_idx = entailment_index(model_name) + fallback_idx = entail_idx if entail_idx is not None else 1 results: list[float] = [] for start in range(0, len(pairs), batch_size): @@ -121,7 +155,7 @@ def batch_compute_entailment( for score_row in scores: if hasattr(score_row, "__len__") and len(score_row) >= 3: - results.append(_softmax_entailment(score_row)) + results.append(_softmax_entailment(score_row, fallback_idx)) else: results.append(float(score_row)) diff --git a/athena_verify/parser.py b/athena_verify/parser.py index c97ae02..493bb61 100644 --- a/athena_verify/parser.py +++ b/athena_verify/parser.py @@ -74,11 +74,33 @@ def split_sentences(text: str) -> list[str]: return _split_sentences_regex(text) +# Common abbreviations that end in a period but do not end a sentence. Kept +# lowercase and without the trailing period for matching. Covers titles, legal +# and academic citation forms, and Latin/measurement shorthands — the domains +# (legal, medical, technical) athena targets, where a wrong split fragments a +# claim and shows up as a false positive. +_ABBREVIATIONS = frozenset( + { + "dr", "mr", "mrs", "ms", "prof", "rev", "hon", "sr", "jr", "st", + "vs", "etc", "al", "cf", "eg", "ie", "ca", "approx", + "inc", "ltd", "co", "corp", "llc", "plc", + "no", "nos", "fig", "figs", "sec", "secs", "art", "para", "pp", "vol", + "ch", "ed", "eds", "rep", "dept", "est", "min", "max", + "jan", "feb", "mar", "apr", "jun", "jul", "aug", "sep", "sept", + "oct", "nov", "dec", + # multi-dot forms, matched after stripping internal periods + "us", "uk", "un", "eu", "am", "pm", "phd", "md", "ba", "ma", "bs", + } +) + + def _split_sentences_regex(text: str) -> list[str]: - """Split text into sentences using regex (fallback). + """Split text into sentences using regex (fallback for when NLTK is absent). - Used when NLTK is not available. Handles common English sentence - boundaries but may split incorrectly on abbreviations like "Dr. Smith". + Abbreviation-aware: a candidate boundary is rejected when the token before + the period is a known abbreviation (``Dr.``, ``Inc.``), a single-letter + initial, or a dotted acronym (``U.S.``), so claims in legal/medical text + aren't fragmented. Args: text: The answer text to split. @@ -86,18 +108,28 @@ def _split_sentences_regex(text: str) -> list[str]: Returns: List of non-empty sentence strings. """ - # Normalize whitespace text = text.strip() + if not text: + return [] - # Split on sentence-ending punctuation followed by space or end-of-string. - # Handles: period, exclamation, question mark. - sentences = re.split(r"(?<=[.!?])\s+(?=[A-Z])", text) - - # Filter empty strings and strip whitespace - result = [] - for s in sentences: - s = s.strip() - if s: - result.append(s) + result: list[str] = [] + start = 0 + # A boundary is sentence-ending punctuation, an optional closing quote/paren, + # then whitespace, followed by something that looks like a new sentence. + for m in re.finditer(r"[.!?]+[\"')\]]?\s+(?=[A-Z0-9\"'(])", text): + preceding = text[start : m.start()] + last_token = preceding.split()[-1] if preceding.split() else "" + # Normalize: drop internal/trailing periods so "U.S" -> "us", "Dr" -> "dr". + normalized = last_token.replace(".", "").strip(",;:\"'()").lower() + if normalized in _ABBREVIATIONS or len(normalized) == 1: + continue + sentence = text[start : m.end()].strip() + if sentence: + result.append(sentence) + start = m.end() + + tail = text[start:].strip() + if tail: + result.append(tail) return result diff --git a/tests/test_new_features.py b/tests/test_new_features.py index 0957ca3..a250d2e 100644 --- a/tests/test_new_features.py +++ b/tests/test_new_features.py @@ -45,10 +45,12 @@ def predict(self, pairs): @pytest.fixture(autouse=True) def _mock_nli(): - with ( - patch("athena_verify.core.batch_compute_entailment", return_value=[0.85]), - patch("athena_verify.core.batch_compute_entailment_async", return_value=[0.85]), - ): + # _ground_sentences scores one pair per (context unit, sentence); return a + # constant entailment for each so multi-sentence answers score uniformly. + def fake_entailment(pairs, *args, **kwargs): + return [0.85] * len(pairs) + + with patch("athena_verify.core.batch_compute_entailment", side_effect=fake_entailment): yield diff --git a/tests/test_nli.py b/tests/test_nli.py index 5226312..c509a47 100644 --- a/tests/test_nli.py +++ b/tests/test_nli.py @@ -9,6 +9,7 @@ import pytest +from athena_verify import nli as nli_module from athena_verify.nli import ( NLI_MODEL_ALIASES, batch_compute_entailment, @@ -69,8 +70,23 @@ def predict(self, pairs): @pytest.fixture def mock_model_cache(): - with patch("athena_verify.nli._nli_cache", {}) as cache: - yield cache + """Swap the cached model loader for a controllable dict of mock models. + + get_nli_model and entailment_index are both @lru_cache'd, so clear them + around the patch to keep tests isolated. + """ + nli_module.get_nli_model.cache_clear() + nli_module.entailment_index.cache_clear() + models: dict[str, object] = {} + + def fake_get_model(model_name: str = "cross-encoder/nli-deberta-v3-base"): + return models.get(resolve_nli_model(model_name)) or models.get(model_name) + + with patch("athena_verify.nli.get_nli_model", side_effect=fake_get_model): + yield models + + nli_module.get_nli_model.cache_clear() + nli_module.entailment_index.cache_clear() @pytest.fixture diff --git a/tests/test_supporting_spans.py b/tests/test_supporting_spans.py index 2ca7732..485dd5c 100644 --- a/tests/test_supporting_spans.py +++ b/tests/test_supporting_spans.py @@ -9,7 +9,6 @@ from athena_verify import verify from athena_verify.models import SupportingSpan - CHUNK_0 = "The sky is blue during the day." CHUNK_1 = "Photosynthesis occurs in plant cells." diff --git a/tests/test_verify.py b/tests/test_verify.py index c9a819d..f21844c 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -12,10 +12,12 @@ @pytest.fixture(autouse=True) def _mock_nli(): - with ( - patch("athena_verify.core.batch_compute_entailment", return_value=[0.85]), - patch("athena_verify.core.batch_compute_entailment_async", return_value=[0.85]), - ): + # _ground_sentences scores one pair per (context unit, sentence); return a + # constant entailment for each so multi-sentence answers score uniformly. + def fake_entailment(pairs, *args, **kwargs): + return [0.85] * len(pairs) + + with patch("athena_verify.core.batch_compute_entailment", side_effect=fake_entailment): yield @@ -248,17 +250,9 @@ def test_latency_budget_llm_judge_never_called_with_budget_50(self): from unittest.mock import MagicMock, patch llm_client = MagicMock() - result = verify( - question="What?", - answer="Some answer.", - context=["Some context"], - use_llm_judge=True, - llm_client=llm_client, - latency_budget_ms=50, - ) with patch("athena_verify.core.batch_judge_sentences") as mock_judge: - result = verify( + verify( question="What?", answer="Some answer.", context=["Some context"], From 443085b86ce91667f4ad5111ef8100109e11d934 Mon Sep 17 00:00:00 2001 From: RahulModugula Date: Sat, 27 Jun 2026 18:47:07 -0400 Subject: [PATCH 2/6] Fix lint and type errors for green CI - Annotate print_table and import VerificationResult in the CLI. - Treat crewai as an optional dependency in the mypy config and drop a now-unused type-ignore. - Sort imports (ruff I001) across the package and tests. ruff, mypy --strict, and the full test suite (140 tests) all pass. --- athena_verify/__init__.py | 9 ++++++++- athena_verify/cli.py | 3 ++- athena_verify/integrations/crewai.py | 6 ++---- athena_verify/integrations/langgraph.py | 3 ++- pyproject.toml | 1 + 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/athena_verify/__init__.py b/athena_verify/__init__.py index 95e11c7..2a97613 100644 --- a/athena_verify/__init__.py +++ b/athena_verify/__init__.py @@ -26,7 +26,14 @@ verify_stream, ) from athena_verify.llm_judge import LLMClient -from athena_verify.models import Chunk, SentenceScore, StepResult, StreamingResult, SupportingSpan, VerificationResult +from athena_verify.models import ( + Chunk, + SentenceScore, + StepResult, + StreamingResult, + SupportingSpan, + VerificationResult, +) __all__ = [ "verify", diff --git a/athena_verify/cli.py b/athena_verify/cli.py index 3a83107..eb51372 100644 --- a/athena_verify/cli.py +++ b/athena_verify/cli.py @@ -8,6 +8,7 @@ from pathlib import Path from athena_verify import verify +from athena_verify.models import VerificationResult def color_score(score: float) -> str: @@ -30,7 +31,7 @@ def format_trust_score(score: float, width: int = 6) -> str: return f"{color_score(score)}{score:.2f}{reset_color()}" -def print_table(result) -> None: +def print_table(result: VerificationResult) -> None: """Print colored sentence-by-sentence trust score table.""" print() print("Verification Results") diff --git a/athena_verify/integrations/crewai.py b/athena_verify/integrations/crewai.py index 8540298..34311c1 100644 --- a/athena_verify/integrations/crewai.py +++ b/athena_verify/integrations/crewai.py @@ -5,8 +5,6 @@ from __future__ import annotations -from typing import Any - from athena_verify.core import verify_step try: @@ -14,11 +12,11 @@ _CREWAI_AVAILABLE = True except ImportError: - BaseTool = object # type: ignore[misc,assignment] + BaseTool = object _CREWAI_AVAILABLE = False -class AthenaVerifyTool(BaseTool): +class AthenaVerifyTool(BaseTool): # type: ignore[misc] """CrewAI tool for verifying factual claims against evidence. Verify whether a claim is supported by the given evidence. diff --git a/athena_verify/integrations/langgraph.py b/athena_verify/integrations/langgraph.py index f32adc1..e722592 100644 --- a/athena_verify/integrations/langgraph.py +++ b/athena_verify/integrations/langgraph.py @@ -5,7 +5,8 @@ from __future__ import annotations -from typing import Any, Callable +from collections.abc import Callable +from typing import Any from athena_verify.core import verify_step from athena_verify.models import StepResult diff --git a/pyproject.toml b/pyproject.toml index 55e82e2..41a6e77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,7 @@ module = [ "openai.*", "anthropic.*", "nltk.*", + "crewai.*", ] ignore_missing_imports = true From 31051387a8e68cf404a6c0253dc5020aef85668e Mon Sep 17 00:00:00 2001 From: RahulModugula Date: Sat, 27 Jun 2026 19:17:22 -0400 Subject: [PATCH 3/6] Update benchmark numbers and docs to match improved verifier Regenerate the synthetic eval with the false-positive fixes and refresh the README and RESULTS.md tables to match: - Faithful false-positive rate: 17% -> 11.5% (base), 9.2% (large). - Overall F1: 91.3% -> 93.6% (base), 93.8% (large). - Replace the misleading "0% F1 on faithful" row (F1 is undefined with zero hallucinations) with a stated false-positive rate. Also tidy .gitignore (add .ruff_cache, scratchpad, local settings dirs). --- .gitignore | 8 +++++++- README.md | 29 ++++++++++++++++++-------- benchmarks/RESULTS.md | 47 ++++++++++++++++++++++--------------------- 3 files changed, 52 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 211f988..765d4eb 100644 --- a/.gitignore +++ b/.gitignore @@ -18,11 +18,12 @@ env/ *.swp *.swo -# Testing +# Testing / tooling caches .pytest_cache/ .coverage htmlcov/ .mypy_cache/ +.ruff_cache/ # Models (downloaded at runtime) models/ @@ -39,3 +40,8 @@ Thumbs.db # Environment .env + +# Claude Code local settings +.claude/ +.odin/ +scratchpad/ \ No newline at end of file diff --git a/README.md b/README.md index 322fc26..a6f86b3 100644 --- a/README.md +++ b/README.md @@ -81,21 +81,34 @@ pip install "athena-verify[all]" Evaluated on 100 synthetic cases across 6 hallucination categories (legal, medical, technical, general). Real-world benchmarks against RAGTruth and HaluEval are in progress — download instructions are in [`benchmarks/RESULTS.md`](benchmarks/RESULTS.md). -### Per-Category Performance (NLI-only, synthetic, nli-deberta-v3-base) +### Hallucination Detection (NLI-only, synthetic, nli-deberta-v3-base) + +Each row is the per-category F1 for *catching hallucinations*. The faithful-text +row is intentionally excluded here — it contains no hallucinations, so its F1 is +undefined; we report its false-positive rate separately below, which is the +number that actually matters for clean text. | Category | Precision | Recall | **F1** | |----------|-----------|--------|--------| -| **Fabricated claims** | 100% | 97% | **98.6%** ✓ | +| **Fabricated claims** | 100% | 96% | **97.9%** ✓ | | **Out-of-context** | 100% | 97% | **98.3%** ✓ | -| **Subtle contradictions** | 100% | 97% | **98.3%** ✓ | -| **Number substitutions** | 79% | 96% | **86.8%** | -| **Partial support** | 78% | 95% | **85.7%** | -| **Faithful statements** | 0% | 0% | **0.0%** ✗ | -| **Overall** | 87% | 97% | **91.3%** (synthetic) | +| **Subtle contradictions** | 100% | 100% | **100%** ✓ | +| **Partial support** | 88% | 96% | **91.3%** | +| **Number substitutions** | 82% | 96% | **88.5%** | +| **Overall** | 91% | 97% | **93.6%** (synthetic) | + +**False-positive rate on faithful text: 11.5%** (10 of 87 genuinely-supported +sentences flagged). Latency: **p50 23.5 ms, p95 36.2 ms** per verification on the +base model. Numbers are reproducible with `python benchmarks/run_full_eval.py`. ### Where We Lose -Athena has a **high false positive rate on truly faithful statements** (31% of genuinely faithful sentences are incorrectly flagged). This is a known NLI-model limitation — conservative thresholds bias toward catching hallucinations at the cost of flagging clean sentences. +The remaining 11.5% of false positives are concentrated in number-heavy and +heavily-paraphrased faithful sentences (e.g. "approximately 1200 SEK per tonne"), +where standalone NLI is weak. For those, enable the optional LLM-judge +escalation (`use_llm_judge=True`) — it is exactly the borderline case the judge +exists to catch. Conservative thresholds still bias toward catching +hallucinations over passing every clean sentence, so treat athena as a guardrail. **LettuceDetect beats athena on span-level F1** on real-world benchmarks (LettuceDetect 79.2% F1 on annotated spans vs. athena's unvalidated real-world score). Athena wins on latency bounds, provider-neutrality, offline execution, and the spans-in-library integration story — not raw F1. diff --git a/benchmarks/RESULTS.md b/benchmarks/RESULTS.md index 515b779..8ab9c98 100644 --- a/benchmarks/RESULTS.md +++ b/benchmarks/RESULTS.md @@ -7,7 +7,7 @@ All results are **real, reproducible, and measured on this codebase**. No projec - **Machine**: Apple M1 Max, 64 GB RAM, macOS - **Python**: 3.13 - **Seed**: 42 (deterministic) -- **Date**: 2026-04-19 +- **Date**: 2026-06-27 ## Real Dataset Acquisition @@ -72,55 +72,56 @@ python benchmarks/run_faithbench.py --synthetic \ Six hallucination categories across legal, medical, technical, and general domains. -### NLI-Only Mode (nli-deberta-v3-base, ~17ms p50) +### NLI-Only Mode (nli-deberta-v3-base, ~24ms p50) | Category | Precision | Recall | F1 | |----------|-----------|--------|----| -| Fabricated claims | 100.0% | 98.7% | **99.3%** | -| Out-of-context | 100.0% | 93.3% | **96.6%** | -| Number substitutions | 79.3% | 95.8% | **86.8%** | +| Fabricated claims | 100.0% | 96.0% | **97.9%** | +| Out-of-context | 100.0% | 96.7% | **98.3%** | +| Number substitutions | 82.1% | 95.8% | **88.5%** | | Subtle contradictions | 100.0% | 100.0% | **100.0%** | -| Partial support | 75.9% | 100.0% | **86.3%** | -| **Overall** | **86.6%** | **96.7%** | **91.3%** | +| Partial support | 87.5% | 95.5% | **91.3%** | +| **Overall** | **90.6%** | **96.7%** | **93.6%** | -- **False positive rate on faithful sentences**: 17% (15/89 sentences incorrectly flagged) -- **Latency p50**: ~17ms per verification call -- **Latency p95**: ~26ms per verification call +- **False positive rate on faithful sentences**: 11.5% (10/87 sentences incorrectly flagged) +- **Latency p50**: ~23.5ms per verification call +- **Latency p95**: ~36.2ms per verification call - **Cost**: $0 (local model, no API calls) -### NLI-Only Mode (nli-deberta-v3-large, ~37ms p50) +### NLI-Only Mode (nli-deberta-v3-large, ~53ms p50) | Category | Precision | Recall | F1 | |----------|-----------|--------|----| | Fabricated claims | 100.0% | 98.7% | **99.3%** | -| Out-of-context | 100.0% | 93.3% | **96.6%** | -| Number substitutions | 79.3% | 95.8% | **86.8%** | +| Out-of-context | 100.0% | 93.3% | **96.5%** | +| Number substitutions | 82.1% | 95.8% | **88.5%** | | Subtle contradictions | 100.0% | 100.0% | **100.0%** | -| Partial support | 75.9% | 100.0% | **86.3%** | -| **Overall** | **86.3%** | **97.8%** | **91.7%** | +| Partial support | 80.8% | 95.5% | **87.5%** | +| **Overall** | **90.7%** | **97.2%** | **93.8%** | -- **Latency p50**: ~37ms per verification call -- **Latency p95**: ~53ms per verification call +- **False positive rate on faithful sentences**: 9.2% (8/87 sentences incorrectly flagged) +- **Latency p50**: ~52.5ms per verification call +- **Latency p95**: ~83.2ms per verification call ### How It Works -Context chunks are split into individual sentences before NLI scoring. Each answer sentence is scored against every context sentence, and the maximum entailment score is used. This avoids the "neutral trap" where NLI models classify a hypothesis as neutral when the premise contains information beyond the hypothesis. +Context chunks are split into individual sentences before NLI scoring. Each answer sentence is scored against every context sentence **and the full chunk**, and the maximum entailment score is used. This avoids the "neutral trap" where NLI models classify a hypothesis as neutral when the premise contains information beyond the hypothesis, while still catching facts spread across several context sentences. Answer sentences that open with an anaphor ("This cap…", "It also…") are joined with the previous sentence before scoring so the referent is preserved — the single largest source of false positives on faithful text. ### The Right Tool for the Right Job | Use case | Recommended mode | Why | |----------|-----------------|-----| -| General RAG QA | NLI-only (base) | Catches 91%+ of hallucinations in 17ms | -| High-stakes docs | NLI-only (large) | Slightly better recall at 37ms | -| Real-time chat | NLI-only (base) | 17ms latency is production-ready | +| General RAG QA | NLI-only (base) | 93.6% F1 in ~24ms | +| High-stakes docs | NLI-only (large) | Lower false-positive rate at ~53ms | +| Real-time chat | NLI-only (base) | ~24ms latency is production-ready | | Maximum accuracy | NLI + LLM-judge | LLM catches paraphrases NLI misses | ## Latency Comparison | Mode | p50 | p95 | Notes | |------|-----|-----|-------| -| NLI only (base) | ~17ms | ~26ms | Fastest, 91.3% F1 | -| NLI only (large) | ~37ms | ~53ms | Slightly better, 91.7% F1 | +| NLI only (base) | ~24ms | ~36ms | Fastest, 93.6% F1, 11.5% FP on faithful | +| NLI only (large) | ~53ms | ~83ms | Lower FP (9.2%), 93.8% F1 | | LLM judge (local) | ~7.4s | ~10s | Per sentence, local gemma-4-31b-it | | GPT-4 judge (API) | ~2s | ~5s | Per sentence, network round-trip | From fa42da3b8912d7543ed2b29c920ef03e19fe4d18 Mon Sep 17 00:00:00 2001 From: RahulModugula Date: Sat, 27 Jun 2026 19:36:20 -0400 Subject: [PATCH 4/6] Halve the false-positive rate with a contradiction-aware rescue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standalone NLI scores many faithful paraphrases as neutral (entailment ~0) even when fully supported, which drove the bulk of the remaining false positives. Recover them without admitting hallucinations: - Expose the contradiction class alongside entailment (batch_compute_nli), read from the model's label map so it is model-agnostic. - For each sentence, take the contradiction of the most lexically on-topic context unit — not the global max — so an unrelated unit can't veto a faithful claim while genuine reversals still fire. - Add lexical containment and numeric-consistency signals (overlap.py). - apply_grounding_rescue lifts a not-entailed sentence to PARTIAL only when it is not contradicted, every number in it appears in the context, and most of its content words are grounded — gating out number swaps and contradictions. - Share the logic across all five verify entry points. Synthetic benchmark (base model): faithful false-positive rate 17% -> 4.6% (3.4% on large), overall F1 91.3% -> 95.0%, recall ~96%. Adds tests/test_rescue.py; README and RESULTS.md updated to match. --- README.md | 38 ++++--- athena_verify/calibration.py | 47 ++++++++ athena_verify/core.py | 196 +++++++++++++++++++++++++-------- athena_verify/nli.py | 89 ++++++++++++--- athena_verify/overlap.py | 54 +++++++++ benchmarks/RESULTS.md | 36 +++--- tests/test_new_features.py | 8 +- tests/test_rescue.py | 83 ++++++++++++++ tests/test_supporting_spans.py | 11 +- tests/test_verify.py | 8 +- 10 files changed, 468 insertions(+), 102 deletions(-) create mode 100644 tests/test_rescue.py diff --git a/README.md b/README.md index a6f86b3..0db6ca9 100644 --- a/README.md +++ b/README.md @@ -92,23 +92,35 @@ number that actually matters for clean text. |----------|-----------|--------|--------| | **Fabricated claims** | 100% | 96% | **97.9%** ✓ | | **Out-of-context** | 100% | 97% | **98.3%** ✓ | -| **Subtle contradictions** | 100% | 100% | **100%** ✓ | -| **Partial support** | 88% | 96% | **91.3%** | +| **Subtle contradictions** | 100% | 97% | **98.3%** ✓ | +| **Partial support** | 95% | 91% | **93.0%** | | **Number substitutions** | 82% | 96% | **88.5%** | -| **Overall** | 91% | 97% | **93.6%** (synthetic) | +| **Overall** | 95% | 96% | **95.0%** (synthetic) | -**False-positive rate on faithful text: 11.5%** (10 of 87 genuinely-supported -sentences flagged). Latency: **p50 23.5 ms, p95 36.2 ms** per verification on the +**False-positive rate on faithful text: 4.6%** (4 of 87 genuinely-supported +sentences flagged) on the base model, **3.4%** on the large model — down from 17% +before calibration. Latency: **p50 22.5 ms, p95 34.5 ms** per verification on the base model. Numbers are reproducible with `python benchmarks/run_full_eval.py`. -### Where We Lose - -The remaining 11.5% of false positives are concentrated in number-heavy and -heavily-paraphrased faithful sentences (e.g. "approximately 1200 SEK per tonne"), -where standalone NLI is weak. For those, enable the optional LLM-judge -escalation (`use_llm_judge=True`) — it is exactly the borderline case the judge -exists to catch. Conservative thresholds still bias toward catching -hallucinations over passing every clean sentence, so treat athena as a guardrail. +### How false positives are kept low + +Standalone NLI scores many faithful paraphrases as "neutral" (entailment ≈ 0) +even when the claim is fully supported. Athena recovers these without letting +hallucinations through, using three guarded signals: + +- **Anaphora windowing** — a sentence opening with a referent ("This cap…", "It + also…") is scored together with its predecessor, restoring the antecedent. +- **Contradiction-aware rescue** — a not-entailed claim is only rescued when the + most on-topic context unit does *not* contradict it, so reversals and subtle + contradictions stay flagged. +- **Numeric gate** — rescue requires every number in the claim to appear in the + context, so number-substitution hallucinations ("$5M" vs a "$2M" context) are + never rescued. + +The remaining false positives are heavily-paraphrased claims with little lexical +overlap (e.g. "olive oil is drizzled on top"); enable the optional LLM-judge +escalation (`use_llm_judge=True`) for those. Athena still biases toward catching +hallucinations over passing every clean sentence — treat it as a guardrail. **LettuceDetect beats athena on span-level F1** on real-world benchmarks (LettuceDetect 79.2% F1 on annotated spans vs. athena's unvalidated real-world score). Athena wins on latency bounds, provider-neutrality, offline execution, and the spans-in-library integration story — not raw F1. diff --git a/athena_verify/calibration.py b/athena_verify/calibration.py index 73b3acc..89558d6 100644 --- a/athena_verify/calibration.py +++ b/athena_verify/calibration.py @@ -20,6 +20,16 @@ PARTIAL_THRESHOLD = 0.50 UNSUPPORTED_THRESHOLD = 0.30 +# Grounding-rescue thresholds. Cross-encoder NLI frequently scores a faithful +# paraphrase as "neutral" (entailment ~0) even though the claim is fully +# supported. When the claim is *not* contradicted, is heavily lexically +# grounded, and all its numbers appear in the context, we lift it out of the +# unsupported band — recovering false positives without passing contradictions +# or number swaps (which fail the contradiction / numeric guards). +RESCUE_CONTRADICTION_CEILING = 0.45 +RESCUE_CONTAINMENT_FLOOR = 0.50 +RESCUE_TRUST = 0.55 + def compute_trust_score( nli_score: float, @@ -57,6 +67,43 @@ def compute_trust_score( return min(1.0, max(0.0, trust)) +def apply_grounding_rescue( + trust: float, + *, + entailment: float, + contradiction: float, + containment: float, + numeric_ok: bool, +) -> float: + """Lift trust for neutral-but-grounded paraphrases NLI scores too low. + + Only ever raises the score, and only when all guards pass: + - the claim is not contradicted by any context unit, + - it is not already strongly entailed (nothing to rescue), + - its content words are heavily present in the context, and + - every number in it appears in the context. + + Args: + trust: The ensemble trust score before rescue. + entailment: Max NLI entailment probability for the sentence. + contradiction: Max NLI contradiction probability for the sentence. + containment: Fraction of content words found in the context. + numeric_ok: Whether all numbers in the sentence appear in the context. + + Returns: + The (possibly raised) trust score. + """ + if contradiction >= RESCUE_CONTRADICTION_CEILING: + return trust + if entailment >= SUPPORTED_THRESHOLD: + return trust + if not numeric_ok: + return trust + if containment >= RESCUE_CONTAINMENT_FLOOR: + return max(trust, RESCUE_TRUST) + return trust + + def classify_support(trust_score: float) -> str: """Classify a sentence's support status based on trust score. diff --git a/athena_verify/core.py b/athena_verify/core.py index f01c306..d2599ea 100644 --- a/athena_verify/core.py +++ b/athena_verify/core.py @@ -9,6 +9,7 @@ import asyncio import os +import re import time from collections.abc import AsyncIterator from typing import Any @@ -16,6 +17,7 @@ import structlog from athena_verify.calibration import ( + apply_grounding_rescue, classify_support, compute_overall_trust, compute_trust_score, @@ -29,8 +31,8 @@ SupportingSpan, VerificationResult, ) -from athena_verify.nli import batch_compute_entailment -from athena_verify.overlap import best_overlap_score +from athena_verify.nli import batch_compute_nli +from athena_verify.overlap import best_overlap_score, containment_score, numeric_consistency from athena_verify.parser import sentence_buffer, split_sentences logger = structlog.get_logger() @@ -39,9 +41,12 @@ # reported as a supporting span for a sentence. _SPAN_ENTAILMENT_THRESHOLD = 0.5 -# Return shape of _ground_sentences: (nli_scores, per-sentence span-unit scores, -# span unit texts, span unit (chunk_idx, start, end) locations). -_GroundResult = tuple[list[float], list[list[float]], list[str], list[tuple[int, int, int]]] +# Return shape of _ground_sentences: (entailment scores, contradiction scores, +# per-sentence span-unit scores, span unit texts, span unit (chunk_idx, start, +# end) locations). +_GroundResult = tuple[ + list[float], list[float], list[list[float]], list[str], list[tuple[int, int, int]] +] # Leading tokens that signal a sentence depends on its predecessor for meaning # (anaphora / discourse continuation). When an answer sentence starts with one @@ -92,6 +97,11 @@ def _starts_with_anaphor(sentence: str) -> bool: return first in _ANAPHORA_TOKENS +def _word_tokens(text: str) -> list[str]: + """Lowercase content tokens (length > 2) for lightweight topic matching.""" + return [w for w in re.findall(r"[a-z0-9]+", text.lower()) if len(w) > 2] + + def _build_context_units( chunk_texts: list[str], ) -> tuple[list[str], list[tuple[int, int, int]], list[bool]]: @@ -132,12 +142,13 @@ def _ground_sentences( sentences: list[str], chunk_texts: list[str], nli_model: str, -) -> tuple[list[float], list[list[float]], list[str], list[tuple[int, int, int]]]: +) -> _GroundResult: """Score how well each answer sentence is grounded in the context. Splits context into focused premise candidates, applies anaphora windowing to each hypothesis, and returns: - - nli_scores: best entailment per sentence over all premise candidates, + - entail_scores: best entailment per sentence over all premise candidates, + - contra_scores: strongest contradiction per sentence over all candidates, - span_scores: per-sentence entailment over the span-eligible units only, - span_units / span_locations: the span-eligible units these align to. """ @@ -149,7 +160,8 @@ def _ground_sentences( if not units or not sentences: empty_spans: list[list[float]] = [[] for _ in sentences] - return [0.0] * len(sentences), empty_spans, span_units, span_locations + zeros = [0.0] * len(sentences) + return zeros, list(zeros), empty_spans, span_units, span_locations # Anaphora windowing: prepend the previous sentence when the current one # opens with a referent, so the NLI hypothesis carries its antecedent. @@ -161,18 +173,62 @@ def _ground_sentences( hypotheses.append(sentence) nli_pairs = [(unit, hyp) for hyp in hypotheses for unit in units] - flat = batch_compute_entailment(nli_pairs, model_name=nli_model) + flat = batch_compute_nli(nli_pairs, model_name=nli_model) + + # Token sets per unit, for picking the on-topic unit for the contradiction + # signal (see below). + unit_tokens = [set(_word_tokens(u)) for u in units] - nli_scores: list[float] = [] + entail_scores: list[float] = [] + contra_scores: list[float] = [] span_scores: list[list[float]] = [] n_units = len(units) for i in range(len(sentences)): - unit_scores = flat[i * n_units : (i + 1) * n_units] - nli_scores.append(max(unit_scores) if unit_scores else 0.0) + rows = flat[i * n_units : (i + 1) * n_units] + entails = [e for e, _ in rows] + entail_scores.append(max(entails) if entails else 0.0) + # Contradiction is read from the unit most lexically on-topic with the + # claim, not the global max. An unrelated context unit frequently + # "contradicts" a claim it has nothing to do with (negations, sibling + # clauses), which would veto a faithful sentence; the on-topic unit + # still fires for genuine contradictions (number swaps, reversals) + # because those reuse the same vocabulary. + hyp_tokens = set(_word_tokens(hypotheses[i])) + if rows and hyp_tokens: + relevance = [len(hyp_tokens & ut) for ut in unit_tokens] + topic = max(range(len(rows)), key=lambda k: (relevance[k], entails[k])) + contra_scores.append(rows[topic][1]) + else: + contra_scores.append(0.0) span_scores.append( - [s for s, keep in zip(unit_scores, is_span_unit, strict=True) if keep] + [e for e, keep in zip(entails, is_span_unit, strict=True) if keep] ) - return nli_scores, span_scores, span_units, span_locations + return entail_scores, contra_scores, span_scores, span_units, span_locations + + +def _trust_and_status( + *, + entailment: float, + contradiction: float, + overlap: float, + sentence: str, + context_text: str, + llm: float | None, + weights: dict[str, float] | None, +) -> tuple[float, str]: + """Combine signals into a trust score, apply the grounding rescue, classify. + + Shared by every verify entry point so they score identically. + """ + trust = compute_trust_score(entailment, overlap, llm, weights) + trust = apply_grounding_rescue( + trust, + entailment=entailment, + contradiction=contradiction, + containment=containment_score(sentence, context_text), + numeric_ok=numeric_consistency(sentence, context_text), + ) + return trust, classify_support(trust) def verify_step( @@ -279,9 +335,10 @@ def verify( # sentence against individual context sentences plus the whole chunk # (max wins), and prepends the prior sentence when a hypothesis opens with # an anaphor. See athena_verify.core helpers for details. - nli_scores, per_sentence_unit_scores, span_units, span_locations = _ground_sentences( - sentences, chunk_texts, nli_model + entail_scores, contra_scores, per_sentence_unit_scores, span_units, span_locations = ( + _ground_sentences(sentences, chunk_texts, nli_model) ) + context_text = " ".join(chunk_texts) # --- Lexical overlap scoring --- overlap_results = [best_overlap_score(s, chunk_texts) for s in sentences] @@ -315,12 +372,20 @@ def verify( # --- Build per-sentence results --- sentence_scores: list[SentenceScore] = [] for i, sentence in enumerate(sentences): - nli = nli_scores[i] if i < len(nli_scores) else 0.0 + nli = entail_scores[i] if i < len(entail_scores) else 0.0 + contra = contra_scores[i] if i < len(contra_scores) else 0.0 overlap, best_chunk = overlap_results[i] llm = llm_scores[i] if i < len(llm_scores) else None - trust = compute_trust_score(nli, overlap, llm, weights) - status = classify_support(trust) + trust, status = _trust_and_status( + entailment=nli, + contradiction=contra, + overlap=overlap, + sentence=sentence, + context_text=context_text, + llm=llm, + weights=weights, + ) unit_scores_i = per_sentence_unit_scores[i] if i < len(per_sentence_unit_scores) else [] supporting_spans = [ @@ -453,9 +518,14 @@ async def verify_async( # per-unit + whole-chunk premises and anaphora windowing here too, instead # of the old concatenate-all-chunks premise that silently truncated at the # model's token limit. - nli_scores, per_sentence_unit_scores, span_units, span_locations = await asyncio.to_thread( - _ground_sentences, sentences, chunk_texts, nli_model - ) + ( + entail_scores, + contra_scores, + per_sentence_unit_scores, + span_units, + span_locations, + ) = await asyncio.to_thread(_ground_sentences, sentences, chunk_texts, nli_model) + context_text = " ".join(chunk_texts) # --- Lexical overlap scoring --- overlap_results = [best_overlap_score(s, chunk_texts) for s in sentences] @@ -489,12 +559,20 @@ async def verify_async( # --- Build per-sentence results --- sentence_scores: list[SentenceScore] = [] for i, sentence in enumerate(sentences): - nli = nli_scores[i] if i < len(nli_scores) else 0.0 + nli = entail_scores[i] if i < len(entail_scores) else 0.0 + contra = contra_scores[i] if i < len(contra_scores) else 0.0 overlap, best_chunk = overlap_results[i] llm = llm_scores[i] if i < len(llm_scores) else None - trust = compute_trust_score(nli, overlap, llm, weights) - status = classify_support(trust) + trust, status = _trust_and_status( + entailment=nli, + contradiction=contra, + overlap=overlap, + sentence=sentence, + context_text=context_text, + llm=llm, + weights=weights, + ) unit_scores_i = per_sentence_unit_scores[i] if i < len(per_sentence_unit_scores) else [] supporting_spans = [ @@ -663,26 +741,34 @@ def verify_batch( ) continue - nli_scores, per_sentence_unit_scores, span_units, span_locations = _ground_sentences( - sentences, chunk_texts, nli_model + entail_scores, contra_scores, per_sentence_unit_scores, span_units, span_locations = ( + _ground_sentences(sentences, chunk_texts, nli_model) ) + context_text = " ".join(chunk_texts) sentence_scores: list[SentenceScore] = [] llm_scores: list[float | None] = [None] * len(sentences) if use_llm_judge and llm_client is not None: - combined_context = " ".join(chunk_texts) judge_results = batch_judge_sentences( - sentences, combined_context, questions_list[q_idx], llm_client + sentences, context_text, questions_list[q_idx], llm_client ) llm_scores = [score for score, _ in judge_results] for i, sentence in enumerate(sentences): - nli = nli_scores[i] if i < len(nli_scores) else 0.0 + nli = entail_scores[i] if i < len(entail_scores) else 0.0 + contra = contra_scores[i] if i < len(contra_scores) else 0.0 overlap, best_chunk = best_overlap_score(sentence, chunk_texts) llm = llm_scores[i] if i < len(llm_scores) else None - trust = compute_trust_score(nli, overlap, llm, weights) - status = classify_support(trust) + trust, status = _trust_and_status( + entailment=nli, + contradiction=contra, + overlap=overlap, + sentence=sentence, + context_text=context_text, + llm=llm, + weights=weights, + ) unit_scores_i = per_sentence_unit_scores[i] if i < len(per_sentence_unit_scores) else [] supporting_spans = [ @@ -814,7 +900,7 @@ def _ground_all() -> list[_GroundResult]: for q_idx in range(len(questions_list)): sents = all_sentences[q_idx] if not sents: - out.append(([], [], [], [])) + out.append(([], [], [], [], [])) continue texts = [c.content for c in all_chunks[q_idx]] out.append(_ground_sentences(sents, texts, nli_model)) @@ -843,24 +929,38 @@ def _ground_all() -> list[_GroundResult]: ) continue - nli_scores, per_sentence_unit_scores, span_units, span_locations = grounding[q_idx] + ( + entail_scores, + contra_scores, + per_sentence_unit_scores, + span_units, + span_locations, + ) = grounding[q_idx] + context_text = " ".join(chunk_texts) sentence_scores: list[SentenceScore] = [] llm_scores: list[float | None] = [None] * len(sentences) if use_llm_judge and llm_client is not None: - combined_context = " ".join(chunk_texts) judge_results = batch_judge_sentences( - sentences, combined_context, questions_list[q_idx], llm_client + sentences, context_text, questions_list[q_idx], llm_client ) llm_scores = [score for score, _ in judge_results] for i, sentence in enumerate(sentences): - nli = nli_scores[i] if i < len(nli_scores) else 0.0 + nli = entail_scores[i] if i < len(entail_scores) else 0.0 + contra = contra_scores[i] if i < len(contra_scores) else 0.0 overlap, best_chunk = best_overlap_score(sentence, chunk_texts) llm = llm_scores[i] if i < len(llm_scores) else None - trust = compute_trust_score(nli, overlap, llm, weights) - status = classify_support(trust) + trust, status = _trust_and_status( + entailment=nli, + contradiction=contra, + overlap=overlap, + sentence=sentence, + context_text=context_text, + llm=llm, + weights=weights, + ) unit_scores_i = ( per_sentence_unit_scores[i] if i < len(per_sentence_unit_scores) else [] @@ -1061,15 +1161,23 @@ async def verify_stream( else: ground_input = [sentence] - nli_scores, span_scores, span_units, span_locations = await asyncio.to_thread( - _ground_sentences, ground_input, chunk_texts, nli_model + entail_scores, contra_scores, span_scores, span_units, span_locations = ( + await asyncio.to_thread(_ground_sentences, ground_input, chunk_texts, nli_model) ) - nli = nli_scores[-1] if nli_scores else 0.0 + nli = entail_scores[-1] if entail_scores else 0.0 + contra = contra_scores[-1] if contra_scores else 0.0 unit_scores_i = span_scores[-1] if span_scores else [] overlap, best_chunk = best_overlap_score(sentence, chunk_texts) - trust = compute_trust_score(nli, overlap, None, weights) - status = classify_support(trust) + trust, status = _trust_and_status( + entailment=nli, + contradiction=contra, + overlap=overlap, + sentence=sentence, + context_text=" ".join(chunk_texts), + llm=None, + weights=weights, + ) supporting_spans = [ SupportingSpan( diff --git a/athena_verify/nli.py b/athena_verify/nli.py index cb3d4c5..ef9d033 100644 --- a/athena_verify/nli.py +++ b/athena_verify/nli.py @@ -86,6 +86,36 @@ def entailment_index(model_name: str) -> int | None: return None +@lru_cache(maxsize=32) +def contradiction_index(model_name: str) -> int | None: + """Resolve the contradiction class index from the model's label map. + + Mirrors :func:`entailment_index`. Used to tell a real contradiction + ("the cap is $5M" vs context "$2M") apart from a merely neutral / not- + directly-stated paraphrase, so the two can be handled differently. + """ + model = get_nli_model(model_name) + config = getattr(getattr(model, "model", None), "config", None) or getattr( + model, "config", None + ) + id2label = getattr(config, "id2label", None) + if not isinstance(id2label, dict): + return None + for idx, label in id2label.items(): + if "contradict" in str(label).lower(): + return int(idx) + return None + + +def _softmax(logits: Any) -> list[float]: + """Numerically stable softmax over a logit row.""" + row = list(logits) + max_val = max(row) + exp_vals = [math.exp(v - max_val) for v in row] + total = sum(exp_vals) + return [v / total for v in exp_vals] + + def _softmax_entailment(logits: Any, entail_idx: int) -> float: """Convert NLI logits to entailment probability via softmax. @@ -93,12 +123,9 @@ def _softmax_entailment(logits: Any, entail_idx: int) -> float: logits: Per-class logits for one premise/hypothesis pair. entail_idx: Index of the entailment class for this model. """ - row = list(logits) - max_val = max(row) - exp_vals = [math.exp(v - max_val) for v in row] - total = sum(exp_vals) - idx = entail_idx if 0 <= entail_idx < len(row) else 1 - return exp_vals[idx] / total + probs = _softmax(logits) + idx = entail_idx if 0 <= entail_idx < len(probs) else 1 + return probs[idx] def compute_entailment_score( @@ -126,28 +153,31 @@ def compute_entailment_score( return float(row) if not hasattr(row, "__len__") else float(row[0]) -def batch_compute_entailment( +def batch_compute_nli( pairs: list[tuple[str, str]], model_name: str = "cross-encoder/nli-deberta-v3-base", batch_size: int = 32, -) -> list[float]: - """Batch compute entailment scores for multiple premise-hypothesis pairs. +) -> list[tuple[float, float]]: + """Batch compute (entailment, contradiction) probabilities per pair. + + Returns a list of ``(entailment_prob, contradiction_prob)`` tuples. For + single-logit consistency models (e.g. HHEM) the contradiction probability + is taken as ``1 - entailment``. Args: pairs: List of (premise, hypothesis) tuples. model_name: Cross-encoder model to use (or alias like "lightweight"). batch_size: Number of pairs to process at once. - - Returns: - List of entailment probabilities. """ if not pairs: return [] model = get_nli_model(model_name) - entail_idx = entailment_index(model_name) - fallback_idx = entail_idx if entail_idx is not None else 1 - results: list[float] = [] + e_idx = entailment_index(model_name) + c_idx = contradiction_index(model_name) + entail_idx = e_idx if e_idx is not None else 1 + contra_idx = c_idx if c_idx is not None else 0 + results: list[tuple[float, float]] = [] for start in range(0, len(pairs), batch_size): batch = pairs[start : start + batch_size] @@ -155,13 +185,38 @@ def batch_compute_entailment( for score_row in scores: if hasattr(score_row, "__len__") and len(score_row) >= 3: - results.append(_softmax_entailment(score_row, fallback_idx)) + probs = _softmax(score_row) + entail = probs[entail_idx] if 0 <= entail_idx < len(probs) else probs[1] + contra = probs[contra_idx] if 0 <= contra_idx < len(probs) else probs[0] + results.append((entail, contra)) else: - results.append(float(score_row)) + entail = float(score_row) + results.append((entail, 1.0 - entail)) return results +def batch_compute_entailment( + pairs: list[tuple[str, str]], + model_name: str = "cross-encoder/nli-deberta-v3-base", + batch_size: int = 32, +) -> list[float]: + """Batch compute entailment scores for multiple premise-hypothesis pairs. + + Thin wrapper over :func:`batch_compute_nli` that returns only the + entailment probabilities. + + Args: + pairs: List of (premise, hypothesis) tuples. + model_name: Cross-encoder model to use (or alias like "lightweight"). + batch_size: Number of pairs to process at once. + + Returns: + List of entailment probabilities. + """ + return [entail for entail, _ in batch_compute_nli(pairs, model_name, batch_size)] + + async def batch_compute_entailment_async( pairs: list[tuple[str, str]], model_name: str = "cross-encoder/nli-deberta-v3-base", diff --git a/athena_verify/overlap.py b/athena_verify/overlap.py index 3bc4f59..17a2f5c 100644 --- a/athena_verify/overlap.py +++ b/athena_verify/overlap.py @@ -9,6 +9,60 @@ from __future__ import annotations +import re + +_WORD_RE = re.compile(r"[a-z0-9]+") +# A number: digits with optional thousands separators / decimal part. +_NUM_RE = re.compile(r"\d[\d,]*(?:\.\d+)?") + +# Function words carry no grounding signal; excluding them keeps containment +# from being inflated by shared "the/of/is" tokens. +_STOPWORDS = frozenset( + { + "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", + "of", "to", "in", "on", "for", "and", "or", "but", "with", "at", "by", + "as", "that", "this", "these", "those", "it", "its", "from", "into", + "than", "then", "also", "such", "which", "their", "they", "them", + "has", "have", "had", "will", "shall", "may", "can", "any", "all", + "not", "no", "only", "other", "more", "most", "some", "each", "both", + } +) + + +def _normalize_number(token: str) -> str: + """Strip thousands separators so '1,200' and '1200' compare equal.""" + return token.replace(",", "") + + +def containment_score(sentence: str, context_text: str) -> float: + """Fraction of a sentence's content words that appear in the context. + + Unlike symmetric token F1 (which is penalised by long context), this is a + precision-style measure of how much of the *claim* is lexically grounded. + It is the signal used to rescue faithful paraphrases that standalone NLI + scores as neutral. + """ + ctx_tokens = set(_WORD_RE.findall(context_text.lower())) + words = [ + w for w in _WORD_RE.findall(sentence.lower()) if len(w) > 2 and w not in _STOPWORDS + ] + if not words: + return 0.0 + return sum(1 for w in words if w in ctx_tokens) / len(words) + + +def numeric_consistency(sentence: str, context_text: str) -> bool: + """True if every number in the sentence also appears in the context. + + Comma-insensitive. Returns True when the sentence contains no numbers. + This is the guard that keeps number-substitution hallucinations + ("the cap is $5M" against a $2M context) from being rescued by lexical + containment, since the swapped figure will be absent from the context. + """ + ctx_nums = {_normalize_number(n) for n in _NUM_RE.findall(context_text)} + sent_nums = [_normalize_number(n) for n in _NUM_RE.findall(sentence)] + return all(n in ctx_nums for n in sent_nums) + def token_f1(text1: str, text2: str) -> float: """Compute token-level F1 overlap between two texts. diff --git a/benchmarks/RESULTS.md b/benchmarks/RESULTS.md index 8ab9c98..baa80eb 100644 --- a/benchmarks/RESULTS.md +++ b/benchmarks/RESULTS.md @@ -72,20 +72,20 @@ python benchmarks/run_faithbench.py --synthetic \ Six hallucination categories across legal, medical, technical, and general domains. -### NLI-Only Mode (nli-deberta-v3-base, ~24ms p50) +### NLI-Only Mode (nli-deberta-v3-base, ~23ms p50) | Category | Precision | Recall | F1 | |----------|-----------|--------|----| | Fabricated claims | 100.0% | 96.0% | **97.9%** | | Out-of-context | 100.0% | 96.7% | **98.3%** | | Number substitutions | 82.1% | 95.8% | **88.5%** | -| Subtle contradictions | 100.0% | 100.0% | **100.0%** | -| Partial support | 87.5% | 95.5% | **91.3%** | -| **Overall** | **90.6%** | **96.7%** | **93.6%** | +| Subtle contradictions | 100.0% | 96.7% | **98.3%** | +| Partial support | 95.2% | 90.9% | **93.0%** | +| **Overall** | **94.5%** | **95.6%** | **95.0%** | -- **False positive rate on faithful sentences**: 11.5% (10/87 sentences incorrectly flagged) -- **Latency p50**: ~23.5ms per verification call -- **Latency p95**: ~36.2ms per verification call +- **False positive rate on faithful sentences**: 4.6% (4/87 sentences incorrectly flagged) +- **Latency p50**: ~22.5ms per verification call +- **Latency p95**: ~34.5ms per verification call - **Cost**: $0 (local model, no API calls) ### NLI-Only Mode (nli-deberta-v3-large, ~53ms p50) @@ -95,23 +95,25 @@ Six hallucination categories across legal, medical, technical, and general domai | Fabricated claims | 100.0% | 98.7% | **99.3%** | | Out-of-context | 100.0% | 93.3% | **96.5%** | | Number substitutions | 82.1% | 95.8% | **88.5%** | -| Subtle contradictions | 100.0% | 100.0% | **100.0%** | -| Partial support | 80.8% | 95.5% | **87.5%** | -| **Overall** | **90.7%** | **97.2%** | **93.8%** | +| Subtle contradictions | 100.0% | 93.3% | **96.5%** | +| Partial support | 90.9% | 90.9% | **90.9%** | +| **Overall** | **94.5%** | **95.6%** | **95.0%** | -- **False positive rate on faithful sentences**: 9.2% (8/87 sentences incorrectly flagged) -- **Latency p50**: ~52.5ms per verification call -- **Latency p95**: ~83.2ms per verification call +- **False positive rate on faithful sentences**: 3.4% (3/87 sentences incorrectly flagged) +- **Latency p50**: ~53.4ms per verification call +- **Latency p95**: ~89.2ms per verification call ### How It Works -Context chunks are split into individual sentences before NLI scoring. Each answer sentence is scored against every context sentence **and the full chunk**, and the maximum entailment score is used. This avoids the "neutral trap" where NLI models classify a hypothesis as neutral when the premise contains information beyond the hypothesis, while still catching facts spread across several context sentences. Answer sentences that open with an anaphor ("This cap…", "It also…") are joined with the previous sentence before scoring so the referent is preserved — the single largest source of false positives on faithful text. +Context chunks are split into individual sentences before NLI scoring. Each answer sentence is scored against every context sentence **and the full chunk**, and the maximum entailment score is used. This avoids the "neutral trap" where NLI models classify a hypothesis as neutral when the premise contains information beyond the hypothesis, while still catching facts spread across several context sentences. Answer sentences that open with an anaphor ("This cap…", "It also…") are joined with the previous sentence before scoring so the referent is preserved. + +NLI still scores many faithful paraphrases as neutral (entailment ≈ 0). A **guarded rescue** recovers them without admitting hallucinations: a not-entailed sentence is lifted to *partially supported* only when (a) the most on-topic context unit does **not** contradict it — read from the 3-class NLI distribution, picked by lexical relevance so an unrelated unit can't veto a faithful claim — and (b) every number in the sentence appears in the context, and (c) most of its content words appear in the context. This is what cut the faithful false-positive rate from 17% to 4.6% (base) / 3.4% (large) while holding hallucination recall at ~96%. ### The Right Tool for the Right Job | Use case | Recommended mode | Why | |----------|-----------------|-----| -| General RAG QA | NLI-only (base) | 93.6% F1 in ~24ms | +| General RAG QA | NLI-only (base) | 95.0% F1 in ~23ms | | High-stakes docs | NLI-only (large) | Lower false-positive rate at ~53ms | | Real-time chat | NLI-only (base) | ~24ms latency is production-ready | | Maximum accuracy | NLI + LLM-judge | LLM catches paraphrases NLI misses | @@ -120,8 +122,8 @@ Context chunks are split into individual sentences before NLI scoring. Each answ | Mode | p50 | p95 | Notes | |------|-----|-----|-------| -| NLI only (base) | ~24ms | ~36ms | Fastest, 93.6% F1, 11.5% FP on faithful | -| NLI only (large) | ~53ms | ~83ms | Lower FP (9.2%), 93.8% F1 | +| NLI only (base) | ~23ms | ~35ms | Fastest, 95.0% F1, 4.6% FP on faithful | +| NLI only (large) | ~53ms | ~89ms | Lower FP (3.4%), 95.0% F1 | | LLM judge (local) | ~7.4s | ~10s | Per sentence, local gemma-4-31b-it | | GPT-4 judge (API) | ~2s | ~5s | Per sentence, network round-trip | diff --git a/tests/test_new_features.py b/tests/test_new_features.py index a250d2e..ef3dd10 100644 --- a/tests/test_new_features.py +++ b/tests/test_new_features.py @@ -46,11 +46,11 @@ def predict(self, pairs): @pytest.fixture(autouse=True) def _mock_nli(): # _ground_sentences scores one pair per (context unit, sentence); return a - # constant entailment for each so multi-sentence answers score uniformly. - def fake_entailment(pairs, *args, **kwargs): - return [0.85] * len(pairs) + # constant (entailment, contradiction) for each so answers score uniformly. + def fake_nli(pairs, *args, **kwargs): + return [(0.85, 0.05)] * len(pairs) - with patch("athena_verify.core.batch_compute_entailment", side_effect=fake_entailment): + with patch("athena_verify.core.batch_compute_nli", side_effect=fake_nli): yield diff --git a/tests/test_rescue.py b/tests/test_rescue.py new file mode 100644 index 0000000..f79b456 --- /dev/null +++ b/tests/test_rescue.py @@ -0,0 +1,83 @@ +"""Tests for the grounding-rescue path: containment, numeric gate, and the +contradiction-vetoed rescue that recovers faithful paraphrases NLI scores low. +""" + +from __future__ import annotations + +from athena_verify.calibration import ( + RESCUE_TRUST, + apply_grounding_rescue, + classify_support, +) +from athena_verify.overlap import containment_score, numeric_consistency + + +class TestContainment: + def test_full_containment(self): + score = containment_score( + "reference counting primary mechanism", + "Python memory management uses reference counting as the primary mechanism.", + ) + assert score == 1.0 + + def test_partial_containment(self): + score = containment_score( + "olive oil drizzled before baking", + "Ingredients: pizza dough, tomato sauce, mozzarella, olive oil.", + ) + assert 0.0 < score < 0.6 + + def test_stopwords_ignored(self): + # Only function words overlap -> no grounding signal. + assert containment_score("the and of is", "the cat and the dog") == 0.0 + + def test_empty_sentence(self): + assert containment_score("the of", "anything here") == 0.0 + + +class TestNumericConsistency: + def test_no_numbers_is_ok(self): + assert numeric_consistency("the cap applies broadly", "context with no figures") + + def test_matching_number(self): + assert numeric_consistency("the cap is 2 million", "indemnification cap of 2 million") + + def test_comma_insensitive(self): + assert numeric_consistency("about 1200 SEK", "approximately SEK 1,200 per tonne") + + def test_substituted_number_fails(self): + assert not numeric_consistency("the cap is 5 million", "the cap is 2 million") + + +class TestGroundingRescue: + def _neutral_paraphrase(self, **over): + kwargs = dict( + entailment=0.10, contradiction=0.05, containment=0.9, numeric_ok=True + ) + kwargs.update(over) + return apply_grounding_rescue(0.2, **kwargs) + + def test_rescues_neutral_grounded_paraphrase(self): + trust = self._neutral_paraphrase() + assert trust >= RESCUE_TRUST + assert classify_support(trust) in ("SUPPORTED", "PARTIAL") + + def test_contradiction_blocks_rescue(self): + # A real contradiction (e.g. subtle reversal) must never be rescued. + trust = self._neutral_paraphrase(contradiction=0.9) + assert trust == 0.2 + + def test_numeric_mismatch_blocks_rescue(self): + # Number substitution: lexically grounded but a figure is wrong. + trust = self._neutral_paraphrase(numeric_ok=False) + assert trust == 0.2 + + def test_low_containment_not_rescued(self): + trust = self._neutral_paraphrase(containment=0.2) + assert trust == 0.2 + + def test_rescue_never_lowers_trust(self): + # Already-high trust is left untouched. + assert apply_grounding_rescue( + 0.9, entailment=0.85, contradiction=0.0, containment=1.0, numeric_ok=True + ) == 0.9 diff --git a/tests/test_supporting_spans.py b/tests/test_supporting_spans.py index 485dd5c..a9a4b41 100644 --- a/tests/test_supporting_spans.py +++ b/tests/test_supporting_spans.py @@ -15,12 +15,14 @@ # 2 context units (one per chunk), 2 sentences in answer → 4 NLI pairs: # (unit0, sent0), (unit1, sent0), (unit0, sent1), (unit1, sent1) # Scores: sent0 supported by unit0 (chunk 0), sent1 supported by unit1 (chunk 1). -_NLI_SCORES = [0.9, 0.1, 0.1, 0.85] +# Each is (entailment, contradiction); contradiction is 0 so the rescue rule +# is irrelevant to span assignment here. +_NLI_SCORES = [(0.9, 0.0), (0.1, 0.0), (0.1, 0.0), (0.85, 0.0)] @pytest.fixture() def _mock_nli(): - with patch("athena_verify.core.batch_compute_entailment", return_value=_NLI_SCORES): + with patch("athena_verify.core.batch_compute_nli", return_value=_NLI_SCORES): yield @@ -89,7 +91,10 @@ def test_span_text_matches_slice(self): def test_no_spans_below_threshold(self): # All NLI scores are 0.1 — below the 0.5 threshold, so no spans. - with patch("athena_verify.core.batch_compute_entailment", return_value=[0.1, 0.1, 0.1, 0.1]): + with patch( + "athena_verify.core.batch_compute_nli", + return_value=[(0.1, 0.0), (0.1, 0.0), (0.1, 0.0), (0.1, 0.0)], + ): result = verify( question="What color is the sky?", answer="The sky appears blue. Photosynthesis happens in plants.", diff --git a/tests/test_verify.py b/tests/test_verify.py index f21844c..63e9071 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -13,11 +13,11 @@ @pytest.fixture(autouse=True) def _mock_nli(): # _ground_sentences scores one pair per (context unit, sentence); return a - # constant entailment for each so multi-sentence answers score uniformly. - def fake_entailment(pairs, *args, **kwargs): - return [0.85] * len(pairs) + # constant (entailment, contradiction) for each so answers score uniformly. + def fake_nli(pairs, *args, **kwargs): + return [(0.85, 0.05)] * len(pairs) - with patch("athena_verify.core.batch_compute_entailment", side_effect=fake_entailment): + with patch("athena_verify.core.batch_compute_nli", side_effect=fake_nli): yield From c01d354d3c8136c002578eb02d2c1ac51be63792 Mon Sep 17 00:00:00 2001 From: RahulModugula Date: Sat, 27 Jun 2026 19:44:40 -0400 Subject: [PATCH 5/6] Make sentence-transformers a core dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NLI cross-encoder is the core of the library, but sentence-transformers lived in the optional [nli] extra, so a fresh `pip install athena-verify` followed by verify() raised ImportError — contradicting the README. Ship it by default so the documented one-liner install works cold. The [nli] extra is kept (now empty) for backwards compatibility. Verified: clean build (twine check passes) and a from-scratch venv install + examples/quickstart.py run end-to-end. --- pyproject.toml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 41a6e77..618f33d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,12 +23,15 @@ dependencies = [ "pydantic>=2.9.0", "structlog>=24.0.0", "nltk>=3.8.0", + # The NLI cross-encoder is the core of the library, so ship it by default — + # `pip install athena-verify` should run verify() with no extra steps. + "sentence-transformers>=3.0.0", ] [project.optional-dependencies] -nli = [ - "sentence-transformers>=3.0.0", -] +# Retained for backwards compatibility; sentence-transformers is now a core +# dependency, so `athena-verify[nli]` resolves to the base install. +nli = [] llm = [ "openai>=1.0.0", "anthropic>=0.30.0", From b2055b0f7685a48a832f2151d9bcb27f0c456a4d Mon Sep 17 00:00:00 2001 From: RahulModugula Date: Sat, 27 Jun 2026 19:55:49 -0400 Subject: [PATCH 6/6] Add agent circuit-breaker demo and animated GIF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework examples/agent_circuit_breaker.py into a realistic 4-step financial research agent: a hallucinated "35% net margin" (the filing says 22%) trips the verify_step() circuit breaker at step 3, so the BUY recommendation built on it is never produced. Silence the ML stack's load report / progress bars and warm the model up front so the run is clean. Add assets/circuit_breaker.gif (rendered from the demo) and feature it near the top of the README — the cascade-prevention story is the launch narrative. Also sort imports in the LangChain example. --- .gitignore | 4 +- README.md | 19 ++++++ assets/circuit_breaker.gif | Bin 0 -> 72143 bytes assets/circuit_breaker.tape | 25 +++++++ examples/agent_circuit_breaker.py | 104 +++++++++++++++++++++--------- examples/langchain_example.py | 4 +- 6 files changed, 124 insertions(+), 32 deletions(-) create mode 100644 assets/circuit_breaker.gif create mode 100644 assets/circuit_breaker.tape diff --git a/.gitignore b/.gitignore index 765d4eb..c752342 100644 --- a/.gitignore +++ b/.gitignore @@ -44,4 +44,6 @@ Thumbs.db # Claude Code local settings .claude/ .odin/ -scratchpad/ \ No newline at end of file +scratchpad/ +# Recording build artifact (GIF is committed; cast is regenerated) +assets/*.cast diff --git a/README.md b/README.md index 0db6ca9..29395ae 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,25 @@ No document ingestion. No chunking. No agents. No database. Works identically on ![License MIT](https://img.shields.io/badge/license-MIT-green) ![Version](https://img.shields.io/badge/version-0.1.0-orange) +## Stop hallucinations before they cascade + +In a multi-step agent, each step's output feeds the next — a single fabricated +figure propagates straight into the final answer. `verify_step()` is a circuit +breaker that halts the chain the moment a claim stops being grounded in the +evidence: + +![Agent circuit-breaker demo](assets/circuit_breaker.gif) + +```python +from athena_verify import verify_step + +step = verify_step(claim=reasoning_step, evidence=retrieved_chunks, threshold=0.5) +if step.action == "halt": + raise RuntimeError(f"Ungrounded claim blocked (trust={step.trust_score:.2f})") +``` + +Run it yourself: [`examples/agent_circuit_breaker.py`](examples/agent_circuit_breaker.py). + ## How It Works ``` diff --git a/assets/circuit_breaker.gif b/assets/circuit_breaker.gif new file mode 100644 index 0000000000000000000000000000000000000000..5394c8a70d10b14d5b0a30a0df3e7609f6077fd9 GIT binary patch literal 72143 zcmd4YXHZk&zv%mwUT7;wFB*DBP?W9-AXPD-(p5xIKtM!#Gc>7c=p6y+2!hg^p?3&X zx)hNjDk3UPIr;B>=G?h6_uZX6bMhuLYm%9)=f%oe-{5Qx#3_?FF@DFw0;hyZ{2M&P1J{*3Zen|R1pZ?GHo{CGJ-`m@flvb*( zs}+}0yre@?QP+-)dZwtXsek!KMP;QrLI2B_&uM8Hx2^9tG}aFd4V*bgAO}Bv=<507 z$Cv*8UL8H-_4QA?yWgZ`Rb=JVR#rZCc6EIFwlO>V!QR2Or>Fb&hvT-k*2nI?*KgeV z^XG^B*^4_nU!T8-D=sei{CT;ms;a5E>Ep+__=M!V{6hbLpt13h4CqXIFPmZ(skw;LtE_WOQtNV)Db(^vvws{KCh@rR9}RtDo1_H#Wa)ef{=*`^V1D z-Cw`|?Cl>M9{v4y48X8_x)s^2A?Op5Ry`Ft?ct!1eui#kZf7*tS;x7a%2(Yl@S0J4 zdT;Z3G9@V@4Ld`mxK^3zkXHG$Sd7=$(wyu#bbq} zSNfR;!vr$w3mbfjhX6>Ng!tN9g4P77xA&BD0?y&dh!WT16~|C;k#ghpS4nsNuxuD; z>8?{|cW#Pgtruoq9UxU(dH11#MyT2HrfW{)w_RWMRY58<)uYk;X@d8}a(B@+%V?6; z+jx^#s5?WAir3-DGcKJ&HH}6NHwtesoLZ2M{3M!wiXEMehoG=BR<>N%b2@@GB|i)l;RVFICD$roRqG=?H1<-F~!ZNMl11!`ylOpG=}n7lg` zKnbz6QDGO|7eg?z?jY=L|G7U_?ql4!(*l21Yi~aZ-J%9z1NlLSAPP>y3!p^n7NbMZ zWDN$k>vEb{h%iAn;^2qa?2S*U;S2crfZ7|!-Y;VCP#8bKl@Ji@M7?1VR;3igEM-N5 zvhZVG_wQYDII2kH%Rt{Ad zXMnX~XK;V?3o0NJxYkCg^R4|Nq%^n955FEohG2pR$V5bM^~U_05II+f>7#NiwYbJ|hZJOElZqlC*A)r~_y{ZXZ! zhd>oN1ho)}pi&XvqdC(Yd5kTNGd>O$qs;|%{ca>dr02GqyFMG_cU!hf^*BK7^Ut(c zZSXquFg{-07Mv^TD=Dq$8;j~sXj%}NOQOE8m&wH*sbD6aC`ul4lN6$tU#=4$Ogo^uHKQa%m;)0=tZOI)F z6Ig}+&taX@^2Y)WE{5o~Jh7`YCVUltTdPNh{R4i_TK@H0Z)1TrE=5Qg_SkUS8<5q+ z6c;0zZ~9}YM^)X;+Gfm;FF)oP&pVGU@nEs%@@Ir3;JHN!Hp+_DqvWRuvI33>LPu(Ri#8NT31`j@Td$r)=X0Qf@mnn2L7K_F7;dvmll_`Py7f&IoHg zBi+MQt~Mg>Gy6qe5$dB7fLKK<-Xa-#3qhbfU0lX*rgVug=N`)t z*N6BJIeIF{k+>9vM20e5jT5<~Fzy%;$1HrRFdq~AA{Ia)jmWHwUtn$uF$Yh_JFq5K zKt%;Exv+{iHrTe{=QL~?7b#*0jz>MMATrw$Am6!I0Y>W}yb?hfAUR*^Z?(&I-w=?2 z64LZ@??F-95Pq#6;OE>{fvVms53y}1NJ`M+B6|3H&zR(p!kYXqrOP_D*>L`CCLy8; zUQ1%&O9YpALe1EXU|iP$Sn>ez27(B%SOLZ% zx>99RSfCm10$NmlxOMiRl1QE?i6fAqA_oPjt&3pV-{l)Jr9IC*J;CR5W1R8D~ zhB3M#-w|7Eh^3&$l;Jf>f?^ z3Ru@Zb1L$dG^iXDi3j`nPB|!-pqu0ka6A;;M|4gHiegR4Z2cMn|1nvIyxb713#cl8 zkOfj|aol*w1c)J<2vC|$ecs@g4~Kz_ENwN+HScYqW*izX^s=7q7Z`Bb3HVLpYagmXL(Z{rQI;Iyqvi5CdmY65%faPTOpVqzGw+hf0*(1-ysfg z0>2PstJ>;5vAd5&b_9Z)MsS@ubSI3(5K@~Pn&vb2^_DF zq%PHf@!woA;%ERO7t^sGqM$3Jj241ZdoWE^S^G{A8ux#U77_^zN!5-gYxlv*GYr-kR?eNqALudX(y zUb%JO5145mIv{DY2O9^-0T=3$Y8meURAw5e)5DddsB-o;Q}4{;1ueMoIWnBSSf|W_ zSWdXYSSaGnVxTx6y*PSYRs=}z6r65eYs#qjG5~S_TPEx3f8I{8VI#rmjgPj`z zL;nmdJ(M}!zA|+y@y+4<&u{I|P6&Qo;C2^uBwq``KqcNff;1YaQcOO#|h2TL)ck*)SM#e8p2B;PaJfknMr3BDpfWvJqXf(d_e%hcqk_o zfRO;TKldJWc@K;PelL^=--ZNo0J}&4RwVp50Z;>BirG*;R|j?1D^WBu3s;cZeIX?% zl$gz~D~A@?JLWIzzUgF&{vG~=LV^iVAu%vGD=4a@Eyda4-=G+J$vbo_Oi-ExuqTD; zyAhkco>aG)r^JcbcZ4eGlC9)*WYIyaDSoXZ4!%DGmCeXW?rtI5?u$D2x7Gr;@1u8g zo~+e|E4T8LFWo&?g=ThD?y!bP$UTsh1LQ7Yx!o{NBO;>q(0pY2tr_N=>yyNt2rf!Q zbPq1ACo*e0GPVfwOgtjAC%h=-M65iwdbM2y36KW?9XXbKT}E*-{WKsdgmbSGJ*xMj zDwU#(m0~Xchz{F!KSR6y%{r!)aBg7);-iHH+S$28n8xbZC-#^IxWcVsV>iv9i>czjk7XUW}s!V~K zQ;rzW#JO7@*j4m1b-_mhK_Qx3tDP@j|tZkDI1DKXQ>DQgue z&$(0MrkNBqSUmK`n1>6Y32KA72N4> z>oJ!V(`$Uv>r&Gj>eHL1(?5{XTevgyE4eYI41HXz-4Ypn=B%AQ=)w978h7StNXDpn z=A=*NRBGl-edgSB=E8pFB6rrZa@HpdFuuV$Y@W4QpS3lewV0Cijr-+}^2=TGm%n{p z?xnswsDF7h{qo=bOMoXEs*(-2$VU2Rqtmjn4cUw{*~|yoAWsgfN)DSv4lXu>BQ1xg zA%|}!=j1^S-jS7S2Dc``B7|iSSNomS}EQ0PBX7oFtj`(6>0Pp*W)i_gEj3s)BjRgUN|P z=g$-urInO4l)RoPDL*Kwuqb}TQyTAEq78Xjl~&r&P}($8`Zv3vMv}30v$VtFb(in! zp0w9}4X+1gUJo6-rty@Gs+5gelui1UO{JC1G?dNFlr0>TE%F?fFRPS)vMB%TTfUxF zzS&T|HB&5p{OU5hurec$Y*y*X%jb2Rhj-@zMzw*snK0k^C``c!V~x{n4aq0>#+RB? z*jhK$m`9DES3k!6OMwSNy5DSV;6W|9zc5&}&ex*use#j3$Z_>WQ^p8YmYDRq39$a! zkGicDrrOkD+0^CN)RW%S*Vr^L+cb38MB{B9Rc#)(Y@YOMo=R_? zX>6XGZC*HRUgT|AR&DuY+49-1Wj(!Rv$17sw&mMl%Qo-(9o6@{mhXT2z28fJf6(~; zX!iZT!}kCm6{<#sTTzkzRCERv+eBrYqcR^+LB3X2wN^H(Ru2DGu8dZmrdGbW){{rA zc)m74wKid^Hc|gJ@r*XfrZ(xhHrb;#IllHYYV8VE$L;6++m$lfRhrt>=GreDwG;R{ zG}Jn@tU9#)J9INT^qV>^&vh6cb&&Wvjnz7@TXmZFcba8%S~PW9&2`>7>a^zTvQg`@ zwd%U(-({cC<=E8aG}q;F)aA<8?WWf4Zq@DS-|d~z?c3DtKi3_2)J^8=30CU~wd#55 z-xHD16V=ocGuQL{sE5MW8>iNrVAY%C-n;$v(cLdKrDyTjxd~;MZU{pMFRI+(gdVW;)@2DLA*ctUP zg_~pN1ICmx$5fif)aJ)7{2e3kk87xpYuy~z4j9+X9M^9izdS!~_;;MdKVhssasB3m zNx+0z=7dG_gw_1St-ll2{F657leRY}?*&ZSt5*cki`zaNkxegb(u}|fKmz(62-hK9 z{_oQEf)cN!qD2p5^uQJs{leAFi(a|z*f>0Jeyn-P(8J5$$2X9k-^?uTj8BX{dmej% zpik7i5*_n`p25y25a_Xu-p}am&EWEl8z#5u6^-8H=;iG9?`=z~yWtTrs_NQ?B(s=j z6r*dF*KXXTS2ubpbNBG`3kV7gi`3P>wzc*7(PQt+hNk!JAJS79J@|S11e#meL`23o zySP6Mj|mQm@b!CQYHn?F_W?cBIXJqisq1L#7@d|^qbIvs2?pO8cK46Y|7r_qaD>Fd{y4<6awcd@!klrBaIgnlo2L}2Q5>w{priO+G z=xK0meTCi>M@NTk?VJ_P6X|WTxw(0BbG56hb832mpkY9dYn5-`W@Y7cbauqXB~H&w z&}-;__eFYwdry6zn3O6lt2{K+UszQ1;lp@iQ$ufGxAh(SnVAnIrKObE1afd#eSIB0 zbLJNm<>eKSj4X0;^Wx%@=wX$fHrLlz$U#rb-@Iw>Xz%ImriZ*Y6&3WnS6x$6R8*Xh zm`v}WHMKSL+DC7qugl8l#graC>FtkRQpKguH@7tBzRI6pn3BN!4IRb64U2of&tQL_(P?`!Z5NhQV~++d~SGWB98fDiZ&;^s0)CfN8k4 zs*0R_Ys%$UE>)3_gRX}fzBx0<^QdU@jkSnV}5A{zsTKTRDmE9 zpezDMXIxpxXae>h0^|~~3`tjga$a29noktV0+6m9xNnBA21BN7m)XhF(8qpAb(-5s zHw+h^Fk15H6fdHXc$^1Vqk$NBy&vky?;P*Y8O7&wG%SHF?9_W``lCnL=F%-Iy!PE2jJqg#xIC1AMhvo_;7xA^+H?T2E0qNnw5aWO_NhvV zmMZf2QLq#sEkR?^SJI0S_{3Dz!3LO}L366}R3R9lkwoB>Hx%5+1FlPQX;cnbJh%oI zNN*?38sDM4X=S#2O*jwDZo@;LVQ*Gf%^Vb$qNZ0V%s?Fl0T7+!$SgLJY($>(XQ(HE z-I?e&El0T++$}eAg;9Y^02m?(#N461hp-HMtA++iNmpXbZTgA4v-)5cfWAEPF2N#d zgOKLNV*(7KJFZ-Mim5^ZR5o}}zB6!15Kv~(3MOQ8<>}f%*=1-WIFT|66p&^`0+`cU z=R%P;Z<0n-|ES4m7(cevV;dB$`bB(nU70k^chn@b5@|Iwg=z}TH*q3O6`!*-KvwZYCy{Yg!2;(?lMs&HU2P>Po}a8 zpC>l^%!6NG4x2`I;q?WeAj13{_zB>%L1^T#D5wxMWkXegw&`U}L5xVf-*>1Hl#GGP zEO~0w4KMg^vYA5(v)Kv(WexXITX+&9s%WyQvXM zSYoc0_TDQb^QRyOiLUeT9p%WqH~P>sy8Lpu)4V4?dB8TV$eLBh(lV_Dh!F%nMuC{% z3hfhOuKgNn;uMheQ}h#z&9lxVbdHE zSk^F+>~br_bcYCo1|gX8C~2b<&BU?HCt+qsg12T9^u@so!8H7(69q&NrXF(ntl-Bw zGrk21=nN&N!yIPUhl(4*#m8J$-=RN(#>d@3N|6z$`a+V+If^VPGzU z%tqmEae2o~yZk+=ov%Nine`EUZ1_%pNA2D#S8nMk67~w{x~9SF3kah%lt0+6Ypc!- zg$C|w3p#4)S@K9P)>RcKGI8iX_7w#FRTV`G2JQbwlV2HDm&mYg-e^3SsCZue`u>$K z7PAMFbwkzVUV~q59Ugq3GS^guUvavLTAb=J{9PG6xOGqUaGKSqrV0jpb+kO3>CCEm zTRHgE#jkO4#_)G-_myvM4^-xQ&et}47<6gK_M86tytZlWS-jV^^o6~l+7@M-?*WI0 zAAyUy?_q}9!7F}?$g6d2{m-_;RF9S>T$DP*41ZV~G|?Asuk3XBkLP~=6MV;YJvuHM zl=!1h;>X|L_Ffy>NqS~1b+=DPM( zmEqX(-Gbqa4Z{~I{W-F~N?v4anhZB|RhIuM_xt^uJs7R~vI)-~ouP zlB|v1p@<0!&>KeTn>2wu^4W{nf>(?Ns(jb`BN)c;Jo)K|JnTu zljm9q{*h=xjW=0#giXWP|@qW{66B_DmxjcYSxISHzv;JBh=*r zx}0yXPrh&YTFLo)>-3=CE9opu%*TLPosWk#jai+(wW@&H`*4Vs0CRXYFW^azByi=R zTDP^1gNq*qY&|#yxEBE@x8&faub0p;qSh``Tfp~_+ee6?=1X!kPnMKyP&OMYy$Ym* zDD`c0{GtMp0(e`)pSog178T+4Wc0eiuRj4xD4QcO;2(+%)de?d1Hb+Wgc1YzT?3}Y zaGc&x@V{Xd*6>FFMve9aE+2$SA;XA);<}1Y@IKD>#g*>BpwSn5Ct^u+Bz9Tv#YO% z5YWJRvmn?HgmWPzUXDqRf;15Uoz23e&f{880e9EKoH-G`_YvD-;Duu)I0q>>vM%Bo z8bAW*xDUEEQ=X$J-~|``F60S&WstKq%2&~dg!eI_NdYu;90~=GjS8uag8hhu5u*xH zyiRyTriw>GDcA%qTn!rF-bVYbK_8%jw{;QC%RW&lUNPd}BXOU{N+)C}a3~%b>*Wgu zMHh;L{pe?}m14^FqTo1WTrC>D6w@jWri*)hQHrEZMRa<{^sa;DO5PRX&n}1q1g_`K zI?(_*911e}-bbYifb`v~!*yJ&n2!|+Oj+~M%SIrxQO@C}->)Nu<k>8H{iEWYlr+ZN%awfNBvi_gELlD-LAh;Y0$|d;2Ui&GXJSjB^pd z;{jJ=17{RU2!vnS25L0JolO&TQWFg7WBK=E1$?6Y*5V1uM&c5HWN)18MzkIYfvoTr zO*K8+n|NLVJSTy@NI~`V1VQB!pw@}H#R>L$aol=f=Z6!~if3lUajMht7r7Iz*C%UF zC!0qi9-7BjdqA)3Cm1Owo~C&^P=QFwi^r4@XVX|t?vwyd`qdlAGmrB=m+aw_d=kJV zNdShJL}PO>WIDw^GEr&LQyq^)ZexeUz<1{9sd}s|%HBAiv~BS;sGPVo&GQTyd{oGv zg~}Y20Fji0r{qhhSYOs@Pod+u_!E`!v^x-;F%vuXjQa z56FU%;Xa{0sB8et*;mdvk^p7|UJjeTL_R`0CsSM~2~}qUeoBCxX*qJ!87&c+Tn*V! zS~jqq?Y$35>%YXrri)5~2OAu43H=a?->1iUvQeyIDd34|&p15DMDxT@LQYyE>XW^w zcV0m&@&Q^tbUPo&2CmeD#iqa=l>%qe0ws+9;Rnye^I#Dr;jcF$$vgQGh*h>`4VZZW z1&YQL3w@987xLB>!j=lEBEhWi0>{1r7J)*WnIdScpQ;8@!Mo^*V?okpesI12iJ&5% zWC(N6vjt-z?Wt$c`R%B8(8|h72jIDMFGO^@kScikOUQW*${`!Zx$v#CN`?M| z3TC}>GmAGZ;+6d>aOYao32O`|IZZvi{On$a>t^+;e2tx7jhagO{JE%M9%;ROJo*ks zf`oCxKnw0*e9}Q1DZutY&9A6XRAtRnXvEq<`Nk#~=2zz?bw_W%SV6oFnqB*xp(d6A z;8i&x(fGE~98Bx4hs)PL`cm!xrPdi$2Zg+b3%vGP%*cu^hTbW!O_zm|>vbOJ1{~IY zQ%EUH2WMv3=2gHlsV0F(`MXiC9S-x|7@*5AK!aREWT+}41oD{!kFa3Vtqu5h><)_G0U>UHTY#x5IakR;uy zwe>Q;)L5^n!IMi5{eNHrK>E|M&f9!lw~pJpje9aNy==H6@F3kLg0cJ7 zO7}I!7q(7a_OH93i>m1oKxQhsjOMx9(Q|ROUO>7BS^=j63zo{>`x#viE`!9yo`kKg zNWN~^M*0P7ppNFbnR3QXtv}1@BHvO|UPw_wYtKtLZ!^md;jMS@?dQ&3qEXZVV0*xq za}Xhr;ful?rnNzXI^s;#&Mt=BbA4O+DWtP~&}VHBD&Kqi?jY1vw5SQ(p7Y!}Lbj!O zJ)*>XR`c$R9|rK)5YuN$xL#)BFwgQ3v~svPI&DMR=ZhM+uH@am^?rtzFf-8e@3j}? z$`HzEB*be39z^p+g~H0bIyEWBbDn4kQ1rOWOMD*Jrv^^Xy%==rh&6x*fGEuhvKFxdC8YW>23(1dN22Z(Uk2D0k{3Kj!A zK*qf76V@`5w$76<;)LeS@xcumJa*hb9e!m#0$B#SJ5LOzqFq6hCy2@Fo`C+CynAyp z+n&Y&B8y3=i{5OE&7ikB7{mYJKL3>GUr$xaq;|!mb3mA{HOg|Y1V1+d026T^#&n() zhdNJ1%zH+C1#`+Koz&T4)u$87rti#8xy|=OU6HlhC{G&JO^%p*WoB8$>*gff*;K_| zY#vIOHzm@a(B~^e8F-EtkvafOWmdp;ZGKT^!C7a)B4>W?ucO&)QKGo#_bUZIWZ1Xn z7fSCz-%&7n1hn@edP~joVDRHVro}stKJH~MVp|p&7Z#cSErKVPST8KG-CE)ZT;j@F z;%QmpTUa`o#hL^{ue$;m8qkx=iI4-bcP(MrkjgFyOb$TGJ>SeljHz%+I#C*AR7;3A@~wnSVbs+YW8XE1x=VMjGKV6q5on8 zE{V><7UOj&D3~h{VB>IB4#1E;uII1U{)X{Wfiv6dKkfmrih1`t^fN@N$pI0DY%VRV zVq`1;z}*eOi*kSUn z%K@>XAU1moD*qh~!fmW!EFg?#2>Qq&*a@(U*M7jMp*1B#@bmL;n?v7n z_>Ck745fVl~>Chkvk2n1Tm%l$*1qAG#iyG)DRgR#%wNK!b%u~ zM_hZNZ$io}R@=8OMLwF&u=<@a8oETt$VdF}W&)&WQgVmmNxYJ&u z$?leQ5+azZ0fws}l?2ptvAv@(5{xuBV|~0foTKxhIsF55p#{mC>>3A2MDV~gssj1_ zDv3}DFX4GOFp^#J{Omd! z|5#&$5Mtu=s7r{c%}6IhNdotF*R^Epe25Ykfsz=?MP&*%p^!z-b823@0wlTj^ezAu z>!IPn;z3d8X6Go%Yv%<=w4#o4uUG0sXu-+Y27fNRShP23v^tzN8~h5zY*-DVvB^5j@VG{oAwC=BB7 z8e1}WFQZrBq!^TdusHShyl(hYe}jVXf^+CH3+2Y2qIY9Gbj|PO1fZ?@glS{_y-p=g z%$6v8TMnalPi}BDn?li3$Pjfg_!(Su%KF*wuYW$ez7hFw8h+Zi2N@2^dbfYRF6a)} zNT?`I+r9H^#S3n)jF8g7y&-%8kN0|c;z@CCa=aHGwIfpEBy~AM&cv)8Dxm>}XjvnH zd`_Phdl(>`N5eB57fw|>GBu`tw*=3Wa7rbPINi5%wG@n%{Pyh0+`DAG!AaWJ>G|^ zvsz^oT=Us!IyJ*fR&KHC4INZH<+`$l+)K-CIVGst$jM7(C$k~9X`)ZmtF)jPMyBP$ z)Q7_XUZaLOYMK)=-D>SQr&xPK*|C-{?iR4SHRLDrMMXX+?XHz@607<=Q`k$qVDiZ$ zT4zZ>cZ}J8=zeEs&7AY>=5MD%yN=r(Zme(JpE+s&i^dt&C{h+i@^1a<;{<4-00UPE z7H0NQQ<@ff3?T;z@or^nikcBatX$n78H1UI_aPZ%T1fw+o84#DK`4*b&6>05Z;ZK3 z>E2py%(1Bjlv>S&R2M*cE|DQWTr!;Ez;x9$Lf4TTFVAR=VyWzD=&8Mw=532jn!0@; zSBF!Ui@gyArG}k66L!D&tIL_0n~5B|!cnvcPiGTU?2f19;TnGWm-(3Z^zZ zJm}+9GzV@)7y@v8VZ3+7EQE@aB>_)HSb~P%_QRU2IjYtlx*V0xD%;s|Ht{Vnp($il zhqf!N)@$SkW3(kV<5Wly6-c6z(OEYV<+tfeyFGNkLxn)I1<|4uD&*vuk*He=ue=_J z11EWm=Ei?!?e8+2cW5(;wz>W)|I?I2cU{uvZoJ%UcM%rnHfSZcu&7`W+HX#i%+rGs zZ1|L;59>63(ZZH;PwmC^B-Wa?r4Cjsy9W3i49>DMOuLPs;@zn4raYVrcyL^uRokGL zRVdnITYm6SPasCT7c2Lav>RzfKQXF6qCd1^4QfV)k-E%qT?RB4=!|{8QPf0a97(Nt- z3vfXsWh+!A+_?Tk`DOnzmQN zM6-)$?d<&br(R8~>JNQC%r5-t@@o0j{4ns(?8jrIH}x2C!8Mq3ZV{#9-NxSH`c!dl zi6z{-L*&Avs4sr^W1&((AE3h_51O9yvD9`ybo&vN%tv$Pebe(N{ANh$QKmGiw*O(U zTl|NNCWYgY0q*k0kAG%}D%JYX>e0LQ2Wm^&rxZsOHQaL)157U~_8|FyHS9&L_jXED zSOmE|{L>vQ)5(f|1k?@;Nn{jyPSiZP6Us-vx$)p(Bl$*o3vrH$e6VLWUKmI&;jDRY zckvK5YKVK&cykLIR6jlN+_PFW^y|~_=P$WQgu=3^tQJaS#?K-iC5qDmVabjEPNo5v z%Rwis$I;`NExt^|c`>$E-=^4&5u-OdkV*|w)30`pp}XR?jFH9>Xewa#{I&RA`-h|t zQ$-YRrBA<-n_sC_wjD+vW!2Sm|Lr<)5!gIDxd+2t_;GFr=*NcQht{|~qE#ug$qWK) zcXc=AhkOqnMr<544PCjv!hK<;OVdVx!Bd~%>zAZDUB-sJMBf>vmI$?(UZ!SiDsDeA zn=2|eB|WY0eVcB@`95q%N^}KR^joE9$Wh(bvs4yoiTTB6$fGj{+sxbG3kJp*Mg=s$ z*6O(v{Sp0qN&LCG8+xMjJ^nSOK$9)pOyqgQiHz7+r$)q@-m>0>lBGBDRC0vUv*SgJ zHQsH;oiGx)G)*|Dj}yPl3M|Ho474SGNs>#@I9AWq_z|yW=))Ex)h1z-aAp^$f@MF& z%^n442<&R^OC|l5z{}Wh4eL|bc#_%rn!iSKWaMzLm2fD!i$)k}VFnt*iv)nWoF}7F zp0ON}gpMQg4i^WlGw@3Qxx=XylP1rd(8~?J9VW2 z?=mC>9gldO$9dWN)RP zwZBBz(CiGj-pRUks(z>YhE|DSRJVtZ?y-(+d zxgq}6+sNm0J%yiQxqA4HeirCsieuA?>EaKk)%$cy%eOB&KCZ*jpHO+!J(H{d1PZva zin*!i){p8#w)$+Pg)6GW{CNz-u9Z8tQA>{4J>vQkX429l^b;XCED)=uJ5b1|0$Yp@ z>IsToN}UUS-I6OEPEJ^SCk4&xe-8sj4x~r>Ff=~keUvuYKVp^#4L*LbY;wz*etC-n|Ap)WPCvNjx3XI*;+$QHy^p@KSb(sS5-X+1=136Io#}R z3qvJNs#17N9$;vO&Ypz#>u{S_)cI9FA}TVbDtFYb-XRW#7a0zpMIW}E{u_LyeP#$? zQ2nk}@nCo0tCQTXP~F^di9f-oMaLqJ+pfmm1BW(IHJk1QZ;}!X; zjkte}M2?+zcvZ8CHgc*&u-KeO?s94yjc;cu;{-|X2s8r{u^u(yl>Zt6m z(bI@AIrcGmk+C!9#?D?EJ65yvLYg*qBn{m~#G@O7)m(=a|~WnELA2gsOB(bdF!17{9VQZuo2bDq@1f zK4BDqhR97^yL984$i$5+)z>|4K&&Us=BrGv+%T`cq4Da*_2!A26Ys3lZ`?wdRP5Zi z!8B!ICz*4!zLX&O|I3O5ME6>HIF<0nsgn+xUzq6 z%g4AON?p!<(7*IS#`uHB-^n{+AABYvJo8N-SAXzh|KLAi`f&9_VCUo$gxP)esUVrj z;B#iym!?AJC&KK^EIp>epG~ktBu?GPpNit2i0+&sF`@sJ&EqDw0}7NZYO(yKpC+KaBMl!C6&wHn=ajf@QE=NHng8J$PbVH%yJ(OnxI!370J(8(AbL(%0F-E+}h z6i?=y)&J?S+5a+PY+Mfi|3*C15Ssz2JfGngWi8crkb&b zfozO*6-(_AC+@22npLiJQ-ln%F4wBVt(T|O(p=x8yQ16vXiEs6n zdc^JitSgN*UuWwce_0q*wfV9@4KX;9X{!6N(i1Ord#I`Y=Nj#m!AryDhF@Q%-aYs@ z)ZF;z`(k_a@u{mVP5VFB$4YMxw=^IA*$&BT5wjO$*}(o;Uq^`&t9Q}N{p;UxRzNb!QAC>4EI-y=3=Ehx?k37ZTRo{gU8qnPg-X zDI<9K(ws)PapJl~wEm=h4y8edn3rJUxRFo9MQs$w|Lxr<#5o*f6=i?c+O*aZF&ZhB zJoj6y^f~UhWb?`!PlYcAwMivk%A0;~etAPhN*TZWjg#7{>=7;9dOL9b%ho%Zw$#_E zagzsMtEcXjeyy4F{NfxPKeSq0d-2)f*ZOt0KlTl0%Dz}OZgoXl*6G)mrZn%w*~td% zJ+*uIejk}J-2&x&&DM$(d%fMpsJOM=E>w)Hxcc~tQ*0;LsQcZK_e-d27d~A2t;4Ao zbF5uysjqkXq-(c!`sKQ%e-0>oaQZo@wD$VvklLTEpTh){%q~rf(|LD9_v07#KD2!1 z?iimklZUg2s)@%n(^JB~#;v@*{`z1YF7tcJHre_2G|eOC_l#5RS5MPD7xjd>haa5( z%zLku{aNt;b4;%h?|SMb#fx zJP#YSFM*3qr)+m0q5096Wq%=zb1ahEE-yBM59Ki2{AV#azYD{Owx-?^j&KE-uHd&P zU`leaAy{EPwqPttE*64dP|Do#>{eUjruq1zLy5btgiUTZGfH6*CbX`W^qL5q?rnv` zLCCMjPza2SLCXXKqKu27FjtL9MyLi`fveRS*Ck9Q2^eE?6&e(xasb)kT1pRbNQsW0 z$7oV0o|Ve-h!(>04`>ZS<0AEK1tHQA#AID-s4|I+(md0a>>ewK&m%C6ZwEt~1O!#M z2uQ4TIFFe%pwFetAifA=1{N`bq*qLd;hJk-NqD<$7-UzDq6%8$0tmT8Mm_o#zZNdc zJxd4$?M<}W77-)C0JVxs6#q(*moYt1Bn4HfwiLSKhejaru#ivkPcW4*CL~B0ov$FP z@adk)FuedMw6S<8!6ALt6eT4V99CWwLQ#(647~#riD-j-mZR{Z7t^Mi3iwg1P}pSv zA#YD$Vl)PR{lYVHxx&DcoxQ(&gkPLd%MnH820?bMnb~?;xkK^C6dpzb0*Pw_%&fsv z{sJrt)@0;c=&<+Dn7}D6GB?hcaS_GH85|=a~ zq^tad*06KCVg!^RDHtpK>`WDvkh`gI#SNb&%b1P2AW2c?AmoDPc2X!CBYEjhfb!I< z9|tK0x_jFcXr(6eD$N+mXoL5~hd>T15ZcnD%Avcj@2b1CGP)9&WB@1|=%yDsj%Q%) z0Wjl~U@0`U5D+f}lE7e|CNdhz1-+EjcWL}sFfU7=E9mGm?de5_bJ;L{H2Pw8FM<)0 zhr);eLs63_kgkaW0GY>?1Otyw2s)_27-sAtCt3&szdGzU^rK?=wB>?e@}k^;9-zdf z#q6MD&CU2@VUNePa~lEK!6oa&EbKDon*D)Vv%nYIZ~t>e z^&I3RLzgOF1ZCiO(0Iscaq}lkHstJE@n{4G0mV29gM{X^e%sA+W--1R==-~Y{kP-| zx>yh*T@&DS>5+T}1alN}91pMYpr!ufir4IkBepl;hwi{o}XMSs200Exu4zXAdj zG9L1cY=7x&gAjdbQFq8;XL)UVVH;$?pGt8PrQ#Vt0$asVs|;^m2_~n4(BKI2$B@Ci zUFyBj0ZR2I88REQq$Lv<{J+?H^LMEK`2Y9$dc9_|zs8bXW8XDG6lw<9mmym!jZ&77 zEfi_&5*aF6+mMiTgb-s;6Q;rskXzXvSnI=>EVeJ^UZi@48sx) zK65Kl(-J#B((ck&!3YyQ6?o;df-;&dU&)n}77+WEh&~WMI|R%0wu@PyS|o}Ox>~Mp zZ%bv}JGVm_F-xI0{`!!nrF@$h@N@UOyvN{VAu2Y?vz^PkRi{kJ5PL-7hkGuYUb&QxN1_h_cnPHC(zhnqitDAw-6hKT$ zY};IuBFm((cDvcUTD$!+e$(eq>D_Y`aV2T8`?X)MO9H*2<0%rsEVS2DFkMGxuk1bh zFB0O;q=q(YQu>FKl#w~%jWXJAE<_a6sH&l3M!#>Lx7)qXHj;d*?GF_7oZRzSQjFvd zxwUUk6H$=mE0ADmpvR8B&ig)DnQCLhpLa{FBzjo3SxVg}NbjeR)(~i`N-wk@r!jBV zleNo*yb@4xNkwJFIA!GO3-{KFnIe`!6X&2~V%k5whqD04W#AH*NlD|DIn?cd%EBkG zLf9-NG91i9bg57P2M88G<}BFik^9y=+tyDey-`JcD3CPQ^?}b($_k{($9<|i`n?!q zY77+~RIh=$QE-klM0OcJaR${}Ohx!XA`_d9!4?G*^uxkok|B}KhKxD5SElG!X*WZy zP4#xNwiLWH3znzi<+r(R&$PH#kWr(Y87QN;KS3_IBy(TMv8IX4h$)2wW?8NtS#g7E z2_;#MSF@6SWU*wjQ_Zr|eX}!v=&y@(dG}geg95g#E|RU55p@RahvJCJI1dJ1oC^uC zc0O8xwpE!(Qt^h7TX#}@qynO0hV@c>pjZlMOh8>ST?7nIO!lGN;G?h8Hj5HK95v^% z8C3m4+auD&<*Rt!k8B0yBn$Oucm<++K|0?QIN7Kw>Z!#zBDsR&w&jQyoM@6cAutDK zHk^Ewjq-ih$o0{0B;;9~$wSCEdj=f956>vXn==s&ATjiVY{HAT<3TZ005jm3O6-L8y zWIGE&#|z-)0k6@Wcg^GY!Uw0TuOPL#Z$h=78hb1Oa7u3k~PZ zC2XX+X#mKMi8o*%G$wun6Zh+-3t!DuU?QJ1=RB_F5HOj?3s8C4*tYc#63xu2lMjkD5S%Ld>BA=840sGgF)1bMQ7?)JI>fW zMz(Na=}2615q>JLkO&o#po>WFg~|B~a5*f>#Ow0$5gPxxXYHkD6`Ri@6K9h(GZ#*~ zYS~9TJzNx`jeX}t$vGg!P6s?hd^{F_v%MS9q0V!oKhMDvrGm?)z;nymD@2=^QEG66g4(q7)p=1u+cl4+>?X zo+=bn101EPgZrkQYqMSKGSHkMEBd}rp%u)mJdUgYm`f0F2L(Slzq=Bh`PkfoUXtDpnV#gA#sKPd~gYx@t%$n@*QvhXdNhY0UJ#yKPS` z$}I_hG=)+famPR5wHVefe&*F}y#fPBp!W<(`66Mi7U`{3mGk6D*QAxXXl0SJ1i|q# zztroV{fnM~S4=cOt;um%kb%f@VgEZ&5VJ~T0tfSO4H{rCr)K9bM$HkIqEIwar+U(+ zT6CiHrYU;(dNo{-ovee#U9U<0)-bKsD6HJ@a1*+T*)Vm)C3n*m`lAK}YKrlb z-MXIC+03+1-l7MY%_L>a^DM`HM;ScZ7(PVB-rR6N<;3k!GZ^6WivfS4If;a=e;}IGP5VuVzZF{o&@8+62ZT>>n_PlL?X)PO0 zwA`(^f+=RlyD)dGwNM9j+0e?l)56_xseMO_>DiX3s_TS_>wjXqx0<)@m}xt1*R@v& z7!7w{-yti!N&!Z!8kkpv1aKb&!*TKDnD*#i#vRc_VOu6zzFWGWRJq^9X!`1;8sxED zA~I1*T`rlEzd3BeMCO2a0;Y@Cl{&edoYM!Ga{55{U{IqO+t>?S=Yq>7mSyY2}-j)KS#vM@#ISl=} zGg>+RqLT$G`RDH4;k!~3<64!Rt<^kPk)2SVG(?WvuMpWK)Laa& zAbkRW*6C8#A0k5zIprNfrdK7Ns9K+k3h)Qd9X~A)^-#DOke4~z7|_eqM}ocgQr-;D zzl~?^BAYVtAQHCV62b(Kmjs~3ZJ10?`JxdLIr30anP47?lVHH^3NV#Vn4LvUS^Y@B zm?(7OOxO{aHT5>^qQJYcc<1S>a+f1!3C&uM<$I?u5JD^ZIlqji92TK*VS>zML=km) zLL!?o;Ih*|ml+b2hsL}QjI*qBa3Gh2qh$>z{6-)7pAT`C<#~sbn-phSTxRXhqwR{G zoqzhf@?`1!yK#y`r``=)>7!fq(Wk4|$dBT}Z@)zgHkQKfZQstJw%$!%ickB(^M&SM z@-i-l0!6)l3WMjQ-f1@qh!6zsp^twaB_DnQW7<~&AEtFRY70Z!w6KV!XFb8;bSLs7 zU)MkpV9UmDXT*rj-Bs+E3VCwXUGd&Ki)owO6IQbmNul@7HIV=Oc@7z*9uNXAG@c;M zbBf8RUc!D76EWNM)iGWWo&%cGE z&%w))vq*G51gJH zmVXzI7(P1kOG=8i1P3gsEcR&>*NH+2qYH1ijp*Vwm&ZM*G#7Vf`~2Y^^wP80Gm1~u z{K$O~sAJMQxO_1;dA9Gwl3?=(Y4V4cqS4vO=)$wHHxX}|Pr9`F%PhU@w*~_OU0e}) z*qD1D#(&|dW&GB|x zphIXGCkwg<{|*y--!00KH{Segx%@Ucef?Vtyo*G+=)*bzMMpuMaQ#T+62iDi(SaHb&}_=#NYXZ6#OHw%1Rpf z_)!XlUfyW#pZqg?GimSncSr$o`ZT^^DSr0jpG7gUKV#0tmJ0{qWRAK}!RgLF{{Uqg zPy%=nB^zHF2S^IJv}%z8=WHNB(Ko&|(44J2EEt9hK~q@j+cjLFjqB<5V6%!|Y|DiD zNS==AeU28$?#06tg)j;qx}^ZFT^=rr~RQjdN z9MYOvm7U9lgj;g*SFPsJbfJ@}wp8bI!%HuEk;HqceGXBYRytBD9E+)M`mBx^^0%+Q zqyzn+0}@>>z{g-0h+BD+Ck)m*<-#V8w&(r6hCWsn5GZLjTwEOQNsY+fZYejvGEdq~@@y`!*!l__ z`D-8(Thps5Uq7owd2SZ{t!9FhaEY@HXa9mDR>ZuWt}Td)f!F8KT(7qn^HcdBA_{K$>evwSh93dfTLMQp76dkTOPw#VBf_7D&KgS)&LkVPESqSY-k@EK4 zo1XRzxqBte0HvH*<5l;u=bzJ_gFqI$uY@V@?BCa(Ykk935rBU11$Dy`0@`GB0NwJh zMrgh>PAYJ`DOGN#{-E*++>!-!-#XaRY;8+Ro5|{o@R!ky7Z%iB{7RpMGP*Ua!NY0Y z9pi14x$GCPD6oW;jMsWsOT3roL+?Mv4?WG_8hiT$-H}oj*)-VZv-?Gd+$86D|OoM{Lu|N!rNaxo>ah1r3^<5sK?K2NYFQZ z8fzE)X=o3gN_*-g#Gi9J{Bc*Ff8xr&Ny8S!+quVx8Zwn)>tmM>-L-N%`<)7u{VE<=o86nbT~y6SppT~q(20}bu3uODc-NiYv< z9?<9tYq@7@9^N*&zbm}`d8Tu@iQhH)0>S*;yWXKk!02L~+UpT;lTh<|>JxH(}iOS328Wxn0!!>@{idJZpK%z73qqnXolnd` z7yMto!@t>_v_kyr&sw=>x8>=l2SGRg1Wm?oN&dMl<5=?VkGmU^|M|YKMeWaTB6Ibx zG!1@okcGEl{uV8v5l$XV5lQENQ<9+zS{+Q4>f?VhD52Ym${kEo#04yGma!856p^lO zGxy%L#A?mmhz$MofVciK)^ZgQnI?U6ixDN(N=G8HEOC2YC(GEVctm7xwR!RKWQmQ2 zjsfMrqD2ajNBsUzXb}U6{|OZ_Jc#upFw%%&Mhq}wXc41%7+S=@AjTdskcg2+3^`(u z5Myf>E^x4J0>&FL>WE=Rj3{E%4r7lP`ojPth8!_`he1e;>|t~fBaIl@!`LIn8yOyc z7@5TC5*S6qm?Q=pG0cc@Lafm+{p2wQ4>1CnmtTnGEiecf5*mepM~wbq02515U~Pg& zj~`$xk!rLB9#z(N~X@d2wp*o}u__z{cK zVoe7u6@j5e3@~DJ5#xG*GG(dfXl zkdKqF*8&9bxB}R$m`PJswKur z^V+4V9dbR|3+L4O53|qf%?*`Yi2YXTdo~oVJ;muF-z_d*zp}JYky~+jF+3keuQ;6A z@2PE*y+$p#N+9aj_~pDzo!YPqCqv)Fxm%R!IM+qs-#9e9UiH*D-E<-JZ9~WF{{t=h zj_QIc zTGjuPXi>A_%YA3l_g@|`?20pJSu{^P$V5Gfbe|y0@y9zX^`sQsV-B@h1-01q(LNQO z7l+aSMQb+NXdsp5y))2`9x%K5KF*;4WwDiUG%JR(kn%Hy=`}yY(U-^SaMx%!&uI3X zV1U2JGu%@uN(kQgY@3|anF(~hV7H_h-OK3opjA^l?w3`fG%l0glK5!D)}wZ6o9G8B zl}@P=T^iUc+)Rijruvj^z3qVML%hSeNfFu?Y0G2i(ybGf8?6p4mNR~+sn0$6K>YK} z(yqVV0R4mB!eaQMvR4!4zeOB*1DFs3a)_ z_$&QLy`D~sIHq&XL2~uY0Qy}S>uJt0ZP<}g_eLRQuk17Stv>9v1hSz*>ztL^k26m$ z_vQF}8B%#MUSLt^2fr*Xvdf+U!))b{E%^wd?5SITuII#KGN2`$VYQDc;F?o=%+E4MiD_gtf)ugGc#jqaYe zISp8A2MK2YL%pNqhrD{Sj;W)X`k4sL8naBb!fU5VCq&n$V*@ilmKsqHR*Sk*zx_R zlw;lrNbG;HnstHSOIfMZbnJF0JSvVmC8L*ZX%y|6^G}TO`<=noayn#lhc7CUqr+c> z%g}AM_f_2){_2-*W#=-tskDaK+ZQjCB*oRLkL?u@5m-N(@lN-$dejxVV{PGC+x{ey zrM;%ERqsVyjEWKduBP(UhbfYeRFV@GU1M`3QmQUjc^|mEwJel<$+oYS=um6Z{fk}K zc~xbnK56^peK(zmzy|T5i;7O49$t$)P5HCaUhP$AVav5U4Leur=(sLNa1zHgf!=Q2 z99kSmxS|%Ro?~#l(7f4S?J8ko^}0F-@sYBO6Rx`i1l6s^%bg#qUp_y&_t2&8))!LX z!l=FfrS+bTs{y#?{iW7nCim|>^~SY3E`>AWI#7;PV4F&LK)8Ig-;IYS1tR*gqsy(l zvRDy~_|8VJSQ!anEpKJBHm3wriCwAhB)SCXA6zN+#FTTf%Bu$=?2@|3-nOTXvu}sG z+IKi!)k*0wQhuAzRVV(tN3F-saCx_q$&;6HW7muV-XY4u1$z5!IBR-HZc-J`*t1{t z6V&X@)l~wrlZq)EF98WF1#*A0%r(i^k#_#VEb?vBex;~pzo=ZcDDwJLqk4Av!yV6Z zagQkfgxePyc?2Fe?o@Tlv`-YWnoSWlQ?}bc!Usj}Ia?#tfj}{i6#@gL*&9{dH3rXw z6uv&)x_y;$V6#h+rPXz@_P{Yx?`NAY&XuOp>Gw<_W9xsC7%tKCHz*I9`ikVGZRLMW zuuryDYPu{r?)Y`TWDLtsj4rt*{(8U>ZL6_!dE;^R*TWhtKjAa_#`}MqpE#w46=1J% zQ{EqVEBt#3(uzC8whY_1Eo=H#+_zYcVpxb%)|0+|5=!EQ(*oy23TY}poDOI;pl}t!qCKXas!RycdZlkz3O!rK(RkRla_5Q`4@aj zaaN4*Te!~mPK_?Eo$5W2}8SZIk$f@D4E z_-31Qea+-WHJlBJV0tTAh7a*4At`sfGavV50bP@j^2ZeDj3c!8)PDmO%`6cuR3|b; zmx}J_IA)Xzq)D-O04P+TKtYdQ0`fo5T}%W+hF*LSO2s2lL}dJk{q58>bE$wI%UIP! zy*ROl_@voP5<(_!XGUTFi@k|IUP?N2Zi=iW4U3#Z(H#*|1n*WV|0M#=ICYEsQ5{U?~m(SCV0RDO;*MR=GF|fsTGXg=g^zewT>S z#rXX3{8bsYd;w05ht~S$!3+qOf`xBA-)Z91sHi0cYwwbEInY{E{INay*C(+~>*T(v zT$m1R;Gv4ss4x|1*5KEyPqkh~@yj@x9iH7%D7MNyCdp5*_epy7Equ-vEgj4U6cTP3 z@ArlH3!L{EU|GnY!wEzxA@kOT*N!im^1odg_EEebH^pznx&cnUal2i4#p zdl?D0veCcQn!{#=LD`v})C2~A&j%I@x@WDUL5?Mv3RCDlkcul$3E!SoaVk^2%BXN7 z`E&@H9a)Mc;rX}24OTtTLzD|j?)Y_)82!Ry=nrWwTgNYhK@p-{6!;Uhr4aTkpY1%1 z{#^h>G9E!s1=tqi-Qfe2Lfk~bR7qY;O^PMc>X1z8Qd%(%LJ&3#&*2en<{@KF!SpG8 znZqO~dim6GN;>JXCVJW)6j4weM^yh;#n5HQ&-lc7d(_VX|4F+*QX6>nqk>#$(|DdL z9gi?}tR6=Z9$PYmLomc^Zb$8Fw~EMR#h4{-U|-5Veil#@H)icRoMk092w=n1M;pOp zsnN7HdBy>?wgEQUsI35LuQ6#ig_y;_r!Y_BB9A20Rwuowt}Z689Y;h_Jc$O$tx{mZ z1k_q%vyB2IC>Pu95jqFIi30K_Q7RV-V`ZzPl;~1DosOW1R3Mpv_)&?3UpNIC>|P0P z7>5b;+T{)oDItDKdpr)RLRh;(WJ=OPU;c$Ntn!^jv5Cl&h z0JM=*{F4yW*Py=8Am4n&M#~FHL2PQ$P5Ce!2}Buq5`Y!?m4KJLgVv~5yhkvFxI-K4 zze~P8a7^jvF;Z{&rtgu2BlW+(yFzs_m!o5712JBeF^d_Mn>v%zd`?-&Hr)B1id#v; zEuEg#5ZI5yrLsvOmBiDLw(5yjv7UoM036FltoYE{Rj!>f6*1vO2X7}h+oylIn@r}w zTiK921?jxowBu)!LN6r6Zr(-WkX8V>1baCshUu^&6|axNZp=!;Tq{F2Q5%%N-_0bu zEVpq8pM!u};-^eXT{(6P^^-3o>!)3^s0-J*v+W_$Xmw00$ z*ai`uC0&BhAC~L%0mPTv?zC|Y*s^i8FHn-On~2* zibzmkt4RDCAxL1Hpu)w|DTvB4VoOba6`#DMdFoGXPixq4M;Mws0$6&3=_BaLbB%Dp z-O}!%gqWd~@;CIhPi|o6rujwUoKK*6l-AO%= z3w|Nq&9g z9?W{gFLvjs-P@};sjtcK5SbDyZ`CV?a;al`E$_U}9kVc(DcU%E;oAXJ4nRh1 zXb%_HpzSHh!NJt+Z?@B{#~}*8Pc&slZ?$>`KfyzKAK>WE5apX0PamNo(|VKpVgios zw?I93y)hOSJ4=wRMcewt9In=h*5F55a$yAq){(#~<=`wiz>7toa1e1Y1Q@q^&O>XN zFr5vnQXrFCH!;yCm;(f;aQM@iJx_aJ+T5Cxb8;VYrFKI5xp4@`lE8sf$6*Tv*f=>A zr;pP>2~ZebZ5(%y2!8+hH=A!;k~pjQ%sApDp7m0d^Dgba#6#k(IzUTyA z|L=?Ic3Vk}eBsp(d`A*w@!=&5=B*-F4Z-t&&(8<=zi7VlO2=O(OLQT}AN9#a+q-Y_ zhVR&D!4?eknijBP6E~Jk$EN`!K#Zf@dHQs5i_*NcEBZnoef|EeSJE@}%`Z)oUvAv| zxNPiIQq=t+9W>Blp1iR@C%iztlP&U-4H}Viw}|jK_AWbDGWO=aC`jfVf$2n;dYO9% zrz%SBd4CCZ|BzjBI}tZPrn>R8P?T{6@DXte8hg~l+P(?!*+qBIJwx{o&L;m*%B^oe z$kKO~T|c}Q1<0}Y#C$Ze9=k^+1pAL2_%KcCegEv_6nYVTm$-riEXO2&5~qI@qksCf zI`;9e=rT-Me%$kE%Js#T@D~#6J_bK}&ry0OTuungBmU!$?m!U|3y86zT{v&_iy_U( zUJwaj)FOy`N;mSvQq6rJAIdUC__FoWE|m7_hMj zF1TScbtdK+Ai2ZRRP6U9xd8XUa_w0cOH`TylyZ8aQgQG9qCMw-z|B9D{P3#GAf)^P zavq7Q|Jc%kSoi$6aOpdL;JaACGNJdU`j4OW@4rit@0=C`MHj#S$@wKzexX$agt&}d z+l+>}qO5g4(w~19?Cq&~9^LVAfV-Al()1HC{3q>s^f?47-|`D){A&OBcy4v`@3xQV z{oVh>yM7T~!END18$`nXerPbWOT-&*%oL)OR_XrHqduQH;1OmM(rB$#;S=Z(peHa>1t5W8ZBtxK%QV7sY|0 zqk<96>(LA#ry)(^jC0dvpsjg=O!hczRVbz^K3l?JB9ItC$#@$z6%chHGZ&ddp^s8X zxPN3EY-zcv{uI%4iWDzP1lghjxGmL_>7uxEjG7{Sdb=C+kMP}i{{7570v>H;#npR2 zs?c2&SX~00h_Zyjk%z$^so4(ey=&SlLz?zaiox={r|CDgu2(qjU->hoNZ=-( zKV>xi^y~ZQ17ehXpoRa=Jm&x;1wS1ip2sEhR!>WYT7xbA9RU=JHO42e6il7?*gmj0 z1NWw~`FC}NUm<)WP%D#iyg;Kin{#4447xkt$hon!)SS4RZ+4%`RyhIm@~nDJer{+H zUz*Xj>(Ov5=NPS*&&EO5wCu5|fF!CF(9rupRhCjIKS+&h`o0KD+r6Tx-odYr`DIA;6GQqk&L+=VXq8h4;oSi37N_Lgm zD8uvDBFZDAMxe@lXI>j`=upG5q?LlWY@P$;$I-WNh(dVYzvDa?pM(h=N(| zxLxsiaInwoou3jUgWQ#{3$Ea3EF#Rw8;#o;8AQVJQ(OAZn5|s25Pn4PNYo~En$X>TcWSyhPHWC6|6(0)X z$ln^@8U9E;$`ddRa%@f!Y^S-SHw2B9Ee*b>`aLW z8j)pPz4y4;JVQ9a;?t1sx|)Ey^Ht^#^Gr<7t~W9s$MoYF_v4mKRiKsa5KxVfPQ8>(7XU4D?$N$fL-F)n+lf-CN7;8a zG@e+$gPbSvIon#%|APx(d|s|aOGSMTSM7w}U8VRb>(k?vS|(IMiR&=SJSS5?fkIN* zYQ##71QE6qH2Vl|OSou(g)fKrIqL}-^Y^v7Th;FY{+&y#mDF@y(gPfqF1Dei^uSaK z$%c!JqO0i|WCoqQo(st`b13`J)Uejz!Az_c6~+S6lC;|?=}ZC#H%W_AmuD-qxZ%SuC`0JWZaghL=E7Q9a5n8Ied2zT|>qN48G3wdson>NTDaXxQ&pqm)VHNVBrDj& zmxq^?u~q_(D9)2*U@{I79&^~zOqPHOEp^nxYU`CfU$w3}+lwXxGp0sd^C}!%ACQ+i zm1P@SvGr=@`qCxmvA7Mi1-av7S8j@22TKV39G0BSNV{D1*t0^tA;kTy-@RMcTC1PB zIEUzp&)kh`Pg>HCICj=gNjSJ;U~5G5nYxk#29ur}WLT0F%*=cyuXFNFBk|DLT_57& zJN&7Q@psJqKRvGQeD(FN-ANT#n=^UiqyAvrocZow6*hri?;K1%pXmQ*0}k4dP%XNN zdiU=dna)opk*QMMOoHiDuUKYey1GSxpwHAT>Hf$}lkNb~_^I1#2~pWwE%t~PPxWcq zM&)>Q?^)A6b!S~>RPH{Dy>gRN{igj|b9zJrZOaQ}(EP|AzMI(Sw;W z=U!O^ZJL}Oi|>!Y@)JRuzfa#gMu_F$ErYG3pN#+etB0n>>Qr$Ad(dWti z;{Mnw6D&Uw|K!0%!l9b2mZ2WSPaam=9;)>k-E|~q|D&eNw)h=KPy0?jc|3H!%{}7A z0sq7elUtV#aWh2Pc7G0;8nC^#C+B?Fep7Sx;kdZg%jeNxx0xqnjo;eZj=l>!I5X4z z;amG1#kZ09GqYC=zIQ%yc@x_-^YmiG_pVnBO9}U8o}Fp@e&egiQqsqn=O;dVzqtV) zvB(me<);|@=#?5 z7yZvGWj*M>48~^t*CmTpFRztfx&L{qj0pvpJAiov0eeF*l^`SY1a`EH9r|KOA2SFr z@*f%&ZL!&Yqxn|MEZAgWyTQyFvj8y2kI4m?gMirt7?z)(=VK0n0o5FH1TgrIZDC*z z0(NzbVSj9C76bd(+yka3oX2)wcJ9SQ0PO4;Qx!1#;QRMa*wt}ccN8{(@#)h$%nrcd z|JSd}4%AK9kZjS}z)hBxZ@>Jze}NJEpWC(>8d+lJ%Ap%J-x*E7792250J9h{cfirb zCM?nelO3>iiNWzBPh8yQ-)EN6Y%wRb;ZkK4W+mLZ8;^MbH|?FUE9JVY z5eZ4YA3wgu6bVe1;Pvt|r+R#`t+J_Hsup4P?p90(Ln0}FhX$#n4 z_S2`+CT8ZCeDVHU4yHmpUr2l5%9xlK!;Ynw-945*Wn%AtHdirL=jxN{&#f!HYkDOhxNshuTTQMmEvmP)H0vpbt z8<=611m-gE7t^t$Yfe?j62l$ayl84W7!>A?sTpQ^rr5efS^55hG22qIc5dJ0^yula z(u?~&ee5x1WAtHCPk$VTZd=n7e&5M;>z$BJUTmJFjb)jipOJ+D!-~4zoMyEJyXD>+ z?1s%`V0K1(PmIHCq?@7n*yE$l?+;;nADA(LX%yHThP|_m-Tm+dBhmGH?6B#L-Fsc0 zs>AoOfBvI=>aFc>uf=_-U}FOox-<(~J#%$+6%#IAZ&AQjEv|<8J%7Dezd_!?*%}ir z7M-+LiqpSw%isEL#8eB+xxfy*KWtmS*V6dF){P(Y5`K;M4;o9lemng0(ddWkjlXB^ z|Fihw!yvCYc>8N!$Nz*%@c(Eq_y030!Qpo(j4R;@?7AM)|EPNR^RcG~vljWiO{+R3 zmU~#A)rPmklKpvFMGG&BPd^{G{-SKZ)FjMO*_564t2gU{)tf^>{FK7-nViYy=b6DW zs@8&jk?tqV$10hb`fXG0Y{F5QJ@&Ty167^2Oo3;|#9hLRO5R+Fw+6AYDv6R5Tc z4~7;}XdGeYRtYAbu^)torILg<;P&)E$WfW4$}^&|N`^EFOavLZ=q=ib)K$Bew9*Kj z@#-o9yDB+50t>)RL~(CR0;VC02H9I%DZ2o!AW6z>idW@8h5GP#{h4~&#Qj?B>Itw*Jxq75?b2tLW|pdvOsc-{BY!9M zRV@JQ)X0*7Yr!QC$0l0ym`r(1p`az@=3iJLtY1b|*Qo8WKkM0TAwlX24Zqf6TyDY; z+#;%bx-ZhUJ=l4O6ZVm?-zGyaa&cyGjlSa1d@LJI_wHUzROv$k=qa=UvL(=J+Gw*gt)v79iH72M8Uw;z_ zJon5hQss39k>KXEwt}FN_pnX#7(C>kc#Qy-GBlH!5;V#!gt(CtL2o#4RxsSCH|t!3 zCCG*zUX7tar|m^fZzAVI<}S#Ir%d1fA^}39`YUfJRV6~8~RGr?IHF7X?<#uB*|#Ocfj0l#I4U* zUB^q-O5PbDw%M+$j~!MAf*O}Eww9l@_LsRJ^ZL!%zY;VR*N3b{h4KVAMHFP?*GZ{c zQAE^91kcmyl+=54qe8~nT5I=|KK($UIWHLr?m7j)5`c$rZvY$7228?)1I{q;rw3T1 zSHTXuT#PcWEuN0Oysv`gWXMLK*oQ?@UNtmZUPo54WNqvzt*!6XSn}tBN#wv(kbLA; z`QLXHAh-t93l1-qEK=YbmpKvJ5s1f0UNAbfuJ^V_idI{7poor3xSvI#U?5?QI>ECS zRg{4FApG3Cj`Xkgwz2VG{>akdhoN4L5u$+Ub(yx5whb(vuEC{L`yxW1{E2b8tYF6J zYZ==zR&~~JIz+TCI)~P5SCVWI*me832YQ}B8AcR-uufC15$y$;BCv9VA9lUC->R*~ z%UCM<;fU0K0!)@CE3}e68RzmpKCMHu2=W zF5!~t1=02n`0z4KE$M09W;bAI%hH>3n5*wJPPde>SF12GQ?%0@?sRE6@ZvhVvcYkprEp(R08ST+It3KC|d-Dd-J85?7P0#E&c=DI5yn!ZR{FL zEI40S6(tdD&b3)R@;Y?&>Up3PjuNP%W%~0mDb+$RQf1^aA6ywT8o!iL9e$)Q!{bn9 zxdq$z)*+iGb&x+U=|{A$9&`vR$0~`YwpZ)vWVgT4rvlYDc!k;AvAACw>lg zdq>*~R8($~_j!z5X$wAX$qr3hcyi^?0eZx)Ijf9iJo3{zw(r%4Mc$54utnso)QkPk z%W^Vt+TR=B@6?k@fuT}1{PJ6U#b)`E>uXHR;({JULKYS;UU~<`_8n-(+0{(ET1}1B zcw!cqhfpNqL566|a$0eVg|hGO4wn`oy)AN;*M%}_8~<1=&TmwBAx>n)mC4PY-5mZ3 z4*p5c9D6Hz^H8Jj>Y*WGVAwK6{<6VI3f)^2TsdaEV88bJVWz?7#c_*5Mfq{|+b+j{ zSZC*6jD0vas*hH=$)10wM4Acm|Li}U`0k2vBAsW!HWcDSugl*!hq(Tn5I)Dhp=f+d z;#asDu`p^qcq6FsVdn`aQK9tr<7tITM^bYAKVD!=Txxes+*TlbA4K9oAR8(Ua&FK@ zvp*a@h8`EMaU;!XRsNPf;-*;r==iRJnCaip}PkJ*b%PVWmM?ePae}Xd3oS=SVS?60L+J>?5PF zIs2&NcXQ6=ROxubSvbu|M46xT>VcV?7EXqeA*1DJ`8jpc$6jZT^VjG!@tQOX-y_CmM_u6bK$$ce zV#rZ*JlG2TZo~VNGS^QX_Oc6hIfFa-D%tP5CFz@rPX8WZdvs(q2z8laGJa%iTeO&% zNDcJe>XneOeULaYk#>}$@-WaPH_hdk%r2e6z-N-tiS3a@f?&j(cS%;$~QaiN5VKnJgT%e3PIv}o8 zWSjM@D)maXo)ptG_p=$G<_U=>A-oV;Rop^>+uR(ula{SD0Ch-%&?z%73Q7dl`!sQb z(e6T5^8xoHZUxf0nzYAf-AkE0fO+ycDI!v9ZEQ;(eB+aKb!`Y^vxLl7J`l+m(SLlJ>GLW!@B z-}BDXvJYz1LghNQx=>GKPvvvg9J1VAC~u6el~uxF?;iq6>V5g!Z=V`9D>}21J8qv! zPCa>ef8KZKWW9!n4iD$T!mr_+N_<=tU!CJY1&x5{%Yv(b=i-!-s%&x0_F{NK%dsE! zrUi(CkS`tS{8M^Imx<#Bd#VMWf;zI%R_~PiHvVFk`#mWPv7X->Gv{5sJ7|FxgMH;{GVams;bZBi1 zD@}FMMtcO#-I7)88!CJ)fLxR{t*a?P%N5)V5y)8Mn&gXr2fDt-HlgjWn<9x#Z;-E8N~rRz7Vicl{D2 zp`u4qYi<9U&3+QQE6Ec)AdA9&JTFa}f?XL#k`-M-FX=3iH{QMED<@CCRUTn(L8V0Cxudp1D(dp~P1W@)RsM2o;&mj}hmy^?0(%+o5f%v< zKNajKl@Tg+^jCGtrkX9Qo1FH)L8Be3|M$V%BUehw{$nur$(0Mgu5jdPD>l_u`qfq) zsr_$*xpH+^H`TTJ)m=MM*HKn?tWZEd5+^rLDNoh+h-3 z+-OX%@7F;Gb)=NpgpoJ(N2B0xjt%R3Nh3K2r>{4xogm!*1^A&T3E;l{0!4rbV+3k0xadip*I5iVr6%2p8?Zcu=5BI5*E z`9_2cAiMVvZEG&7$Dyb~@z4hV1xPkEUSy z)Fl%-grJBJrt6##e^T2bPcP4xYR08pGXb7by!yqvsO(u=gw9pi(Uy8*O`l+fU^#@a zLHRNqLWLnCRDcG*bA$=HI0Qhr=4+pWI#CpsU=sv+B*Fx&3UHVsxATPi*Rpz~xATI~ z^@<(n4#xHBv)4t*ou_tm7XQ9pZcf$#!2!BlVfr-*2BZUUsw`i*HbAC8f*g3t5#7Rp zO(+nV1JTqsKRj?_s;hhSclX4qIofjIdd-e{48KqO0;-WB)4J$`9XBRtuq)zDIr9cw zN=MFc$4Mu&Ll-*jgbD&km)Tf~v9L(otzp!N-C;@v`8!ZQHXOpd5l%%R3!Wx}N7U(qy9M!wF#@#h9`$^-kHQU8&8=FU5S4~uh} z12oWt)%xZm?@(+Qm~f(FOIVZD&YN57f%9;E6ms z(5P){RG_%5gE7k>Ye$9m6BHLzGJJx_n)sLszUAKgweyaE2reA3C0D`K*6s;K5wKSA zzF0x~4GVO5_TQO}35(H33gct7fX0BNY0X<`!m7Lg;pSp*_10S_1_UYa1}{i|dAsB2 zLsyYU#5E6$M1YAUXq`pPT<%-^L9bcd{oiPN3$Cc%H*9-PGBecB4Ks9ihYk%&cRO^d zbQ&{Ag93t-fHbIpfHbH>N=Y{;Ln#Ib0)og-9{y{+>wP}K^9ALEzOVv%zp1h+&TM6IwdN%ax!@pNkKNh`7u z{D}IJng&pfD`?`%VOwi+A+nqrh!k(5gFyqfS6J3(M$~}>XLO|oIzY0ZmU;PX}-#!0HVa^?tNUd z4jR%Jo*q+OO!`ZLV*A$p>j=O^-pb;t`l7#>8uaNJ^ySi?6FP@-y+M6l@WTX(NS;;$ zC~x(%)3hOpAX>s|qeeFbpRzi=QP%}<;2Mt|k{rq7O46^PL7P+=n-o~6tjXrsH#^RM zZPrF3?4vT^p3N&JbmMm~|za?|ns$F*ZM)fD|E7rNI+aS_xk zG&(&zvRLxr1NFZh-nvbF|Mj1jN)wl*M{lOG_tuf6JJ6mTs<$6#rYH>6&|+`V9)HLC z(M{KWypO*3p1)y20S)=p1$j;LvGr4DgUhSC178C_N?@Ln8VkR-jBisOY{9X^$;B%G z<{N+}O@-kjqydMQXvV*i`TGFvz9d!}2=GKt7rV~h+O?Bxz2oMb zEL4ix`_2X*gJy#I#COiQne?ysop=7k6}4Mj0gx`+af-n2OB9>>YXuFUD~V>hg?_5> zjYjeSf?F?sgtpv!i}<-y@TnDP{&OsP7opiIS-&P-d?)uh2-iZoD^NdoQT#Pb?GQp- zL6d(a>h7bT9dJqhxUz<3#2?a?{(uqRU5YXIuI!1J?FrsT+dlmwb9(~Wb7`a4y9Cv> z?=Q5h?unj%krwZh?ncW}?FYu~|5yC``V@&T<4wZ7cj{6fcU?X%3y%Q%bu{-6vO|}B zELWlC2YLVYdnJ}R?xSZu4&326jh;aB-`*29SrPPH0rtT=CPqQG(fXuMh|E6_%|E?e zQ+E|kWvo~A{Trxy{^;Ilo!~lz2A%#YI<=8P&;2`sWBa1G>o<%)Q-3;Yy1iGqMpv_T zERxo0i(Z<3v4F~K6rpYRTRXqjIHk243zk9~m>%d+PcEvXB~ss5T#!MGPfs5I`|(+v z;%oPK&ZBiFo?qJw(^xbL4K%tHqW|5QU3zu8E=Bje9!+x>%$k8zA~ge5SWJ6|fCKty4vjHdA9A`muwIzSpn?xTYEsjtf5hzJp6+p5nQ zW0@6TRX11|Wxdyzq@*_fF$zzLE1-kpaH_5Zk#1VKIo1Z=OL_=0?IFr(|I1_KiNuY; zLipHL+evc9d?e4+>( zw{_9hrvELC+4{v}llZeUmxhBtd`c6k8JGAgWcSK&^qFDxQuy@v+r+}y6p6O|GCa^K zJ2LV(5xQcM+ALf6LKHyAXYfLqvePv81ZUjT78W4@@x&M=g5Ix~CH_PegPzpz$kud5&3CFEYq5~@k!6aO> zCjhuKOewI;pPO`U00hf%N+@N#tkgvhqa2@NPe@kN!HC$8ZltgoD2a}9>>y6Qu$M3z z$E>Us3%A&h6Ef2kcp?au-=4L^ZCbfbf>2z-5M)5y!6n2b(ISf=(qF6A?&CbvYAL%m z^`e8ltGvpeeiV_&$e4kNb&_!sTXMMlQF5VIRQZYRh+i|EJ|LW=4Mv(wK5l|ok;5rP zzErlk#QYpkx^eg9f0VL{+&1z4tH;@E1m9lL130%q{Q~ieapZae&*11|F~){0xBrh) zhO2mF76p{XQvw;WFO;uHwtsqTY7#|wqF}|?XC2x|AJCfL4`27`Ll7>+lF2zq!s_nncZ z`$=cpqlw)AzLdY0cH*zzgQ=3D@|j;_*%Jo9%06nBFvKDQQt0x{m@gCNFuSSe z_hi#}jXQ_lF2!xYd|7+IY45d>Ng`>JJw*jocDgX9Ou=hMWkukOCmivars$F~Cb=LT z$q|us2&kN=s3|tg-0+gBumTo$^fuQs@1pD%5%nN8o%y{xf38>XJP^|UEHE8eagJp* zO&+nTn#iuJEoHv3WN5ADl~bzjCgJ&f)cF=V@g3CbU1+-i$4BqKhHdKz@_w+|fr9?YxB(=MdAfe0OkKF3u)DwzwTP4PkZS?CtYJ{*{;vAdUF?k0~ z2eU3C^@aNfGm;T+QYYC19}D)zy96zr<+8nP>FnsM50%eQ`_j3UBz*l_==!xZVOh&m zDBDfOuGGaOw*`oUdKNAhXqV$Q{(OVZU?_t;^VK!9SiYw; zeo`lqtgGtia*aLb{eMTIQpTfZn*(Jf36-Lr;%+vd7w2A?U@QE2^Qg$<{6FYo8QQd; z9vXroVu?+Ke^X2mR#9SaXf$^T)D~91*`r7Gd;&tXzDs{{NWDo71kF4CP@v=o#%)^f z#)x>zeK9c}=cXh6y(3QjJZZV*_3t(cGv;_nAwU$9E}|svH$t8sEc;Yfc_Fj=Q-5u` zYMYyT|XOlhe<1h+{rbAvt&WUlW#rE{{ z#dJi(sZH*+3&bpe9{D9K*zJy252yIk-W}~P`ti+C?N|J>h`qJ{#=--596UQ2zsBic zsY)LjGGY>6_$e;de`$5Ber^4vA&yj2adFMz*Bsv1^xUsfL`OCrG)XW*gK6=InePG$ z=2ul%MNc3ir!Qas5vx60dH0o)uf|{LWu7`KUIHfkU|0Ik7VkLHR(5}+HW=*dhh?L z3L5QQc%TlT``9sk+;=)xvQ%F#mc_l$3Ro8Jmqr&w(`8)iUBLG;hZ32ap$78w?2K=* zIs%JLZ{rxcS`Iad{tAzxuW&LBn9BEB+)?=ShsZ|~70j-54dR<~cw$T>m>_fb*~`6p zv(%RuBvw(&=DhaE`0Iz2H~6z<UUXrm{&>wRa8{$0*? zn=Z{8P0E&xL&W~v2R&2|LfPX&_03aQ6No^f3Z{X9I+=gyBBj|tFp4ePJb!p-C@(+1 z`0LQa+8V+UW`(!pZud~Om7#z+`vRVQ*k?FPx}Z3DICoaY;vAE8E|KjtTzoV?2+dcBQKgqUiOdF&W_Y=jnp5Fyn>E4Fpf6z zk2cAVzSbFSwi<2m7;Oz5ZHpUi&mQfl80~Bx?dl)xo*jL&%2w?`Kf5tZEH!BD8O}1g z!o^3;@KV41!L_%cMuStsLZGp}A9(|-PkUDlM~)f>yQwVr4Na-W-ZOq6`L<6wT^na) z8y$PvBThX(Mm6JMwEMlpg!98PKj#9c;VhJGteazzQ{ZB?_R5F;H01~H@9;mjZu+pL zUbqR-`mSwa_Cxq|yzb7RfUkDo=aN<>2qCVn)V)|%X7to*E_{HfXGb%^rU zW+M&`(=D;_wk%3h%keWt&d=xiDKVD;7Ud4gi4zaS6m8SfEyI5v6F=1_jb>{u`c1xD zO)@%8z>Oz17<)MbCfK2qUshkHv8z$HOwev~oEOSSclZD5eh=H7;A)-(37EQ8Xj*Wn zMR_PNa+u#cY({YnG>I91Jeu6*m@?0v{7-(0J${_Q`idpr^gT;+`Z?n>=#-GNG2au@ z-Bq(QCk0jw3sH#{_NUW4j27(M=8_MmsI4cg3QV~xEiOO<77|S3n^qG$jMFBMXK3c8 zRW&MA=6bk4j5j!1*f&{930Mj>|yqZFJ5s9(&S1d4p^2Dwj=+y7kpLlQpQaDYdm1gEbL6 zZ+~pl@_f!~VD2W@!qwx6V2MfnU7Ja&X;ld;iw_oNFU@qrtZ(RAM#kId>srt1Joo)z z=|A_DY$fwwsO6pi<{#7*h*8htxMV`>7FfT}czC|OkPDl4sa%}!=+cd!SNTse;jZ07 zU5kfmMwgFx*V8C|Kq3of_nN^D<-BP6(Jtb508UE?*pHH%zfJ3{RPwC5u2fWAt?k5fRE=j<%Ch)0} zp)PpfUp{&f@IV8UNe(S@x$Igjey%LlbIC?_M7}=y+8Bf>fuQV5V97K58@66mn|%Ip z?Y{vE{~<(R-g;08@1Ba;H937#czm2rg6<$KNDjUVE&Qv~d}lkhv6{7b;gB#_p) zocMH&OV$aYV@m5o<&NO)my_^86)KpV1(Q zM(qSPbby23-m+`TVMgyl-cA-BmCEFEYILkd>8(ewR1hnXN}9xsN4%|}2B-ZvH{rUu zx6ZEU{ZGOZvL6rTfZv|K;T@UCCc4k2w=eOZ-PL7??5QTNjdC@&hHx{;HZqBj08wKD zfp8C|xV}@mScQA-!O9$7vGTtwyQP36AkeJUhqoB-@2>{Y1iQs80xuX0mc-sZ^j4`U zXo#aQ0R5*JJhOfq5}4q5y2f_)1C$Hr9;EOrgZ`G*{!4r<*k#TPi6-~} z&oLT z6ho6(kda`$ux1mnpv_W=z!5pofVeQl+m~GO>CZ&b1Rmuv!lQ(=b^x>pQBCAlt3GtD z5yI&W(gAVSbTue2~&Vh6XZ@dZ?F(h3~vbuhiMrh7oO?7UMD93 zyW#BV;iXB>(BzK9eHHae;6-Hj@VnUkq_C4FgAhW>#j^l}lGn3wvvS*Qs|0&sK1Uh`Xi52EnuqC>(5NPfv z(b5K;e6~oowS03^;EtFIn^5v5-J3LF4J#OdFA}g}h}q@;vn%~4Bwy@1dr~d_b4ZlG z+yXKyk|5qofY%VqyZ`j&7Jg=Pw?GjUxdSCqeu4B5;@)IgxLZu_x$6)#u;xzEGXZ{9 zBDCfO0`ox7r&4GC<_*enJ~Y7C5Dm(Ez(dj&M-xKVR=%c0(qEmr7&tOj_a!pDi|iYH z1`@}K;7Ag+l;YnIHk)_XUi}9?q$FYBJ(#q#9rRvf+!sf{6p&CELfr!c^9B%q#q)!7 zPOEF@8;$V)Dz}&OBOs9!K*Kh-3tW>m>Gaw9pWDA5KK`%T#DAGcsD@tx(v^{PWE>`e zHCYh>4aAXn#KD57q?VciW%siuY_RGE=bOf0mng4*~X>7K>p(y)h8>1ws$^l2c^W_SzU=3&fdt z2=AB1bHxW>AWuZLEvmSaO&QsqZXQ48R11hC%lh_#6g(kz=Gt03RFu)-wGHGXC?KNl z`ks~06;Z)bZc2^AK7!I05`X~UQa5KK?8%7YyZo)yp~S~4Fkkl+3CAX|`L$ycxuQ?8 zN!S!+-eiG-NVi1TNaY5#o}Fd_OJ#Q;U9fO(C2!)?QafL|#@Q)fh9*jdKU0@Isv?0i z^L@_a;}+}k4C70e&J$~gD1jV%zYc+mT<7R0f%{6|PMNbc6aEMk_`c{6e0sCXC zyW(fZ&`xW?jPQ{Tp=Wo`{s@&2QL4hF@$AvUWyzwQ!sQQD&x9*7uc?YuK5>W^smk~3 z6sazXJ`<@aO;HtnURe+=`r`SEPSKZjooAx8jU%dJbuG)$V)Y&0I>lbSIXe?;=s~H8 zHx97Jh&K(3c8R}!ZX!K}Wbeh`DDO)UbQp{YbPo(E17s7ZU6dz5uehvSZ3LyV>655= z_+z!5`HVsVCj{$k&YO7!50HWP!SzgsKlNiPbQr$*UH&9i4P zP#C}bEGSH6tmb8hS0;{&ZFR(2^Md|xK1RwBTpk|_`1No~kNQ*{5P^5tJ5xjbPgc#S zl6=I#{kj}QUnySZ0ons|zXalR{PE}!04z@=;&_dENFb~SApvmN>Ll?P4oT~&ILI@; z$1sEzkr=^uq*-{9)cJrnw>$1EjQJja^zQPN4s0+C=;8ve22;+0*wBb6977JLhRXJ% z6b!&Fj*!+)m{EOW%L6V6oZyBtl#|+)^aEt%tJ>Sn_n&S5 zE#@FGLe~8TuBZXq=Tnoc)K>r0kK$>t0EB=b_1XtvGUK?7Dl!?VIBQGa^Jo#u5|K)I z{E0B03_1itt8n|zg*^jdU6&z77&G9AuMV1mL#0u(NCG&}95{B8Mu=k|P_pZ@zht4S z#552M1EYZ;9+Q%a+rN9@KLmr4h^P`?r1T;*QtQ$zNWcMNWc*+C$etDqgw2Ny!VLWd zL1VNo?eyBsMs8Qnzoy^6HLSzHz7Q+QiGvxKQSw3?9<(!VU^+&hI{tbB@RLAn{y;h| z(1z>XKw6p)o%;t#Fo*8k=&nHq~Hbmw?&$*=PM>eEDkz&o zrlR0CN)iBeydNXX-V5B)j0ISOGMNw1Dh%SAzn{f#v^K-4S+Z6y^EU_eQCmLXD`?G`KU z;-GcLIVeL83fxBm($e6drfnz~=+zC!09N~!r3|jZfQ>3$p<-OoUIpSpeZ|c(BV(@k zG$83pm+FLF!Ew>eMo}A%GV6N5(0XdRm%xyNC8&9(zE}Onvmc&d;tx&WoBz-dz5#R{ za|mS#-^RaA^Hl?k;P!wvXo&$Z<_e`4(h+te#M9%AG4=6v0lt(hO3_D7{+R@T z5y+i#qXmTLR&*}soLFfsUcsln?CuVOxi?S6<#hqJ;ET(6h*Jp&JI)dDhSKH0ElcvM z!fkFY*-VuoO%#Ix+m{X5A&5{rHZ3ro+e|u(IjtA&PQW>85n!KCIw2-q^&u@Q8E2oA z-u>PEiBFAL-MS}<$8rZHn{s~qx_2FzH@=-++GV)G&eAbr{OhsQhR2bjQkP%VtGfp; zZ~UvbAHB6F>jt3~x~142Kkjefe$|VaHH`+S7bfD;8eDI&Q(zSX2ONLn5<1f~0iCB6 zKrN0hWs!s^wW|gi@cpT%lIcq|awSJBn4^##=qEs`U|L^ONvaFjxCi_&Fy$7AJOC(5 z0pTArT-tQo{Lu(mtr!@H{9D`~O#-9ee7)!dG~={=LLD4%2|~Rvs9nwRgV(|U2dfzd zB(HT{?hal4NN3-{XJ-J3B!r5#FM@$^?F3`BZdQ&ZT2MP~D?Xw8+lu&xV4I=zBnL+t zc+%f@-3Yo7?=7#hlr!Ga()h;9dQ@PIWxcJ|S0cml!RqF#N5@09&bL4woB+qq$d^&n zhZ){owa;@pN*-_v(S+Jy`X59JdQ8SOlK^HgNZUujyZ^qq5c0NWpge%>XkS%q10$fF z_kdQ75_agnvT>c-;y_Rh4#WCg38*pQY)M3~0(r0B*&=3HN4fa@yeX-QBO#z6z=g9x z6C*;3mB|M98GI1)Ehf2)p%Ly`jj7FWpg?}d`b*~99g&ATr5N@v!Lv4jUy zXJSy)`5xX#2!ZW{A5Gm%_npfFwE*?r_~-& zLNA4U4`WaUOVS1S^Bb6WT90m$%P4{{qY%f402qpkyXT&c)o>>qJd!;w#5IFPYfDT& z7WY`|a3X=FudEhz6Lf-DpdZ+L*vkwM5LTN1oa`KpG#N;75RztHJ|0{JgKd@EI$ixb zq|4$0Wp$uHq+pPWI+QP9V6F|P9W2iomLJaRK*hVvw1GEw`!Bw%0BtJdmN5-0!BOF} z*h+91a`i`D!6ckYhP~tIgl_u+PzsCPe&QHNPZ7QBaIQ&Xg%e&RQMd9+-@~vC+QDip zwS^fd1vi}*vzV1h>{KKkgN4^1QcDm4!8&ym>89PkAs(LX6gQ^5Q-oV3QQQHndSJ`u z7jeHTIk2g#UpjP#|CyXgTcG&9v0jw733UoeL8DZ`PB z%oO{r#&w6{a5Pq>+IB>iAx#ILIi1-;T>qxd7~j=;$1V^dhFHP$Yh1Vg!WdZq^PYB3 z$am5(}IRuWLm5!2t<8ZRs$1@Vm80P8u3EwnhUj4aE`2!s|u4Tu7)nw!|!glSzw1`)Z_6M`oq zw|LSHdVB>4u)rLlPu}tGu5g+q1wGTo}oVe&FQDO zuem_Q9CKCt=Y0^go;Sxt@FyWJ$r%UBv!f}oODwAbY6P7KHA#pIod@OMIT^Mt7M3j`!Npr4lt3 zeBpE|Qc@6CS)fxoE?gEaROQ8_EB-aI_{BpXYaeHwsPR&qGGkE9XA3Yvlo@}YE>4qG zS4+I#QJuk{VZkAe#$XF~KESmM`tBh94aXP<*?FPW&JIw;Qf(1bu3=xRioGiM{3*(Z znh?iS!JxJ7_xkPf>$kA6Jid40^Qv#xbRvnLBQMHcSwNf0;%{$L>5_2vMy4W)agXbD zv!>Io5&Lzv`5ex1G-f)Y_R~EkI=za}Z($rkKP*k2eAT_vr^EQ&F;qG<;Y!!aOP5<=YUtD`~Pk_EKc;j$7g-|*;WkWsDbMsZ?r zIm-@6oD*mQ0qdU&9h);m5dC(NtAk?~AZ8LygcAsRv3qk%&&da1|3T#DWZ>a<9i~rI zwz%nVF6N`?&l5$*x+}13zj}!f_9*_i`mG+|uidqH|K$jTB7rz{v>Z?#+Sy+ynpqud zdo~nru?fBpmVa;15i%F7>Mz zMn*|0nCnlqN;#$7_WsQjWaU6nar;Yj^(>h1-JuTj&M>vxAWhi8EkKioM0&uk2kuKs z>mPth1>SH9gk2=T`8{e*nU^FYe+Q}QBbxM-P`483msvb;h~yi&4!N=1+lK0WX#AIu|B8PRa_T%SwJ}dg4P7Rr^cATVI?) zPnvd6v@cjdY*4xyCLxzBaeY|kyR5u^Ncs?5p*mQq{MS9qq1@}HSpGxt+j2@1@gCxa z4$8?{Ki;ccf35oJb&}PET)=6ts@Q;AVX#Q~YXzy0qR7{I;)nTpA;d{TjW*r88FCJ% za>_4d)r6Y0Vv^LZQ{Whm#D40^naXQyhDdx5xpEki$0{$X6e^S(?5`ZE{tTvH4%3s8 zzgFF>qZV>sU0$6<&c*IX;G#*?TUL(e$hp7C*dSTMJw$aAVG!}!EdA)#q?^0xznqIX2NIY+Dc>lVY@V@JrbVs4n>&kzr_-`-ah6}8`c zuQjPH3|Pjr-dqdE=Z4yL4c-z{ym?j5cJnAeD%|72p~26~+bBBTKC-q|@KrjxsUP93 z9eR`E$n|}wU|X2X1h``&jcU~$HU&_U(otRO!>%NcBxuLHga}=3%6e|}dbvsN~ zsXeB;#h0Qzg({MGUpeV8sVE~nDYrd&R3`aZd&(4 zi0hNi8<6!#kSo-cHG3eFq$eHvM>j1hKOJ7c5#sfIKt+tIt9`TxbtKl+l6;;BMkH}` z!e}yZ)D<8kBlds`gr3DwdH6m8l#18nqx}{ArP|`ke?O%iE>enq!tv{g`yY4m0|h6j za^RVAUYt~*YR(Ov=TAF9r@DAbM%TiS2}1vbM{_I%4oZcKgACTq#9QJ9{?)@UX_v*F zq`EIURMm)b_p^J?LR0!$0`GxBU+MqwLiOy2#|~s39zbn zv?2jcJv4DRK_6lSKbCK&pVlKb;Dboge%rf?Ha~ZzD19}N8)tdKT`ik3tx9on>TqRw z9pz9ph33DyxIXu^XlrA^$;wM@^UvVb$oF!`?=K7uUcsc^=?&BV8m8*>&yrfLL3aGYjI_vGW?N&3Rw zIQFy)+-05>z>)N7_2Gm}RG~i9hh(5|C7wW66iNaj33`P9l#1{&r&)lCL>Zvd@P(R& z3bzs)?#cH%E%CScOpDz`%h2uP1&)6aw#vg1Z2}OvA?@h(7=@rO$4QXiaUAbLuD0IE zI#*dZu?#t#&;2j4Py?vw2GyzqRDJGbaMywc5cpU2PN#kQ)$@y(G2nfXZkGN2n?Us{ z9AVCETMfsL0tomXfP#`;JyMz>jX$4WFRfh~s|y6dJ!bP{bhM};;c1JJ03ic}W`^5I zWBPD_nxa?0I=#EmBS3m_H4=nN198##MFrsr6tT?dy!-|mphrge^nl97uZwYjlRm4n zwnpA_q|Uj;ttiRt`C_f>2T{%>`8tONyQN3Qj?9uDU%UL=xY?!jeW5$(4;WC-`Qg|T zKG39#3YT&3O*oOAf6-p+(VX+gqF8*X?$f&@NFuXFL8$LcwRyG8n}RU^g;%ccZ>5eB zh#mk5_VP_F<|d?XaL&dSUD&t`&k^9uX*fOD8gzSO)O8qWp}EU z4i*GKaicN#N1P%IkAXb8&X-USYG=lp)0N;2@T8l8K!BESU;JZ}Yrd5^P`>qQv}^|+ zUedJj>Q!N=R(drOp20^zE24Yjv(7ADmY`WdyyX-CvPXc1Zt=nug3+@B%K9dI4_<%3 zj0V(k3b2ofr^Xpk$@L~cElN8sJ>3FA_A>$c0{In9I-HbZ(^3qLaX*H*?rs_BOO~F$ zc+cO*`)gFH>&UcKX+6zyT;*%+%(&Wt(P-PIU*G$J*b*5MYiywmDK{>mG{MySlcv?w zCTrMDTG`@RDZ*|;3huJXs&5!>I}HIIp-}9afc!7M8klSb+W6Xo)5D+T?6-`D__NUt z`!xt|1H^+7a6?0*{1Z1k0RqN*Pypg=O}Q z$>MgV-bGn~$rYj&*8o8J7bTF$U))2ozCM+rP@m3B`esuZ$`o*ULe{Q{il)%IZTB7^ z@58TKV3LFp^fuQ}NPZGF4oz={i#0<;y8;*oG;bD!9eD2iYV3IW1BrAmz?Yl$e(sIX z{LqM?5GMVbE>$3}g?D%O4s+i3F3Gn`#tw6~@83&(d+Ak&RM{QR+28(nyny|^t600S zo;7h*fF9}r>Fv38vK*ajTYxYd(gegumM&CZKIMC%Md0aeNviOn@5B)_oSyVinxpZy zABQBMCs*GI6f9T1pj{@$iNP;@unx2g;rQ2g=&|6+8(LB~bG=ycsd1{(gqmXyK*|7N zB)G4X;&UssT+G1Rfw4tl^g=w>G7y|nH zS7T3g(qA>LP}4sqg8GS=o2)p>3;O9=?^4M?Q!86msrH7E#^E4|X8ztZt6^F{$zZNp z{5J$e!Ke&6?&h~Ggpy(3PmNnn6^`4Kj#0y1KTH!_nzto_@<$i;(P9`5D@~NO4p8IH zne^h}`y^i@ZET}hJx05Z#*G_GHUdOz$|iJlMPu_&)8EF2&)`PV3PT&*)| zWJBXb$6uPynIzaBpHW`@V_|C~DknH9!~eyP3f=c?lEKmYA#U=|ugkDou=LVrOK)RD z!}2YQ_Oh#AY!9BxZqu+L{KNLPtl^vUQk-20#Hmz9hDR>4dvp6`(6J0f68CrZS#A z9BlRxPN@Ks?-mHAfbgauFA16ZJGQ6-e_ymzzCzN3yo25Pa_6PfwN6>7bR8-@er4s0 z?5|{n)*wFL)XqL8s)Pz7Nq*CvDs#pB;m0cyPbx>JTJVt^jT7-~xJMn(cf0RD>Xe5jb*3#sw-b{EwIiOi5}kFK}B zhh>j$#4A@t_zTaK~g){{2})Lk5`Jt&WTEfLP; z8oew z4m)cH2b&ar60lVnapexlpXd(5+I}4Ny>*cLc{QuiH>zKLJmedyXh&D+Y|7m|q(Z9b zYsa}tf2`zDjZ)D!>$bDG#^SKL+eP1cabc!js|!foZcP z*AZq<5*)-sq?9kaXX$zv@sn{^8Luj~R#%NTJeJ#eI`F5t?cXPCiI0ENqbRrMSBl@> z{&@8V)$g!x_cT3dKOau0pMRFx_YHg%qkF;)aG8bDfL-o${79opiZlMp0ET^{6@yAk z0Q+ZX>Ag5|-8=x%as{9Oe8($nUTQZE2Mm(v*{i@aKSxC~_O4_|F_UR89^>GHC7jI| zY%oAKwsFcz2DC7cNgU9P!T!bpa3WX|V1km_*sFjKU<5-T;-a2*OceVW2N)3k!UK12 z;VvOLS7kH6YXb6ga!;#4f*m)7$HDBSOjp)E;I9f6Q8=(a*zZ~XqwEu7joF?ss5s-n}poIjXDH|nfzf7mlvBqbz3T&X+ zFmx=9bbmJJrKs+c_ZaMFT43?_t4O&)0wiJ2U4)0KcqQSu&Dls-@^1T`0RwRnjjYb&<5HQlR7Hs)b zY^6aA`mz%9X1v8s7`IG*1}%=nMpol#=D7pvj;5Uq9Kedj+K>?-oJJLf8HS;w2Tv`h zB4-cK=@@kQXPUgGHwh(}3x{BD7kZ#~A{y_i7Megy@5T@gr@Yp~WLPc*BTGf-Ni(=H zGCs^eY@m0WFnEAO6^us3N<<7wKsMQ#>du8jy!ResVHn9T!JINvoGc8|3w<)k%b(Lh zIY{61d>=fxiu)%W@GFR?(qihG9LC(zdq-9(Opn1cM8hXU*3?omSxz4F`VBt=Fd3{7 zdng+wD5oShr5Yl3q9wy7C#_qq3d1Nrd_4*7J9=CCFW8hf7=XwAgy?~y`!|479PqCM zGolGJK>!mx+6@r?16EtmR^z)|cV1BjEltX@|MV=qV`rqCEdl&%F$t?3v?ef}feE}^ zw$gWteHs^JZJ=ORt=CZHwAF|D)Q^PZn}kucRz|}|;4yiM$!rzJBVG5|3$Q!t zd9y)!sE1LuiRF>W3a`;H9wLo_IAn4o-IQV&kp4${A))%lA!e>tdJJV?4kq1Qc`N5o zE9w@BCPsK`^YwSx7@t|aAS?X?3ixccF^puqZe?QHY+@ZMUlnS=bbJj`Zll3ud%;(x z|G|xQ$?{*g9<4lvch+5l3{dZhds87GHbk%&0rfE0cIC);^9Xl8o4TlpnnT|C&tuck zBO^HlgNJk1-W+Njv}_IH0HajQ77nN_d8b!tWfG5(kGI_TZfSEASwWDRzuZS^Fws^r z6ge2YCQuIs>~KJR38tb1iO#b7I2SF-3`i=vgUIm1+cX>!*gb};X$Q3ciRl^v<1mF+ z8n9)wk^g1IyJNP6+VC`Hpk$wyuWIlxW%j1a!ARr1Phx3~voSg#%UyFPN;k_78?JoJ z08OUPhcYm&jkl%%=1q>gm7;v}<=t^!zVlv`;sFfd0lE*|T82F?WPZy}w^}WQNf8LB zM4T7j1}!rHktVaZ@}X<3y#q)HK{UXL19pNL+ACp;eZotvULdm9)DzLt;M*XAX3wni z{dv!y?k<_XZyhRxxk_p)F#@Vb+M^c0au%k^C$++KYU6Seg%iK5h#D+`h6@sp6fN{~ z#F1|9|J^>%5fIi6zm>`os9*z}OEGwqAjjO`pkOeR5soKfNMt9aR@d8YZ-QA&Ve;4p zC2X^jUxy74ej;Cmj?T=*3`Tg5v`Kh1(89C)6s&m|$|CM7MTRL!h&J4d@*)^Mh_1VA zd1s9b(~Im_i0QEf`dJu$Drr1Z!hTkgX160GF34Y4;BWW_GJsK+#;0TS3ykbKg%>(z z#})uwDT9B})P_s(vI1{UB1IxY7FrUYPdLxCVdf)Twr!)WPm-?giS;#M1}(hi9z+vn zstaFO7f5ru=xW74_9F@uwY?wYCfeoBs*;v6S&+WUu|o28|7f@k$9YheZq%f*jZg z@(#P2j;Ae=xxS<%vBe5#Ej@}-Mrg2vUfj(Nq3g)-m`cr5E>M!BVd!Q2*6_~jvUw#J zQ{^MoO(({k1JNn*L8d(c;oUwcV;jQH&zOM68EMIFscdZiOsCN>Ow6k#R?Ut_O5xcn zOY%pocYoU7JWn)xXL5<5QHgwpN|{71Pkh-8(~Qw_pj zjqj8&DR?G#kT;-}|3u|HHV@OhK=blZ=8xY@q`O)U=1ObSg?4%6$K}ikW-+}s3}UA7kwZ2J<2`sL5AMYLF2I~Q zl$*QP@qr5!4<$D zaUt`t@$EYNorYQ+Uu-|0Ti@`qJiEFL)~PpQ_3d5G9$EFuOc*#32gzy#QP1O`(m0ggv^Hfd3|B%eiGgn6;UTy}#0Ff202T#g@BSaA&ibp# z|NsBj#>PgEM#|CMAyRH*Ak9FeTT1DaW`h9(X~jT5x>G;_5lI1Qr6i<71Ox;HU+TWR z&*zu#e{h}WIoIPn;(ou~Cq-YVm6a^o3%K05DC}2yilKUIfBl-F*i!FS>znVt-^>Yg z4n}?q`3h-~(Kur6h5#ubr>;E%&l9hAY@sE7-GsS58s*u%W)m4NzYnGtW+ob(kZxwC zPct>Su(D?4?^kb8J@WslS7|JF`D5NaR+go&1o+wTKrWpT6&|F-$m4ze(+Wb zi3 zf8NW!rzl!x^Y8R64gOJ$s*80SD1LI5f^vLUbCHJf>Gvj9Ix{sVa&S2rvoTD?JN0QD z7HR@u(fle79-0nSS^0B>)BdZ}onp~jL0{>+3#m>EsrO7*d8IxI`$)A1c~nPtfjsFO z0ZRQfAS_0e@?Z(nT`NPE#r+&fiDab#!2;hmo`b)#8hzbquLdEZnJCL&P7uQ;1{^@Y zie#eX+Bg=uK3B5vYki&uygyK||Aujhd{b)@QJ`e%pc&}kF|8@JdVH(J?Y+bT%_+b} z59EBC*daLV4%*W^(A=2P^!Uu9)Kc*6Z5L7%IFA3d@COyT33Lm$dLxB+P=qpx*|@Bbhq8DX}2BQeKfu@p@*@iW5OvVEr89N1wY`d617p z#d_G>CK+{y>1gr`uU=?qw#rT6mSgr|kzXA#Th{;n3)lZT>az0tUdEqDA9Cnma8i$a`RFr6uY2CN2Fl0t)gyoZp;D}z zEW$qEU)p#Q>wvfcZo5~mO zhKp+r9(JgEaIN9X+o=D3|D{oBT`GI6eYo5i`QJY}^|qt6{s&w)($(8f zwnm?;#nNkZoPM3IvHgAp`6l#$^7YlF@qFa@K56h_gSvyu@59xGH?a&iVoZOmLF=#T zI(CM?*tyuT{JdfU!koU)f|9}hg;CVfLr(E5;d8t(j0={?OTatqQGx^=X*q#es4_Ew zm%r^ryl??&J^732{7SSSrOJBBPiGa!Xpw*+=5*RR(G84p;@q7S485Lfh&=E0wFd%h zEQ0u7$(4@ToEEMnIyn8dFB!6$ml{VYi)WdSdE=~IpSc(4acA$`o-fTdDDQXAvwM8b z_bi0BvgoOmF}#el3HmII2`JX{EK1y?EXs}g__aLWTwZ_ezP&?LLApiPm8}=K($2E*=b4y@suC6Qs}7-ZhrQp0t&$%(w+a@hZL4Z1jrFQtZWla|b|O6fCRI1j zq$}QLxV|URlDD!aFUJ=f-rZX;XzHW&dLY-w)tBcn_)PVA zbH6ZyQSg+pu|#?0rZ{(9lYZ|XnU-sBQX9HdjXUm+n*ZP~ZJO5i37&V!h&~vY5&wJj zjnOr0m&g=VAG!Gq{qT?`hCYpuIqyF4YoBAtQlZZ^B2+y(z2^RguG5iIf3018Qxo>Z zwt=>K)tHMuY&+M_m}BK3Ify=Dw>Tq4>TCIn^oTvan(>Hl^;~byzrCC=jy!m^k{)^3 z@qIkZq5&BA)`KgMq`M4x^%F^N8#bt>orumHO2K1rl48!1iX!yM}3)kEdnAZsPK@n&k!ACyqaA3eFN7y@V-$qcRn zAk6cbuF}oVCi+W-wZa9O!{u{c_Xrng&ei5n!E>r5nI%47hCEcp0UoQAAWrlk`vPNz zaW25oIZ4zIMN#vL58wKiOh^(G0y&8iL=!WUm3;iwYXATm^Iqx7z~!zSCV&LV zNER90J~$f^!AA>#7p=Pic~sHW%0fye5}pz+KP6=IC;;-c8&EIIXGS>zwoXeV?>z!x z_ff^q;ZhEXxof5;qG&znhFQ4Y_0sYRyNMK5v3KSlO72a0S>)J_7*!f->V-gfk(a(bW#=4qb*dJSCr#T(!wY$9DS=8P zp~y3yMpy0Qr-Ers!EPV8d;sCfrUgdJJHy+9re^`egyx&aBkT|x2NCe>Kme7Mk>?7S z2Lnw0{K*e3mc#4Ls-|wA>?LZi$kx<$db1*D7hbj0fB5zK;jO3Fa$dO=CN!BIZaLY6 zpUHd+fUwBTL7@DppS?v$>|qcN3u>piCh0AV>TI-hn(B_ z9MTZWv+qo^3Aq9%Ad^(Uw(-_^tP~|fL}eFz^s~5}8_uJBjI z@a;~)!^Y9kmvVl#BVqqeTY6FtHGfdne}$j~;DuBKxGKDk5+$|s%mC0s0aE~x*t zu6-%>jvgN9M!Ns!x(*{Q#aBTe^I#W5OOQ%KHLdYB|9LwDv~2{IBc4G1K(5iuPJ)n` zf6CVFQ#VBbiDp{8F%Itx&J#x;(V6Fk^sMxxZ2l`qUT*D_UUmlqR{jZe8OdH zqBi4;CGFyX0vdSthWe9C6gLt;jsE#k&lHZ=jLdxKA+ja70HRVii{}u0q}^8jC)`&e z4z9uu)C`C%!r~%kXiCp;6#YPqPsF}O_17P~-#!-U&s3T$Pp|(JFRV{j=Awr$8gEs@ z=Kibu`t#qu+trwl69qr()S`d;RM5TixS;A_MpECO`z8hTzH#D;ff!x9522C4fpAM& zY=dEugE0jOmwX1~lh)Cj^<+~RBe?e`|0{N{9W>~^tl3bqYmjJ;7OF468gk(u%Lj+G^Ke+68>rMxH-$JiwH zlk)DqlEmsq-{UfcpR=(N^U+^^h1Q+wEoyR+nctngzTpu!H51GtX(i$d@Qfe;2K2~R5oy^3vXj-e%rb`u>8 zFH{_cpe>XUk-D@L$4RmesjwcwieZF-iqIK$Um18*I^6L|j>kjK(7~Pf>qCq;bcqVE zls7IS&smHsX5+JU$#Ih+MD(5HjoZZ1&RA_%s2#6{o;lAm-<+V77KqkP>3d*qX zlF5dHWr06}0mgE$8%PK*9t!rSvkc{x1i(K2AU-6F_=v*DJ+YnxkdXzXG7}B5O}#u3 z<+1>0CY9MLNT0<^Y!bw}>Z*1GFgZ!A&)a-Th|_|Aa#-{}B1nu_62)2u5UvL_g6B}6 zYwlFS+lkYakt!-Fk5sPhBxDsZD@m$^D?Su5h>yz94SB9Bw-0{ct&=7i?)H-@yNHyc z{xF*9dYlXUgL=6KmpJNGtr!Z%7zhdiUc)F5Eh&0D#R*VKz91PFo5TbwcozJzFc6DK zGhrmtiQfgODu&E+R) zTV4&ILYoO9QX8$Q0V9d8*-DiuAMa;-2s?P(4zWvAX8xL$k{lwk6 z$r%_2-lo_LMDu?{&Q+=rkNo*zYegYGh#&|CA$gfSdF&)`qW1#A2)0F$!Abx`06|g4 zX3C?NWfBs1k1f~jePJrqi(xUJwPKac+2lxzry3iD0p&{1!=KVFL>+z1z84g0SCSoE z9*wx23u9KZz$5MvWcjRE?O(CW>qlv!OO&$RVnHQhA4(+Axu_&Y+oHmRlalS1B^oKE zpEpAa*-CBXN{MEr9|BAD^vi-u7^QPruh|Hj+LYPvl{r$EJByd!(JyzqSMHHgez&^Z z>qEKEUb!E2MSyrkkbXtTy^8xO72(wtksm6e_bTcLmnTU|XglbV0RDggBvSt`d~#Qm zF?Fw!vnY%k$zqwDC7Puw0ck0i3|f+?UIw2C=$< zMHKutDHOkio576$N`6_6;sy5DBa=!S#DFSd@;5f+17cehF?M+br3Q8*n2xHM+w_@F zs%tbEE0qgjMpU(ogof#?T4uRMuBKY-TD?4DBiGtZyr3@8RX0Bc4xa+K9%ax?LZDZ8 zFi7A|VN>s3EhD-Ck=4lD)W|9KQpveNA>if3J!V1&75E6CIBq~py__^{WXx)24QSLd zeJL5xEW&8mCy2N%+XOSEkCjE-!6kkQxKvL-SQ*JJu&kH#$ISrOOHtESg{GI(&s(Jz znF$fFnpJ52?Pf3^jO_??3FAc~UBGfs{25DkJ3_V_nY@_Sn2%XaMOV z0CG9P0$|iV5X21!*G3T3+7cq$fX4Rx|JsYx00OH0@nR)JuEYD=E9lglcW;`holVY5 zO=Lt6?cdr9)!xJzv?Y2WlHatY{ny5>goB3QhCbIje!CwlhNyRZrbVfn%+zB0_VoxN^ug3mCZ+R9 zLW2fCA^{0}?>B?jP6o^_|5@HdB0SV6`~fS&NozG)E5|&9tZa_pF-ttYFHj8v&1$=e zfRlq^V8&(>0<;X#xs}^KAptbvwM@{~3<2-&?myN=QTDb{xZ=TVXr`wUfQj`WK4dU4 zb%@+D1SU|EA3+*r$wR>85V`a{h1>@d(+?G+h*+qVURBtO+94tD!7xc6Vsy~3!-`xw z3=QZLK?46>L#SO^+K&+4v^a{awmd_|hpag>)D-{y0F|Ix09+F-5<&;SsuDl~-Ea}Y zu<_I|9ca*`sV`U(u_X!Ybmr4a_DmWg!aD~|rUrY$hr4S>I#VaW$D`!n@s!tHe3FQP z52HbEhQZ6dEQ!O9a;p<0M;#X@MQ8!4zt2Z3Mj?dJ5Bt^he@zevqi>=njI$=H{;X+WL!y&B&uz9SGF6i||YFw6pUVqk0dr(^x8U0PT{ zAygSj1}LL}!3lAV8*BwkgXNg&5?aj-D6b<}O?YN*+0OulUo!I-p`d#B5qW7!V|k`- zvD0YQJ-TPKa=!2HiVM$bUD?W{)beyb#jFN`jlH#5d}^m-`YdhH=vgKE69Cf41>~1Q$Ie&%ZU9fYrBBtAz)1r9Ly4f?1>Fnr8?)aApTZ)9Rmt z)!DQGo6&){*mX4xW)~U&b=e!5H^;JOeYi{Ux@waJM3F-a)Z@V5o=U1FN-!=EpbHx} zKnMgtDfpm9z=)9VR7%oa3g-`IQwVco#G}M5a1$W!1WFYTf2_XaIDgm0cso&j7r^a& zxCH9SA~*_R@u@o*Fz6&5%p0rd7RMM?LP9N*r-*p>tRwqV!( z0Aez!?|*})?bPPiF?wN939@Fn1ztO)8@?x5tWZ}XV(>pBdO2JFUn zhr19wCLf?6YU8yU+*ekxX>3BVdMy6Nw9eEwxy%dB56irs*_kYye{VbByX z2Uj{#Ovr+LI~m;?PK|p`L53%Q(@AQ8ONJdRm6hz9&*5CJ!SVI+ZihSJ6xAM;EQmfu;f zn*7XucPb?XOk{pO@Vx;0{|b-YRLS3BonSWpcdCGEzKt}bMIS?*UxNt{$=|0SZO~zD zTSrn0!}wHzc2 zUWf?bsB~o;Q}9w6mKN|h&{526Lmf_E11!#ce%+Bv;;FDFtB7@4ghF zv}}XBEz~A6OQ$~8t)+G41GGk;1^XqQ-bLS;sew29jrKjHf%$kG)Oe)JJtkxU9c(q8Qg zA>!gUwDEl*JO|)#X?(w*vdaM4)$*iIv7_-3im$ZYZMtZq?jZ_iTM2a&qN@4%$GbU( zZqNhVo~XR?pt~3VvBbxJnL0ly%I@ra2?E55-o*PMy~X(0^b}1|VA#iD6j2>w3NT*F z)fZ%dsDR?PI+_{y6iZV*!62PuV=xF0H^4Wk&O2R$I02Xx&7obCIv)l0Sndxn$$;~)KfoGWbqy_3w_#6Dy)WXDite>^} z^N^p`o+UYt9=su%r=J}A>mGas7+D&nZi8EcY5ba|bVAE%9U-ZKCZWhXauVqWC7tv^7okF&}j z*)s|!&K-QRAEMS&S<5UB8xnQhCVe+L?B%g6IR|)1?GA5#RD()N8!`A6e;vvqv2)Up z1ZaJ--jz6CrqJ;AXp`Yy5|tm(TO9ozu0!UbC&mqm5p2~|1 zl=_+gT%xEU7LYR4nb;FG@z#%|%|vT=kmoj?-&@%F;`e5y5A&iO2M+ppE&vX_BF_Rr z`VhM)k<3>tl0^}0jPmNGJoi50ZRYPhi4I-xdcCaX#X6wYA^;(=R@=i?5pM!(G%fiw zMd5rP8gG@0NrEZ@mBEY|##6}eX~TWv__;5dzQnTYCA{Kcb)vZO{693Dp%JmliG1kT zb&>QoBdoC2DGZ&$h3J&~Q?cn6`?n-{a5av|D2Rk`kbo<>$d~_tfbb7@3Vtj=HGp$$ z^nTFQKZ+mOHE9|67&6gFn@{scU1hWMEZQW;yM5!oeS`Uu)eNTV-Z(b|^ zb#LcNbs!SuU7s&fid2(IaRL^lE9Flg0et=4(Q_}^*Z9F0M&F4HimVb{T~EvbemgyU zY2%6#7*#SUmtru=4cKJH)3u7E*}HC_-girgS@pt&ba^i2H0oI`>B0?f>?Q0k^sU~T zJ@y~w!8oCr-|P0!*i^wOT{7XP+7NK?#RHZD?7+Jv{3E4fjHC3tsQ2f7nkfWcB*5Q!WW3z%o?uv_rx0ISVs@ zLePpfev;}D3`Q;%)6lYb$S6^Pz^v zk=CQ`7R_r_TPPd60dW+dj{}KfIAs&}j|ZP_oB5Do+juC)qjf-3fr%0(512kGM5ydq zP$IIWd*FrTr?umEum(1OU1bnb94$=K9FLg?t`rVov34}JpNmu5%KGc*sGH)w!}r^Q z#eD9+<8s&!i-XqV`(T+)g$MlKI-2sOu#qPsyZTQ$be`d<46|oH4Ws;*uKV0uS%9&0 zgXpt%=kCZhf&~4?X&P4r0C9k*8`eWvjEeV5`$)j-_CT(u4NYnmvK)+Fd-IBJ*rd+F zkag~LWhH_tj&*c3bMkQ&+tBymng?^F`LG_k`n{z%3A;3w`8s&AoR8x}K=u)E#S3-Z zn{Ib2?ey2XcW(o;P2aetK``Ax2Of_q$aY%`7-nFjl?wHo)@t;bBoxjlpDwor)#zRa z2GU5`x79L-Ekc{)g@cReqId6e{-2uD0q;pvkEivFnWbYH^SmkWp|ZL8w3zF!r>>Xa zgOz9KAPgNZ-Q(FwYiCfaPW0ZbCkxk`zA2WY>AdWAZ06o2TcIL+%qJ!& zZ^v#&Z6|yp`d?5X%zjr(Bw{KpCb(3@{;PpU#3$P;K;=#QJ>30>nYx&eS`Yhgwi!dy zZ+U_GB>Vk4B9WiRe2*GT{_cBuL@qA>3vFAJJ}}FOTt0}o-?^jlJt(8&%in+ZyI~GT z+B<49u-LGE5r^XkGGS{y<6%QL9ZnurM7i@ghL2t*MLpl?43l07pG`{{V#Fin z$JE29otuuoKUH+;6go!ldypOf>w5R^=Ua00(TlGC)) z!h@zZpQ;l@w}hhKM=|`lED>l%ZZwl5npqvqLi@%L31t&ZxN3mrNbN!7!Gf(|;r>Y> zsHE^rHZT$|@-(0KPfrs~FIfO{MZI^7DL&FaDRdqdvzip`ndH-!#1q9PkeV+cm?650 z=FjBe-|ex-w@Rk+81AA~;T`gVc!kY$MLVq{WCCw$pTc>%9Bqb_AYPi5N+neb{8a15 zIR*^ZI`-5OG`|r+uA~fYglUf^Md7fKM~U|eVG^fkWGPl-AFaEZt~RPQ55l4!=^azRzo2FV!-b#z?bJS?r^4NHUmkGu)QM9&N;VS0;s?Ye&O%qCH`L zp0Lo$MDf$txaoeY)O_2#d^h!0yUq5yQK|P%)8z{XtQN49L+>@!GdyTByru{C-z51| z!h&64p+}S<<~M`$F#24tJ*Vk?gLPy}^TUZP0sZaK#FUt6dcRt2%j%mYa9#h-LA0V) zAniM?CCa2IT@q0@FcW)tN$K4s>fj(o@OY96M3beLG)drfjhA!x#) zWOsWWlm8HTM=6L$Ln@GA0uG|Nsr4u-Rc$oh9!J!fB+A2wvm}Qfroz>B6LjElTJs5( zB)IcxqKM}QEfoqL6v+dU$g2)9Ly^i_^eYwh!AQM_=EMRbP5#m_uc1Mfq(LcmB=?VQ z+WD|OE&(|SaYn)4^c%dLCN&0sXu`tX@x;_6C@NUXBU}$m8h*wNN2SJ@;1Y^l6G=$u zO;4hZQ@o-p1u@K^*-*dH^@GkXRBaOeKG+akc-cLV4%d#7L`VA!4a;2hds6j#6%Dmf zh6(-jIio{q)9`rOZzivADjr3S>I_gd#f9hJ-ZDRHMC@*;1zko!Y%Drt>Oo7^Zl z_Gy~5;;L7N)vXr<8n}&``o{>FW6Pyu38!6#C|F=+QpjndOQ*?yEfYRgNs-9JgRle< zCxclC@VvI~b*;YUk^unVKp}chx#7?3#@aje(yR<#85*>;^?`-(Izt}_Dw8_8lLUXr zEAxZ~Zul!fKwD+Xwk5t^-4sAhrtcg3xH;Ju}OBpnrFHhC6}g&R?_8{Mq|Z zyd2iT&CLS*)I7?5)?LHW#&9l+<&%3fG%)AWKom}~%nU^fHVjK}*hTL_@Fzrw86r{I ze$KPZ5*Ui-orbVY6M6rPJKdglt}_GsTaj|C;+8Gr$r{s2*g0<=D}ZEvGt8Q}MeP`E z;lu;Kp-uArJL@krYZ+_~ZluWYF+_P!SL|9whgg)b4iR*1GKHX78aBjvYn!pY04Yn3 z9P5y}&v6=7qI9SzRVNh(VZC2Y1Z_DWAwrHf^ zmUYG!<})gmgG&>&Fm!$IXE%xQfux<;0)QQ`3!zVg>*`wfYEQi+j`Hp&1h(3aB-jy$ zmXO^>o;h|OqEo}7XI0GYhqvtVAb7RYKFcsWf#~VQw@V+kmI<=UmD9tYc;L|kx7-;m z693L+D=mEy0vO)*`54&~3zurnCSUJEiDZ;RKeS&@fFDdiqCX-=Czy=OA$&b3p}J{A^TrGOQcGN@PRMTG21KvJE>$cb!HC$ z4nL$E0k~uHSeNB);=Oa2%28a<5tz;x%rQv;eF*A^o@vw7s#K!x%D?%s%Hu+RWYwEd7PMdxOp@sXWo|0nK4+wj}Y`<}}b2`j~A_ME&Vk`TV3Q!6`q zy@A^Wd^9UDYQC{-DksdVe1kpq|n#=CvNgxmrUiRVV^nJOEbv~l}%8wUqy`TAKH zt6k=_%lMeHWQ?|kwDXqK&@XZtoPTvq>!9bV@zTXX&+mgCdBjrk7PVNtGZ;Vh0(xm! zHc+7i*fdx3S=RXuH`I++sM0mfb`!lPlY*?=EPS_ukns^iiM~kZHr)j&3s=?{1Jh}^ zt*`U-80Sd)^>_A52j{~d-CV<$-K1LAizQ87v0zY!);Art`Noayf6@yIZ&$Qgiu*XZ zDsQj2x%P~$$K;hVz5wH?Kzh&MuC2`7t%^2Rk1u+xyyiMz`fj>^zM^C27y4z3+m%Cy6qT-@NT2Mf zIlV8rdQW+WDDAdS0}3V&Q7X&Ao~GXoI)XjxO7cd*LfMjy6REqFXG!ySr_HVWyLLEU z?cDKk@ojYuKl@4;BGyNxw5EdD`|luQ;^almGlTE_AW~2EyO%iJ6DnBN3w0^u{Z?A< zTBivqDcqB{@cf~2%RkXW&D}vl8?JO{*XVl(jJktd-rVvTZY$r2b^q3`+1H{>-TUuO z8?cwzwpm%Qr~1fqr_56~ou;{bJ&n?dIJ8MP8ZvVC4t{m7R)^HbmfSDwmgB1#(dZPI z37d=YnR|~8tW5MR9NM>9$veFJN!jVpdxY`8ortpoNgC1hV`Sr zM0_~}blm5J0C2&#GqyXwEUbP!b~>R?ChPd6bsv0F-V2C19CFwk4&D6F<+(Vp93tck zAXfhqcA{Xv`}GjLXW?5W?FWCnI$hrNxSlf53U*)jnC1S#=f_)l%G6g9AnrpD9unA? z*4O#{VJ7MLO2e_bVV`jrG!=T7PHBR5vl%(;aXp)h*7nVEGIaPmD-TfK=rmOuBFa}z z@6z{AwSPn&;Y8o9SUx?mnLa+4yr+!{RM>H8vN(yl?ND+1i|ms?alg+B-JgR)*^ulf zTt44p-PhO1UGu49UJmrTnc90AS5L10+-dh6)_oiF(!THd)gP+ti<-B6#-mx zBn(|#L~EerTww8l-tRK+2ajE^d7Lo+JB=t?&>CD=;XUO29{h-|&+sTv_NJ?xNRZm# zDPu!dILeLr#&L*BXrSlu!eP4QJWhOOj^*#T(wv#cpr>-{M~|y6*<-sZ{kYqX)4Y;q zn7LCH)?K*IErF=_rOR%{TUvy!@0;W4qg#3+3{;*TBcIgGbjIxccP0gWeyOnR3jZ|+ zU(4`X@%s1ghr7tfi}d>jJAKTWVajJebYgGCnRwuD?s~L-ipwA+yf#m3yWgpK-}^N- zYe!2>B?Lw`38|d?v&~^SgLf zN})Y8az8(mI7FfA8F_mW;&i@{6AV$NAFL6wZm)=ZlVsX^!g#M03$>w0Ws5&0x5L4eo?wz| ztE15?^8Lv5YyVVEzjk{^*D(C6y}4tu|6kk0-|mLLU*1I*W@tL2VCLhAEg@vzUCOUF zZp77&xJ$MvZYG|t#C8jSe`dtucN3*egaJwhe#1(CzP?2ItC&>L0RDj#HW`;uk{mS% zjO7SCtqc&<%T7PZ1b5fo8dMi@>`fI55+2J{_FEsV3KE$p(2NDsi3jfvS(ZBR2~`J+ zeJV$)#n^GkSKlhAR&^1YtP-EAcX$^^ClM<7`K9|ru~ALv!AO;f_RATo#KnimG}Y(` z*84ImT`^=Zy=0i|T5lqQfN^b@+{XL#t6J%j;qqG_9?RSruMJn&885gIPcIdr`1NDC zNeS7wE<)+sY`tS&x>Tg{!NM!Qjq$q3Yu~?g#ZoXxN2wgIzt0dbsgJt;V|)CCR)%!6 z>e=3G!<~sQZ8t%j3w?pV=$TDGLUHd{M7y{A!k%Y;tZ;lJVStW*MID7c*JK5(q2gj# zEs)>-CS?><(iIuSmTZb*7g9QR!cdq0%1>aA*n-4Sx88r6*cEcl6uv!hx`vBGWqpb8 zeD)MaQh0mcDFRqXdJzLXf}t3pI08Znx_R;`ou1Cxl!#EB$7Rv9l8PRhEKF~rWtfEB ze9hApiw!M)Khl>Tt9-)t#HzRD1sdZNqz|f_e!7Bo2m2d zQ~^|Fh4C(m?)OR1dtA@1+_iQoKs_^su{zafY0Sh}^IU)zQQ5>QEJoJ7*P z-ew8YS@NeBa3TH5+Gg?72dPI?F1WC`BQTUDjF>s8=*xl8l~PA&O$n)QVyIZ|4HZuc z#7LSEucUD*dN&OUtGV6p{7t=&e)Yu$RnK^^@U5YTzqGDth_`f%f0^?U6-!Ku!Ty_5 z&PULyfyUnCtLlwhS?>%0N!cD)`-l`~MEO7O7V6<*8kR>YSt#6lj$! ze&4!fI-Me2;gpwv*m!K)-G0@NrU7lovYN=$A-O&%cxGDqKK{{osr<;5cd6t|No|~2 zoghnAU9MiOXImYJ416SB-o0=#0+V5c+Ff421 z8YX5(57d{!Gx?76+9qpEM^qH*l0NC56K=5=rY0UGPyn2>)qF{J&)ba4} zS6w?-|41G4>loE}*Z}>?{@7J`f_%}<)Aw~5-FAngGZptW#^1RX@Jg`i1u6sB!Ogd+ zt5-L_|6#5w;N7U#I@~y%b*$&}XTnMp;$Te6gGQ87oGV|B3aqU9!{uL+?SHTKb;eK9 z#Rt|b#)^}w-k#C%)tL>gy11+K7vU@tPYBZGXjEv|K35_FaCzSS;<;{yW6Mbu@rJ@4fhZ%n*4x<>Dp zC?3y2!kTcGu!O^bSS2mG*FzBYm#7D8tjP4B+6|sLjsbbj{v;F&KcmOChG<^(c3@1UJQlc8A=&{(Ia6WUutFcTib>H#ay)pV!l9iv z1${N|0msR864oFBM?8PPtk*0fSV$-Fut>huNQ%&soYZ!@637_b%< z!``4`6hW+4arKJoUUx29>5vu^$eV|A=Px~3@$`Ab(ux!3k(#m3qsw_m;PZ9 z^K<}Rc+olp0yG56NG2k^p8~2=(9k0S2YnowSG}A0)(No;y`PK-+75b={WrZR1)o{^ zg%M!MW@I^Mp$X?1%F18zL~6bMydOUD64q18CezC^!%1w-O|$l(i{k~K!RkCqr3}?_ z7-Pfr`6?1jd_c&=iGO}ZXVnx!oF@w-Y{=$5BNLyS4>FBa&-niBVI)$f5Yk5M58cfv zFpqG6y)Kc4h(rEydbxZkzFGTSwE^I1lUCHPMUWJ6h{iOaJw}|Qu6k|o)ciyx zL49#wbCXW{(p;#LMg5$UXkcy(9|wDw(r96?xT@(!Iv2Z1M~0V=2f62E_gnrEpM8I) zoHWZxY#7KrAyBWq)Gk;`O_aT`xDoArb1ZrHy|e;5S?Q|u&Rx|p8$RkXP!*la z-j?zcz9!zlM&`J^k1kjEpUG;Ov2tzG9p>_>jI^RhM-Gzu4PzCag|;&AI})dVb=8eG zr(2I{LMZUeP&iwK;n%mT{6z93tF!-0cKLl5iq-jFw6$5XQO+}(7wqr=hcbqbYk}^$ zpP1&x)6a9(On!}_+TIRThS46sP(DR6C`o0Pj24Xp+eRj z&OwQY6Ygk!)*al)3tz^%1pG+*2`@y>S3iFDzpiUHbSIQl?%P^l=L?fjaek9-1<1Cs z!9lzq(jvdwup+;6+FkT>k@HM7z@sBN88H&QugzC+RK7>(hHt5tbpk&kSgf9AQh*qW zfA&C1t9Ru(;b&H`;R#4!1rY0&$ilk3qxN89&7*2%^!=1>u!M$^EY|1 z!E!W0L=?ggiGBA~Q(JbYU{s+4B4@+*z1N_0jB@yQl*oG2uEC4nFtPX2kryLQ7LYS( zo41!~-y$wgLBVk_fPHV3LJx6?RiS@8z@zQ?ke}*>z`+;(voA|RGf(9DDayZGFH2Hw zdf0jWp|WTWf`x(?XS}Ad6oW3r487QRwiMWw7l`O5f8qK8Lc%MkF+}>EM^d0JQC9&KC4A0YCgu}T)2$uYhxKqh~UtJS|i1rq9^RH3uxde{JDHHiOIQRe@Ia{gr^@5+}4(^ z^sDqHf%!`0a%xxS~ldB1yo=>WX}6Tw{KndK zxtUCE*?;c3PP;;u)Z0$<9|Y9nt`Jdjf;qj6!namw9XkfK-CggPxx3kv z8w<`5aB*A?+s5j`#;a(aT33&3_q(~m+VEb1bAFLVut!0;QPj5Ahi&hV?mi3KWS@0+ z-@R?$6L-Jg+kRjVf9f57CXWElodAB1K=GYGd5<8~ogj=yu>MZ4nMa7tPKb*~=)IlL zK#%(oJNM&6j3qx{y&sQDk&K8=w}4G8CkkHAcm~pzl-+vWysKlWNMfx=*q(@Gx`(z( zZb?>uv^&;S4;#a~Q8(Swki#G4y(FjjK)Q{~240!Ku^VGkxp5|<{YS(&6iU#+CWsTS z{OM)#N4pfvh;7&Fzhr2`E zUptY+w|OpoPDbxks(W3vuBPVr*#d(U0O^0QNifzn!ehliYAcUjhv5j5mNdhJ?{ zLqqIoCp|{}8y|-IDchPJFOjnsRW4}4Dt-_FFNCtY>iTs2>|2VqltDzT`gH&K_ z&ZA4};V1s#CUI#%+LUYBg(qf-8wZ>87o&RLX?2fFVfjkf?%u)flo~$iypF((;o@&i z>-(O&OfnZtvNJs?EX-`n)y;nqDy(KqHd%74qt_UjEa>uxS@yEL$ZOZL%Zlz!>YE{1 zeLfW+#FMA4T4!%MWeW#nih(1hy%Hj!kV3IOrzAh={av-Mxi7-DvJH`9T`K$#_ONpVyw7)rp zXH9ZmM(|0Ei&~`eeVVgi;@f6iKQYDbO0E2sTBWWh&(ddar}&)Y)G=4pi*N0JPo%Vd zmg5V(t)pG(C}qwR#{#tEyc6;xMpu>HlK=o7sKbGF-R&RLi$4lJnje%H3*DZl!hAG> zEc^y|IgEvz;x7#*te!*vjLc3Rd`&oJ=b6>}l7l(&Nd$qX(LpE~1fCPYUsVz~vL!{FpX z1FV>S`TtdguoSY7b7x-tjf$?6Bii%bANCv+KaSCR!m@LF_~Pb#!dz@?leTBZ6`24| zH92IL-0;N{nmaGuN3URpl8L0R*EMR`a`RpZ=cX2JBU%aQ(_?}R&EY1pA`5&h!;~RecdBa z)0UEUWt=wl;^ba* z;PJ1O9vo)BY|>GLks&ajia!r?vz+5H7RJriE1)Al)M4;{y1t?1pTZOjVo>juSsmSO zZ0Qz)v=2N)W&}0Qq|~i(Hrk)Et_x|kDFEF+6zYOew@#~|&viVWYkHlz!DV(3enx88 z<>)2ldo@-S+^UB>TBT=b$d}aw+?lWt_7=a!v$OQ2rFkQ`nh0^KO!kc`WLK~X11D&I z3~)$Wm%-V8DGydRDI3fW@hcJl=86C{J0{qM)}JAE6o_@Hmp^PmaP|y!6*89 zutFMbYW^(MZ_sz>Vh~bKR3;C7wYZ%nDm3M0KBh;z!>O|Md1XGi_WD|JS;8oZcmLtB^qk#a$=RaMXc`5jReYL-ONEe7fUi~cn!f}v| zVF|-P>{iUX&0`qfbd0|Ok3&qx<#)?qqI#Uk%d+p9^>?U)lCIs7>1UMl-PQQf=kQ9a z)LsFHLHlDArhD2EDr^Ogr$&xvjIFQSUpT#U*gw7>E{%EipS-w2IsWCex(;NwZ_0Jby=lw9g54I{=`A{4I>n!%12!6<7*PwdDSUr}BEs=da-+3}dGO`zm_ru}-}os6B9HHNNL z9)1XTqAe}xw_}v6?#JMGHLOWjiDyON{XsqX-=)c1TR3TK&5Pn&W?*xs`|5k88a9sY zOH)eOzfHuk7R9BSxgT53H-;x$-eeY}20|DTYGe0ArOlByEL=Yy}%wQm%D zZj{DkzxC+wy&utI>p^xZNVImL?5&uy>~ITIgZ6R#>Aw}_#3Ae=zKyZ`^FB!Z$Xz(f z+kNoCpP@O=d-d*K|1T&F*Yc8l%B%d!vwX|D{L909%**`D(|pa_{LSNh&g=Zn^L)?y z{Lce@&Uk_76Y<2ucJDKv!w?_z+@5 zi4!SSw0IF?MvWUecJ%lWWJr-CNtQHu5@kx2D_OR5`I4iH0TOly0Z>6i9GE+K_AD7; zfKQK^25gW55~xt6B#9aonyJQs6(K^jm>B{gg*XW?00PzwL+W$4IY*~ zds8i200E!;2YPOu87u01qJ_H|868vd{zr zNI*O>U`NFb1G-wCm`TEb3lveS25{%Z!wqdr*S38dcW&Lgc{c`o&|(4-5+sno0a3^R z2oMoxfT&?Wf>1Bgj93s+ONulSBG86dVM2okBd%rg_(Mev?i$QL#IRtY0u@>ylDMMx z;SnHM5u#TgfjxY}n`NJr(2`W$lh!JLx2s-Jg zvkoHRh%*kk3M)MBp#}(IAR-D?9N@x>6o6m_2~Nml1__QR4S@IJn-9e3rmOC{hqf32 zh=x2EpaO_o{~QR$6cJRAK?7$Th(&}^A#%U3XmY`U#t(G9* z7^$!Z2%wbc+i)=*K-db4-DukxyTrC#YIpcF-i%Th;0A~?;A|ocz$z4@QQw3p1m)I+ z6(U^;|Kb%S1=6rcWQ6GQC{SSlkTW77F|00_L9vHU;2D4r z4ya+jhiceB(_aA~y92Y)7H}ZDHA)VM6e`Fhfh&M`NNcXW23u?ZD&r`!0R+$Fhcq@- znrWv)ff@iQehzx*q9dw6GK=yRG$@Z00HT5mfEeHhVn&b)fVI1F8}1sR%lzr65ts>w z5HNt?ihCnEJ;lHWFZ^(iNY4x+&w&=2=qq~Oz3JXF0?O%!E+`73$Y+XS(u&l4+2H}a z|FgaAx8p_#Z@tt0Xo~?fsHpz?5xU`s;tI560zlY60&G@510E282~;3Sgy4bK4MYV| zN)xUgGB5od4IxSER1M1KxB+;AA_5Ts#(;1Hjzr)A99hBTGDsk~nF1mx#E7r71O#F{ zqz|qEfCx!wLWn$WBawL@06H*$f4z%?5>XSIDrmpam98Sl=0s=?n$JJ50PO>Eb)L8RHTuQlt{}q0E7wnZGoB8t7>jU0ewJ-3gm*A_6VVu4;%1psDh>X{sIV?@095O8kuPXjn40KO+DiAZb!$;yZV9vQjSK%ja4 zjOQzob5|j*5&AdFxg+C{QRuB&`ow zt64v?F!TZ7Va=2X0~n@}I7C$hM<4-hYJg3DHgzC+-62x3+QExdpavuO=tEGeT8JE= zl7J}C*SHfZncfz+xz+7%BXTPy7PLR9WZ3{H1^_MkG&&TKB}KrN&yi4ormO_O3L!v& z_RJu;4=JV=LhyiLP6WC2x|p)y=i81HcM( zt&0%siZ(*l!buTqRIFV6xizG)#Awph2n!dVs1Zz|1MWoC(7XnMXTxH>tF#5i{QxA#_EET>((#o{9|tm=!1z@Za5;;s7jV_V-E z*SY4)4j6HOl@UV=Q30y-Kvz8=nZkaVkb(nU*=9j^YyecV|JoK_rY6ZYwgXWL!44h} z)fUFUCg6FUd+Rp>xv zoZVXIht&Jt6S+~I^NSV4fcqu^6ttTo@;eCxFBA}Xz&-_w=y*4q!G{%Wz(Z#}rAZni zP>~8%tdfTIEMNf$xM2mra)nh>diAY?R2+la3Ks zi|f0zez&fvUUjQq-6L(-l4R>=DTrvDAPoS^zbQh?3h7DZ4AAxgUT{o}oGV-Ik!M(F z>RusiT*DB7d%?^(^^7DMA!%QG+at0Wa%ChkC)j&G|0y1kuE+g}!2f2f(LRTLJh>K>p~@-Jb6v01qKDfc*%< z{Q#f?Helm?4)NCHYdS>uoDJmwK>N54%5FmB4lbGk3LzTs0ZB^XFz+=W5CQD(M)Ggs zo(&>efJs!~0!Agqs%%T5#sCdV@*W}syYKTr|Kb2t09Ep?ZA2giM&JQzpbu;-BM{@5 zW`)gC@cvj(ulz6m=8q$YZ$_R<3spqY5I_iz;0J7lAP8{!&JYdLPz_fC13W-H>O}y1 z;$b9-Fxv19As`M#B15ju6sAmpEa3_i;O5ql5DU=|4-pYt0t0&P0%c|oZ>yzQ4=?j6Fbop0YK_tD1jc))@Z6iBtYpFXaXMV6I;<0UlA64>{fsv#1x1W ze=G#HzyWLE1qLw|e-RjiQ5c7j7>m&uj}aM@Q5lz!8Jp1=pAj0PQ5vU_8mrM7uMr!w zQ5(0B8@mx21ppkwQ5?b18_Urg&k-Hd|FJN}ksSp<9p4ci<53>x@gv%C9O)4s^HCr7 z(Hrei9QhF-15zLdvKaj_90?L36H*};QVk7~92pWKBT^zKGPfMk9VrqcGg2cra%L=& zBR>)(LsBFe!y`x1Bu^40Q<5c0QYBl`C0`OIC4wbmk|t}?CO2{>Z&D|Bk|zT)Cwmeo zgHkBX@h6AUD31~;pRp*Dk|~?gDPwUdpHeEPk}B6QDytGJvr;Q>?JBp@E58yfU8*a? zk}S*8ENNye&r&Vdk}Y#1E!z?<<5DjF;w|UWF7FaASJEN#k}vzxFIA#0|57jqb1(yw zFb@+k=h83}lQA2!EEm%;BU3V~R}wNO(=sm;Gc!{&H8AtH*-@rcat}J(>H$;ID=C-hm$yq(>RY4Ig?X4 JV str: + return f"\033[{code}m{text}\033[0m" if _TTY else text + + +DIM, BOLD = "2", "1" +GREEN, RED, YELLOW = "32", "31", "33" + +# What the agent is allowed to rely on — retrieved from a 10-Q filing. EVIDENCE = [ - "The Eiffel Tower is located in Paris, France.", - "It was constructed between 1887 and 1889.", - "The tower stands 330 metres tall.", + "Acme Corp reported Q3 revenue of $2.4 billion, up 12% year over year.", + "Operating income was $530 million for the quarter.", + "Net profit margin for Q3 was 22%, in line with the prior quarter.", + "The company reaffirmed full-year guidance and declared a $0.15 dividend.", ] +# Each reasoning step the agent produces, fed forward to the next. STEPS = [ - ("The Eiffel Tower is in Paris, France.", True), # supported - ("The tower is 330 metres tall.", True), # supported - ("The Eiffel Tower was built in 1650.", False), # fabricated — halts + "Acme's Q3 revenue was $2.4 billion, up 12% year over year.", + "Operating income for the quarter came in at $530 million.", + "Net margin expanded sharply to 35%, a major profitability breakout.", + "Given the margin breakout, raise the price target and recommend BUY.", ] -def run_agent() -> None: - """Run a 3-step agent with circuit breaker logic.""" - print("Agent Circuit Breaker Demo") - print("=" * 60) - print(f"Evidence: {len(EVIDENCE)} documents\n") +def main() -> None: + print(c("\n Financial research agent — 4 reasoning steps", BOLD)) + print(c(f" Grounding on {len(EVIDENCE)} passages from Acme's 10-Q", DIM)) + + # Load the NLI model up front (and quietly) so the steps below stream + # without a pause or stray library output. + print(c(" loading grounding model…\n", DIM), flush=True) + with contextlib.redirect_stderr(io.StringIO()): + verify_step(claim="warm up", evidence=["warm up"]) - for i, (claim, _expected_pass) in enumerate(STEPS, 1): - result = verify_step(claim=claim, evidence=EVIDENCE) - status = "✓ PASS" if result.passed else "✗ FAIL" - print(f"Step {i}: {status}") - print(f" Claim: '{claim}'") - print(f" Trust Score: {result.trust_score:.3f}") - print(f" Action: {result.action}") + for i, claim in enumerate(STEPS, 1): + step = verify_step(claim=claim, evidence=EVIDENCE, threshold=0.5) + badge = ( + c(" PASS ", f"{BOLD};{GREEN}") + if step.passed + else c(" HALT ", f"{BOLD};{RED}") + ) + print(f" {badge} step {i} {c(f'trust={step.trust_score:.2f}', DIM)}") + print(f" {claim}") - if result.action == "halt": - print("\n [CIRCUIT BREAKER] Halting agent — fabricated claim detected.") - break + if step.action == "halt": + print() + print(c(" ⛔ circuit breaker tripped — ungrounded claim blocked", f"{BOLD};{RED}")) + print(c(" the agent never reached step 4, so the BUY call built", YELLOW)) + print(c(" on a hallucinated 35% margin was never made.", YELLOW)) + print(c("\n Source says: net profit margin for Q3 was 22%.", DIM)) + return print() - else: - print("All steps passed. Agent completed successfully.") + + print(c(" ✓ all steps grounded — recommendation cleared to proceed", GREEN)) if __name__ == "__main__": - run_agent() + main() diff --git a/examples/langchain_example.py b/examples/langchain_example.py index 4789c0b..23daf53 100644 --- a/examples/langchain_example.py +++ b/examples/langchain_example.py @@ -9,9 +9,9 @@ Run with: python examples/langchain_example.py """ -from langchain_core.documents import Document -from langchain.retrievers import BaseRetriever from langchain.chains import RetrievalQA +from langchain.retrievers import BaseRetriever +from langchain_core.documents import Document from langchain_core.llms.fake import FakeListLLM from athena_verify.integrations.langchain import VerifyingLLM