gitronald · gitronald · May 31, 2026 · May 31, 2026 · May 31, 2026 · May 31, 2026
diff --git a/WebSearcher/classifiers/footer.py b/WebSearcher/classifiers/footer.py
@@ -28,9 +28,9 @@ def classify(cmpt: Node) -> str:
 
         cmpt_type = "unknown"
         for classifier in classifier_list:
+            cmpt_type = classifier(node)
             if cmpt_type != "unknown":
                 break
-            cmpt_type = classifier(node)
 
         # Fall back to main classifier
         if cmpt_type == "unknown":

diff --git a/WebSearcher/classifiers/main.py b/WebSearcher/classifiers/main.py
@@ -3,7 +3,7 @@
 from selectolax.lexbor import LexborNode as Node
 
 from .. import logger
-from .._slx import class_tokens, get_text
+from .._slx import _iter_text_fragments, class_tokens, get_text
 from ..component_types import header_text_to_type
 
 log = logger.Logger().start(__name__)
@@ -186,10 +186,10 @@ def general(cmpt) -> str:
         """Classify general components."""
         node: Node = cmpt
         node_id = node.mem_id
-        cls = class_tokens(node)
         # bs4 distinguished "class" present vs absent via ``"class" in cmpt.attrs``
         # -- preserve that distinction explicitly.
         if "class" in node.attributes:
+            cls = class_tokens(node)
             conditions = {
                 "format-01": cls == ["g"],
                 "format-02": ("g" in cls) and ("Ww4FFb" in cls),
@@ -268,30 +268,14 @@ def knowledge_box(cmpt) -> str:
         condition["locations"] = node.css_first("div.zd2Jbb") is not None
         condition["events"] = node.css_first("g-card.URhAHe") is not None
         condition["jobs"] = node.css_first("g-card.cvoI5e") is not None
-        # bs4 ``next(iter(cmpt.stripped_strings), None)`` -- first non-blank
-        # text fragment in the subtree. Use the _slx walker indirectly via
-        # iter_text_fragments-style filter.
+        # bs4 ``next(iter(cmpt.stripped_strings), None)`` -- first non-blank text
+        # fragment in the subtree; ``_iter_text_fragments`` replicates stripped_strings.
         first_text: str | None = None
-        for s in (get_text(node) or "").splitlines():
-            s2 = s.strip()
-            if s2:
-                first_text = s2
-                break
-        if first_text is None:
-            # fallback: pull first non-whitespace fragment from text walker
-            text = get_text(node) or ""
-            first_text = text.strip().split()[0] if text.strip() else None
-        # Simpler & more faithful: replicate stripped_strings exactly via the
-        # _slx iter_text_fragments walker.
-        from .._slx import _iter_text_fragments
-
         for raw in _iter_text_fragments(node):
             stripped = raw.strip()
             if stripped:
                 first_text = stripped
                 break
-        else:
-            first_text = None
         if first_text is not None:
             condition["covid_alert"] = first_text == "COVID-19 alert"
         for condition_type, conditions in condition.items():

diff --git a/WebSearcher/component_parsers/general.py b/WebSearcher/component_parsers/general.py
@@ -183,6 +183,10 @@ def _next_sibling_with_text(node: Node) -> Node | None:
 
 _ARIA_RATING_RE = re.compile(r"Rated\s+(\d+(?:\.\d+)?)\s+out of\s+(\d+)")
 _ARIA_REVIEWS_RE = re.compile(r"\(([\d,]+)\)\s*user reviews?")
+_RATING_NUMERIC_RE = re.compile(r"^\d*[.]?\d*$")
+_RATING_VOTES_RE = re.compile(r" vote[s]?| review[s]?")
+_RATING_REVIEW_BY_RE = re.compile("Review by")
+_PRODUCT_SPLIT_RE = re.compile("-|·")
 
 
 def parse_rating_aria_label(aria_label: str) -> dict:
@@ -202,26 +206,24 @@ def parse_rating_aria_label(aria_label: str) -> dict:
 
 def parse_ratings(text) -> dict:
     text = [t.strip() for t in text]
-    numeric = re.compile(r"^\d*[.]?\d*$")
     rating = re.split("Rating: ", text[0])[-1]
-    details: dict = {"rating": float(rating)} if numeric.match(rating) else {"rating": rating}
+    details: dict = (
+        {"rating": float(rating)} if _RATING_NUMERIC_RE.match(rating) else {"rating": rating}
+    )
 
     if len(text) > 1:
-        str_match_0 = re.compile(" vote[s]?| review[s]?")
-        str_match_1 = re.compile("Review by")
-        if str_match_0.search(text[1]):
-            reviews = re.split(str_match_0, text[1])[0]
+        if _RATING_VOTES_RE.search(text[1]):
+            reviews = re.split(_RATING_VOTES_RE, text[1])[0]
             reviews = reviews.replace(",", "")[1:]  # [1:] drops unicode char
             details["reviews"] = int(reviews)
-        elif str_match_1.search(text[1]):
+        elif _RATING_REVIEW_BY_RE.search(text[1]):
             details["reviews"] = 1
 
     return details
 
 
 def parse_product(text: str) -> dict:
-    split_match = re.compile("-|·")
-    parts = re.split(split_match, text)
+    parts = re.split(_PRODUCT_SPLIT_RE, text)
     if len(parts) == 1:
         return {"price": parts[0].strip()[1:]}
     return {"price": parts[0].strip()[1:], "stock": parts[1].strip()[1:]}

diff --git a/WebSearcher/components.py b/WebSearcher/components.py
@@ -40,17 +40,13 @@ def __init__(
         self.type = type
         self.cmpt_rank = cmpt_rank
         self.result_list: list[dict] = []
-        self.result_counter = 0
 
     def __str__(self) -> str:
         return str(vars(self))
 
     def to_dict(self) -> dict:
         return self.__dict__
 
-    def get_metadata(self, key_filter=["section", "cmpt_rank"]) -> dict:
-        return {k: v for k, v in self.to_dict().items() if k in key_filter}
-
     def classify_component(self, classify_type_func: Callable | None = None):
         """Classify the component type"""
         if classify_type_func:
@@ -150,7 +146,7 @@ def __iter__(self):
 
     def add_component(self, elem, section="unknown", type="unknown", cmpt_rank=None):
         """Add a component to the list of components"""
-        cmpt_rank = self.cmpt_rank_counter if not cmpt_rank else cmpt_rank
+        cmpt_rank = self.cmpt_rank_counter if cmpt_rank is None else cmpt_rank
         component = Component(elem, section, type, cmpt_rank)
 
         self.components.append(component)
@@ -227,4 +223,4 @@ def export_component_results(self):
         return results
 
     def to_records(self):
-        return [Component.to_dict() for Component in self.components]
+        return [cmpt.to_dict() for cmpt in self.components]