From 3262d25dd5107bf5076b7319da332c9e5e51dec2 Mon Sep 17 00:00:00 2001 From: Haroon Quddus Date: Sun, 26 Apr 2026 23:15:00 -0500 Subject: [PATCH] Add OCR queue pipeline and room-number detection --- detect/__main__.py | 203 ---------------- {detect => ocr}/__init__.py | 0 ocr/__main__.py | 166 +++++++++++++ ocr/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 150 bytes ocr/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 154 bytes ocr/__pycache__/__init__.cpython-313.pyc | Bin 0 -> 154 bytes ocr/__pycache__/__main__.cpython-310.pyc | Bin 0 -> 4222 bytes ocr/__pycache__/__main__.cpython-312.pyc | Bin 0 -> 7807 bytes ocr/__pycache__/__main__.cpython-313.pyc | Bin 0 -> 7883 bytes ocr/__pycache__/ocr_engine.cpython-310.pyc | Bin 0 -> 7531 bytes ocr/__pycache__/ocr_engine.cpython-313.pyc | Bin 0 -> 12038 bytes {detect => ocr}/detector.py | 0 ocr/ocr_engine.py | 261 +++++++++++++++++++++ requirements.txt | 26 +- tests/test_detector.py | 2 +- 15 files changed, 435 insertions(+), 223 deletions(-) delete mode 100644 detect/__main__.py rename {detect => ocr}/__init__.py (100%) create mode 100644 ocr/__main__.py create mode 100644 ocr/__pycache__/__init__.cpython-310.pyc create mode 100644 ocr/__pycache__/__init__.cpython-312.pyc create mode 100644 ocr/__pycache__/__init__.cpython-313.pyc create mode 100644 ocr/__pycache__/__main__.cpython-310.pyc create mode 100644 ocr/__pycache__/__main__.cpython-312.pyc create mode 100644 ocr/__pycache__/__main__.cpython-313.pyc create mode 100644 ocr/__pycache__/ocr_engine.cpython-310.pyc create mode 100644 ocr/__pycache__/ocr_engine.cpython-313.pyc rename {detect => ocr}/detector.py (100%) create mode 100644 ocr/ocr_engine.py diff --git a/detect/__main__.py b/detect/__main__.py deleted file mode 100644 index 1e3fdbb..0000000 --- a/detect/__main__.py +++ /dev/null @@ -1,203 +0,0 @@ -import collections -import math -import operator -import os -import sys -import time - -from http import HTTPStatus - -import requests - -from .detector import Detector - - -QUEUE_NAME = os.environ.get("QUEUE_NAME", "detection") -WAIT_TIMEOUT = os.environ.get("WAIT_TIMEOUT", 30) -VIZAR_SERVER = os.environ.get("VIZAR_SERVER", "localhost:5000") -MIN_RETRY_INTERVAL = 5 - -MODEL_REPO = "yolov8" -MODEL_NAME = "yolov8m-seg-nms" - -MARK_ALL_OBJECTS = True -MARK_LABELS = set(["door", "dining table", "desk", "table"]) - -# Rename some of the labels from the detector before marking them as map features. -LABELS_TO_FEATURE_NAMES = { -# "dining table": "table", -# "desk": "table" -} - - -def try_create_features(location_id, item, info): - # Get current list of features for the location - features_url = "http://{}/locations/{}/features".format(VIZAR_SERVER, location_id) - response = requests.get(features_url) - if not response.ok: - return - - features = response.json() - features_by_label = collections.defaultdict(list) - - # Organize the existing features by name. - # We are only interested in objects of the configured types. - for feature in features: - if feature.get("type") == "object" and feature.get("name") in MARK_LABELS: - features_by_label[feature['name']].append(feature) - - for obj in info.get("annotations", []): - label = obj['label'] - if not MARK_ALL_OBJECTS and label not in MARK_LABELS: - continue - if label in LABELS_TO_FEATURE_NAMES: - label = LABELS_TO_FEATURE_NAMES[label] - - pos = obj.get("position") - if pos is None: - continue - - pos_error = obj.get("position_error", 10.0) - - # Check if there is any already existing feature within a certain - # radius. We use the combined position_error values for the two - # objects, which give a rough estimate of how wide they are. If they - # are too close, avoid creating another feature. - duplicate = False - for other in features_by_label[label]: - sq_dist = sum( (pos[d] - other['position'][d])**2 for d in ["x", "y", "z"] ) - dist = math.sqrt(sq_dist) - - # Other point's radius may not be set, which means we do not - # know the position_error value for that other feature. - # Just use the new object's position_error twice, then. - other_radius = other.get("radius") - if other_radius is None: - other_radius = pos_error - - threshold = pos_error + other_radius - - if dist < threshold: - duplicate = True - break - - if duplicate: - continue - - # Create a new feature on the map. We are abusing the radius field - # here to store the position error / spread. The radius attribute was - # meant to control when the feature should be displayed in AR, only - # when the user is within a certain radius of the feature position. - # However, it is not really used. - new_feature = { - "name": label, - "position": pos, - "style": { - "placement": "point", - "radius": pos_error - }, - "type": "object" - } - response = requests.post(features_url, json=new_feature) - if response.ok: - new_feature = response.json() - features_by_label[label].append(new_feature) - - -def get_queue_names(): - url = "http://{}/photos/queues".format(VIZAR_SERVER) - response = requests.get(url) - if response.ok and response.status_code == HTTPStatus.OK: - items = response.json() - return set(x['name'] for x in items) - else: - return set([QUEUE_NAME, "done"]) - - -def get_next_queue(detection_result, supported_queue_names): - annotations = detection_result.info.get('annotations', []) - has_person = any(x['label'] == "person" for x in annotations) - - if has_person and "identification" in supported_queue_names: - return "identification" - elif len(annotations) > 0 and "detection-3d" in supported_queue_names: - return "detection-3d" - else: - return "done" - - -def main(): - detector = Detector(MODEL_REPO, MODEL_NAME) - detector.initialize_model() - - while True: - sys.stdout.flush() - - # Set of photo queues supported by the server - supported_queue_names = get_queue_names() - - query_url = "http://{}/photos?queue_name={}&wait={}".format(VIZAR_SERVER, QUEUE_NAME, WAIT_TIMEOUT) - start_time = time.time() - - items = [] - - try: - response = requests.get(query_url) - if response.ok and response.status_code == HTTPStatus.OK: - items = response.json() - except requests.exceptions.RequestException as error: - # Most common case is if the API server is restarting, - # then we see a connection error temporarily. - print(error) - - # Check if the empty/error response from the server was sooner than - # expected. If so, add an extra delay to avoid spamming the server. - # We need this in case long-polling is not working as expected. - if len(items) == 0: - elapsed = time.time() - start_time - if elapsed < MIN_RETRY_INTERVAL: - time.sleep(MIN_RETRY_INTERVAL - elapsed) - continue - - for item in items: - # Sort by priority level (descending), then creation time (ascending) - item['priority_tuple'] = (-1 * item.get("priority", 0), item.get("created")) - - items.sort(key=operator.itemgetter("priority_tuple")) - for item in items: - try: - result = detector.run(item) - except Exception as error: - print(error) - result = None - - url = "http://{}/photos/{}".format(VIZAR_SERVER, item['id']) - if result is not None: - # Determine the next queue for this photo, then send update - result.info['status'] = get_next_queue(result, supported_queue_names) - requests.patch(url, json=result.info) - - annotated_png, mask_png = result.apply_masks() - headers = { - "Content-Type": "image/png" - } - annotated_url = "{}/annotated.png".format(url) - req = requests.put(annotated_url, data=annotated_png, headers=headers) - mask_url = "{}/mask.png".format(url) - req = requests.put(mask_url, data=mask_png, headers=headers) - - geom_url = "{}/geometry.png".format(url) - if result.try_localize_objects(geom_url): - requests.patch(url, json=result.info) - - camera_location_id = item.get("camera_location_id") - if camera_location_id is not None: - try_create_features(camera_location_id, item, result.info) - - else: - info = {"status": "error"} - requests.patch(url, json=info) - - -if __name__ == "__main__": - main() diff --git a/detect/__init__.py b/ocr/__init__.py similarity index 100% rename from detect/__init__.py rename to ocr/__init__.py diff --git a/ocr/__main__.py b/ocr/__main__.py new file mode 100644 index 0000000..1d9d722 --- /dev/null +++ b/ocr/__main__.py @@ -0,0 +1,166 @@ +import operator +import os +import sys +import time +from http import HTTPStatus + +import requests + +from .ocr_engine import OCREngine + + +QUEUE_NAME = os.environ.get("QUEUE_NAME", "ocr") +WAIT_TIMEOUT = int(os.environ.get("WAIT_TIMEOUT", 30)) +VIZAR_SERVER = os.environ.get("VIZAR_SERVER", "easyvizar.wings.cs.wisc.edu:5001") +MIN_RETRY_INTERVAL = 5 + +API_TOKEN = os.environ.get("VIZAR_API_TOKEN", "") +API_KEY = os.environ.get("VIZAR_API_KEY", "") + + +def build_headers(extra=None): + headers = {} + if API_TOKEN: + headers["Authorization"] = f"Bearer {API_TOKEN}" + if API_KEY: + headers["X-API-Key"] = API_KEY + if extra: + headers.update(extra) + + return headers + + +def get_queue_names(): + url = f"http://{VIZAR_SERVER}/photos/queues" + try: + response = requests.get(url, headers=build_headers()) + if response.ok and response.status_code == HTTPStatus.OK: + items = response.json() + queues = set(x["name"] for x in items) + print("Available queues:", queues) + return queues + else: + print("Queue request failed with status:", response.status_code) + print("Queue response body:", response.text[:300]) + except requests.exceptions.RequestException as error: + print("Queue fetch error:", error) + + return {QUEUE_NAME, "done"} + + +def get_next_queue(result, supported_queue_names): + return "done" + + +def main(): + engine = OCREngine() + engine.initialize_model() + print("OCR worker started. Waiting for queue items...") + print("Using server:", VIZAR_SERVER) + print("Listening on queue:", QUEUE_NAME) + + while True: + sys.stdout.flush() + + supported_queue_names = get_queue_names() + + query_url = f"http://{VIZAR_SERVER}/photos?queue_name={QUEUE_NAME}&wait={WAIT_TIMEOUT}" + start_time = time.time() + items = [] + + try: + response = requests.get(query_url, headers=build_headers()) + + if response.status_code == HTTPStatus.NO_CONTENT: + print("No items currently in queue", QUEUE_NAME) + items = [] + + elif response.ok and response.status_code == HTTPStatus.OK: + items = response.json() + print("Polled", len(items), "items from queue", QUEUE_NAME) + + else: + print("Poll request failed with status:", response.status_code) + print("Poll response body:", response.text[:300]) + + except requests.exceptions.RequestException as error: + print("Polling error:", error) + + if len(items) == 0: + elapsed = time.time() - start_time + if elapsed < MIN_RETRY_INTERVAL: + time.sleep(MIN_RETRY_INTERVAL - elapsed) + continue + + for item in items: + item["priority_tuple"] = (-1 * item.get("priority", 0), item.get("created")) + + items.sort(key=operator.itemgetter("priority_tuple")) + + for item in items: + print("Got item:", item.get("id")) + + try: + result = engine.run(item) + except Exception as error: + print("Run error:", error) + result = None + + url = f"http://{VIZAR_SERVER}/photos/{item['id']}" + + if result is not None: + result.info["status"] = get_next_queue(result, supported_queue_names) + + try: + print("PATCH payload:", result.info) + + patch_response = requests.patch(url, json=result.info, headers=build_headers()) + print("Patched photo info with status:", patch_response.status_code) + + if patch_response.status_code >= 400: + print("PATCH response body:", patch_response.text) + print("PATCH failed, stopping worker so item does not loop forever.") + return + + except requests.exceptions.RequestException as error: + print("Patch error:", error) + return + + try: + annotated_png, mask_png = result.apply_masks() + content_headers = build_headers({"Content-Type": "image/png"}) + + annotated_url = f"{url}/annotated.png" + annotated_response = requests.put( + annotated_url, + data=annotated_png, + headers=content_headers, + ) + print("Uploaded annotated image with status:", annotated_response.status_code) + + mask_url = f"{url}/mask.png" + mask_response = requests.put( + mask_url, + data=mask_png, + headers=content_headers, + ) + print("Uploaded mask image with status:", mask_response.status_code) + + except requests.exceptions.RequestException as error: + print("Upload error:", error) + except Exception as error: + print("Mask/apply error:", error) + else: + try: + error_response = requests.patch( + url, + json={"status": "error"}, + headers=build_headers(), + ) + print("Marked item as error with status:", error_response.status_code) + except requests.exceptions.RequestException as error: + print("Error status patch failed:", error) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ocr/__pycache__/__init__.cpython-310.pyc b/ocr/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f33522a8035892b51e780fb7f75d852bd6ea3844 GIT binary patch literal 150 zcmd1j<>g`k0;kyrGePuY5P=LBfgA@QE@lA|DGb33nv8xc8Hzx{2;!HeerR!OQL%nT zVo`p6USVlUN@=maOKNd;Nq&L8YgJfgm41G55gf#M(-km literal 0 HcmV?d00001 diff --git a/ocr/__pycache__/__init__.cpython-312.pyc b/ocr/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65c53ffabb7f25f4dcf54b80cda3eb9e7cd83030 GIT binary patch literal 154 zcmX@j%ge<81WvOLW`gL)AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxdWvL%poLW?@pOILU zpPyG)nvzmltnZRqoL!P%pzm50mRY5rpIijT@$s2?nI-Y@dIgogIBatBQ%ZAE?TT1| UCNTnWF^KVznURsPh#ANN0B-Lkh5!Hn literal 0 HcmV?d00001 diff --git a/ocr/__pycache__/__init__.cpython-313.pyc b/ocr/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8c3fb5e43134ca4de7e9c630063c499f25c291f GIT binary patch literal 154 zcmey&%ge<81WvOLW`gL)AOZ#$p^VQgK*m&tbOudEzm*I{OhDdekklzH!lqf}3(cNkGpvetmCdp_yl43wJGD)P!)Nz3AR@Qor?&6}XVIE9 z66HQxTf2EnI&$DYA)tYjy>Vr=;cj+Z9_2o{-MHOoHCL`TY~5>%sIb0rb*;5_^?KvR z?X@T8uplbjxoWShwr(|6?=)5ei#z`CUMFxweXrwg`t`Ptj^D0xHdy-cn{U4LMCsF> zN*1l$yxLm3ajnseG`skWbgl7eTY;JUC^|ZxD|r03(R8#Ck($Uev|nnEX-F7d)+7H#KCJuGsyg$JAw<#F0KsZmM4Hyc04!8{`xYYT)xJKH9g8c zLzPiopo!9hK650mQ9H%&OW{PBE$%Qb{I4`I1wh}vvv}LbaB<5Kp6Bik7#sMDA98)7=cb0{sK?P#aO1Wd=)q zp@sBAZJ+FukuGyA9qRiOs?*oCTN=h0wNg>gH_^|-*R#K)3!0*>YZBhNMQ>>a%+M`# zi^ID_8yQ3kO5o3hM#A+eqGDuxZcMn7(H_~;YK7*vnJp>%ON)y^X|ccMNzY&09q?ppr~&aM|9nxDcLL ziVWttyf!6Fh1MJk_9GqmV%LbMw;kmZ;jOj@=Tu=6(Hqwyqw9Nar2AY(slMpAGBPCG zILh(+ZQfTA;78@v1ZrbK63V@5sYoA)p4u-#8KpXs@A!5a=fcUNRE+MVGN(=`?E*yk z7McQ~q)aMs>I$4XO$v05m`3Gkm6+6gK%a^lnA!^Xn|WXo5)tY?yH!a7I()S6>y+`FauczICHX! zs3R~CjJQ8f}9UC^YKNzI~y7wYImncCYuh;g959t@*%Bi zaewBJY|zkT)n#q>PodF0EzcaBWz|E%W*@3c94jA@yXQtGFwMm)P8|}3EXcqfFNEeH z8D-bC_!Q@c{~4d42=4jh1VD}X_zpU8im_!Du=}Y6l~RmRF3d7*KpqkC7kNI+hS(|0 zb}e>#Sqm;f<{34Lb(!y82*KU8Vym5fsE?G^UI_DvuGO2J+fG5!Wzf%KJUh05rF>^y zWoF#5Kz??j|3_r_t7G^toU}l);+YkKx5Mng#Y62kbW|7>WszNEFLu;TUJ7&YwU-{# z(G)EEVmK8R!V-HK7?nS~5|+mp71yianvS`q!{V5$0O2W0S?S6d>;UA9qNbMxT z-LEI;u;Luf^XFsSum6AUJuR34256L>W^Z)JkxplT zDjaLi6Wb^SaAB(ZqHe7_9f^v?+VDhji(BzcSFhKD;%#5eAi>|`VkwyUxZ_LiDw5|W zyi04Mf?Fb(87HpyC@psTqYf$s|Hur%%4a&o+?_PzA}MooNe>0*Be9 z38^Iq{T@d>sBiOOko~16W86y-?XV!XI&jD00&|kA?R1iTu;I$um5;2xGwgW|TdGlU z9$UsaJUD;TK~h65j59orzTrKW7CVnr0<0zyCRr!Rjlpt4O_J#)n9l3>)ww37Of+jT zkNcJjygkqBD|>R7tsWHOohPUZS3DO+l$Y0reI8^xJI*Fw?7N$&JscN{l``rWtMUP* zpOc_}`F39&3Gk1Yta#~jX9%*3JC47tmIv0+3Pt)p=3>Gz0XBUde2Y%M562iA>@{V@ z3)dYuEQ^g`Ies$t{I;hXs#F*k7*M~e9uj3O)m{=RdD`WU3paOqoq)G?P$cw(%FS_6 z5hr|92}3`EA(=NowXo3}_*-#O7HROIZs_cAaat`oqZ(C{iWgL)5~!VP-e_I9(OhdZ z*Th*xRbL3P@gu#*U2#zjUsTOYXrjvXtIgJGV{P@*)>SN8y|eOhl=6F=_annc8jnn` z&xIpB5#>}wAa6@9B3%sJDEq8XQAs;8lqcD#_|SGfRv&6?f7`e9{y^5McACfEG~x|# z+gZe?7`9N_*tz%yXvGIprJs03HA>}ChWDJl&l!Ycck#NKco_|XMCrCQaZkH=x>&QEAaRsy-wrEu-dAl~)DhDkJq!u`G;HI<$N)A;U zIR0BT5?X;&X#tWHKjc2GrY0QJ0G;G0_KN@Mr-jtXJx3qKk%xaBItvnt^bnkV9=OzxT zirPp=2^{-#;`cvM!c>tb3aW{HTGgR(HF$(WRsXHlF{P{TJ`sOAn9B&IL65(uZdPBp ij;SfVn9zht3$&zH;!n8}beYr5f>|*Q)5S;KcmD(Pt4}uo literal 0 HcmV?d00001 diff --git a/ocr/__pycache__/__main__.cpython-312.pyc b/ocr/__pycache__/__main__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bee3d6cef9126b69ac089e5baf6f1a68e7aff658 GIT binary patch literal 7807 zcmcIJTWlLwc6Z3(`z=x;^|EN$lJz7dKV(IgCCjq>l116F99v3kme!1IQWVL|3~h^` zyxJl~|?EgE38z@nS(F3=PUG^E6Y^n?KnYzowWs_l<7{%Fq~ za!6XMY#Ow^0B6qJdtUe4bI-l!@DDnj8bSLk^|`mK452^b53$K}z_TR+p#?-CitwZH z?3)-Tgm1~XM0iWbrSO*c$tl^mjL721$K{liBK?Xf<+zeSG~p`|Sr^{E(gG|(l#G(! zmt?=jH4;Q?zXz>?Qi`D+tMX5#laXqCOH+&CTEv3uT8ovjfzKG65St!Gs z#CQp1d=rhEDHA*`R0%vw=`zYZ*?Mop8`pxNBcqTl*6kCNgA+`c9v4 zj}8wGxOl|t3SEN(V-vaM0Az%ux!*GefCc15GX#hGg~n*9qOW(*dy*(YNxET-%ee&@ zK~;cZREkHVh%rS8A692miz3k(2oxpGqDImcQ5>c{49(biU0;~H7Gz+CoHrPVs4ld^ zBw7dQ=|(B9D$GZLL6`?G4~HlZM>k4%8GVyuJiOu>?V)IfWwCa4`!}yR&$007yyjto z!N40~iVCyNBQ!h71w+n($gAFn6D*(qx!qG9Z@}$#gr<4j<*?UJxpNE|D=@@6gvHaF zMQOcp_SlySW4f;8!PxR(d`IV;B4$V^?O!VNnVNd|&?J;qj}+Rp(lD>Tt&fG1%Ie3O zl3CX?phpwRK9GuAV1|IT zP>SP)H!}qNLf9$vVTjX;-#)zv72bm5w*jT8&?iEo>=_AX7W*jjO%#5{}un6r$O!W*|q^T-O7G#V;@SP!Vk((uDdv6do&`s$Wx5S-| zp=&`d$U5H$(_xyuM=;n%f3jisKAB;(gFHri0O&cUZpv=Al;2gSVy-%IN|ZL zJ&ruiE+Ano@c4K1^(hMTN`D(G|c$1ysVbI%*)YJEH8(7!OS2W zVeMEP|LnL4R^|?*gu%mo)QommKz?n-fLYYSIzOJ zgP&=8o-2{gx+srpYT{(g*UuG5Ye{LgB{bVo8fQY|e2NIrPpOORYnKL6%BHxo=__rS zi2q_k0mI+0_!zs>xBqZA`Yq|i&+h8}4(ad8wf*hV-?hsCp2(jhfQp+C3$Yh1fcrK| zT+9xD8O#WrR}Ll*`#HXp4TnNOhNG!sXDxPWUXRTUUY&DirV}{ejUy}zSX*Oa@J642 zU%ufeD#-sw*iZ|I%b%2D^qPUq=EE49nm7*8D1Yivgc$2y>XmTfg{~+f6;VmKe5@`+ z6Np&(NbSqV=YkSGLje|{BAxQFTI`!4D6*i(JBX4Mq{5e+%U==U)R#j%&&O&X-uata zo~!gK#5~XyAD2SgDwakiT%|ZJQa(_KbDfbzWxjlL7V+Rz!X6+rpFIZ@y@XTATx3OM zVLl>f0%7d}Zvy|?BGMelocK<;JO7kQklO;3I4Evv;@sejDENemYZTGow8;4vr7qGI zyFN4m1)N26nys}f%&iDl3%E^LF2zqc1$D)z;Fg*ggc6GQmq_KST!94R(6$}IAFU=bm>seq<2;^K!`*CP9uev3Y3 zBBiK10XI8{CkHkOgU_f^#GO%X+Nm~}OBpr_>`Zb-9aY7MAGUD4B9EvlN{IZUD&HZo zzwoBYu_a)a7QC`zYfO<3#IM4RK3w1=LQ&9SD@j}I^UC|V=sBY)l3x|oY_;bL^0ywR zswZCuZ`)DCX&Yc};rj(nMYuJhjS1Lo(_j%;4V7;|oK@j1?&R0Ow`a67y28Au?NqHd zALnQEQ8h%*9q&nI3`G_@THqu?QA1Q4)j!yow}!Duuezx5JFJ1F)Poe$cStcsbuURN zDUz})TJq3^@cy;!0s4SVVTi?oKVq=A)!qO_N6^Rk5-zlHX1L|QS^`9E3DmT!^v z`_0lCTU`+clw0gKg3KgT7a#|~*F}f@yvNHy`NV!D$Yc`}yKs4TI2;k(ITnvV&EPuC zbVbakyevlta91#p#qDBnA{a5}^0Z#z!Y^b>2P5+O8^HP?gD*FcN>?z;*FF(u7&^fD zr|sSx6JCBg=!eWFViY9XuQ0)>Ec=KZkNnvL$(+L|&X1T{EK-k0u(H|Qh=E{*p8OXy zUJ3aP)K$1?UNOPYP%@!-z1ZdE!XZBmwJGT&JsnXU3vz0!xmV%U?_y=C9Y)FvUVy+v-SY!>kkG)csXfUHAh4%NEWr$_6Gy7hFt6D zbcl|qyi=a5v@;aA3aPdy0L0iB4*147G8B_MS?%l2h47R>{XCLgAbn{L5tVbw!%kxI zh&@k%yS75sWidp>OhbU@^n^m-7`X+**-2#kkO%yg5*ESkVY6c|i>(;I>18fu2J_>5 zIBTq~#+?j4)p?^g0AB9#dn2@a3esLb0~abJq{%`G3wiG}u!|wiJ*XV$2l)+^PrU5_j;`35J9bj;L$E#p}GShCHtgD3p;1(v< zj#a}qRtB4#SI~Y>h=ol-V(Cl=9&q9Ty&%Rdx+ky8dxIO+oL6GO0%VxTdYM=+EXKsb zOA3T}W?5=HsuTF+2lRqF5;7}kho^a;4Bu;m~DgfVHRa{Wj}sx_Gp?^ zr^w0#S(zlOGo&i6?n;se0GM;W+nppEgsz$oDw1T&W3@h`)Xb}It5Zr_LTO7XE55Qj z*X`|bs9Tm*zw2MT6mRbNb!655NqzFn#rWw7`r?6)@yU>w6SDPwm}+I z%^7RWLiBEQ>BL7f|L}vP_1N6;#|CT4up?pE@j)cDt2?o)d(F_DF_y;5_Ixy+G#*OZ zD(=c2o5~j||F&|;mfF5Av3=jKD}Gb`tLnAwN7FX@`?|Mv?-*h-Ac z=lAN*jDyeR$W#_D>s)O}b-5BbvS>W$TI}QMo^9?0lrPJhqfW zu|qXR%pFPBH!Sac*mbf2dh_8#{o#*~{PXdDI{wMn`jPYTx(jnh;->9u+S&~% zCfJZ8l^z@QCpS~pri8UA-h5!)+C8UEDh~qJXQ)TjJ9h-iFR|uD?){r@-&`i&xdmmK z4U*JtOPANYU-5Ru(vkJ@#+ZCVjY_NDJ+;`gY+Kp6!u_IYr7B+1GuQvvP?|7o&op$b zT#xsTt~Z>E$rm(tHH*g+#<~qsXR6Ft%HtJH%SRs$-WyC>I=`xG{qWMyF0J&hUir=B zuO^dc|6={@X`f~$Ft1pYWa#RObZ>4(261`)eTgEr030Ru{7$T6y z@b&XHWVEFWwFyIQ%CIM4*n_>HSty#}(wDYrP22PWFL?8d=k>@^{S--b+pw?tmj;r? zrgVArPvtK*WYGEIc>}W9v#yY`9!OXZ0Isk^i;m^Gq^0dkO9jR>RlWeBn=uK~Zdjv5 z=kmUkrG3rP{vu#&w!w7w{$27cO@-J@o~DRG+Pqt;@7$t+I8i3}9X<{tMGcxEig@5RSu=Aj=#9fH{W0NL(!!4kF=%;??jU zjyo6ig~Lxl2d;aWU?6*T;c6Q%h5D{=v}KRLlb9JUiZQs(l|9~YHC8AR3c(cXfeYYn zw@`W%N`={Cxw0D$)UcoKVJ3kBS8&*0!UL>M5MLm}7fAI5Qh$LoU!uxCBE!F-?GOd@ z#`*Ev!YW37Jv`!+RM9EX-Bti6Uye>tgb}q6eEq_ER|LW)ld{zh(`){Ds!GZt) literal 0 HcmV?d00001 diff --git a/ocr/__pycache__/__main__.cpython-313.pyc b/ocr/__pycache__/__main__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54b652bcafe33fa3f9b59d263075d89a895e9056 GIT binary patch literal 7883 zcmb_BTWlNGm3PSD`z=xuMOo5Fwq)x?$`8pl^|CC}FIkk$5o0@Y%%!EV&6*xEGo)h? z-K-FxfQ&34If+*a8f=6nK;>+KsAzz?K!B#rE;b8nH>AldnTgsONcO{jO6-qryuj|c zGaQmulmsdE0-U*X?|I#G&OP^@!$&5Q9zpvg|DTbX8iYQ_A97O{JCEKb5W0hCL=$6( zlimbJh~ElMA-GC^JUGB?0T1BgS z719@HxL|DgA-Eb^E93Tp+_-V3AHZ8j>t%e-OdB}M55P6jrV_682jH12@NBe$wz$xb z2(F5@x)5ilZSZu^Rq(97P($0j=MV~O5xP*_f_kL|A_`q0t`P0j5AnCZoe=cLCnrzS ziEtvxKg3#qA>G7iz{g&Ru#BKT6Z8drA^*s@Pf*6DIl*{#aMKNK1_C_QX=G-qt*ekqynuiWQ?Pg|@-rk;`-49h5$s!?*oIVkn zoEY=@1tgftLu0=4(?yd2Fha`W@6jT3?w}sT5>t3wY`co@un~ddmB-C!uOdj$ik&b& zRdf=BO3=E>QGiooxL|qS246$~3%jWWk?bxMKxLzEk1@0DNo~ZYH>}pt;I~XBdv>vBVT59P|Wv z`1D*2bFmovd~#+c$$O44{Ix_Z?(wCbiKIMWm(nj3x*m?Op^!U1E0``NBhi^qkswzK z1o0l?55Z#&70lMTV}I6IUz#nncKzb~>R5XFfqBiMC8MSOtTDgz!g^C1d>S&^ZGX}j z3tG#9`IdPxnbS5rFjUR?9{~+CjURHuK>*w7(f@$uAP78V30kpN$)d&zI|N7>vHWfU z{ZDD+Z`oyO$rf^!;VWP@01Lr(bAte?B~cjz z^tw%kq4m2JJ5Y0_rk(O!`5kbT`*Kf%!VL(qWCed+kSud)f?&;(BjjdY(&%O4G8$Ai zq02TGB@Mg zkkE34NrXfRLKua24@6_r;V3`kF2Ug4&*Nj~A?%H`bI&`zmFJiH{;}>I-+%MiRbR5% ziIi>RMmQ1;Uy3r6v>tD&`3znd#W66zC#cIX&&*ISL=smiUW}^VltTvL<1vx*?l4lZvD$YgNgg_=BtO$DM<}?$>5s4S90ZCM!JjCIh+pZFnNiK@{ zNm2@`NP@Y}3u;&wtPH{vmX62Ppm39PY}uq(4sV%e`0<~>1A>gv{@SUA(d*1W@dhnNn+4@5T$F`;M z+vBfJ%=^}>of+qzyS}V*Pquo`{LyuzEnU^RawBJS7wmQO<6q$K-nE#m~ z5n$Kqu6MidcHblK+0z|^pBRT8YmupTS)DdCrOBqhJk}tiBX4NT7#i~iPsZSRgb0vN zYf772R(yGFM_SwQg|SA4f3l&0>92Ww=>7dj|0qJg*LvXjA!;1$SAN)N9PLwn*r$U2 zbm_DKP%yI;7qJ)J0q1R3__-a>=P)5~T{WmY8BGY)d@>%7aS3Lo(p8^wX){(g_;k^k zxr0CfZynJrplySV!<&5?ex-)vDRz)Ye}WBl2Tfsl#D-0U8w=YlfFU+Dd7eU3rBje5 zScUcCRzh$mnx_V zl;X1t0k;oqf3XFqVyEIIo=kRtD(hZbUQbzzShk6mfkR6LalZ5w*fQ?P4raY8Mclsk+$|lKK{6nl zN+t^LsXqB5a6s;r-z|u?%M@wHL4pz`Jw_Z~!LnA^qWW9psgbc|*(tdFL7eF?^N@Q% ztqjZRHtk?1tfdA!7wlM))dzLQh)b;?`>>3A?zT)ns0$pDN6T-z4r>DTZrMX!TSLlp zsLFi~Ub4BhpmTS(;53tU0KMc)ESBg0)n{#G~J& z2QkqSGzQJ|4$y?PLZa57^?NjdIkW)>+xK#?1x-Z`RTUg|2CKfC1MI(u=o#gxzNNK; zdb*D8=poA5khMf(``Rmbb_VU=%kwR1Paac_&*6(w#p)mmY;FPA!J8Z0(jk#!)fIfr z!BP=~LgFo5J)%CQ@Xx``9D3!OIq(~h=>Z*xHc!#1t)v6Ex)LqI|A#GTl{NR*%T87D zl=)#V{eR*IC-qZn+_e?IV~X(@)DDW;dP+x|@!P)zf1!*o=Rtp^bvJYD`WB8KeUoE* zw?E~E;tlmejJpQa2*?reb<$0p4M!4CjG->aI4NrtGI8 ze1c)|P>hvey&O&!Q}$veH!LOuVjeY^Qn$STqz5^C@k!PDV-j6zI>~Vin~2U*ks=X6 zeL5C}%qV3QIa8Op*ma3~io!GBnLyc#5S9582k{W4%yvcc4V zG%BT!U0}IbJdW2TTT;?2H4|faiUoP2u~;0NlL4!_Q$~@qEVXeo#)1tK-IKF%CZ&s9 z4_{$CarO!%<6#z%u`=B7jWcK{K7}ReTZ3`DBoMy@M2VxDH6H7vVd^wuruZsC9hm#Y?lq1&} zt`(!S;Ri04arg!;rPq4@L}+xvKk4&Na`;Zc;k%`1d_fsySgsp?;&{z@;2~6vpYVqQ zzRAG(&k^_1I^xHEcPtZl9$Q##Y#xbysC^?A|_$u zS(8YoG+`Fqp~RKPS92&be4|NK2NKsfxfy`La`O$4Qp}-sQ_ag$b4LrLK2O$X$od@F zuukgIdT);Ght9m`je#7wOB`x?+m$1`9_Y>MTEl|=mOig_X0*U7QC_nysJ4;7rQ+o}h) zx~2N1`W0usxi{0?`@ZY94Zmr~HXkiGsW(lpn|^9pQ~}E3)wh)^zPI%0uAzU{e_|bb ztVXt)bj^W#yYgOt#_P{nCq%-XtLNT*?(TE<_T3vzcOUx1I`UYLjF$Q2%id+%%fst6 zuBFH;kyo#+oXpg?=Z_UEHMzRZtfh0^>R8ghqR-WLuW2&%`*YR5ad zzIPAYJ+OB210vgbIMa6cgCqZX{CCGcI-fmqHr;w|{z%%^oHe$5sYKPT4K>o4u{K}3 znXm1*U)z!H?9bK?%1Kl@O^t=RrZ-)$yH<{5 z>)IF98+uf|?Tu5*L#xiU9czhScC2knR}IaNKCo2Zw=}Qs>RY>!-4$F^FBxtdmXBww ztsA7tR=@73OS?K&kG?y0cP!^P@I`C)&oBP$;@aqMul?p)j{ci0eJRsAz33Og`tM!L z51!2op3OPVJ*aKX*Y3{L?p~v_wFlOn4e7>xYiz#nM5gaV&UsQCxR7%`_o=gSLxY?L zH>zrFeFgWvy!%kbeJJZboN;t6j<2`vUAy?fRC?c;Y+GP);=#_26=roPy~7Jr?YrOI z_Rh9~v$fFBQJ_3uYRHb7w4)77Q|(H-M$)6f{Af5c8qPT`J!olNsa}bsn|c$_(-qMn>wB#*&GnTzj2LqSJ zB{f_JGj`lJc0Aco!q}6?ZOGB^2q{dB*fFCkzMQqAP}lGe>L(j27<}@07jjZy{!DF0 zzP3M8+Yhj!d6wO)tvN@}9~~|XX{&z%OgBRkwmo2>WzTAF-myFD*!^U^mI6_1eWKmE zSCe^1Z`RR^MX~igdF14fb!#3dKde99qDt$` zryEr11{LnNHlA^*GD_o_YE`CMh5HSSfxW6szcFA|-8Y+{f4|-saI5aSRk+{RNO!5S z4kNu&mEEa=zMz%=uW2Xz?X{dAF^uF#BjNc!xb>;V8cm_SUwcPD3${k#zP^oC@B^ZrBJIK zfTJ`SWrnyYVBmTV|7&=F2?*jdWcdu~K12G?kl~N0{&QscFVqZSz-(Q3_SUlt7jIo$ zKDFAKHTBHtKebdX?pgBQ_AU+G9$Im)wPfwRSxeuXv0$++q;91aUcB|cJJ!1>G|YvIQ-IwGqx421~5t4-rZPQMQd4-6(w3qp=2G}k|)inCfVdn z_o%u@0dmqMhiq~YcCq=r>KP762}@zB zt6sgTdiCmkD=3!>3jY50KmXhF-d2?VqQUV$1A~w83;zcVS2*h^cJ^g9lVjCZXz*?Q(U%PIhf)a*AG>@IvN?=#Aur5xb`a&^`OR!hwPTJ&+M5IZi$xxuOBMBvZLCw?%W7I#jA%Z@aFk6uf0(01wO;;_%8BU zK8NoaKF=5MJ$pyti~P(Bg`e41?QxRuhtYY?9xBcAFL7^WwW+1X-LA9grg{0f zC7j*VlHL2gFwNh(@#seD!Hq|M606HAY3cD@`^H-9;qA4@x7Xr^>x6^nUhIhFUC-YP zm)jvaVSCx-{i{E^eEI!0qccW-D!w{?H}MP4q1jXtaINEoQ9J1Nu2+Z_BJ7+ZO8BJt`#0BG_in7-d6=rM|BAhr<_L1< z!Dp$l;dDYb)g#e&)5_28uC{L8U;Xs%t=p?NZ^!!O-~)lS4?;%igd4n>$d~$wy^yS#-WMEVpV1_aTeVZ zl+&ZpyrjH_4pC0jCdz?&s2pk;t)#|M-@vSwBwS&I@nh;OF zw*3#r$Jbb$iOU$Z@jLNH%=;KWDa5Wqeg;0qlmg7)Wpr-9cI(nU=cFxKbHjcoT5Z#{ z#((mP7|3ThgAQ4fmXbg}TP)Ffc`~dmDSVQH-)pJv`5QrMc+$|%a4+n2oI%U+{UCB8 zSedY?r+VmiHU!DE_#rhvqUOltW)S*5!uutKZ@-!HliOEf@&PBx}$8(EHV$>m~ zLrab=jCDEAAI;}@eo>hp;}wqZa=akn8GbP_4@$hqONZ*PB>QDvLBE`o_!MsVHLngU zQJGICd0tyk_SNB(U*t1@RT7;bNs+U82~W!6?~}66lG3o6XxmeXmE@B`Qry&&3go0N zJ%+Yj1$=5lO{##-671QPNIjUA9GH%3$uw?7e4R=3#Ncxq8v1oU55CS|b_TQcq?Xh- z;A)!-t1rpIz9i{<`8)hInXNl}UTAA;cRoq2b;v(=#30o>?nabq@Uf|}>+xtSHMd-E zb1O=Xj)sby;L)JxHfN<=rn=9)ZmRkqB8;5&jv(UG{Dug+ju6fu&9wvnxho<;7qWF3 z(HP;D1D8ZH4HxR;7Vw;@)^+yKc)pz%ZaZ@P&5oO0!jM}@-)I}LD}vBA`dIf7sQ9tm zL~O37TF>i1T`6Q@TPfQm%j=7uU|w9K=8p-}5X2(eTo0ls=!%bN;szSqXa}7@*e3bE z*1#@})sn_`ex#f(Ps>MLa^I=mb;6z0qL#*|RkY~zdYwUwro#_#Oi1LG*aADJR@DNl zsRb2|U#$YNz)EU~S*ngeKv!!REvR#h=I8LU)OdEH)yHuk>P~7 zfVtGjU`Vrx02^N;3$SgIBe=#mM~aEB#mf-(iZrXYWiWWcLXlL`G1971>cGj0qJ5~0 z{g5eKhf`c5Mc~+to*P2!jd#cAvi+a)LfIai87#~O3hzTGvT_7EvjU_y)5>^i@6;K zNRs;nI4%8m@fp1Mi0IR(ZQ{=OkHViLd;W6_D0t_v$3q#$^UM+>Fni9#dAiO4t^?}? zdn`vrq90-=(H|<2nP^dNUlq+nJFt*3G3p(IUy*QFd2xlCQ6APo-D_NW#hT5c_!;e>24SibMyhWIo-c?Osp*9Wv%le&8i%vNGe&zO7 z5QJ_k4Emz&x>ym;p((He)V0D&OlNhda~*0ur^fS>$2_s_#7mM0(hZ~--a`le6E6v` zl}ix3A)p)k-jg0!jo5)o{^kG*NPbs(#Dq$_|M zXJF`vJ8^x&iwSC*4j%uJ&L4^NKVv{C4`N$JvF%WYm6?nTM7|K@L`T%C4$TNjU4pbc zLI5ElxzRjC67!av*W`Q#$tStQ++pF5;oPkqCO#tV#vW#(p2cbCL8wPPU?M;m!-0>H zoa4rKP@ksxdUQ?KdWhez>u6+}ZOYJGSyZz0y2+#$mR$Tk%SUXZ2Vp)P=gO@3E82>l ztggf>6F#=q*Mq%Q6tuOexv_>^14(PQ9x<{%eBoqS?ex z{y`>={Hu%@#7_ZlYGM^1aUTt`b*OXHEQ~aaNEMH0miS_;d!elk;M3vzTYLC8d-8+P zAR7%%QIbx-cHO_kP9eqL6-2-l_|FQf!L#bDpcM$h%-_Uke-FD(s3Gk`5-8R1F&Kqp z8&wgM2V_nuLtK?JY4FrvL6(>jX$NFdDdg3*4XCL$33(&)z(ldZ#A>OE2zfJz1utVA zK>n~UKY|q^l|ycERvuxpIV?i{iZp(9ZBrSR@GHxitzj`KKGBAi;nc7i6_P3F#WdC$ z$rLve6teWeQ)# zKO{AT$5mvJMeJSrPDkjf@-l)~6=#$VrjnUMqVaB2mH7X1gkKrq*MUEk)Fply`1KLK z%CW0lnek*cLST+_reXB{jn{^A2+imHGk}&PR2Tow>wwMzIt6HX1pN;_2k1PYI$uC< zaX60?$Lr4SBP_@GTr$VcC-eI{Mw1 zJ)UjdsU%e3xSY zgT3K#RF~YV@uf9)8)3A|8|!Y{>4$DZS_V1?>eukXhUcSNAYFW!wW zzK)a~D+!_P+-@%#h)&xNzDL&KqP`mG2`XljXR6>eadR=Vpd9)$&WoV zi6&aSFJv{vuA$uG3Q~#*tPZ|MqY&i?`un}3*aIQb4~;8d2DmCtyC&%*e@m$3B;&Uy zE=TnJH~HkInVN`7qkfp?x{wT1|LqErSXqh9%3XFXD-ZIl+9*q4_Uu>_z!Q>y5vny_ z3MC|8ofapW6{c49mPmAq;B9NfazUwSb4Ij@Y(a~Ro}ln!9(~*Nx|C{(Kcfjf^18_8 z36WAB+oCBFBUvaGU(&=3HB_$@12n0LY}NI-EW(Ku0#l~hJd;I|fkeqd=&aO5F@cN@ z?CP=Sl!3dQAHA8&?WxJLW^+Zl`N&4v)5jlJ&Wo?GuRSZpRCHZ_yx?ZnkC=IkTOuY( z-lis@wWX1vl`k{0a1->@8t-9G9X}jbw+oWl@{u#fj~{w&vOa8W1b7iwpjwg@{!T^J z8U*NaW@T3Cu)HiqqfG#Do{JCv$wt?`dgB|8Zo2C6;FC*0hx`%?O%n zz9p#ADJW8vHGlEf)FXpp7o-9Ag5~EQ+UoNUGf6jKgnI*-?bzCbyZ5rpNTWp*C?mi3 zpL-(kW#%a}%`AyDP{H*gneC(a7VP2yWztXLpJ~Hc>b*ma)Qij1BQ210zUgwQbuBC6 kTxUQFFQSn*l+R3$U1t@o!ro$Y2y=076E`nZjN1GE4=c=2kpKVy literal 0 HcmV?d00001 diff --git a/ocr/__pycache__/ocr_engine.cpython-313.pyc b/ocr/__pycache__/ocr_engine.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d3ee3582dc4d56286c93eab0d8c369f7b71a659 GIT binary patch literal 12038 zcmb67_i(Hh};%;ZhstnA2c_F&jTH4e{30h^sqi?iqNsN%mSX8+ z6q24aqRG1osmN1})a0o_8uHX0)ge72!5WYe>Qu)}N6pB5)PgKWt;oty=jlL$JXHA^ z*itUbC05O9dVvOWGh` z5>#MoC8SjDEmgIbo>7>~iCT&Zekt@R)roBsWu_Rw&QqxBYtV0E%?ka1V`Gx(VJ&?} zfTK0%JxyO{HLR>n#xC;#d0BfMg=$#`4Rtk2 z-R4PJP)ReLpnRSOnB)j67G!FKk=?|UiKXW9l@^^`r zv%}~510i-OaCRt=X1K60cQKX@BmbosKPmVl0u+RZpNppYdUx#DDH=}>1VYFAM-H>1 zisMCkyQs%Chff7X?L;^(aH1xOQk-ZzF+36)I63nC@ZiwMz))JV!{5ClQW5~dTD(!z z@PB*}vUjO5Qi-!bNobV{C^S(r`+0gyBew+P%_dh6$WaGNv#Pynh6>UFV?|S`C}V1& zeCm<3#y`VP`e@O}%_ccsh$VQRR#e3jqW-zLBqt1?6xA{O7Ms3=Vo6RkOmfMK;dqMk zsYFdQoD7S)@zlfwhXhPE!-)I3f&#pCO@&b+!M~b{MpHu9ASYZ%CT6;Z(r07ou0#Y? z{zDu;8RI$s%$#WA_(&qkg@7gmvQD|-FW?<;g<7@R7tXzT?yd7zo?q4IzkBRE#};+V zx{ownMYD6^;QYaYxgn=%cnq-A)A>9CEnm%N%sbd(T8w9XlKIw)R}GTHKg-z5{hjB+ zo#`E&dtVZPrJ@RPqFP9ei`vx842OWXnj|n+z>CiyFXW<|PoU{=JeKA{z@+IAZbyyK zg6AyMLk9CTTNie}xqGo|se38;LHbtu$KNcNcjYv@P#v_dn<5qnB1*fV@G?xY3$*TO z8bn1AxFEF)CRRtiVhGZ1MVREBNT%Y`q^n;ZgUP&f@O<=g_x4?{NiQvF$IIuVq)h62 zUTaC$N`RMl0@MazBLI`$E$e8+4DEsc;{ap_=w^xmI)kvlM?jwgrBExSr;C4(KBJQN z{YC0sEkm8yEa|yPYL9w?zD{R5Vh0cWMm32gnx&qp9v?E9O4lJO#)^T!(5RT z!MJ~tF#_2u-;jmjDTAZ!0FP%Q z;OtLVs7ETTLwDuKXBNt0zh^u6i>ZPQB>rGdbMQ;V;T-e@iC2VxrRxaG6-h3&@N^J_ zqwnVw^(-wPH(?vFiW)1)IzDz}VPVyM9Yp#yBkNWegOU|K1ci$At%4KI8BmCT#!`~W zD6MBr<$5B_Yha%~CPZVCu_Umt4QfPHWU8cS$RZ+4uGkV!BrXUcY28Bc*ahw>i-#cv z7=t2fSKa;7GfU{&D_38+7GI42^o+D--)Y!PKfsheus{YH4)*c1sE%{Iyo6>#O(>E` z@ySX;MCTJUpgn*CAM66K3*=?hRh_TyzI~=p-Cb~XUpZDZ*JK;>=K2+L*H6?-gFiMd z2X5JOnl5QUp^K8_1K}}d%DdEg3foa&L(yPA=~y-FY8osk4V;`>R*Q6#DxaPlQ~^#% z#gh-PER964Ov(u(Bno3c6yBvq9dWsZ8fD_jQU%pzdBEle1BxX`VZ%;JlN4E|+iFo0 z<0lfLHb%}}cQhu<#KUu;FwZBF;Ut_Xf=?rA1TH>-o`tUH0L~6VMs^AVN!pTzP$Tu@-!Ip7VtA||uTaG*@7u;=b=}*hV)O2m=KXh@GMaxiFi&m;J%E#m{seNY zZ?N{+n4YF_Sh{bNqQb!NuR3m@j>Y=!Ps@$^aW)8W9zrMj^-Eaj?h)h;zw zm7oy~GMxcpZOd=cvk4A&c|!)Q>)Y@ftCd@M)fjALDFqyIsr&>?Dg{;zCWEWoCYOLd zT0^B^;b(?m9&HAYOi<72aa~Z0IiieZSO;Ms5Sn!m#&r-9 z4n1qckXRTt;4`g*mhkCWlLEaSw>hW_U}4&jSTk#p&l%nlv;^>Bydkkxc@B!Q1fH=r z-a3Z0HA$>JXk;CouyP7E+sP`{2{4MU64Xc#K{HF^`II4p>Of-&QN?3)e)MNunxK_c zu`XiXsRPXw&86b#_jjpA|Alp9h!g)T>C_4t#wuh(S|OhHK5MT_JzLr zK5WHj!v*t}Rae!u_IKK|gEx;}KbqaY+?cQ5cBd)t>YX20bJbkyxZ3gF=^xQx7xyo9 zXGim{w#>kNn|mSgW+EFY*qT;s9eG>Fa&N)5GoxO$dlz1R^L1#j!y&O|cP-4#&t+ZN z#>{K;nhd?>_N=(K-E(it3=}`^WNtQRRfu!)oO3HeW~|)7c`FM zw;#yY9r&;@Upr8!9?U$yR^_>N`0C;8;ia>Ms_mJfH9HuOTk`fTOV$;;KX3OJy_*)b ztF;Z;(@W~5@mq#Mt?#OC&CAG@uIp!)&Mt>PfVJZz<^wtv=6ReP=-dFM!${lSThan;+jw71~x%vkT+ zoy3%NW?#)3fA0Eg?`?11-ks5`x;JH`1$T4Cux4+_PJxY^GqjPj)~6;iEuH7&WXI|S ztPeQI`S3Ic2e~N}nodMhaa^{9La(O6@luOD6q<-3fowcJ5elJx=!=q&iPfPH?2TkB z5(4WT#l}tAoh;y#W+wl=T{I5OMz|U51i=nDgljYqN1H}haZ#N@aWEcI@puTM z7V!Qh2dZ?8S|d}5gusP_L<#|BPkDE&YZ!-K!rwv$Tf^>Nn4O=^nhW-pjHYOFElkZ% zEvDgA%&6DQb|{$}SG{#N_1E<`E!QnSdGU^>;O)%}uY&HsG=FK~_4(Jcu@!rJ-ripH zFgXWv-(0;IxzV#?-jX+OSsM6Yk zw=GBV?wvWy&igh`uCDvet~+gaO&|7r*!tnCIqz`6b~I-=`s9XHG}(Qa7um4z+?YyU z8Wu~s1^$X3wb7eWefh?!w1A(sSy{Eb6?k=d<9>l(RtbyGC6WS)4<^^(pl(os1GDN0QBm?~M~I7suziokk0g*x#SI}& z7CHg-U_O2W9w-bMT%Uj@PJ*^Qgx@sd-b!>`UF^D>Mv~rZq0SufL9X%m8m3bCtxk}_#Z){ zh|#F*6$M7QGGK5J4MT58Y>bf!9h^%&WmN!Gv!} zpyt(bIBKk{VwF-EyyD(W2Xte%e?uy(VA%NzhJaEkgIC}(04_(lMJ@%fPrM-o_3}RC zjsI`_rV0)!Rv2*J2w^rjNO*I(J!qEkL%0WM!g{fb;w|vE68Qhd@IiA5)@AqPhIaxUS2-vLGb%|oYYsYX zRF8_a_|?3-f@W*bE$e8QwdH9a763#E3sCwCprBEn)l(?&e<1}nGImwP*!G}u-3?<~ zpE`C`#n``J$8BY9K-K@0!hVVre}z0(k2NcEP}YHUk`OrTW!A;lux{RSa(e}wMWzJc zeh90H;i>?x8sNMYa7wVW0pr^NF zM?hP79_79geITbn3-3KyRROb+HVk7082C7Y_p+OVUUm!1u*~Qi_yinXCg)&v(1f3C zqhA-S33@89DV(dQQ>dt@Q%1%6^Q?KDjq~?x6IPyO8wWn)Ftg2m9oyp9@U>&D$~x5s z)j(hCUhv0C=dc+*f@m8_*PN29JNSU13TMqfS@n~pKip*A)33c!!u71i2E?n z1Sc?XvBi*k+jYZ%4@WMW@#&g&@QF^`p)<-Q3ER?b1Bp~TO8P=Q3P+MK-kdaDvPQ%W z^{1;UrcmzP9)M6bBZac1J|!eTzmS623j%f|=ltn5Wu_9ef};~iBDe|Z=`({R%1T%} z@ztq>8gY~O+HN}ou{Q>8J4f30eo0^_@LfRKQ^Bo@^%K$-EYxs;3xWB$K9TN(_;`W` z{~Ur-=nYsQpH4K9uu}*;qJ&(=(Mi!PeUd_gn4-Jv8-=1NL@qQ%7u zS@NxvT4D&pP&ypZObMcX8WswFo@9eyJ^AuOx=j{c(suw+=_3Sk!z@>-^JU(To!aeG)Sz4sJR`|ce`P~aa$nRa#f#Rm!ve1h&`tO=Xc}IGQrrqP=S2^8Dp& zceX$CEqo(#zkbWjz3=UPe_zhvg?sjEC$FAd8Y)zEWQ_OgHsAEU=X-x!#ts4KYZtCw zSgGmA*K{oJE!6a8%&QH|&EfZk-#?Z!)ZT|tZOlDG<8P}OxC^m37hAu7{GpnvY5Y`2 zH8$rAn~GZ+Aq?<4++1w!$Xl8+gIUvRy?;4bsNa=2ejmDQ%O;mzx^w))&fMv-dzSNS zp86F}XWrAf9KG{u!Lv7Wq-giRoyn@BI#+Y(o}>R0hj-0hdt;|3w#?;%xjy@9!ORqEw`GnN>z?+jv=JuSrXE+Tqk>RtiEH^J-zGv76E~dqvv$qt?t;=-5>?^kR!E%rR%ZOX|Lfu;H z)|{mYxaQeZtlzn|xw*J`YjI0^aqI5<*5P8?o_yPIv3*aj?^wS5*h8zn*7m83s&6gU zx2-m{E?p=zb+5MhavcW?ZHHEU+jE`$1>bX@^c*NO9Q@_J<<_O0H%5Q4FW+zwdN$Y| zdfX0EM)O$*<)|$>tBWpAk=YHb#g458JGMq@(;-^G4$5!(Y{%-V&=n|-K=|MyJkceb z&EjkhXZ<)+6rZHoh!*=8H>7cP8E1Gak`0Gm!zG-ZgFm@;cmuw{qP6vpbSiB>{m5X~ z9-|*M>9w04xtv3tWFE zE`e^rVGyF5B~@LEU$LhmDeXPDv=?WDTw0`O$<53TN#7XuOP6*B(0>374S(U!kbSDA zY5KR6QT~~KN7??3^8S``{X5l>r#e71;496e^GC^b#wmdeb8dP4_-B=;=fBqbe1>= 1 and len(text) <= 12 + + +class OCRResult: + def __init__(self, info, image, display_annotations=None): + self.info = info + self.image = image + self.display_annotations = display_annotations or [] + + def apply_masks(self): + if self.image.ndim == 2: + rgb = np.stack([self.image] * 3, axis=-1) + else: + rgb = self.image[:, :, :3] + + pil = Image.fromarray(rgb).convert("RGB") + draw = ImageDraw.Draw(pil) + + h, w = rgb.shape[:2] + + for ann in self.display_annotations: + boundary = ann["boundary"] + + left = int(boundary["left"] * w) + top = int(boundary["top"] * h) + right = int((boundary["left"] + boundary["width"]) * w) + bottom = int((boundary["top"] + boundary["height"]) * h) + + label = ann.get("label", "text") + sublabel = ann.get("sublabel", "") + + if label == "room-number": + color = "red" + text_y = max(0, top - 15) + else: + color = "yellow" + text_y = min(h - 15, bottom + 2) + + display_text = sublabel if sublabel else label + + draw.rectangle([left, top, right, bottom], outline=color, width=3) + draw.text((left, text_y), display_text, fill=color) + + annotated = np.array(pil) + annotated_png = encode_png(annotated) + + mask = np.zeros((h, w, 4), dtype=np.uint8) + mask_png = encode_png(mask) + + return annotated_png, mask_png + + +class OCREngine: + def __init__(self): + self.reader = None + + def initialize_model(self): + if self.reader is None: + self.reader = easyocr.Reader(OCR_LANGS, gpu=OCR_GPU) + + def choose_source(self, item): + path = item.get("imagePath") + url = item.get("imageUrl") + + if path not in [None, ""]: + full_path = os.path.join(DATA_PATH, path) + if os.path.isfile(full_path): + return full_path + + if isinstance(url, str) and url.startswith("http"): + return url + + if isinstance(url, str) and url.startswith("/"): + return "http://" + VIZAR_SERVER + url + + raise Exception(f"Cannot load image path ({path}) or URL ({url})") + + def preprocess(self, image): + if image.ndim == 2: + return image + + rgb = image[:, :, :3] + pil = Image.fromarray(rgb).convert("L") + pil = ImageOps.autocontrast(pil) + return np.array(pil) + + def _bbox_to_boundary(self, bbox, w, h): + xs = [point[0] for point in bbox] + ys = [point[1] for point in bbox] + + min_x = max(0.0, min(xs)) + max_x = min(float(w), max(xs)) + min_y = max(0.0, min(ys)) + max_y = min(float(h), max(ys)) + + if max_x <= min_x or max_y <= min_y: + return None + + return { + "left": float(min_x / w), + "top": float(min_y / h), + "width": float((max_x - min_x) / w), + "height": float((max_y - min_y) / h), + } + + def run(self, item): + self.initialize_model() + + source = self.choose_source(item) + print(f"Processing image from {source}...") + + image = iio.imread(source) + h, w = image.shape[:2] + + preprocess_start = time.time() + processed = self.preprocess(image) + + inference_start = time.time() + raw_results = self.reader.readtext(processed) + postprocess_start = time.time() + + print("Raw OCR results:") + for entry in raw_results: + print(entry) + + confirmed_annotations = [] + fallback_annotations = [] + + for entry in raw_results: + bbox, text, confidence = entry + cleaned_text = normalize_room_text(text) + + print("OCR text:", text, "-> cleaned:", cleaned_text, "confidence:", confidence) + + boundary = self._bbox_to_boundary(bbox, w, h) + if boundary is None: + print("Rejected because bounding box is invalid") + continue + + if confidence < MIN_CONFIDENCE: + print("Rejected because confidence too low") + continue + + if not cleaned_text: + print("Rejected because cleaned text is empty") + continue + + if matches_room_number(cleaned_text): + annotation = { + "boundary": boundary, + "confidence": float(confidence), + "label": "room-number", + "sublabel": cleaned_text, + } + print("Accepted room-number annotation:", annotation) + confirmed_annotations.append(annotation) + continue + + if looks_number_like(cleaned_text): + fallback = { + "boundary": boundary, + "confidence": float(confidence), + "label": "possible-room-text", + "sublabel": cleaned_text, + } + print("Stored fallback annotation:", fallback) + fallback_annotations.append(fallback) + else: + print("Rejected because text does not match room-number pattern") + + postprocess_end = time.time() + + if len(raw_results) == 0: + ocr_status = "no-text-detected" + ocr_message = "Could not extract any text from image." + elif len(confirmed_annotations) > 0: + ocr_status = "room-number-detected" + ocr_message = "Room number text extracted successfully." + elif len(fallback_annotations) > 0: + ocr_status = "possible-number-detected-no-room-match" + ocr_message = "Detected number-like text regions, but could not confidently identify a room number." + else: + ocr_status = "text-detected-no-room-match" + ocr_message = "Text was detected, but no room number could be confidently identified." + + print("OCR found", len(confirmed_annotations), "room-number annotations") + print("OCR summary:", ocr_status, "-", ocr_message) + + if len(confirmed_annotations) > 0: + annotations_for_server = confirmed_annotations + elif len(fallback_annotations) > 0: + annotations_for_server = fallback_annotations + else: + annotations_for_server = [] + + info = { + "status": "done", + "annotations": annotations_for_server, + "detector": { + "model_repo": "ocr", + "model_name": "easyocr", + "engine_name": "easyocr", + "preprocess_duration": inference_start - preprocess_start, + "inference_duration": postprocess_start - inference_start, + "postprocess_duration": postprocess_end - postprocess_start, + }, + "ocr_summary": { + "status": ocr_status, + "message": ocr_message, + "raw_text_count": len(raw_results), + "matched_room_count": len(confirmed_annotations), + "fallback_region_count": len(fallback_annotations), + "raw_text": [entry[1] for entry in raw_results], + }, + } + + return OCRResult( + info=info, + image=image, + display_annotations=annotations_for_server, + ) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index abe4d6b..cf41bea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,8 @@ -certifi==2023.5.7 -charset-normalizer -coloredlogs==15.0.1 -flatbuffers==23.5.26 -humanfriendly==10.0 -idna==3.4 +easyocr imageio -mpmath==1.3.0 -networkx>=2.5.1 -numpy>=1.19.5 -packaging>=21.3 -Pillow>=8.4.0 -protobuf>=4.21.0 -PyWavelets>=1.1.1 -requests>=2.27.1 -scikit-image>=0.17.2 -scipy>=1.5.4 -sympy>=1.9 -tifffile>=2020.9.3 -urllib3>=1.26.19 +numpy +Pillow +requests +opencv-python-headless +torch +torchvision \ No newline at end of file diff --git a/tests/test_detector.py b/tests/test_detector.py index 27f79cc..8d54fdf 100644 --- a/tests/test_detector.py +++ b/tests/test_detector.py @@ -1,6 +1,6 @@ from unittest.mock import MagicMock -from detect.detector import Detector +from ocr.detector import Detector def test_detector_choose_source():