diff --git a/PDF_Compliance_Tool_BATCH_Hocr_+_Redaction.ipynb b/PDF_Compliance_Tool_BATCH_Hocr_+_Redaction.ipynb
new file mode 100644
index 0000000..ad81509
--- /dev/null
+++ b/PDF_Compliance_Tool_BATCH_Hocr_+_Redaction.ipynb
@@ -0,0 +1,318 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/Legalworkflow/Extract_-_Redact-PDF/blob/master/PDF_Compliance_Tool_BATCH_Hocr_%2B_Redaction.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OY-Tn6A_z-rR"
+      },
+      "source": [
+        "# 🔒 PDF HOCR + Redact Preserve layout with HOCR and Tesseract5\n",
+        "This notebook can function in a few ways, depending on how you set the run_ocr and redact_pdf variables: It prompts you to mount Google Drive. You can change input/output paths or just create a \"Input\" & \"Output\" folder in MyDrive to re use.\n",
+        "\n",
+        "Once Mounted it installs all pkgs and dependencies per cell. So if you need OCR but not Redaction, set the **redact_pdf** Variable to \"False\" and you don't install the  unnecessary redaction pkgs. Same with Redaction on Text based PDFs, there's no need for OCR: Set to \"False\" and you can simply enter your word list in the \"\", \"\", format and go straight to redaction.\n",
+        "\n",
+        "# 👨‍💻 0\n",
+        "OR DO IT ALL TRUE+TRUE = OCR/Redaction Both."
+      ],
+      "id": "OY-Tn6A_z-rR"
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jmD9Cf9az-rc",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "24354244-a697-4163-f9e4-68f8a28a7e29"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting pymupdf\n",
+            "  Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)\n",
+            "Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl (24.1 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.1/24.1 MB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hInstalling collected packages: pymupdf\n",
+            "Successfully installed pymupdf-1.26.3\n"
+          ]
+        }
+      ],
+      "source": [
+        "# 📦 Install PyMuPDF\n",
+        "!pip install pymupdf"
+      ],
+      "id": "jmD9Cf9az-rc"
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jPnq0yTSz-rh",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "af438054-fbf3-439d-b34f-c6c8c7a6281e"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ],
+      "source": [
+        "# 📂 Mount Google Drive\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "id": "jPnq0yTSz-rh"
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nRSZKgpPz-rj"
+      },
+      "outputs": [],
+      "source": [
+        "# 🧾 Word List\n",
+        "words_to_redact = [\"1628\", \"High\", \"Cir.\", \"1273\", \"1500 Fawn Run Crossing\", \"Fawn Run Crossing\", \"Fawn Run\",\n",
+        "    \"Fawn\", \"Run\", \"crossing\", \"Skyridge\", \"Dallas\", \"Cheyenne\", \"Austin\", \"Norman\", \"Tucson\",\n",
+        "    \"Yukon\", \"Cleveland\", \"Tiffaney Norton\", \"Dallas Norton\", \"Christopher\", \"Brianna Jackson\",\n",
+        "    \"Brionna Jackson\", \"Brianna\", \"Bri\", \"Jeffry Jackson\", \"Jeffrey Jackson\", \"Caitlin Jackson\",\n",
+        "    \"Christi Cornett\", \"Christi Comett\", \"Angela\", \"Thagard\", \"Marilyn\", \"Williams\", \"Eufaula\",\n",
+        "    \"Jones\", \"73069\", \"73071\", \"Cory\", \"Lori\", \"Puckett\", \"Virgil\", \"Black\", \"Ortega\", \"Jackson\",\n",
+        "    \"Pierce\", \"Troy\", \"Judy\", \"Sean\", \"Bailey\", \"Ferguson\", \"Norton\", \"Whatley\", \"Nedwick\",\n",
+        "    \"Douglas\", \"Balkman\", \"Tayra\", \"Christy\", \"jilge\", \"Mary\", \"Abbott\", \"Children's\", \"Christi\",\n",
+        "    \"Christy\", \"Jennifer\", \"Shyanne\", \"Tara\", \"Riley\", \"Ryleigh\", \"Caitlyn\", \"Caitlin\", \"B.J.\",\n",
+        "    \"B. J.\", \"BJ\", \"Bo\", \"Bobo\", \"Tay\", \"Tiffany\", \"Cornett\", \"Jacobi\", \"Norman\", \"Nonnan\",\n",
+        "    \"Dallas C. Norton\", \"Ortega\", \"MINOR CHILD B.J.\", \"bri\", \"Eisenhower\", \"Long Fellow\",\n",
+        "    \"Norman North\", \"Angela\",\"Thagard\",\"Tanya\", \"Burcham\", \"Thomas\", \"Keith\", \"john\",\"Hadden\",\n",
+        "    \"127\", \"Crestland\", \"Charles\", \"Peters\", \"200\", \"Eufaula\", \"Jones\", \"MR. ORTEGA\"]"
+      ],
+      "id": "nRSZKgpPz-rj"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# 📌 Optional: OCR with Tesseract 5 + HOCR (accurate overlay, in-place update)\n",
+        "run_ocr = True  # 🔁 Set to False to disable\n",
+        "\n",
+        "if run_ocr == True:\n",
+        "    # Install system dependencies for pdf2image\n",
+        "    !apt-get update\n",
+        "    !apt-get install -y poppler-utils\n",
+        "\n",
+        "    !pip install pytesseract\n",
+        "    import pytesseract\n",
+        "    !pip install pdf2image\n",
+        "    from pdf2image import convert_from_path\n",
+        "    from PIL import Image\n",
+        "    import os\n",
+        "    !pip install PyPDF2\n",
+        "    from PyPDF2 import PdfMerger\n",
+        "    # !pip install poppler # This is a system dependency, not a pip package"
+      ],
+      "metadata": {
+        "id": "CU6xD7cDJGW7",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "46abb749-98c1-4c15-dfca-a99d51ad722b"
+      },
+      "id": "CU6xD7cDJGW7",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\r0% [Working]\r            \rHit:1 http://archive.ubuntu.com/ubuntu jammy InRelease\n",
+            "\r0% [Waiting for headers] [Waiting for headers] [Connected to cloud.r-project.or\r                                                                               \rGet:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]\n",
+            "\r                                                                               \rGet:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]\n",
+            "Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]\n",
+            "Get:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]\n",
+            "Get:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]\n",
+            "Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]\n",
+            "Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease\n",
+            "Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n",
+            "Get:10 http://security.ubuntu.com/ubuntu jammy-security/restricted amd64 Packages [4,932 kB]\n",
+            "Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n",
+            "Get:12 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [3,148 kB]\n",
+            "Get:13 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,267 kB]\n",
+            "Get:14 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,572 kB]\n",
+            "Get:15 http://archive.ubuntu.com/ubuntu jammy-updates/restricted amd64 Packages [5,139 kB]\n",
+            "Get:16 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [3,461 kB]\n",
+            "Get:17 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,840 kB]\n",
+            "Get:18 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,762 kB]\n",
+            "Get:19 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [9,126 kB]\n",
+            "Fetched 33.6 MB in 7s (5,019 kB/s)\n",
+            "Reading package lists... Done\n",
+            "W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)\n",
+            "Reading package lists... Done\n",
+            "Building dependency tree... Done\n",
+            "Reading state information... Done\n",
+            "The following NEW packages will be installed:\n",
+            "  poppler-utils\n",
+            "0 upgraded, 1 newly installed, 0 to remove and 36 not upgraded.\n",
+            "Need to get 186 kB of archives.\n",
+            "After this operation, 697 kB of additional disk space will be used.\n",
+            "Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.8 [186 kB]\n",
+            "Fetched 186 kB in 0s (1,180 kB/s)\n",
+            "Selecting previously unselected package poppler-utils.\n",
+            "(Reading database ... 126281 files and directories currently installed.)\n",
+            "Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.8_amd64.deb ...\n",
+            "Unpacking poppler-utils (22.02.0-2ubuntu0.8) ...\n",
+            "Setting up poppler-utils (22.02.0-2ubuntu0.8) ...\n",
+            "Processing triggers for man-db (2.10.2-1) ...\n",
+            "Collecting pytesseract\n",
+            "  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)\n",
+            "Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.11/dist-packages (from pytesseract) (25.0)\n",
+            "Requirement already satisfied: Pillow>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from pytesseract) (11.2.1)\n",
+            "Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)\n",
+            "Installing collected packages: pytesseract\n",
+            "Successfully installed pytesseract-0.3.13\n",
+            "Collecting pdf2image\n",
+            "  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)\n",
+            "Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from pdf2image) (11.2.1)\n",
+            "Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)\n",
+            "Installing collected packages: pdf2image\n",
+            "Successfully installed pdf2image-1.17.0\n",
+            "Collecting PyPDF2\n",
+            "  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)\n",
+            "Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hInstalling collected packages: PyPDF2\n",
+            "Successfully installed PyPDF2-3.0.1\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "input_dir = '/content/drive/MyDrive/Input'\n",
+        "def ocr_pdf_hocr (input_path, output_path):\n",
+        "    import pytesseract\n",
+        "    temp_img_dir = '/content/ocr_images'\n",
+        "    os.makedirs(temp_img_dir, exist_ok=True)\n",
+        "    images = convert_from_path(input_path, dpi=300, fmt='jpeg', output_folder=temp_img_dir)\n",
+        "    page_paths = []\n",
+        "\n",
+        "    for i, img in enumerate(images):\n",
+        "        img_path = f'/content/page_{i}.jpeg'\n",
+        "        img.save(img_path, 'JPEG')\n",
+        "\n",
+        "        # Generate HOCR (layout-aware text layer)\n",
+        "        hocr_output = pytesseract.image_to_pdf_or_hocr(img, extension='pdf', lang='eng', config='hocr')\n",
+        "        pdf_out = f'/content/page_hocr_{i}.pdf'\n",
+        "        with open(pdf_out, 'wb') as f:\n",
+        "            f.write(hocr_output)\n",
+        "        page_paths.append(pdf_out)\n",
+        "\n",
+        "    merger = PdfMerger()\n",
+        "    for p in page_paths:\n",
+        "        merger.append(p)\n",
+        "    merger.write(output_path)\n",
+        "    merger.close()\n",
+        "    print(f\"✅ HOCR overlay OCR complete: {output_path}\")\n",
+        "\n",
+        "# 🔁 Overwrite files in-place\n",
+        "for file in os.listdir(input_dir):\n",
+        "    if file.lower().endswith('.pdf'):\n",
+        "        src = os.path.join(input_dir, file)\n",
+        "        temp_out = os.path.join('/content', f'ocr_{file}')\n",
+        "        try:\n",
+        "            ocr_pdf_hocr(src, temp_out)\n",
+        "            import shutil\n",
+        "            shutil.move(temp_out, src) # 🔁 Replace original with OCR’d version\n",
+        "        except Exception as e:\n",
+        "            print(f\"⚠️ HOCR OCR failed for {file}: {e}\")"
+      ],
+      "metadata": {
+        "id": "CkdAzS0AWV6q"
+      },
+      "id": "CkdAzS0AWV6q",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vSfDa6_vz-ro"
+      },
+      "outputs": [],
+      "source": [
+        "# 🧼 Optional Redaction Script\n",
+        "redact_pdf = True 🔁 Set to False to disable\n",
+        "\n",
+        "If redact_pdf == True:\n",
+        "import fitz\n",
+        "import os\n",
+        "import re # Import regular expressions - Keep import just in case, though not used in new redact_pdf\n",
+        "\n",
+        "input_dir = \"/content/drive/MyDrive/Input\" # Corrected directory name\n",
+        "output_dir = \"/content/drive/MyDrive/Output\"\n",
+        "os.makedirs(output_dir, exist_ok=True)\n",
+        "\n",
+        "def redact_pdf(input_path, output_path, words):\n",
+        "    doc = fitz.open(input_path)\n",
+        "    word_set = set(w.lower() for w in words)  # Lowercase for case-insensitive match\n",
+        "\n",
+        "    for page in doc:\n",
+        "        wordlist = page.get_text(\"words\")  # list of (x0, y0, x1, y1, \"word\", block_no, line_no, word_no)\n",
+        "        for w in wordlist:\n",
+        "            if w[4].lower() in word_set:\n",
+        "                rect = fitz.Rect(w[:4])\n",
+        "                page.draw_rect(rect, fill=(0, 0, 0))\n",
+        "    doc.save(output_path, garbage=4, deflate=True)\n",
+        "    doc.close()\n",
+        "\n",
+        "\n",
+        "# 🔄 Process All PDFs\n",
+        "for file in os.listdir(input_dir):\n",
+        "    if file.lower().endswith(\".pdf\"):\n",
+        "        input_path = os.path.join(input_dir, file)\n",
+        "        output_path = os.path.join(output_dir, f\"redacted_{file}\")\n",
+        "        print(f\"Redacting: {file}\")\n",
+        "        redact_pdf(input_path, output_path, words_to_redact)\n",
+        "        print(f\"Saved to: {output_path}\")\n",
+        "\n",
+        "print(\"✅ Redaction complete.\")"
+      ],
+      "id": "vSfDa6_vz-ro"
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": ""
+    },
+    "colab": {
+      "provenance": [],
+      "include_colab_link": true
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/app/build.gradle b/app/build.gradle
index 3b288dd..d05a6a8 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -9,6 +9,9 @@ android {
         versionCode 11
         versionName "1.10.20201004"
         testInstrumentationRunner 'androidx.test.runner.AndroidJUnitRunner'
+        ndk {
+    abiFilters 'armeabi-v7a', 'arm64-v8a' // Optional: keep app smaller
+        }
     }
     buildTypes {
         release {
@@ -30,7 +33,7 @@ dependencies {
     androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
 
     implementation project(':scanlibrary')
-
+    
     implementation 'androidx.appcompat:appcompat:1.2.0'
     implementation 'com.google.android.material:material:1.2.1'
     implementation 'androidx.legacy:legacy-support-v4:1.0.0'
@@ -40,6 +43,7 @@ dependencies {
     implementation 'androidx.lifecycle:lifecycle-extensions:2.2.0'
     implementation 'androidx.lifecycle:lifecycle-viewmodel:2.2.0'
 
+    implementation 'com.google.code.tesseract.android:tess-two:9.1.0'
     implementation 'com.google.android.gms:play-services-vision:20.1.2'
     implementation 'com.google.firebase:firebase-core:17.5.0'
     implementation 'com.google.firebase:firebase-ml-vision:24.1.0'
@@ -48,4 +52,4 @@ dependencies {
 }
 
 apply plugin: 'com.google.gms.google-services'
-com.google.gms.googleservices.GoogleServicesPlugin.config.disableVersionCheck = true
\ No newline at end of file
+com.google.gms.googleservices.GoogleServicesPlugin.config.disableVersionCheck = true
diff --git a/app/src/assets b/app/src/assets
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/app/src/assets
@@ -0,0 +1 @@
+
diff --git a/app/src/eng.traineddata b/app/src/eng.traineddata
new file mode 100644
index 0000000..f4744c2
Binary files /dev/null and b/app/src/eng.traineddata differ
diff --git a/app/src/main/java/com/babanomania/pdfscanner/OCRActivity.java b/app/src/main/java/com/babanomania/pdfscanner/OCRActivity.java
index 8d1acce..9f4c0d5 100644
--- a/app/src/main/java/com/babanomania/pdfscanner/OCRActivity.java
+++ b/app/src/main/java/com/babanomania/pdfscanner/OCRActivity.java
@@ -1,154 +1,156 @@
 package com.babanomania.pdfscanner;
 
-import android.content.Context;
-import android.content.Intent;
-import android.graphics.Bitmap;
-import android.graphics.pdf.PdfRenderer;
-import android.os.AsyncTask;
-import android.os.Environment;
-import android.os.ParcelFileDescriptor;
-import android.os.Bundle;
-import android.util.Log;
-import android.view.View;
-import android.widget.Button;
-import android.widget.EditText;
-import android.widget.ProgressBar;
-import android.widget.RelativeLayout;
-
-import androidx.appcompat.app.AppCompatActivity;
-
-import com.babanomania.pdfscanner.utils.OCRUtils;
-import com.babanomania.pdfscanner.utils.UIUtil;
-
-import java.io.File;
-import java.util.ArrayList;
+import android.content.Context; import android.content.DialogInterface; import android.content.Intent; import android.graphics.Bitmap; import android.graphics.pdf.PdfRenderer; import android.os.AsyncTask; import android.os.Bundle; import android.os.Environment; import android.os.ParcelFileDescriptor; import android.util.Log; import android.view.View; import android.widget.Button; import android.widget.EditText; import android.widget.ProgressBar; import android.widget.RelativeLayout;
 
-public class OCRActivity extends AppCompatActivity {
-
-    public EditText ocrText;
-    public Button shareButton;
-    private ProgressBar progressBar;
-    public static String FILE_PATH = "file_path";
-
-    @Override
-    protected void onCreate(Bundle savedInstanceState) {
-        super.onCreate(savedInstanceState);
-        setContentView(R.layout.activity_ocr);
-
-        RelativeLayout relativeLayout = findViewById(R.id.rl);
-        UIUtil.setLightNavigationBar( relativeLayout, this );
-
-        this.ocrText = findViewById(R.id.ocrText);
-        this.shareButton = findViewById(R.id.shareBtn);
-        this.progressBar = findViewById(R.id.extractingProgress);
-
-        this.ocrText.setText( getResources().getString(R.string.ocr_waiting_text) );
-        setTitle( getResources().getString(R.string.ocr_title) );
-
-        this.progressBar.setVisibility(View.VISIBLE);
-        this.shareButton.setVisibility(View.GONE);
-        this.shareButton.setOnClickListener(
-                new View.OnClickListener() {
-                    @Override
-                    public void onClick(View v) {
-
-                        String textToShare = ocrText.getText().toString();
-                        Intent sharingIntent = new Intent(android.content.Intent.ACTION_SEND);
-                        sharingIntent.setType("text/plain");
-                        sharingIntent.putExtra(android.content.Intent.EXTRA_TEXT, textToShare);
-                        startActivity(Intent.createChooser(sharingIntent, "Share Using"));
-
-                    }
-                }
-        );
-
-        Bundle bundle = getIntent().getExtras();
-        final String filePath = bundle.getString(FILE_PATH);
-        new OCRExtractTask( this, getApplicationContext(), filePath )
-                .execute();
-    }
-
-    public void setText( String content ){
-        this.ocrText.setText( content );
-    }
-
-    private class OCRExtractTask extends AsyncTask<String, Void, String> {
-
-        private OCRActivity ocrActivity;
-        private Context context;
-        private String filePath;
-
-        public OCRExtractTask( OCRActivity ocrActivity, Context context, String filePath ){
-            this.ocrActivity = ocrActivity;
-            this.context = context;
-            this.filePath = filePath;
-        }
+import androidx.appcompat.app.AlertDialog; import androidx.appcompat.app.AppCompatActivity;
 
-        @Override
-        protected String doInBackground(String... strings) {
-            try {
+import com.babanomania.pdfscanner.utils.OCRUtils; import com.babanomania.pdfscanner.utils.PDFUtils; import com.babanomania.pdfscanner.utils.UIUtil;
 
-                ArrayList<Bitmap> bitmaps = new ArrayList<>();
-                final String baseDirectory = context.getString(R.string.base_storage_path);
-                final File sd = Environment.getExternalStorageDirectory();
+import java.io.File; import java.util.ArrayList; import java.util.List;
 
-                File toOcr = new File(sd, baseDirectory + this.filePath);
+public class OCRActivity extends AppCompatActivity {
 
-                PdfRenderer renderer = new PdfRenderer(ParcelFileDescriptor.open(toOcr, ParcelFileDescriptor.MODE_READ_ONLY));
+public EditText ocrText;
+public Button shareButton;
+private ProgressBar progressBar;
+public static String FILE_PATH = "file_path";
+
+@Override
+protected void onCreate(Bundle savedInstanceState) {
+    super.onCreate(savedInstanceState);
+    setContentView(R.layout.activity_ocr);
+
+    RelativeLayout relativeLayout = findViewById(R.id.rl);
+    UIUtil.setLightNavigationBar(relativeLayout, this);
+
+    this.ocrText = findViewById(R.id.ocrText);
+    this.shareButton = findViewById(R.id.shareBtn);
+    this.progressBar = findViewById(R.id.extractingProgress);
+
+    this.ocrText.setText(getResources().getString(R.string.ocr_waiting_text));
+    setTitle(getResources().getString(R.string.ocr_title));
+
+    this.progressBar.setVisibility(View.VISIBLE);
+    this.shareButton.setVisibility(View.GONE);
+    this.shareButton.setOnClickListener(v -> {
+        String textToShare = ocrText.getText().toString();
+        Intent sharingIntent = new Intent(Intent.ACTION_SEND);
+        sharingIntent.setType("text/plain");
+        sharingIntent.putExtra(Intent.EXTRA_TEXT, textToShare);
+        startActivity(Intent.createChooser(sharingIntent, "Share Using"));
+    });
+
+    Bundle bundle = getIntent().getExtras();
+    final String filePath = bundle.getString(FILE_PATH);
+    showRedactionChoiceDialog(filePath);
+}
 
-                Bitmap bitmap;
-                final int pageCount = renderer.getPageCount();
-                for (int i = 0; i < pageCount; i++) {
-                    PdfRenderer.Page page = renderer.openPage(i);
+private void showRedactionChoiceDialog(final String filePath) {
+    final CharSequence[] options = {"HIPAA Guidelines", "Custom Word List", "No Redaction"};
+
+    new AlertDialog.Builder(this)
+        .setTitle("Select Redaction Mode")
+        .setItems(options, (dialog, which) -> {
+            boolean useHIPAA = false;
+            boolean applyRedaction = true;
+
+            switch (which) {
+                case 0: // HIPAA
+                    useHIPAA = true;
+                    break;
+                case 1: // Custom list
+                    useHIPAA = false;
+                    break;
+                case 2: // No redaction
+                    applyRedaction = false;
+                    break;
+            }
 
-                    int width = context.getResources().getDisplayMetrics().densityDpi / 72 * page.getWidth();
-                    int height = context.getResources().getDisplayMetrics().densityDpi / 72 * page.getHeight();
-                    bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888);
+            new OCRExtractTask(OCRActivity.this, getApplicationContext(), filePath, applyRedaction, useHIPAA).execute();
+        })
+        .setCancelable(false)
+        .show();
+}
 
-                    page.render(bitmap, null, null, PdfRenderer.Page.RENDER_MODE_FOR_DISPLAY);
+public void setText(String content) {
+    this.ocrText.setText(content);
+}
 
-                    bitmaps.add(bitmap);
+private class OCRExtractTask extends AsyncTask<String, Void, String> {
+    private OCRActivity ocrActivity;
+    private Context context;
+    private String filePath;
+    private boolean applyRedaction;
+    private boolean redactWithHIPAA;
+
+    public OCRExtractTask(OCRActivity ocrActivity, Context context, String filePath, boolean applyRedaction, boolean redactWithHIPAA) {
+        this.ocrActivity = ocrActivity;
+        this.context = context;
+        this.filePath = filePath;
+        this.applyRedaction = applyRedaction;
+        this.redactWithHIPAA = redactWithHIPAA;
+    }
 
-                    // close the page
-                    page.close();
+    @Override
+    protected String doInBackground(String... strings) {
+        try {
+            ArrayList<Bitmap> bitmaps = new ArrayList<>();
+            final String baseDirectory = context.getString(R.string.base_storage_path);
+            final File sd = Environment.getExternalStorageDirectory();
+            File toOcr = new File(sd, baseDirectory + this.filePath);
+
+            PdfRenderer renderer = new PdfRenderer(ParcelFileDescriptor.open(toOcr, ParcelFileDescriptor.MODE_READ_ONLY));
+            final int pageCount = renderer.getPageCount();
+
+            for (int i = 0; i < pageCount; i++) {
+                PdfRenderer.Page page = renderer.openPage(i);
+                int width = context.getResources().getDisplayMetrics().densityDpi / 72 * page.getWidth();
+                int height = context.getResources().getDisplayMetrics().densityDpi / 72 * page.getHeight();
+                Bitmap bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888);
+                page.render(bitmap, null, null, PdfRenderer.Page.RENDER_MODE_FOR_DISPLAY);
+                bitmaps.add(bitmap);
+                page.close();
+            }
+            renderer.close();
 
+            StringBuilder extractedText = new StringBuilder();
+            for (Bitmap eachPage : bitmaps) {
+                String pageText = OCRUtils.getTextFromBitmap(context, eachPage);
+                if (applyRedaction) {
+                    pageText = applyRedactions(pageText, redactWithHIPAA);
                 }
+                extractedText.append(pageText).append("\n\n");
+            }
 
-                // close the renderer
-                renderer.close();
-
-                StringBuffer extractedText = new StringBuffer();
-                for (Bitmap eachPage : bitmaps ) {
-                    extractedText.append(
-                            OCRUtils.getTextFromBitmap(context, eachPage)
-                    );
-
-                }
+            String finalText = extractedText.toString();
+            PDFUtils.writeSearchablePDF(toOcr.getName(), finalText, context);
+
+            runOnUiThread(() -> {
+                ocrActivity.setText(finalText);
+                shareButton.setVisibility(View.VISIBLE);
+                progressBar.setVisibility(View.GONE);
+            });
+
+        } catch (Exception e) {
+            Log.e("Clean Scan", "Unable to extract text", e);
+            runOnUiThread(() -> {
+                ocrActivity.setText(getResources().getString(R.string.ocr_failed_text));
+                shareButton.setVisibility(View.GONE);
+                progressBar.setVisibility(View.GONE);
+            });
+        }
+        return null;
+    }
 
-                Log.d(  "Clean Scan", "detected text : " + extractedText );
-                this.ocrActivity.setText(extractedText.toString() );
-                runOnUiThread(new Runnable() {
-                    @Override
-                    public void run() {
-                        shareButton.setVisibility(View.VISIBLE);
-                        progressBar.setVisibility(View.GONE);
-                    }
-                });
-
-            }catch (Exception e){
-                Log.e( "Clean Scan", "Unable to extract text", e );
-                this.ocrActivity.setText( getResources().getString(R.string.ocr_failed_text) );
-                runOnUiThread(new Runnable() {
-                    @Override
-                    public void run() {
-                        shareButton.setVisibility(View.GONE);
-                        progressBar.setVisibility(View.GONE);
-                    }
-                });
-
-            } finally {
-                return  null;
-            }
+    private String applyRedactions(String text, boolean useHIPAA) {
+        List<String> redactionList = useHIPAA ? OCRUtils.getHIPAAWordList(context) : OCRUtils.getCustomWordList(context);
+        for (String term : redactionList) {
+            text = text.replaceAll("(?i)\\b" + term + "\\b", "[REDACTED]");
         }
+        return text;
     }
 }
+
+}
+
+