diff --git a/optical character recognition for financial docs/Readme.md b/optical character recognition for financial docs/Readme.md
new file mode 100644
index 00000000..641e44c3
--- /dev/null
+++ b/optical character recognition for financial docs/Readme.md	
@@ -0,0 +1,11 @@
+**Financial Document OCR Tool<br>
+<br>
+Overview** <br>
+The Financial Document OCR Tool is a Python-based utility designed to extract text from financial documents, including balance sheets, income statements, and other financial reports. The tool leverages the Tesseract OCR engine with deep learning-based LSTM (Long Short-Term Memory) models to accurately recognize and extract text from images.
+
+**Features**<br>
+<ul>  Text Extraction: Automatically extracts textual content from financial documents, including scanned images and PDF files.
+</ul><ul>Accuracy: Utilizes advanced OCR techniques to achieve high accuracy in text recognition, even with complex and poorly formatted documents.
+</ul><ul>Customizable Configuration: Allows customization of OCR parameters to optimize performance for different types of financial documents.
+</ul><ul>Ease of Use: Simple and intuitive interface for seamless integration into financial analysis pipelines and workflows.</ul>
+
diff --git a/optical character recognition for financial docs/image.jpg b/optical character recognition for financial docs/image.jpg
new file mode 100644
index 00000000..2e9f3a8d
Binary files /dev/null and b/optical character recognition for financial docs/image.jpg differ
diff --git a/optical character recognition for financial docs/ocr_code.ipynb b/optical character recognition for financial docs/ocr_code.ipynb
new file mode 100644
index 00000000..4061444e
--- /dev/null
+++ b/optical character recognition for financial docs/ocr_code.ipynb	
@@ -0,0 +1,257 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install invoice2data"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "irjtibnRtaji",
+        "outputId": "2655643e-d156-4918-c58f-3f0bc58be1b9"
+      },
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting invoice2data\n",
+            "  Downloading invoice2data-0.4.5-py3-none-any.whl (149 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.0/149.0 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from invoice2data) (9.4.0)\n",
+            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from invoice2data) (6.0.1)\n",
+            "Collecting dateparser (from invoice2data)\n",
+            "  Downloading dateparser-1.2.0-py2.py3-none-any.whl (294 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from dateparser->invoice2data) (2.8.2)\n",
+            "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from dateparser->invoice2data) (2023.4)\n",
+            "Requirement already satisfied: regex!=2019.02.19,!=2021.8.27 in /usr/local/lib/python3.10/dist-packages (from dateparser->invoice2data) (2023.12.25)\n",
+            "Requirement already satisfied: tzlocal in /usr/local/lib/python3.10/dist-packages (from dateparser->invoice2data) (5.2)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->dateparser->invoice2data) (1.16.0)\n",
+            "Installing collected packages: dateparser, invoice2data\n",
+            "Successfully installed dateparser-1.2.0 invoice2data-0.4.5\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!sudo apt install tesseract-ocr\n",
+        "!pip install pytesseract Pillow pdf2image pypdf\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "MztfBgrFzXxY",
+        "outputId": "b38f7b2e-e52e-4486-e9d8-457077da64ba"
+      },
+      "execution_count": 46,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Reading package lists... Done\n",
+            "Building dependency tree... Done\n",
+            "Reading state information... Done\n",
+            "The following additional packages will be installed:\n",
+            "  tesseract-ocr-eng tesseract-ocr-osd\n",
+            "The following NEW packages will be installed:\n",
+            "  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd\n",
+            "0 upgraded, 3 newly installed, 0 to remove and 45 not upgraded.\n",
+            "Need to get 4,816 kB of archives.\n",
+            "After this operation, 15.6 MB of additional disk space will be used.\n",
+            "Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]\n",
+            "Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]\n",
+            "Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]\n",
+            "Fetched 4,816 kB in 1s (5,602 kB/s)\n",
+            "debconf: unable to initialize frontend: Dialog\n",
+            "debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 3.)\n",
+            "debconf: falling back to frontend: Readline\n",
+            "debconf: unable to initialize frontend: Readline\n",
+            "debconf: (This frontend requires a controlling tty.)\n",
+            "debconf: falling back to frontend: Teletype\n",
+            "dpkg-preconfigure: unable to re-open stdin: \n",
+            "Selecting previously unselected package tesseract-ocr-eng.\n",
+            "(Reading database ... 121918 files and directories currently installed.)\n",
+            "Preparing to unpack .../tesseract-ocr-eng_1%3a4.00~git30-7274cfa-1.1_all.deb ...\n",
+            "Unpacking tesseract-ocr-eng (1:4.00~git30-7274cfa-1.1) ...\n",
+            "Selecting previously unselected package tesseract-ocr-osd.\n",
+            "Preparing to unpack .../tesseract-ocr-osd_1%3a4.00~git30-7274cfa-1.1_all.deb ...\n",
+            "Unpacking tesseract-ocr-osd (1:4.00~git30-7274cfa-1.1) ...\n",
+            "Selecting previously unselected package tesseract-ocr.\n",
+            "Preparing to unpack .../tesseract-ocr_4.1.1-2.1build1_amd64.deb ...\n",
+            "Unpacking tesseract-ocr (4.1.1-2.1build1) ...\n",
+            "Setting up tesseract-ocr-eng (1:4.00~git30-7274cfa-1.1) ...\n",
+            "Setting up tesseract-ocr-osd (1:4.00~git30-7274cfa-1.1) ...\n",
+            "Setting up tesseract-ocr (4.1.1-2.1build1) ...\n",
+            "Processing triggers for man-db (2.10.2-1) ...\n",
+            "Collecting pytesseract\n",
+            "  Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)\n",
+            "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (9.4.0)\n",
+            "Collecting pdf2image\n",
+            "  Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)\n",
+            "Requirement already satisfied: pypdf in /usr/local/lib/python3.10/dist-packages (4.2.0)\n",
+            "Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from pytesseract) (24.0)\n",
+            "Requirement already satisfied: typing_extensions>=4.0 in /usr/local/lib/python3.10/dist-packages (from pypdf) (4.11.0)\n",
+            "Installing collected packages: pytesseract, pdf2image\n",
+            "Successfully installed pdf2image-1.17.0 pytesseract-0.3.10\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 54,
+      "metadata": {
+        "id": "vJGeEAjjs8-s"
+      },
+      "outputs": [],
+      "source": [
+        "\n",
+        "import pytesseract\n",
+        "from PIL import Image"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "# Path to the image file\n",
+        "image_path = '/image.jpg'\n",
+        "\n",
+        "# Perform OCR on the image\n",
+        "def ocr_image(image_path):\n",
+        "    # Open the image file\n",
+        "    img = Image.open(image_path)\n",
+        "\n",
+        "    # Use pytesseract to do OCR on the image\n",
+        "    text = pytesseract.image_to_string(img)\n",
+        "\n",
+        "    return text\n",
+        "\n",
+        "# Extract text from the image\n",
+        "text = ocr_image(image_path)\n",
+        "\n",
+        "# Print the extracted text\n",
+        "print(\"Extracted Text:\")\n",
+        "print(text)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TzeAJCSXvyxl",
+        "outputId": "4efe1c2d-92a2-42cd-86da-4bd6cdbc7fd1"
+      },
+      "execution_count": 55,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Extracted Text:\n",
+            "Example Corporation\n",
+            "‘Balance Sheet\n",
+            "December 31, 2020\n",
+            "\n",
+            " \n",
+            "\n",
+            " \n",
+            "\n",
+            "ASSETS\n",
+            "‘Curent assets\n",
+            "‘Cash and cash equivalents $2200\n",
+            "‘Shorterm investments 40,000\n",
+            "Account receivable -net 39,500\n",
+            "Other receivables 11000\n",
+            "Inventory 3,000\n",
+            "Suppies 3.800\n",
+            "Prepaid expences 41500\n",
+            "‘Total curet assets 39.000\n",
+            "Investments 36.000\n",
+            "Property plat & equipment net\n",
+            "Land 5500\n",
+            "Land improvements 6.500\n",
+            "Buidings 180,000\n",
+            "Equipment 201,000\n",
+            "Less: accumuates depreciation 8.000)\n",
+            "Property. pant & equipment-net_ 337.000,\n",
+            "Imangbe assets\n",
+            "‘Goodwl 105,000\n",
+            "Other ntangbe assets 200,000\n",
+            "“ofl intangible assets 306,000\n",
+            "Other assets 3,000\n",
+            "Total asst $770.00\n",
+            "\n",
+            " \n",
+            "\n",
+            " \n",
+            "\n",
+            " \n",
+            "\n",
+            "LIABILITIES\n",
+            "Curent abies\n",
+            "‘Short-term loans payable $500\n",
+            "Curent porton of ong-tom debt 15,000\n",
+            "‘Accounts payable 20,900\n",
+            "‘Accvved compensation and benefits 8500\n",
+            "Income taxes payable 6.100\n",
+            "Other acoved abies ‘4.000\n",
+            "Deterod revenues 41500\n",
+            "Tal curent taiities 61.000\n",
+            "Long term habits\n",
+            "Notes payable 20,000\n",
+            "Bonds payable 375,000\n",
+            "Deferred income taxes 25,000\n",
+            "Total ong tem habits 220,000\n",
+            "Total abies 481,000\n",
+            "\n",
+            "Commitments and contingencies es)\n",
+            "STOCKHOLDERS’ EQ\n",
+            "\n",
+            " \n",
+            "\n",
+            "commen stock 110,000\n",
+            "Retained earings 220,000\n",
+            "‘Accum other comprehensive income 9,000\n",
+            "Loss: Treasury stock ($0,000)\n",
+            "\n",
+            "Toa stockholders’ equity 289,000\n",
+            "\n",
+            "Total abies & stockholders’ equity $770,000,\n",
+            "\f\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "NiPIyCRWxzgV"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file