jignesh123 commited on
Commit
24ec78d
1 Parent(s): edca3d9
Files changed (4) hide show
  1. .gitattributes +35 -35
  2. README.md +15 -14
  3. app.py +60 -0
  4. requirements.txt +4 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,15 @@
1
- ---
2
- title: Ocr Got Proj
3
- emoji: 📊
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.38.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: 'Optical Character Recognition (OCR) on an uploaded image '
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
+ # OCR with General OCR Theory (GOT) Model
2
+
3
+ This is a web-based Optical Character Recognition (OCR) application using the General OCR Theory (GOT) model to extract text from images.
4
+
5
+ ## How to Run the Project
6
+
7
+ 1. Clone the repository.
8
+ 2. Install dependencies: `pip install -r requirements.txt`.
9
+ 3. Run the Flask app: `python app.py`.
10
+ 4. Open your browser and go to `http://localhost:5000`.
11
+
12
+ ## Features
13
+ - Supports image uploads.
14
+ - Extracts text from images using the GOT model.
15
+ - Displays the extracted text on the web interface.
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request
2
+ from model.got_model import perform_ocr # Import the OCR function
3
+ import os
4
+ import re
5
+
6
+ app = Flask(__name__)
7
+ app.config['UPLOAD_FOLDER'] = 'uploads/'
8
+
9
+ # Create the uploads folder if it doesn't exist
10
+ if not os.path.exists(app.config['UPLOAD_FOLDER']):
11
+ os.makedirs(app.config['UPLOAD_FOLDER'])
12
+
13
+ # Global variable to store extracted text
14
+ extracted_text_global = ""
15
+
16
+ @app.route("/", methods=["GET", "POST"])
17
+ def upload_image():
18
+ global extracted_text_global
19
+ if request.method == "POST":
20
+ file = request.files.get("file") # Get the uploaded file
21
+
22
+ if file:
23
+ file_path = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
24
+ file.save(file_path) # Save the file to the uploads folder
25
+
26
+ # Run the OCR model on the uploaded image
27
+ extracted_text_global = perform_ocr(file_path)
28
+
29
+ # Render the result template, passing the extracted text
30
+ return render_template("result.html", extracted_text=extracted_text_global)
31
+ else:
32
+ # If no file was uploaded, display an error message
33
+ return render_template("upload.html", error="Please upload a valid image file.")
34
+
35
+ return render_template("upload.html")
36
+
37
+ @app.route("/search", methods=["POST"])
38
+ def search_text():
39
+ global extracted_text_global
40
+ keyword = request.form.get("keyword")
41
+
42
+ if keyword:
43
+ # Escape special characters in the keyword to avoid conflicts in HTML
44
+ keyword = keyword.strip()
45
+
46
+ # Check if the keyword is found in the extracted text
47
+ if re.search(keyword, extracted_text_global, re.IGNORECASE):
48
+ # Use re.sub to replace the keyword with <mark> for case-insensitive highlighting
49
+ highlighted_text = re.sub(f"({re.escape(keyword)})", r"<mark>\1</mark>", extracted_text_global, flags=re.IGNORECASE)
50
+ result_message = f"The keyword '{keyword}' was found and highlighted in the text."
51
+ else:
52
+ highlighted_text = extracted_text_global
53
+ result_message = f"The keyword '{keyword}' was not found in the extracted text."
54
+
55
+ # Render the result page with the highlighted text and search result message
56
+ return render_template("result.html", extracted_text=highlighted_text, search_result=result_message)
57
+
58
+ return render_template("result.html", extracted_text=extracted_text_global, search_result="Please enter a keyword.")
59
+ if __name__ == "__main__":
60
+ app.run(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Flask
2
+ torch
3
+ transformers
4
+ Pillow