#!/usr/bin/env python3
"""
CrispScan OCR API - EasyOCR Based
Supports: Bengali, Hindi, English, Tamil, etc.
"""

import os
import base64
import time
import cv2
import numpy as np
from flask import Flask, request, jsonify
import easyocr

app = Flask(__name__)

# Initialize EasyOCR readers (lazy loading)
readers = {}

def get_reader(languages):
    """Get or create EasyOCR reader for given languages"""
    key = tuple(sorted(languages))
    if key not in readers:
        print(f"Loading EasyOCR reader for: {languages}")
        readers[key] = easyocr.Reader(languages, gpu=False)
    return readers[key]

def preprocess_image(img):
    """Preprocess image for better OCR accuracy"""
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Denoise
    denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    
    # Adaptive threshold for better text extraction
    # thresh = cv2.adaptiveThreshold(denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    
    # For now, return denoised grayscale (works better for most cases)
    return denoised

def detect_language_hint(text):
    """Detect language from extracted text"""
    # Bengali Unicode range
    if any('\u0980' <= c <= '\u09FF' for c in text):
        return 'bn'
    # Hindi Unicode range
    if any('\u0900' <= c <= '\u097F' for c in text):
        return 'hi'
    # Tamil Unicode range
    if any('\u0B80' <= c <= '\u0BFF' for c in text):
        return 'ta'
    return 'en'

@app.route('/health', methods=['GET'])
def health():
    """Health check endpoint"""
    return jsonify({
        'status': 'ok',
        'service': 'CrispScan OCR API',
        'engine': 'EasyOCR'
    })

@app.route('/ocr', methods=['POST'])
def ocr():
    """
    OCR Endpoint
    
    POST JSON:
    {
        "image": "base64_encoded_image",
        "languages": ["en", "bn"],  // optional, default: ["en", "bn"]
        "preprocess": true          // optional, default: true
    }
    
    Response:
    {
        "success": true,
        "text": "extracted text",
        "lines": ["line1", "line2"],
        "language_detected": "bn",
        "processing_time": 1.23
    }
    """
    try:
        start_time = time.time()
        
        # Parse request
        data = request.get_json()
        if not data:
            return jsonify({'success': False, 'error': 'No JSON data'}), 400
        
        image_b64 = data.get('image')
        if not image_b64:
            return jsonify({'success': False, 'error': 'No image provided'}), 400
        
        languages = data.get('languages', ['en', 'bn'])
        do_preprocess = data.get('preprocess', True)
        
        # Decode base64 image
        try:
            image_data = base64.b64decode(image_b64)
            nparr = np.frombuffer(image_data, np.uint8)
            img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
            
            if img is None:
                return jsonify({'success': False, 'error': 'Invalid image data'}), 400
        except Exception as e:
            return jsonify({'success': False, 'error': f'Image decode error: {str(e)}'}), 400
        
        # Preprocess if requested
        if do_preprocess:
            processed_img = preprocess_image(img)
        else:
            processed_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Get EasyOCR reader
        reader = get_reader(languages)
        
        # Perform OCR
        results = reader.readtext(processed_img)
        
        # Extract text
        lines = [result[1] for result in results]
        full_text = '\n'.join(lines)
        
        # Detect language
        detected_lang = detect_language_hint(full_text)
        
        processing_time = round(time.time() - start_time, 2)
        
        return jsonify({
            'success': True,
            'text': full_text,
            'lines': lines,
            'line_count': len(lines),
            'char_count': len(full_text),
            'language_detected': detected_lang,
            'processing_time': processing_time
        })
        
    except Exception as e:
        return jsonify({
            'success': False,
            'error': str(e)
        }), 500

@app.route('/languages', methods=['GET'])
def languages():
    """List supported languages"""
    return jsonify({
        'supported': {
            'en': 'English',
            'bn': 'Bengali',
            'hi': 'Hindi',
            'ta': 'Tamil',
            'te': 'Telugu',
            'mr': 'Marathi',
            'gu': 'Gujarati',
            'kn': 'Kannada',
            'ml': 'Malayalam',
            'pa': 'Punjabi',
            'ur': 'Urdu',
            'ar': 'Arabic',
            'zh': 'Chinese',
            'ja': 'Japanese',
            'ko': 'Korean'
        },
        'default': ['en', 'bn']
    })

if __name__ == '__main__':
    # Preload English + Bengali reader on startup
    print("Preloading EasyOCR reader...")
    get_reader(['en', 'bn'])
    print("Ready!")
    
    # Run Flask app
    app.run(host='127.0.0.1', port=5001, debug=False)
