40204-vm/core/image_tools.py

import io
import shutil
import subprocess
import tempfile
from decimal import Decimal
from pathlib import Path

from django.core.files.base import ContentFile
from PIL import Image, UnidentifiedImageError

GPS_TAG = 34853
MAX_BYTES = 256 * 1024


def _ratio_to_float(value):
    try:
        return float(value.numerator) / float(value.denominator)
    except AttributeError:
        return float(value)


def _gps_to_decimal(parts, ref):
    degrees = _ratio_to_float(parts[0])
    minutes = _ratio_to_float(parts[1])
    seconds = _ratio_to_float(parts[2])
    result = degrees + minutes / 60 + seconds / 3600
    if ref in ("S", "W"):
        result = -result
    return Decimal(str(round(result, 6)))


def extract_gps(image):
    try:
        exif = image.getexif()
        gps = exif.get_ifd(GPS_TAG) if exif else None
        if not gps:
            return None, None
        lat = gps.get(2)
        lat_ref = gps.get(1)
        lng = gps.get(4)
        lng_ref = gps.get(3)
        if not (lat and lat_ref and lng and lng_ref):
            return None, None
        return _gps_to_decimal(lat, lat_ref), _gps_to_decimal(lng, lng_ref)
    except Exception:
        return None, None


def compress_image(uploaded_file, base_name="property"):
    try:
        uploaded_file.seek(0)
        image = Image.open(uploaded_file)
        image.load()
    except (UnidentifiedImageError, OSError):
        return None, "The uploaded file is not a readable image."

    latitude, longitude = extract_gps(image)
    image = image.convert("RGB")
    image.thumbnail((1600, 1600))

    quality = 86
    output = io.BytesIO()
    while quality >= 45:
        output.seek(0)
        output.truncate(0)
        image.save(output, format="JPEG", optimize=True, progressive=True, quality=quality)
        if output.tell() <= MAX_BYTES:
            break
        quality -= 7

    while output.tell() > MAX_BYTES and image.width > 640 and image.height > 640:
        image.thumbnail((int(image.width * 0.82), int(image.height * 0.82)))
        output.seek(0)
        output.truncate(0)
        image.save(output, format="JPEG", optimize=True, progressive=True, quality=max(quality, 45))

    filename = f"{Path(base_name).stem or 'property'}-small.jpg"
    return {
        "file": ContentFile(output.getvalue(), name=filename),
        "latitude": latitude,
        "longitude": longitude,
        "size": output.tell(),
    }, ""


def ocr_text_best_effort(uploaded_file):
    """Use locally installed system OCR only. Returns blank when unavailable."""
    if not shutil.which("tesseract"):
        return ""
    try:
        uploaded_file.seek(0)
        with tempfile.NamedTemporaryFile(suffix=Path(uploaded_file.name).suffix or ".jpg") as tmp:
            tmp.write(uploaded_file.read())
            tmp.flush()
            result = subprocess.run(
                ["tesseract", tmp.name, "stdout", "--psm", "6"],
                check=False,
                capture_output=True,
                text=True,
                timeout=8,
            )
            return " ".join(result.stdout.split())[:1200]
    except Exception:
        return ""