import io import shutil import subprocess import tempfile from decimal import Decimal from pathlib import Path from django.core.files.base import ContentFile from PIL import Image, UnidentifiedImageError GPS_TAG = 34853 MAX_BYTES = 256 * 1024 def _ratio_to_float(value): try: return float(value.numerator) / float(value.denominator) except AttributeError: return float(value) def _gps_to_decimal(parts, ref): degrees = _ratio_to_float(parts[0]) minutes = _ratio_to_float(parts[1]) seconds = _ratio_to_float(parts[2]) result = degrees + minutes / 60 + seconds / 3600 if ref in ("S", "W"): result = -result return Decimal(str(round(result, 6))) def extract_gps(image): try: exif = image.getexif() gps = exif.get_ifd(GPS_TAG) if exif else None if not gps: return None, None lat = gps.get(2) lat_ref = gps.get(1) lng = gps.get(4) lng_ref = gps.get(3) if not (lat and lat_ref and lng and lng_ref): return None, None return _gps_to_decimal(lat, lat_ref), _gps_to_decimal(lng, lng_ref) except Exception: return None, None def compress_image(uploaded_file, base_name="property"): try: uploaded_file.seek(0) image = Image.open(uploaded_file) image.load() except (UnidentifiedImageError, OSError): return None, "The uploaded file is not a readable image." latitude, longitude = extract_gps(image) image = image.convert("RGB") image.thumbnail((1600, 1600)) quality = 86 output = io.BytesIO() while quality >= 45: output.seek(0) output.truncate(0) image.save(output, format="JPEG", optimize=True, progressive=True, quality=quality) if output.tell() <= MAX_BYTES: break quality -= 7 while output.tell() > MAX_BYTES and image.width > 640 and image.height > 640: image.thumbnail((int(image.width * 0.82), int(image.height * 0.82))) output.seek(0) output.truncate(0) image.save(output, format="JPEG", optimize=True, progressive=True, quality=max(quality, 45)) filename = f"{Path(base_name).stem or 'property'}-small.jpg" return { "file": ContentFile(output.getvalue(), name=filename), "latitude": latitude, "longitude": longitude, "size": output.tell(), }, "" def ocr_text_best_effort(uploaded_file): """Use locally installed system OCR only. Returns blank when unavailable.""" if not shutil.which("tesseract"): return "" try: uploaded_file.seek(0) with tempfile.NamedTemporaryFile(suffix=Path(uploaded_file.name).suffix or ".jpg") as tmp: tmp.write(uploaded_file.read()) tmp.flush() result = subprocess.run( ["tesseract", tmp.name, "stdout", "--psm", "6"], check=False, capture_output=True, text=True, timeout=8, ) return " ".join(result.stdout.split())[:1200] except Exception: return ""