105 lines
3.1 KiB
Python
105 lines
3.1 KiB
Python
import io
|
|
import shutil
|
|
import subprocess
|
|
import tempfile
|
|
from decimal import Decimal
|
|
from pathlib import Path
|
|
|
|
from django.core.files.base import ContentFile
|
|
from PIL import Image, UnidentifiedImageError
|
|
|
|
GPS_TAG = 34853
|
|
MAX_BYTES = 256 * 1024
|
|
|
|
|
|
def _ratio_to_float(value):
|
|
try:
|
|
return float(value.numerator) / float(value.denominator)
|
|
except AttributeError:
|
|
return float(value)
|
|
|
|
|
|
def _gps_to_decimal(parts, ref):
|
|
degrees = _ratio_to_float(parts[0])
|
|
minutes = _ratio_to_float(parts[1])
|
|
seconds = _ratio_to_float(parts[2])
|
|
result = degrees + minutes / 60 + seconds / 3600
|
|
if ref in ("S", "W"):
|
|
result = -result
|
|
return Decimal(str(round(result, 6)))
|
|
|
|
|
|
def extract_gps(image):
|
|
try:
|
|
exif = image.getexif()
|
|
gps = exif.get_ifd(GPS_TAG) if exif else None
|
|
if not gps:
|
|
return None, None
|
|
lat = gps.get(2)
|
|
lat_ref = gps.get(1)
|
|
lng = gps.get(4)
|
|
lng_ref = gps.get(3)
|
|
if not (lat and lat_ref and lng and lng_ref):
|
|
return None, None
|
|
return _gps_to_decimal(lat, lat_ref), _gps_to_decimal(lng, lng_ref)
|
|
except Exception:
|
|
return None, None
|
|
|
|
|
|
def compress_image(uploaded_file, base_name="property"):
|
|
try:
|
|
uploaded_file.seek(0)
|
|
image = Image.open(uploaded_file)
|
|
image.load()
|
|
except (UnidentifiedImageError, OSError):
|
|
return None, "The uploaded file is not a readable image."
|
|
|
|
latitude, longitude = extract_gps(image)
|
|
image = image.convert("RGB")
|
|
image.thumbnail((1600, 1600))
|
|
|
|
quality = 86
|
|
output = io.BytesIO()
|
|
while quality >= 45:
|
|
output.seek(0)
|
|
output.truncate(0)
|
|
image.save(output, format="JPEG", optimize=True, progressive=True, quality=quality)
|
|
if output.tell() <= MAX_BYTES:
|
|
break
|
|
quality -= 7
|
|
|
|
while output.tell() > MAX_BYTES and image.width > 640 and image.height > 640:
|
|
image.thumbnail((int(image.width * 0.82), int(image.height * 0.82)))
|
|
output.seek(0)
|
|
output.truncate(0)
|
|
image.save(output, format="JPEG", optimize=True, progressive=True, quality=max(quality, 45))
|
|
|
|
filename = f"{Path(base_name).stem or 'property'}-small.jpg"
|
|
return {
|
|
"file": ContentFile(output.getvalue(), name=filename),
|
|
"latitude": latitude,
|
|
"longitude": longitude,
|
|
"size": output.tell(),
|
|
}, ""
|
|
|
|
|
|
def ocr_text_best_effort(uploaded_file):
|
|
"""Use locally installed system OCR only. Returns blank when unavailable."""
|
|
if not shutil.which("tesseract"):
|
|
return ""
|
|
try:
|
|
uploaded_file.seek(0)
|
|
with tempfile.NamedTemporaryFile(suffix=Path(uploaded_file.name).suffix or ".jpg") as tmp:
|
|
tmp.write(uploaded_file.read())
|
|
tmp.flush()
|
|
result = subprocess.run(
|
|
["tesseract", tmp.name, "stdout", "--psm", "6"],
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=8,
|
|
)
|
|
return " ".join(result.stdout.split())[:1200]
|
|
except Exception:
|
|
return ""
|