Skip to content

Commit f7c6431

Browse files
committed
Ensure TemporaryFiles are always closed after use
1 parent 3c5a644 commit f7c6431

3 files changed

Lines changed: 162 additions & 179 deletions

File tree

isic/ingest/models/accession.py

Lines changed: 145 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
1-
from collections.abc import Callable
1+
from collections.abc import Callable, Generator
2+
import contextlib
23
from copy import deepcopy
34
from dataclasses import dataclass
45
import io
56
import logging
67
from mimetypes import guess_type
7-
from pathlib import Path, PurePosixPath
8+
from pathlib import PurePosixPath
89
import tempfile
9-
from typing import Literal, TypeVar
10+
from typing import IO, Literal, TypeVar
1011
from uuid import uuid4
1112

1213
from django.contrib.auth.models import User
1314
from django.contrib.postgres.constraints import ExclusionConstraint
1415
from django.core.exceptions import ValidationError
15-
from django.core.files import File
1616
from django.core.files.storage import storages
1717
from django.core.files.uploadedfile import InMemoryUploadedFile
1818
from django.db import models, transaction
1919
from django.db.models import Deferrable, FileField, FloatField, IntegerField, Transform
2020
from django.db.models.constraints import CheckConstraint, UniqueConstraint
2121
from django.db.models.fields import Field
22+
from django.db.models.fields.files import FieldFile
2223
from django.db.models.functions import Cast, Round
2324
from django.db.models.query_utils import Q
2425
from isic_metadata.fields import ImageTypeEnum
@@ -42,6 +43,12 @@
4243

4344
logger = logging.getLogger(__name__)
4445

46+
# Set a larger max size, to accommodate confocal images
47+
# This uses ~1.1GB of memory
48+
PIL.Image.MAX_IMAGE_PIXELS = 20_000 * 20_000 * 3
49+
50+
gdal.UseExceptions()
51+
4552
# Set the GDAL raster block cache to a maximum of 128MB. This is a value that
4653
# reduces memory usage without noticeably impacting performance for the typical mosaic.
4754
gdal.SetCacheMax(128 * 1024**2)
@@ -205,15 +212,6 @@ class AccessionStatus(models.TextChoices):
205212
SUCCEEDED = "succeeded", "Succeeded"
206213

207214

208-
@dataclass
209-
class AccessionBlob:
210-
blob: File
211-
blob_size: int
212-
height: int
213-
width: int
214-
is_cog: bool
215-
216-
217215
def sponsored_blob_storage():
218216
return storages["sponsored"]
219217

@@ -500,131 +498,14 @@ def metadata(self):
500498
ret[field.name] = getattr(self, field.name)
501499
return ret
502500

503-
@staticmethod
504-
def metadata_keys():
505-
return [
506-
field.name for field in Accession._meta.fields if hasattr(AccessionMetadata, field.name)
507-
]
508-
509-
@staticmethod
510-
def is_color(img: PIL.Image.Image) -> bool:
511-
return img.mode in {"RGB", "RGBA"}
512-
513-
@staticmethod
514-
def meets_cog_threshold(img: PIL.Image.Image) -> bool:
515-
return img.height * img.width > IMAGE_COG_THRESHOLD
516-
517501
def get_diagnosis_display(self) -> str:
518502
diagnoses = [self.metadata.get(f"diagnosis_{i}") for i in range(1, 6)]
519503
if any(diagnoses):
520504
return [d for d in diagnoses if d is not None][-1]
521505
else:
522506
return ""
523507

524-
def _generate_blob(self, img: PIL.Image.Image) -> AccessionBlob:
525-
# Explicitly load the image, so any decoding errors can be caught
526-
try:
527-
img.load()
528-
except OSError as e:
529-
if "image file is truncated" in str(e):
530-
raise InvalidBlobError("Blob appears truncated.") from e
531-
532-
# Any other errors are not expected, so re-raise them natively
533-
raise
534-
535-
# rotate the image bytes according to the orientation tag, stripping it in the process
536-
PIL.ImageOps.exif_transpose(img, in_place=True)
537-
538-
# Strip any alpha channel
539-
if self.is_color(img):
540-
img = img.convert("RGB")
541-
542-
with tempfile.SpooledTemporaryFile() as stripped_blob_stream:
543-
output_format = "PNG" if img.format == "PNG" and not self.is_color(img) else "JPEG"
544-
img.save(stripped_blob_stream, format=output_format)
545-
546-
stripped_blob_stream.seek(0, io.SEEK_END)
547-
stripped_blob_size = stripped_blob_stream.tell()
548-
stripped_blob_stream.seek(0)
549-
550-
blob_name = f"{uuid4()}.{'png' if output_format == 'PNG' else 'jpg'}"
551-
accession_blob = AccessionBlob(
552-
blob=InMemoryUploadedFile(
553-
file=stripped_blob_stream,
554-
field_name=None,
555-
name=blob_name,
556-
content_type=f"image/{output_format.lower()}",
557-
size=stripped_blob_size,
558-
charset=None,
559-
),
560-
blob_size=stripped_blob_size,
561-
height=img.height,
562-
width=img.width,
563-
is_cog=False,
564-
)
565-
self.blob = accession_blob.blob
566-
Accession._meta.get_field("blob").pre_save(self, add=False)
567-
568-
return accession_blob
569-
570-
def _generate_blob_as_cog(self, img: PIL.Image.Image) -> AccessionBlob:
571-
with (
572-
field_file_to_local_path(self.original_blob) as original_blob_path,
573-
tempfile.NamedTemporaryFile(delete=False) as cog_temp_file,
574-
):
575-
gdal.UseExceptions()
576-
577-
src_ds = gdal.Open(original_blob_path)
578-
579-
gdal.Translate(
580-
cog_temp_file.name,
581-
src_ds,
582-
options=gdal.TranslateOptions(
583-
format="COG",
584-
# rescale unsigned 16-bit png band to 8-bit
585-
outputType=gdal.GDT_Byte,
586-
scaleParams=[[0, 2**16 - 1, 0, 2**8 - 1]],
587-
creationOptions={
588-
"BLOCKSIZE": 256,
589-
"COMPRESS": "DEFLATE",
590-
"PREDICTOR": "YES",
591-
"LEVEL": "9",
592-
"BIGTIFF": "IF_SAFER",
593-
# Strip EXIF metadata
594-
"COPY_SRC_MDD": "NO",
595-
},
596-
resampleAlg=gdal.GRA_Lanczos,
597-
),
598-
)
599-
600-
# necessary to close the src_ds (https://gis.stackexchange.com/a/80370)
601-
del src_ds
602-
603-
blob_size = Path(cog_temp_file.name).stat().st_size
604-
with Path(cog_temp_file.name).open("rb") as cog_stream:
605-
blob_name = f"{uuid4()}.tif"
606-
accession_blob = AccessionBlob(
607-
blob=InMemoryUploadedFile(
608-
file=cog_stream,
609-
field_name=None,
610-
name=blob_name,
611-
content_type="image/tiff",
612-
size=blob_size,
613-
charset=None,
614-
),
615-
blob_size=blob_size,
616-
height=img.height,
617-
width=img.width,
618-
is_cog=True,
619-
)
620-
self.blob = accession_blob.blob
621-
Accession._meta.get_field("blob").pre_save(self, add=False)
622-
623-
Path(cog_temp_file.name).unlink()
624-
625-
return accession_blob
626-
627-
def generate_blob(self):
508+
def generate_blob(self): # noqa: PLR0915
628509
"""
629510
Generate `blob` and set related attributes.
630511
@@ -634,34 +515,57 @@ def generate_blob(self):
634515
try:
635516
with self.original_blob.open("rb") as original_blob_stream:
636517
blob_mime_type = guess_mime_type(original_blob_stream, self.original_blob_name)
637-
blob_major_mime_type = blob_mime_type.partition("/")[0]
638-
if blob_major_mime_type != "image":
639-
raise InvalidBlobError( # noqa: TRY301
640-
f'Blob has a non-image MIME type: "{blob_mime_type}"'
641-
)
642-
643-
# Set a larger max size, to accommodate confocal images
644-
# This uses ~1.1GB of memory
645-
PIL.Image.MAX_IMAGE_PIXELS = 20_000 * 20_000 * 3
646-
try:
647-
img = PIL.Image.open(original_blob_stream)
648-
except PIL.Image.UnidentifiedImageError as e:
649-
raise InvalidBlobError("Blob cannot be recognized by PIL.") from e
650-
651-
if self.meets_cog_threshold(img):
652-
if self.is_color(img):
653-
raise InvalidBlobError("Blob is too large to be stored.") # noqa: TRY301
518+
if blob_mime_type.partition("/")[0] != "image":
519+
raise InvalidBlobError( # noqa: TRY301
520+
f'Blob has a non-image MIME type: "{blob_mime_type}"'
521+
)
654522

655-
accession_blob = self._generate_blob_as_cog(img)
656-
else:
657-
accession_blob = self._generate_blob(img)
523+
try:
524+
img = PIL.Image.open(original_blob_stream)
525+
except PIL.Image.UnidentifiedImageError as e:
526+
raise InvalidBlobError("Blob cannot be recognized by PIL.") from e
527+
self.height = img.height
528+
self.width = img.width
529+
530+
is_rcm = not self.is_color(img)
531+
532+
if is_rcm:
533+
self.is_cog = self.meets_cog_threshold(img)
534+
if self.is_cog:
535+
converter = self._convert_blog_to_cog
536+
converted_blob_type = "image/tiff"
537+
converted_blob_extension = "tif"
538+
else:
539+
converter = self._convert_blob_to_png
540+
converted_blob_type = "image/png"
541+
converted_blob_extension = "png"
542+
else:
543+
if self.meets_cog_threshold(img):
544+
raise InvalidBlobError("Blob is too large to be stored.") # noqa: TRY301
545+
self.is_cog = False
546+
converter = self._convert_blob_to_rgb
547+
converted_blob_type = "image/jpeg"
548+
converted_blob_extension = "jpg"
549+
550+
with converter(self.original_blob) as converted_blob_stream:
551+
converted_blob_name = f"{uuid4()}.{converted_blob_extension}"
552+
553+
converted_blob_stream.seek(0, io.SEEK_END)
554+
converted_blob_size = converted_blob_stream.tell()
555+
self.blob_size = converted_blob_size
556+
557+
self.blob = InMemoryUploadedFile(
558+
file=converted_blob_stream,
559+
field_name=None,
560+
name=converted_blob_name,
561+
content_type=converted_blob_type,
562+
size=converted_blob_size,
563+
charset=None,
564+
)
658565

659-
self.blob_size = accession_blob.blob_size
660-
self.height = accession_blob.height
661-
self.width = accession_blob.width
662-
self.is_cog = accession_blob.is_cog
566+
Accession._meta.get_field("blob").pre_save(self, add=False)
663567

664-
self.save(update_fields=["blob", "blob_size", "height", "width", "is_cog"])
568+
self.save(update_fields=["blob", "blob_size", "height", "width", "is_cog"])
665569

666570
self.generate_thumbnail()
667571
except InvalidBlobError:
@@ -752,10 +656,6 @@ def from_blob(cls, blob: Blob):
752656
),
753657
)
754658

755-
@staticmethod
756-
def _age_approx(age: int) -> int:
757-
return int(round(age / 5.0) * 5)
758-
759659
@property
760660
def age_approx(self) -> int | None:
761661
return self._age_approx(self.metadata["age"]) if "age" in self.metadata else None
@@ -932,3 +832,88 @@ def full_clean(self, *args, **kwargs):
932832
raise Exception("unstructured_metadata is required")
933833

934834
super().full_clean(*args, **kwargs)
835+
836+
@staticmethod
837+
def _age_approx(age: int) -> int:
838+
return int(round(age / 5.0) * 5)
839+
840+
@staticmethod
841+
def metadata_keys():
842+
return [
843+
field.name for field in Accession._meta.fields if hasattr(AccessionMetadata, field.name)
844+
]
845+
846+
@staticmethod
847+
def is_color(img: PIL.Image.Image) -> bool:
848+
return img.mode in {"RGB", "RGBA"}
849+
850+
@staticmethod
851+
def meets_cog_threshold(img: PIL.Image.Image) -> bool:
852+
return img.height * img.width > IMAGE_COG_THRESHOLD
853+
854+
@staticmethod
855+
def _ensure_pil_image(img: PIL.Image.Image) -> None:
856+
"""Explicitly load a PIL image's pixel data, so any decoding errors can be caught."""
857+
try:
858+
img.load()
859+
except OSError as e:
860+
if "image file is truncated" in str(e):
861+
raise InvalidBlobError("Blob appears truncated.") from e
862+
# Any other errors are not expected, so re-raise them natively
863+
raise
864+
865+
@staticmethod
866+
@contextlib.contextmanager
867+
def _convert_blob_to_rgb(blob: FieldFile) -> Generator[IO[bytes]]:
868+
img = PIL.Image.open(blob)
869+
Accession._ensure_pil_image(img)
870+
871+
# rotate the image bytes according to the orientation tag, stripping it in the process
872+
PIL.ImageOps.exif_transpose(img, in_place=True)
873+
874+
# Strip any alpha channel
875+
img = img.convert("RGB")
876+
877+
with tempfile.SpooledTemporaryFile() as converted_blob_stream:
878+
img.save(converted_blob_stream, format="JPEG")
879+
yield converted_blob_stream
880+
881+
@staticmethod
882+
@contextlib.contextmanager
883+
def _convert_blob_to_png(blob: FieldFile) -> Generator[IO[bytes]]:
884+
img = PIL.Image.open(blob)
885+
Accession._ensure_pil_image(img)
886+
887+
with tempfile.SpooledTemporaryFile() as converted_blob_stream:
888+
img.save(converted_blob_stream, format="PNG")
889+
yield converted_blob_stream
890+
891+
@staticmethod
892+
@contextlib.contextmanager
893+
def _convert_blog_to_cog(blob: FieldFile) -> Generator[IO[bytes]]:
894+
with tempfile.NamedTemporaryFile() as converted_blob_stream:
895+
with (
896+
field_file_to_local_path(blob) as blob_path,
897+
gdal.Open(blob_path) as src_dataset,
898+
):
899+
gdal.Translate(
900+
converted_blob_stream.name,
901+
src_dataset,
902+
options=gdal.TranslateOptions(
903+
format="COG",
904+
# rescale unsigned 16-bit png band to 8-bit
905+
outputType=gdal.GDT_Byte,
906+
scaleParams=[[0, 2**16 - 1, 0, 2**8 - 1]],
907+
creationOptions={
908+
"BLOCKSIZE": 256,
909+
"COMPRESS": "DEFLATE",
910+
"PREDICTOR": "YES",
911+
"LEVEL": "9",
912+
"BIGTIFF": "IF_SAFER",
913+
# Strip EXIF metadata
914+
"COPY_SRC_MDD": "NO",
915+
},
916+
resampleAlg=gdal.GRA_Lanczos,
917+
),
918+
)
919+
yield converted_blob_stream

0 commit comments

Comments
 (0)