Fix Raw Image Handling and Improve Text File Encoding Compatibility (#233)

* Fix text and RAW image handling

- Fix RAW images not being loaded correctly in the preview panel
- Fix trying to read size data from null images
- Refactor `os.stat` to `<Path object>.stat()`
- Remove unnecessary upper/lower conversions
- Improve encoding compatibility beyond UTF-8 when reading text files
- Code cleanup

* Use chardet for character encoding detection
This commit is contained in:
Travis Abendshien
2024-06-02 20:18:40 -07:00
committed by GitHub
parent 0137ed5be8
commit 0646508c24
5 changed files with 50 additions and 31 deletions

View File

@@ -8,3 +8,4 @@ typing_extensions>=3.10.0.0,<=4.11.0
ujson>=5.8.0,<=5.9.0
rawpy==0.21.0
pillow-heif==0.16.0
chardet==5.2.0

View File

@@ -856,10 +856,7 @@ class Library:
# for type in TYPES:
start_time = time.time()
for f in self.library_dir.glob("**/*"):
# p = Path(os.path.normpath(f))
try:
if f.is_dir():
print(f)
if (
"$RECYCLE.BIN" not in f.parts
and TS_FOLDER_NAME not in f.parts
@@ -878,14 +875,11 @@ class Library:
logging.info(
f"The File/Folder {f} cannot be accessed, because it requires higher permission!"
)
# sys.stdout.write(f'\r[LIBRARY] {self.dir_file_count} files found in "{self.library_dir}"...')
# sys.stdout.flush()
end_time = time.time()
# Yield output every 1/30 of a second
if (end_time - start_time) > 0.034:
yield self.dir_file_count
start_time = time.time()
# print('')
# Sorts the files by date modified, descending.
if len(self.files_not_in_library) <= 100000:
try:
@@ -895,12 +889,12 @@ class Library:
)
except (FileExistsError, FileNotFoundError):
print(
f"[LIBRARY] [ERROR] Couldn't sort files, some were moved during the scanning/sorting process."
"[LIBRARY] [ERROR] Couldn't sort files, some were moved during the scanning/sorting process."
)
pass
else:
print(
f"[LIBRARY][INFO] Not bothering to sort files because there's OVER 100,000! Better sorting methods will be added in the future."
"[LIBRARY][INFO] Not bothering to sort files because there's OVER 100,000! Better sorting methods will be added in the future."
)
def refresh_missing_files(self):

View File

@@ -0,0 +1,27 @@
# Copyright (C) 2024 Travis Abendshien (CyanVoxel).
# Licensed under the GPL-3.0 License.
# Created for TagStudio: https://github.com/CyanVoxel/TagStudio
from chardet.universaldetector import UniversalDetector
from pathlib import Path
def detect_char_encoding(filepath: Path) -> str | None:
"""
Attempts to detect the character encoding of a text file.
Args:
filepath (Path): The path of the text file to analyze.
Returns:
str | None: The detected character encoding, if any.
"""
detector = UniversalDetector()
with open(filepath, "rb") as text_file:
for line in text_file.readlines():
detector.feed(line)
if detector.done:
break
detector.close()
return detector.result["encoding"]

View File

@@ -3,7 +3,6 @@
# Created for TagStudio: https://github.com/CyanVoxel/TagStudio
import logging
import os
from pathlib import Path
import time
import typing
@@ -41,7 +40,6 @@ from src.qt.widgets.text import TextWidget
from src.qt.widgets.panel import PanelModal
from src.qt.widgets.text_box_edit import EditTextBox
from src.qt.widgets.text_line_edit import EditTextLine
from src.qt.widgets.item_thumb import ItemThumb
from src.qt.widgets.video_player import VideoPlayer
@@ -49,9 +47,9 @@ from src.qt.widgets.video_player import VideoPlayer
if typing.TYPE_CHECKING:
from src.qt.ts_qt import QtDriver
ERROR = f"[ERROR]"
WARNING = f"[WARNING]"
INFO = f"[INFO]"
ERROR = "[ERROR]"
WARNING = "[WARNING]"
INFO = "[INFO]"
logging.basicConfig(format="%(message)s", level=logging.INFO)
@@ -443,7 +441,7 @@ class PreviewPanel(QWidget):
# 0 Selected Items
if not self.driver.selected:
if self.selected or not self.initialized:
self.file_label.setText(f"No Items Selected")
self.file_label.setText("No Items Selected")
self.file_label.setFilePath("")
self.file_label.setCursor(Qt.CursorShape.ArrowCursor)
@@ -516,7 +514,7 @@ class PreviewPanel(QWidget):
image = Image.open(str(filepath))
elif filepath.suffix.lower() in RAW_IMAGE_TYPES:
try:
with rawpy.imread(filepath) as raw:
with rawpy.imread(str(filepath)) as raw:
rgb = raw.postprocess()
image = Image.new(
"L", (rgb.shape[1], rgb.shape[0]), color="black"
@@ -546,30 +544,28 @@ class PreviewPanel(QWidget):
self.preview_vid.show()
# Stats for specific file types are displayed here.
if filepath.suffix.lower() in (
if image and filepath.suffix.lower() in (
IMAGE_TYPES + VIDEO_TYPES + RAW_IMAGE_TYPES
):
self.dimensions_label.setText(
f"{filepath.suffix.lower().upper()[1:]}{format_size(os.stat(filepath).st_size)}\n{image.width} x {image.height} px"
f"{filepath.suffix.upper()[1:]}{format_size(filepath.stat().st_size)}\n{image.width} x {image.height} px"
)
else:
self.dimensions_label.setText(
f"{filepath.suffix.lower().upper()[1:]}{format_size(os.stat(filepath).st_size)}"
f"{filepath.suffix.upper()[1:]}{format_size(filepath.stat().st_size)}"
)
if not filepath.is_file():
raise FileNotFoundError
except FileNotFoundError as e:
self.dimensions_label.setText(
f"{filepath.suffix.lower().upper()[1:]}"
)
self.dimensions_label.setText(f"{filepath.suffix.upper()[1:]}")
logging.info(
f"[PreviewPanel][ERROR] Couldn't Render thumbnail for {filepath} (because of {e})"
)
except (FileNotFoundError, cv2.error) as e:
self.dimensions_label.setText(f"{extension.upper()}")
self.dimensions_label.setText(f"{filepath.suffix.upper()}")
logging.info(
f"[PreviewPanel][ERROR] Couldn't Render thumbnail for {filepath} (because of {e})"
)
@@ -578,7 +574,7 @@ class PreviewPanel(QWidget):
DecompressionBombError,
) as e:
self.dimensions_label.setText(
f"{filepath.suffix.lower().upper()[1:]}{format_size(os.stat(filepath).st_size)}"
f"{filepath.suffix.upper()[1:]}{format_size(filepath.stat().st_size)}"
)
logging.info(
f"[PreviewPanel][ERROR] Couldn't Render thumbnail for {filepath} (because of {e})"
@@ -773,7 +769,7 @@ class PreviewPanel(QWidget):
self.tags_updated.disconnect()
except RuntimeError:
pass
logging.info(f"[UPDATE CONTAINER] Setting tags updated slot")
logging.info("[UPDATE CONTAINER] Setting tags updated slot")
self.tags_updated.connect(slot)
# def write_container(self, item:Union[Entry, Collation, Tag], index, field):

View File

@@ -5,7 +5,6 @@
import logging
import math
import os
from pathlib import Path
import cv2
@@ -30,6 +29,7 @@ from src.core.constants import (
IMAGE_TYPES,
RAW_IMAGE_TYPES,
)
from src.core.utils.encoding import detect_char_encoding
ImageFile.LOAD_TRUNCATED_IMAGES = True
@@ -134,7 +134,7 @@ class ThumbRenderer(QObject):
image = ImageOps.exif_transpose(image)
except DecompressionBombError as e:
logging.info(
f"[ThumbRenderer]{WARNING} Couldn't Render thumbnail for {_filepath} (because of {e})"
f"[ThumbRenderer]{WARNING} Couldn't Render thumbnail for {_filepath.name} ({type(e).__name__})"
)
elif _filepath.suffix.lower() in RAW_IMAGE_TYPES:
@@ -149,14 +149,14 @@ class ThumbRenderer(QObject):
)
except DecompressionBombError as e:
logging.info(
f"[ThumbRenderer]{WARNING} Couldn't Render thumbnail for {_filepath} (because of {e})"
f"[ThumbRenderer]{WARNING} Couldn't Render thumbnail for {_filepath.name} ({type(e).__name__})"
)
except (
rawpy._rawpy.LibRawIOError,
rawpy._rawpy.LibRawFileUnsupportedError,
):
) as e:
logging.info(
f"[ThumbRenderer]{ERROR} Couldn't Render thumbnail for raw image {_filepath}"
f"[ThumbRenderer]{ERROR} Couldn't Render thumbnail for raw image {_filepath.name} ({type(e).__name__})"
)
# Videos =======================================================
@@ -178,7 +178,8 @@ class ThumbRenderer(QObject):
# Plain Text ===================================================
elif _filepath.suffix.lower() in PLAINTEXT_TYPES:
with open(_filepath, "r", encoding="utf-8") as text_file:
encoding = detect_char_encoding(_filepath)
with open(_filepath, "r", encoding=encoding) as text_file:
text = text_file.read(256)
bg = Image.new("RGB", (256, 256), color="#1e1e1e")
draw = ImageDraw.Draw(bg)
@@ -268,7 +269,7 @@ class ThumbRenderer(QObject):
) as e:
if e is not UnicodeDecodeError:
logging.info(
f"[ThumbRenderer]{ERROR}: Couldn't render thumbnail for {_filepath} ({e})"
f"[ThumbRenderer]{ERROR}: Couldn't render thumbnail for {_filepath.name} ({type(e).__name__})"
)
if update_on_ratio_change:
self.updated_ratio.emit(1)