refactor: cleanup the refresh_dir code, update tests (#494)

* feat: take Ignore List into consideration when refreshing directory

* undo the extension check in refresh_dir
This commit is contained in:
yed
2024-09-13 04:54:20 +07:00
committed by GitHub
parent af642a7d29
commit 4942d1633c
8 changed files with 47 additions and 44 deletions

View File

@@ -23,8 +23,9 @@ jobs:
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install mypy==1.11.2
python -m pip install --upgrade uv
uv pip install --system -r requirements.txt
uv pip install --system mypy==1.11.2
mkdir tagstudio/.mypy_cache
- uses: tsuyoshicho/action-mypy@v4

View File

@@ -25,7 +25,6 @@ from sqlalchemy.orm import (
selectinload,
make_transient,
)
from typing import TYPE_CHECKING
from .db import make_tables
from .enums import TagColor, FilterState, FieldTypeEnum
@@ -46,10 +45,6 @@ from ...constants import (
BACKUP_FOLDER_NAME,
)
if TYPE_CHECKING:
from ...utils.dupe_files import DupeRegistry
from ...utils.missing_files import MissingRegistry
LIBRARY_FILENAME: str = "ts_library.sqlite"
logger = structlog.get_logger(__name__)
@@ -100,11 +95,6 @@ class Library:
engine: Engine | None
folder: Folder | None
ignored_extensions: list[str]
missing_tracker: "MissingRegistry"
dupe_tracker: "DupeRegistry"
def close(self):
if self.engine:
self.engine.dispose()
@@ -182,9 +172,6 @@ class Library:
session.commit()
self.folder = folder
# load ignored extensions
self.ignored_extensions = self.prefs(LibraryPrefs.EXTENSION_LIST)
@property
def default_fields(self) -> list[BaseField]:
with Session(self.engine) as session:

View File

@@ -1,16 +1,19 @@
import time
from time import time
from collections.abc import Iterator
from dataclasses import dataclass, field
from pathlib import Path
import structlog
from src.core.constants import TS_FOLDER_NAME
from src.core.library import Library, Entry
logger = structlog.get_logger(__name__)
@dataclass
class RefreshDirTracker:
library: Library
dir_file_count: int = 0
files_not_in_library: list[Path] = field(default_factory=list)
@property
@@ -36,38 +39,40 @@ class RefreshDirTracker:
self.files_not_in_library = []
def refresh_dir(self) -> Iterator[int]:
def refresh_dir(self, lib_path: Path) -> Iterator[int]:
"""Scan a directory for files, and add those relative filenames to internal variables."""
if self.library.folder is None:
raise ValueError("No folder set.")
if self.library.library_dir is None:
raise ValueError("No library directory set.")
start_time_total = time()
start_time_loop = time()
start_time = time.time()
self.files_not_in_library = []
self.dir_file_count = 0
lib_path = self.library.folder.path
dir_file_count = 0
for path in lib_path.glob("**/*"):
str_path = str(path)
if (
path.is_dir()
or "$RECYCLE.BIN" in str_path
or TS_FOLDER_NAME in str_path
or "tagstudio_thumbs" in str_path
):
if path.is_dir():
continue
suffix = path.suffix.lower().lstrip(".")
if suffix in self.library.ignored_extensions:
if "$RECYCLE.BIN" in str_path or TS_FOLDER_NAME in str_path:
continue
self.dir_file_count += 1
dir_file_count += 1
relative_path = path.relative_to(lib_path)
# TODO - load these in batch somehow
if not self.library.has_path_entry(relative_path):
self.files_not_in_library.append(relative_path)
end_time = time.time()
# Yield output every 1/30 of a second
if (end_time - start_time) > 0.034:
yield self.dir_file_count
if (time() - start_time_loop) > 0.034:
yield dir_file_count
start_time_loop = time()
end_time_total = time()
logger.info(
"Directory scan time",
path=lib_path,
duration=(end_time_total - start_time_total),
new_files_count=dir_file_count,
)

View File

@@ -675,7 +675,7 @@ class QtDriver(QObject):
)
pw.show()
iterator = FunctionIterator(tracker.refresh_dir)
iterator = FunctionIterator(lambda: tracker.refresh_dir(self.lib.library_dir))
iterator.value.connect(
lambda x: (
pw.update_progress(x + 1),

View File

@@ -24,7 +24,7 @@ def cwd():
@pytest.fixture
def library(request):
# when no param is passed, use the default
library_path = "/tmp/"
library_path = "/dev/null/"
if hasattr(request, "param"):
if isinstance(request.param, TemporaryDirectory):
library_path = request.param.name

View File

@@ -7,6 +7,7 @@ CWD = pathlib.Path(__file__).parent
def test_refresh_dupe_files(library):
library.library_dir = "/tmp/"
entry = Entry(
folder=library.folder,
path=pathlib.Path("bar/foo.txt"),

View File

@@ -2,18 +2,24 @@ import pathlib
from tempfile import TemporaryDirectory
import pytest
from src.core.constants import LibraryPrefs
from src.core.utils.refresh_dir import RefreshDirTracker
CWD = pathlib.Path(__file__).parent
@pytest.mark.parametrize("exclude_mode", [True, False])
@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_refresh_new_files(library):
def test_refresh_new_files(library, exclude_mode):
# Given
library.set_prefs(LibraryPrefs.IS_EXCLUDE_LIST, exclude_mode)
library.set_prefs(LibraryPrefs.EXTENSION_LIST, [".md"])
registry = RefreshDirTracker(library=library)
(library.library_dir / "FOO.MD").touch()
# touch new files to simulate new files
(library.library_dir / "foo.md").touch()
# When
assert not list(registry.refresh_dir(library.library_dir))
assert not list(registry.refresh_dir())
assert registry.files_not_in_library == [pathlib.Path("foo.md")]
# Then
assert registry.files_not_in_library == [pathlib.Path("FOO.MD")]

View File

@@ -1,5 +1,7 @@
from pathlib import Path
from tempfile import TemporaryDirectory
import pytest
from src.core.library import Entry
from src.core.library.alchemy.enums import FieldTypeEnum
@@ -18,6 +20,7 @@ def test_update_widgets_not_selected(qt_driver, library):
assert panel.file_label.text() == "No Items Selected"
@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_update_widgets_single_selected(qt_driver, library):
qt_driver.frame_content = list(library.get_entries())
qt_driver.selected = [0]