From eecb4d3e380dae987b6a0f56b352683c973ab912 Mon Sep 17 00:00:00 2001 From: Travis Abendshien <46939827+CyanVoxel@users.noreply.github.com> Date: Fri, 5 Sep 2025 13:38:02 -0700 Subject: [PATCH] fix: account for leading slash pattern in wcmatch (#1092) --- src/tagstudio/core/library/ignore.py | 13 +++++++ src/tagstudio/core/utils/refresh_dir.py | 45 +++++++++++++------------ 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/src/tagstudio/core/library/ignore.py b/src/tagstudio/core/library/ignore.py index 66237cb2..af850b4e 100644 --- a/src/tagstudio/core/library/ignore.py +++ b/src/tagstudio/core/library/ignore.py @@ -44,7 +44,9 @@ def ignore_to_glob(ignore_patterns: list[str]) -> list[str]: ignore_patterns (list[str]): The .gitignore-like patterns to convert. """ glob_patterns: list[str] = deepcopy(ignore_patterns) + glob_patterns_remove: list[str] = [] additional_patterns: list[str] = [] + root_patterns: list[str] = [] # Mimic implicit .gitignore syntax behavior for the SQLite GLOB function. for pattern in glob_patterns: @@ -66,6 +68,16 @@ def ignore_to_glob(ignore_patterns: list[str]) -> list[str]: gp = gp.removeprefix("**/").removeprefix("*/") additional_patterns.append(exclusion_char + gp) + elif gp.startswith("/"): + # Matches "/file" case for .gitignore behavior where it should only match + # a file or folder int the root directory, and nowhere else. + glob_patterns_remove.append(gp) + gp = gp.lstrip("/") + root_patterns.append(exclusion_char + gp) + + for gp in glob_patterns_remove: + glob_patterns.remove(gp) + glob_patterns = glob_patterns + additional_patterns # Add "/**" suffix to suffix-less patterns to match implicit .gitignore behavior. @@ -75,6 +87,7 @@ def ignore_to_glob(ignore_patterns: list[str]) -> list[str]: glob_patterns.append(pattern.removesuffix("/*").removesuffix("/") + "/**") + glob_patterns = glob_patterns + root_patterns glob_patterns = list(set(glob_patterns)) logger.info("[Ignore]", glob_patterns=glob_patterns) diff --git a/src/tagstudio/core/utils/refresh_dir.py b/src/tagstudio/core/utils/refresh_dir.py index beec03b2..f32e822d 100644 --- a/src/tagstudio/core/utils/refresh_dir.py +++ b/src/tagstudio/core/utils/refresh_dir.py @@ -162,31 +162,34 @@ class RefreshDirTracker: logger.info("[Refresh]: Falling back to wcmatch for scanning") - for f in pathlib.Path(str(library_dir)).glob( - "***/*", flags=PATH_GLOB_FLAGS, exclude=ignore_patterns - ): - end_time_loop = time() - # Yield output every 1/30 of a second - if (end_time_loop - start_time_loop) > 0.034: - yield dir_file_count - start_time_loop = time() + try: + for f in pathlib.Path(str(library_dir)).glob( + "***/*", flags=PATH_GLOB_FLAGS, exclude=ignore_patterns + ): + end_time_loop = time() + # Yield output every 1/30 of a second + if (end_time_loop - start_time_loop) > 0.034: + yield dir_file_count + start_time_loop = time() + + # Skip if the file/path is already mapped in the Library + if f in self.library.included_files: + dir_file_count += 1 + continue + + # Ignore if the file is a directory + if f.is_dir(): + continue - # Skip if the file/path is already mapped in the Library - if f in self.library.included_files: dir_file_count += 1 - continue + self.library.included_files.add(f) - # Ignore if the file is a directory - if f.is_dir(): - continue + relative_path = f.relative_to(library_dir) - dir_file_count += 1 - self.library.included_files.add(f) - - relative_path = f.relative_to(library_dir) - - if not self.library.has_path_entry(relative_path): - self.files_not_in_library.append(relative_path) + if not self.library.has_path_entry(relative_path): + self.files_not_in_library.append(relative_path) + except ValueError: + logger.info("[Refresh]: ValueError when refreshing directory with wcmatch!") end_time_total = time() yield dir_file_count