Coverage for lintro / utils / path_filtering.py: 90%
78 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
1"""Path filtering and file discovery utilities.
3Functions for filtering paths, walking directories, and excluding files based on
4patterns. Uses pathspec library for gitignore-style pattern matching.
5"""
7import fnmatch
8import os
9from functools import lru_cache
10from typing import TYPE_CHECKING
12import pathspec
14if TYPE_CHECKING:
15 pass
18@lru_cache(maxsize=32)
19def _compile_pathspec(patterns_tuple: tuple[str, ...]) -> pathspec.PathSpec:
20 """Compile patterns into a PathSpec object (cached).
22 Args:
23 patterns_tuple: Tuple of gitignore-style patterns to compile.
25 Returns:
26 pathspec.PathSpec: Compiled pattern matcher.
27 """
28 return pathspec.PathSpec.from_lines("gitwildmatch", patterns_tuple)
31def should_exclude_path(
32 path: str,
33 exclude_patterns: list[str],
34) -> bool:
35 """Check if a path should be excluded based on patterns.
37 Uses pathspec library for gitignore-style pattern matching, which provides
38 better support for complex patterns like ** globs and directory matching.
40 Args:
41 path: str: File path to check for exclusion (can be absolute or relative).
42 exclude_patterns: list[str]: List of gitignore-style patterns to match against.
44 Returns:
45 bool: True if the path should be excluded, False otherwise.
46 """
47 if not exclude_patterns:
48 return False
50 # Normalize to absolute path for consistent comparison
51 try:
52 abs_path = os.path.abspath(path)
53 except (ValueError, OSError):
54 abs_path = path
56 # Normalize path separators for cross-platform compatibility
57 normalized_path: str = abs_path.replace("\\", "/")
59 # Convert patterns list to tuple for caching
60 patterns_tuple = tuple(p.strip() for p in exclude_patterns if p.strip())
62 if not patterns_tuple:
63 return False
65 # Compile patterns using pathspec (with caching)
66 spec = _compile_pathspec(patterns_tuple)
68 # Check if the full path matches
69 if spec.match_file(normalized_path):
70 return True
72 # Also check relative parts of the path for directory patterns
73 # This handles patterns like "build" matching "/path/to/build/file.py"
74 path_parts = normalized_path.split("/")
75 for i in range(len(path_parts)):
76 relative_part = "/".join(path_parts[i:])
77 if relative_part and spec.match_file(relative_part):
78 return True
80 return False
83def walk_files_with_excludes(
84 paths: list[str],
85 file_patterns: list[str],
86 exclude_patterns: list[str],
87 include_venv: bool = False,
88 incremental: bool = False,
89 tool_name: str | None = None,
90) -> list[str]:
91 """Return files under ``paths`` matching patterns and not excluded.
93 Uses pathspec for gitignore-style exclude pattern matching.
95 Args:
96 paths: Files or directories to search.
97 file_patterns: Glob patterns to include (fnmatch-style).
98 exclude_patterns: Gitignore-style patterns to exclude.
99 include_venv: Include virtual environment directories when True.
100 incremental: If True, only return files changed since last run.
101 tool_name: Tool name for incremental cache (required if incremental=True).
103 Returns:
104 Sorted file paths matching include filters and not excluded.
105 """
106 all_files: list[str] = []
108 # Pre-compile exclude patterns for efficiency
109 exclude_tuple = tuple(p.strip() for p in exclude_patterns if p.strip())
110 exclude_spec = _compile_pathspec(exclude_tuple) if exclude_tuple else None
112 for path in paths:
113 if os.path.isfile(path):
114 # Single file - check if the filename matches any file pattern
115 filename = os.path.basename(path)
116 for pattern in file_patterns:
117 if fnmatch.fnmatch(filename, pattern):
118 abs_path = os.path.abspath(path)
119 if not _should_exclude_with_spec(abs_path, exclude_spec):
120 all_files.append(abs_path)
121 break
122 elif os.path.isdir(path):
123 # Directory - walk through it
124 for root, dirs, files in os.walk(path):
125 # Filter out virtual environment directories unless include_venv is True
126 if not include_venv:
127 dirs[:] = [d for d in dirs if not _is_venv_directory(d)]
129 # Check each file against the patterns
130 for file in files:
131 file_path: str = os.path.join(root, file)
132 abs_file_path: str = os.path.abspath(file_path)
134 # Check if file matches any file pattern
135 matches_pattern: bool = False
136 for pattern in file_patterns:
137 if fnmatch.fnmatch(file, pattern):
138 matches_pattern = True
139 break
141 if matches_pattern and not _should_exclude_with_spec(
142 abs_file_path,
143 exclude_spec,
144 ):
145 all_files.append(abs_file_path)
147 # Apply incremental filtering if enabled
148 if incremental and tool_name:
149 from lintro.utils.file_cache import ToolCache
151 cache = ToolCache.load(tool_name)
152 changed_files = cache.get_changed_files(all_files)
154 # Update cache with all discovered files for next run
155 cache.update(all_files)
156 cache.save()
158 return sorted(changed_files)
160 return sorted(all_files)
163def _should_exclude_with_spec(
164 path: str,
165 spec: pathspec.PathSpec | None,
166) -> bool:
167 """Check if a path should be excluded using a pre-compiled PathSpec.
169 Args:
170 path: Absolute file path to check.
171 spec: Pre-compiled PathSpec, or None if no exclusions.
173 Returns:
174 bool: True if the path should be excluded.
175 """
176 if spec is None:
177 return False
179 normalized = path.replace("\\", "/")
181 if spec.match_file(normalized):
182 return True
184 # Check relative parts for directory pattern matching
185 path_parts = normalized.split("/")
186 for i in range(len(path_parts)):
187 relative = "/".join(path_parts[i:])
188 if relative and spec.match_file(relative):
189 return True
191 return False
194def _is_venv_directory(dirname: str) -> bool:
195 """Check if a directory name indicates a virtual environment.
197 Args:
198 dirname: str: Directory name to check.
200 Returns:
201 bool: True if the directory appears to be a virtual environment.
202 """
203 from lintro.utils.tool_utils import VENV_PATTERNS
205 return dirname in VENV_PATTERNS