Coverage for lintro / utils / path_filtering.py: 90%

78 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""Path filtering and file discovery utilities. 

2 

3Functions for filtering paths, walking directories, and excluding files based on 

4patterns. Uses pathspec library for gitignore-style pattern matching. 

5""" 

6 

7import fnmatch 

8import os 

9from functools import lru_cache 

10from typing import TYPE_CHECKING 

11 

12import pathspec 

13 

14if TYPE_CHECKING: 

15 pass 

16 

17 

18@lru_cache(maxsize=32) 

19def _compile_pathspec(patterns_tuple: tuple[str, ...]) -> pathspec.PathSpec: 

20 """Compile patterns into a PathSpec object (cached). 

21 

22 Args: 

23 patterns_tuple: Tuple of gitignore-style patterns to compile. 

24 

25 Returns: 

26 pathspec.PathSpec: Compiled pattern matcher. 

27 """ 

28 return pathspec.PathSpec.from_lines("gitwildmatch", patterns_tuple) 

29 

30 

31def should_exclude_path( 

32 path: str, 

33 exclude_patterns: list[str], 

34) -> bool: 

35 """Check if a path should be excluded based on patterns. 

36 

37 Uses pathspec library for gitignore-style pattern matching, which provides 

38 better support for complex patterns like ** globs and directory matching. 

39 

40 Args: 

41 path: str: File path to check for exclusion (can be absolute or relative). 

42 exclude_patterns: list[str]: List of gitignore-style patterns to match against. 

43 

44 Returns: 

45 bool: True if the path should be excluded, False otherwise. 

46 """ 

47 if not exclude_patterns: 

48 return False 

49 

50 # Normalize to absolute path for consistent comparison 

51 try: 

52 abs_path = os.path.abspath(path) 

53 except (ValueError, OSError): 

54 abs_path = path 

55 

56 # Normalize path separators for cross-platform compatibility 

57 normalized_path: str = abs_path.replace("\\", "/") 

58 

59 # Convert patterns list to tuple for caching 

60 patterns_tuple = tuple(p.strip() for p in exclude_patterns if p.strip()) 

61 

62 if not patterns_tuple: 

63 return False 

64 

65 # Compile patterns using pathspec (with caching) 

66 spec = _compile_pathspec(patterns_tuple) 

67 

68 # Check if the full path matches 

69 if spec.match_file(normalized_path): 

70 return True 

71 

72 # Also check relative parts of the path for directory patterns 

73 # This handles patterns like "build" matching "/path/to/build/file.py" 

74 path_parts = normalized_path.split("/") 

75 for i in range(len(path_parts)): 

76 relative_part = "/".join(path_parts[i:]) 

77 if relative_part and spec.match_file(relative_part): 

78 return True 

79 

80 return False 

81 

82 

83def walk_files_with_excludes( 

84 paths: list[str], 

85 file_patterns: list[str], 

86 exclude_patterns: list[str], 

87 include_venv: bool = False, 

88 incremental: bool = False, 

89 tool_name: str | None = None, 

90) -> list[str]: 

91 """Return files under ``paths`` matching patterns and not excluded. 

92 

93 Uses pathspec for gitignore-style exclude pattern matching. 

94 

95 Args: 

96 paths: Files or directories to search. 

97 file_patterns: Glob patterns to include (fnmatch-style). 

98 exclude_patterns: Gitignore-style patterns to exclude. 

99 include_venv: Include virtual environment directories when True. 

100 incremental: If True, only return files changed since last run. 

101 tool_name: Tool name for incremental cache (required if incremental=True). 

102 

103 Returns: 

104 Sorted file paths matching include filters and not excluded. 

105 """ 

106 all_files: list[str] = [] 

107 

108 # Pre-compile exclude patterns for efficiency 

109 exclude_tuple = tuple(p.strip() for p in exclude_patterns if p.strip()) 

110 exclude_spec = _compile_pathspec(exclude_tuple) if exclude_tuple else None 

111 

112 for path in paths: 

113 if os.path.isfile(path): 

114 # Single file - check if the filename matches any file pattern 

115 filename = os.path.basename(path) 

116 for pattern in file_patterns: 

117 if fnmatch.fnmatch(filename, pattern): 

118 abs_path = os.path.abspath(path) 

119 if not _should_exclude_with_spec(abs_path, exclude_spec): 

120 all_files.append(abs_path) 

121 break 

122 elif os.path.isdir(path): 

123 # Directory - walk through it 

124 for root, dirs, files in os.walk(path): 

125 # Filter out virtual environment directories unless include_venv is True 

126 if not include_venv: 

127 dirs[:] = [d for d in dirs if not _is_venv_directory(d)] 

128 

129 # Check each file against the patterns 

130 for file in files: 

131 file_path: str = os.path.join(root, file) 

132 abs_file_path: str = os.path.abspath(file_path) 

133 

134 # Check if file matches any file pattern 

135 matches_pattern: bool = False 

136 for pattern in file_patterns: 

137 if fnmatch.fnmatch(file, pattern): 

138 matches_pattern = True 

139 break 

140 

141 if matches_pattern and not _should_exclude_with_spec( 

142 abs_file_path, 

143 exclude_spec, 

144 ): 

145 all_files.append(abs_file_path) 

146 

147 # Apply incremental filtering if enabled 

148 if incremental and tool_name: 

149 from lintro.utils.file_cache import ToolCache 

150 

151 cache = ToolCache.load(tool_name) 

152 changed_files = cache.get_changed_files(all_files) 

153 

154 # Update cache with all discovered files for next run 

155 cache.update(all_files) 

156 cache.save() 

157 

158 return sorted(changed_files) 

159 

160 return sorted(all_files) 

161 

162 

163def _should_exclude_with_spec( 

164 path: str, 

165 spec: pathspec.PathSpec | None, 

166) -> bool: 

167 """Check if a path should be excluded using a pre-compiled PathSpec. 

168 

169 Args: 

170 path: Absolute file path to check. 

171 spec: Pre-compiled PathSpec, or None if no exclusions. 

172 

173 Returns: 

174 bool: True if the path should be excluded. 

175 """ 

176 if spec is None: 

177 return False 

178 

179 normalized = path.replace("\\", "/") 

180 

181 if spec.match_file(normalized): 

182 return True 

183 

184 # Check relative parts for directory pattern matching 

185 path_parts = normalized.split("/") 

186 for i in range(len(path_parts)): 

187 relative = "/".join(path_parts[i:]) 

188 if relative and spec.match_file(relative): 

189 return True 

190 

191 return False 

192 

193 

194def _is_venv_directory(dirname: str) -> bool: 

195 """Check if a directory name indicates a virtual environment. 

196 

197 Args: 

198 dirname: str: Directory name to check. 

199 

200 Returns: 

201 bool: True if the directory appears to be a virtual environment. 

202 """ 

203 from lintro.utils.tool_utils import VENV_PATTERNS 

204 

205 return dirname in VENV_PATTERNS