Coverage for lintro / parsers / base_parser.py: 99%

69 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""Base parser utilities for all linting tool parsers. 

2 

3This module provides common parsing utilities that are shared across multiple 

4tool parsers to reduce code duplication and ensure consistent behavior. 

5 

6The utilities include: 

7- Field extraction with fallback candidates 

8- ANSI code stripping for terminal output 

9- Type validation with logging 

10- Multi-line message collection 

11- Safe item parsing with error handling 

12""" 

13 

14from __future__ import annotations 

15 

16import re 

17from collections.abc import Callable 

18from typing import TYPE_CHECKING, TypeVar 

19 

20from loguru import logger 

21 

22if TYPE_CHECKING: 

23 from lintro.parsers.base_issue import BaseIssue 

24 

25IssueT = TypeVar("IssueT", bound="BaseIssue") 

26 

27# Pre-compiled regex for ANSI escape sequences 

28_ANSI_ESCAPE_PATTERN: re.Pattern[str] = re.compile(r"\x1b\[[0-9;]*m") 

29 

30 

31def extract_int_field( 

32 data: dict[str, object], 

33 candidates: list[str], 

34 default: int | None = None, 

35) -> int | None: 

36 """Extract an integer field from a dictionary using fallback candidates. 

37 

38 Tries each candidate key in order until an integer value is found. 

39 This handles tool output format variations where the same field 

40 may have different names across versions (e.g., "row" vs "line"). 

41 

42 Args: 

43 data: Dictionary to extract the field from. 

44 candidates: List of possible key names to try in order. 

45 default: Default value if no candidate key has an integer value. 

46 

47 Returns: 

48 The integer value from the first matching candidate, or the default. 

49 

50 Examples: 

51 >>> data = {"row": 10, "col": 5} 

52 >>> extract_int_field(data, ["line", "row"]) 

53 10 

54 >>> extract_int_field(data, ["missing"], default=0) 

55 0 

56 """ 

57 for key in candidates: 

58 val = data.get(key) 

59 # Check for int but exclude bool (bool is a subclass of int in Python) 

60 if isinstance(val, int) and not isinstance(val, bool): 

61 return val 

62 return default 

63 

64 

65def extract_str_field( 

66 data: dict[str, object], 

67 candidates: list[str], 

68 default: str = "", 

69) -> str: 

70 """Extract a string field from a dictionary using fallback candidates. 

71 

72 Tries each candidate key in order until a string value is found. 

73 This handles tool output format variations where the same field 

74 may have different names across versions (e.g., "filename" vs "file"). 

75 

76 Args: 

77 data: Dictionary to extract the field from. 

78 candidates: List of possible key names to try in order. 

79 default: Default value if no candidate key has a string value. 

80 

81 Returns: 

82 The string value from the first matching candidate, or the default. 

83 

84 Examples: 

85 >>> data = {"filename": "test.py", "path": "/src/test.py"} 

86 >>> extract_str_field(data, ["file", "filename"]) 

87 'test.py' 

88 >>> extract_str_field(data, ["missing"], default="unknown") 

89 'unknown' 

90 """ 

91 for key in candidates: 

92 val = data.get(key) 

93 if isinstance(val, str): 

94 return val 

95 return default 

96 

97 

98def extract_dict_field( 

99 data: dict[str, object], 

100 candidates: list[str], 

101 default: dict[str, object] | None = None, 

102) -> dict[str, object]: 

103 """Extract a dictionary field from a dictionary using fallback candidates. 

104 

105 Tries each candidate key in order until a dictionary value is found. 

106 This handles nested structures like location objects that may have 

107 different names across tool versions. 

108 

109 Args: 

110 data: Dictionary to extract the field from. 

111 candidates: List of possible key names to try in order. 

112 default: Default value if no candidate key has a dict value. 

113 

114 Returns: 

115 The dictionary value from the first matching candidate, or the default. 

116 

117 Examples: 

118 >>> data = {"location": {"line": 1}, "start": {"row": 2}} 

119 >>> extract_dict_field(data, ["location", "start"]) 

120 {'line': 1} 

121 """ 

122 if default is None: 

123 default = {} 

124 for key in candidates: 

125 val = data.get(key) 

126 if isinstance(val, dict): 

127 return val 

128 return default 

129 

130 

131def is_empty_output(output: str | None) -> bool: 

132 r"""Check if output is None, empty, or whitespace only. 

133 

134 A convenience function for consistent empty-checking across parsers. 

135 

136 Args: 

137 output: Output string to check, may be None. 

138 

139 Returns: 

140 True if output is None, empty string, or contains only whitespace. 

141 

142 Examples: 

143 >>> is_empty_output(None) 

144 True 

145 >>> is_empty_output("") 

146 True 

147 >>> is_empty_output(" \n\t ") 

148 True 

149 >>> is_empty_output("content") 

150 False 

151 """ 

152 return output is None or not output.strip() 

153 

154 

155def strip_ansi_codes(text: str) -> str: 

156 r"""Strip ANSI escape sequences from text. 

157 

158 Removes terminal color codes and other ANSI escape sequences 

159 for stable parsing across different environments (CI vs local). 

160 

161 Args: 

162 text: Text potentially containing ANSI escape sequences. 

163 

164 Returns: 

165 Text with all ANSI escape sequences removed. 

166 

167 Examples: 

168 >>> strip_ansi_codes("\\x1b[31mError\\x1b[0m: message") 

169 'Error: message' 

170 >>> strip_ansi_codes("plain text") 

171 'plain text' 

172 """ 

173 return _ANSI_ESCAPE_PATTERN.sub("", text) 

174 

175 

176def validate_str_field( 

177 value: object, 

178 field_name: str, 

179 default: str = "", 

180 log_warning: bool = False, 

181) -> str: 

182 """Validate and extract a string field with optional warning logging. 

183 

184 Args: 

185 value: The value to validate. 

186 field_name: Name of the field for logging purposes. 

187 default: Default value if validation fails. 

188 log_warning: Whether to log a warning on type mismatch. 

189 

190 Returns: 

191 The value as a string, or the default if not a string. 

192 

193 Examples: 

194 >>> validate_str_field("test", "filename") 

195 'test' 

196 >>> validate_str_field(123, "filename", default="unknown") 

197 'unknown' 

198 """ 

199 if isinstance(value, str): 

200 return value 

201 if log_warning and value is not None: 

202 logger.warning(f"Expected string for {field_name}, got {type(value).__name__}") 

203 return default 

204 

205 

206def validate_int_field( 

207 value: object, 

208 field_name: str, 

209 default: int = 0, 

210 log_warning: bool = False, 

211) -> int: 

212 """Validate and extract an integer field with optional warning logging. 

213 

214 Args: 

215 value: The value to validate. 

216 field_name: Name of the field for logging purposes. 

217 default: Default value if validation fails. 

218 log_warning: Whether to log a warning on type mismatch. 

219 

220 Returns: 

221 The value as an integer, or the default if not an integer. 

222 

223 Examples: 

224 >>> validate_int_field(42, "line_number") 

225 42 

226 >>> validate_int_field("not_int", "line_number", default=0) 

227 0 

228 """ 

229 if isinstance(value, int) and not isinstance(value, bool): 

230 return value 

231 if log_warning and value is not None: 

232 logger.warning(f"Expected integer for {field_name}, got {type(value).__name__}") 

233 return default 

234 

235 

236def collect_continuation_lines( 

237 lines: list[str], 

238 start_idx: int, 

239 is_continuation: Callable[[str], bool], 

240) -> tuple[str, int]: 

241 """Collect continuation lines that belong to a multi-line message. 

242 

243 Some tools output messages that span multiple lines with indentation 

244 or special prefixes. This function collects those lines into a single 

245 message string. 

246 

247 Args: 

248 lines: List of all output lines. 

249 start_idx: Index of the first continuation line to check. 

250 is_continuation: Predicate function that returns True if a line 

251 is a continuation of the message. 

252 

253 Returns: 

254 Tuple of (collected message parts joined by space, next index to process). 

255 

256 Examples: 

257 >>> lines = ["main message", " continued", " more", "next item"] 

258 >>> collect_continuation_lines(lines, 1, lambda l: l.startswith(" ")) 

259 ('continued more', 3) 

260 """ 

261 message_parts: list[str] = [] 

262 idx = start_idx 

263 

264 while idx < len(lines): 

265 line = lines[idx] 

266 if not is_continuation(line): 

267 break 

268 # Strip common prefixes used in continuation lines 

269 cleaned = line.strip().lstrip(": ") 

270 if cleaned: 

271 message_parts.append(cleaned) 

272 idx += 1 

273 

274 return " ".join(message_parts), idx 

275 

276 

277def safe_parse_items( 

278 items: list[object], 

279 parse_func: Callable[[dict[str, object]], IssueT | None], 

280 tool_name: str = "tool", 

281) -> list[IssueT]: 

282 """Safely parse a list of items with error handling. 

283 

284 Iterates through items, applying the parse function to each dictionary 

285 and collecting successful results. Non-dict items and parse failures 

286 are logged and skipped. 

287 

288 Args: 

289 items: List of items to parse (expected to be dictionaries). 

290 parse_func: Function that parses a single item dict into an issue object. 

291 Should return None if the item cannot be parsed. 

292 tool_name: Name of the tool for log messages. 

293 

294 Returns: 

295 List of successfully parsed issue objects. 

296 

297 Examples: 

298 >>> def parse_item(item: dict) -> MyIssue | None: 

299 ... return MyIssue(file=item.get("file", "")) 

300 >>> items = [{"file": "a.py"}, {"file": "b.py"}, "invalid"] 

301 >>> safe_parse_items(items, parse_item, "mytool") # doctest: +SKIP 

302 [MyIssue(file='a.py'), MyIssue(file='b.py')] 

303 """ 

304 results: list[IssueT] = [] 

305 

306 for item in items: 

307 if not isinstance(item, dict): 

308 logger.debug(f"Skipping non-dict item in {tool_name} output") 

309 continue 

310 

311 try: 

312 parsed = parse_func(item) 

313 if parsed is not None: 

314 results.append(parsed) 

315 except (KeyError, TypeError, ValueError) as e: 

316 logger.debug(f"Failed to parse {tool_name} item: {e}") 

317 continue 

318 

319 return results