Coverage for lintro / parsers / base_parser.py: 99%
69 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
1"""Base parser utilities for all linting tool parsers.
3This module provides common parsing utilities that are shared across multiple
4tool parsers to reduce code duplication and ensure consistent behavior.
6The utilities include:
7- Field extraction with fallback candidates
8- ANSI code stripping for terminal output
9- Type validation with logging
10- Multi-line message collection
11- Safe item parsing with error handling
12"""
14from __future__ import annotations
16import re
17from collections.abc import Callable
18from typing import TYPE_CHECKING, TypeVar
20from loguru import logger
22if TYPE_CHECKING:
23 from lintro.parsers.base_issue import BaseIssue
25IssueT = TypeVar("IssueT", bound="BaseIssue")
27# Pre-compiled regex for ANSI escape sequences
28_ANSI_ESCAPE_PATTERN: re.Pattern[str] = re.compile(r"\x1b\[[0-9;]*m")
31def extract_int_field(
32 data: dict[str, object],
33 candidates: list[str],
34 default: int | None = None,
35) -> int | None:
36 """Extract an integer field from a dictionary using fallback candidates.
38 Tries each candidate key in order until an integer value is found.
39 This handles tool output format variations where the same field
40 may have different names across versions (e.g., "row" vs "line").
42 Args:
43 data: Dictionary to extract the field from.
44 candidates: List of possible key names to try in order.
45 default: Default value if no candidate key has an integer value.
47 Returns:
48 The integer value from the first matching candidate, or the default.
50 Examples:
51 >>> data = {"row": 10, "col": 5}
52 >>> extract_int_field(data, ["line", "row"])
53 10
54 >>> extract_int_field(data, ["missing"], default=0)
55 0
56 """
57 for key in candidates:
58 val = data.get(key)
59 # Check for int but exclude bool (bool is a subclass of int in Python)
60 if isinstance(val, int) and not isinstance(val, bool):
61 return val
62 return default
65def extract_str_field(
66 data: dict[str, object],
67 candidates: list[str],
68 default: str = "",
69) -> str:
70 """Extract a string field from a dictionary using fallback candidates.
72 Tries each candidate key in order until a string value is found.
73 This handles tool output format variations where the same field
74 may have different names across versions (e.g., "filename" vs "file").
76 Args:
77 data: Dictionary to extract the field from.
78 candidates: List of possible key names to try in order.
79 default: Default value if no candidate key has a string value.
81 Returns:
82 The string value from the first matching candidate, or the default.
84 Examples:
85 >>> data = {"filename": "test.py", "path": "/src/test.py"}
86 >>> extract_str_field(data, ["file", "filename"])
87 'test.py'
88 >>> extract_str_field(data, ["missing"], default="unknown")
89 'unknown'
90 """
91 for key in candidates:
92 val = data.get(key)
93 if isinstance(val, str):
94 return val
95 return default
98def extract_dict_field(
99 data: dict[str, object],
100 candidates: list[str],
101 default: dict[str, object] | None = None,
102) -> dict[str, object]:
103 """Extract a dictionary field from a dictionary using fallback candidates.
105 Tries each candidate key in order until a dictionary value is found.
106 This handles nested structures like location objects that may have
107 different names across tool versions.
109 Args:
110 data: Dictionary to extract the field from.
111 candidates: List of possible key names to try in order.
112 default: Default value if no candidate key has a dict value.
114 Returns:
115 The dictionary value from the first matching candidate, or the default.
117 Examples:
118 >>> data = {"location": {"line": 1}, "start": {"row": 2}}
119 >>> extract_dict_field(data, ["location", "start"])
120 {'line': 1}
121 """
122 if default is None:
123 default = {}
124 for key in candidates:
125 val = data.get(key)
126 if isinstance(val, dict):
127 return val
128 return default
131def is_empty_output(output: str | None) -> bool:
132 r"""Check if output is None, empty, or whitespace only.
134 A convenience function for consistent empty-checking across parsers.
136 Args:
137 output: Output string to check, may be None.
139 Returns:
140 True if output is None, empty string, or contains only whitespace.
142 Examples:
143 >>> is_empty_output(None)
144 True
145 >>> is_empty_output("")
146 True
147 >>> is_empty_output(" \n\t ")
148 True
149 >>> is_empty_output("content")
150 False
151 """
152 return output is None or not output.strip()
155def strip_ansi_codes(text: str) -> str:
156 r"""Strip ANSI escape sequences from text.
158 Removes terminal color codes and other ANSI escape sequences
159 for stable parsing across different environments (CI vs local).
161 Args:
162 text: Text potentially containing ANSI escape sequences.
164 Returns:
165 Text with all ANSI escape sequences removed.
167 Examples:
168 >>> strip_ansi_codes("\\x1b[31mError\\x1b[0m: message")
169 'Error: message'
170 >>> strip_ansi_codes("plain text")
171 'plain text'
172 """
173 return _ANSI_ESCAPE_PATTERN.sub("", text)
176def validate_str_field(
177 value: object,
178 field_name: str,
179 default: str = "",
180 log_warning: bool = False,
181) -> str:
182 """Validate and extract a string field with optional warning logging.
184 Args:
185 value: The value to validate.
186 field_name: Name of the field for logging purposes.
187 default: Default value if validation fails.
188 log_warning: Whether to log a warning on type mismatch.
190 Returns:
191 The value as a string, or the default if not a string.
193 Examples:
194 >>> validate_str_field("test", "filename")
195 'test'
196 >>> validate_str_field(123, "filename", default="unknown")
197 'unknown'
198 """
199 if isinstance(value, str):
200 return value
201 if log_warning and value is not None:
202 logger.warning(f"Expected string for {field_name}, got {type(value).__name__}")
203 return default
206def validate_int_field(
207 value: object,
208 field_name: str,
209 default: int = 0,
210 log_warning: bool = False,
211) -> int:
212 """Validate and extract an integer field with optional warning logging.
214 Args:
215 value: The value to validate.
216 field_name: Name of the field for logging purposes.
217 default: Default value if validation fails.
218 log_warning: Whether to log a warning on type mismatch.
220 Returns:
221 The value as an integer, or the default if not an integer.
223 Examples:
224 >>> validate_int_field(42, "line_number")
225 42
226 >>> validate_int_field("not_int", "line_number", default=0)
227 0
228 """
229 if isinstance(value, int) and not isinstance(value, bool):
230 return value
231 if log_warning and value is not None:
232 logger.warning(f"Expected integer for {field_name}, got {type(value).__name__}")
233 return default
236def collect_continuation_lines(
237 lines: list[str],
238 start_idx: int,
239 is_continuation: Callable[[str], bool],
240) -> tuple[str, int]:
241 """Collect continuation lines that belong to a multi-line message.
243 Some tools output messages that span multiple lines with indentation
244 or special prefixes. This function collects those lines into a single
245 message string.
247 Args:
248 lines: List of all output lines.
249 start_idx: Index of the first continuation line to check.
250 is_continuation: Predicate function that returns True if a line
251 is a continuation of the message.
253 Returns:
254 Tuple of (collected message parts joined by space, next index to process).
256 Examples:
257 >>> lines = ["main message", " continued", " more", "next item"]
258 >>> collect_continuation_lines(lines, 1, lambda l: l.startswith(" "))
259 ('continued more', 3)
260 """
261 message_parts: list[str] = []
262 idx = start_idx
264 while idx < len(lines):
265 line = lines[idx]
266 if not is_continuation(line):
267 break
268 # Strip common prefixes used in continuation lines
269 cleaned = line.strip().lstrip(": ")
270 if cleaned:
271 message_parts.append(cleaned)
272 idx += 1
274 return " ".join(message_parts), idx
277def safe_parse_items(
278 items: list[object],
279 parse_func: Callable[[dict[str, object]], IssueT | None],
280 tool_name: str = "tool",
281) -> list[IssueT]:
282 """Safely parse a list of items with error handling.
284 Iterates through items, applying the parse function to each dictionary
285 and collecting successful results. Non-dict items and parse failures
286 are logged and skipped.
288 Args:
289 items: List of items to parse (expected to be dictionaries).
290 parse_func: Function that parses a single item dict into an issue object.
291 Should return None if the item cannot be parsed.
292 tool_name: Name of the tool for log messages.
294 Returns:
295 List of successfully parsed issue objects.
297 Examples:
298 >>> def parse_item(item: dict) -> MyIssue | None:
299 ... return MyIssue(file=item.get("file", ""))
300 >>> items = [{"file": "a.py"}, {"file": "b.py"}, "invalid"]
301 >>> safe_parse_items(items, parse_item, "mytool") # doctest: +SKIP
302 [MyIssue(file='a.py'), MyIssue(file='b.py')]
303 """
304 results: list[IssueT] = []
306 for item in items:
307 if not isinstance(item, dict):
308 logger.debug(f"Skipping non-dict item in {tool_name} output")
309 continue
311 try:
312 parsed = parse_func(item)
313 if parsed is not None:
314 results.append(parsed)
315 except (KeyError, TypeError, ValueError) as e:
316 logger.debug(f"Failed to parse {tool_name} item: {e}")
317 continue
319 return results