Coverage for lintro/parsers/base

1"""Base parser utilities for all linting tool parsers.

3This module provides common parsing utilities that are shared across multiple

4tool parsers to reduce code duplication and ensure consistent behavior.

6The utilities include:

7- Field extraction with fallback candidates

8- ANSI code stripping for terminal output

9- Type validation with logging

10- Multi-line message collection

11- Safe item parsing with error handling

12"""

14from __future__ import annotations

16import re

17from collections.abc import Callable

18from typing import TYPE_CHECKING, TypeVar

20from loguru import logger

22if TYPE_CHECKING:

23 from lintro.parsers.base_issue import BaseIssue

25IssueT = TypeVar("IssueT", bound="BaseIssue")

27# Pre-compiled regex for ANSI escape sequences

28_ANSI_ESCAPE_PATTERN: re.Pattern[str] = re.compile(r"\x1b\[[0-9;]*m")

31def extract_int_field(

32 data: dict[str, object],

33 candidates: list[str],

34 default: int | None = None,

35) -> int | None:

36 """Extract an integer field from a dictionary using fallback candidates.

38 Tries each candidate key in order until an integer value is found.

39 This handles tool output format variations where the same field

40 may have different names across versions (e.g., "row" vs "line").

42 Args:

43 data: Dictionary to extract the field from.

44 candidates: List of possible key names to try in order.

45 default: Default value if no candidate key has an integer value.

47 Returns:

48 The integer value from the first matching candidate, or the default.

50 Examples:

51 >>> data = {"row": 10, "col": 5}

52 >>> extract_int_field(data, ["line", "row"])

53 10

54 >>> extract_int_field(data, ["missing"], default=0)

55 0

56 """

57 for key in candidates:

58 val = data.get(key)

59 # Check for int but exclude bool (bool is a subclass of int in Python)

60 if isinstance(val, int) and not isinstance(val, bool):

61 return val

62 return default

65def extract_str_field(

66 data: dict[str, object],

67 candidates: list[str],

68 default: str = "",

69) -> str:

70 """Extract a string field from a dictionary using fallback candidates.

72 Tries each candidate key in order until a string value is found.

73 This handles tool output format variations where the same field

74 may have different names across versions (e.g., "filename" vs "file").

76 Args:

77 data: Dictionary to extract the field from.

78 candidates: List of possible key names to try in order.

79 default: Default value if no candidate key has a string value.

81 Returns:

82 The string value from the first matching candidate, or the default.

84 Examples:

85 >>> data = {"filename": "test.py", "path": "/src/test.py"}

86 >>> extract_str_field(data, ["file", "filename"])

87 'test.py'

88 >>> extract_str_field(data, ["missing"], default="unknown")

89 'unknown'

90 """

91 for key in candidates:

92 val = data.get(key)

93 if isinstance(val, str):

94 return val

95 return default

98def extract_dict_field(

99 data: dict[str, object],

100 candidates: list[str],

101 default: dict[str, object] | None = None,

102) -> dict[str, object]:

103 """Extract a dictionary field from a dictionary using fallback candidates.

104

105 Tries each candidate key in order until a dictionary value is found.

106 This handles nested structures like location objects that may have

107 different names across tool versions.

108

109 Args:

110 data: Dictionary to extract the field from.

111 candidates: List of possible key names to try in order.

112 default: Default value if no candidate key has a dict value.

113

114 Returns:

115 The dictionary value from the first matching candidate, or the default.

116

117 Examples:

118 >>> data = {"location": {"line": 1}, "start": {"row": 2}}

119 >>> extract_dict_field(data, ["location", "start"])

120 {'line': 1}

121 """

122 if default is None:

123 default = {}

124 for key in candidates:

125 val = data.get(key)

126 if isinstance(val, dict):

127 return val

128 return default

129

130

131def is_empty_output(output: str | None) -> bool:

132 r"""Check if output is None, empty, or whitespace only.

133

134 A convenience function for consistent empty-checking across parsers.

135

136 Args:

137 output: Output string to check, may be None.

138

139 Returns:

140 True if output is None, empty string, or contains only whitespace.

141

142 Examples:

143 >>> is_empty_output(None)

144 True

145 >>> is_empty_output("")

146 True

147 >>> is_empty_output(" \n\t ")

148 True

149 >>> is_empty_output("content")

150 False

151 """

152 return output is None or not output.strip()

153

154

155def strip_ansi_codes(text: str) -> str:

156 r"""Strip ANSI escape sequences from text.

157

158 Removes terminal color codes and other ANSI escape sequences

159 for stable parsing across different environments (CI vs local).

160

161 Args:

162 text: Text potentially containing ANSI escape sequences.

163

164 Returns:

165 Text with all ANSI escape sequences removed.

166

167 Examples:

168 >>> strip_ansi_codes("\\x1b[31mError\\x1b[0m: message")

169 'Error: message'

170 >>> strip_ansi_codes("plain text")

171 'plain text'

172 """

173 return _ANSI_ESCAPE_PATTERN.sub("", text)

174

175

176def validate_str_field(

177 value: object,

178 field_name: str,

179 default: str = "",

180 log_warning: bool = False,

181) -> str:

182 """Validate and extract a string field with optional warning logging.

183

184 Args:

185 value: The value to validate.

186 field_name: Name of the field for logging purposes.

187 default: Default value if validation fails.

188 log_warning: Whether to log a warning on type mismatch.

189

190 Returns:

191 The value as a string, or the default if not a string.

192

193 Examples:

194 >>> validate_str_field("test", "filename")

195 'test'

196 >>> validate_str_field(123, "filename", default="unknown")

197 'unknown'

198 """

199 if isinstance(value, str):

200 return value

201 if log_warning and value is not None:

202 logger.warning(f"Expected string for {field_name}, got {type(value).__name__}")

203 return default

204

205

206def validate_int_field(

207 value: object,

208 field_name: str,

209 default: int = 0,

210 log_warning: bool = False,

211) -> int:

212 """Validate and extract an integer field with optional warning logging.

213

214 Args:

215 value: The value to validate.

216 field_name: Name of the field for logging purposes.

217 default: Default value if validation fails.

218 log_warning: Whether to log a warning on type mismatch.

219

220 Returns:

221 The value as an integer, or the default if not an integer.

222

223 Examples:

224 >>> validate_int_field(42, "line_number")

225 42

226 >>> validate_int_field("not_int", "line_number", default=0)

227 0

228 """

229 if isinstance(value, int) and not isinstance(value, bool):

230 return value

231 if log_warning and value is not None:

232 logger.warning(f"Expected integer for {field_name}, got {type(value).__name__}")

233 return default

234

235

236def collect_continuation_lines(

237 lines: list[str],

238 start_idx: int,

239 is_continuation: Callable[[str], bool],

240) -> tuple[str, int]:

241 """Collect continuation lines that belong to a multi-line message.

242

243 Some tools output messages that span multiple lines with indentation

244 or special prefixes. This function collects those lines into a single

245 message string.

246

247 Args:

248 lines: List of all output lines.

249 start_idx: Index of the first continuation line to check.

250 is_continuation: Predicate function that returns True if a line

251 is a continuation of the message.

252

253 Returns:

254 Tuple of (collected message parts joined by space, next index to process).

255

256 Examples:

257 >>> lines = ["main message", " continued", " more", "next item"]

258 >>> collect_continuation_lines(lines, 1, lambda l: l.startswith(" "))

259 ('continued more', 3)

260 """

261 message_parts: list[str] = []

262 idx = start_idx

263

264 while idx < len(lines):

265 line = lines[idx]

266 if not is_continuation(line):

267 break

268 # Strip common prefixes used in continuation lines

269 cleaned = line.strip().lstrip(": ")

270 if cleaned:

271 message_parts.append(cleaned)

272 idx += 1

273

274 return " ".join(message_parts), idx

275

276

277def safe_parse_items(

278 items: list[object],

279 parse_func: Callable[[dict[str, object]], IssueT | None],

280 tool_name: str = "tool",

281) -> list[IssueT]:

282 """Safely parse a list of items with error handling.

283

284 Iterates through items, applying the parse function to each dictionary

285 and collecting successful results. Non-dict items and parse failures

286 are logged and skipped.

287

288 Args:

289 items: List of items to parse (expected to be dictionaries).

290 parse_func: Function that parses a single item dict into an issue object.

291 Should return None if the item cannot be parsed.

292 tool_name: Name of the tool for log messages.

293

294 Returns:

295 List of successfully parsed issue objects.

296

297 Examples:

298 >>> def parse_item(item: dict) -> MyIssue | None:

299 ... return MyIssue(file=item.get("file", ""))

300 >>> items = [{"file": "a.py"}, {"file": "b.py"}, "invalid"]

301 >>> safe_parse_items(items, parse_item, "mytool") # doctest: +SKIP

302 [MyIssue(file='a.py'), MyIssue(file='b.py')]

303 """

304 results: list[IssueT] = []

305

306 for item in items:

307 if not isinstance(item, dict):

308 logger.debug(f"Skipping non-dict item in {tool_name} output")

309 continue

310

311 try:

312 parsed = parse_func(item)

313 if parsed is not None:

314 results.append(parsed)

315 except (KeyError, TypeError, ValueError) as e:

316 logger.debug(f"Failed to parse {tool_name} item: {e}")

317 continue

318

319 return results

Coverage for lintro / parsers / base_parser.py: 99%

69 statements