Coverage for lintro / ai / fix.py: 81%

182 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""AI fix generation service. 

2 

3Generates fix suggestions for issues that native tools cannot auto-fix. 

4Reads file contents, asks the AI for a corrected version, and produces 

5unified diffs. Supports parallel API calls for improved performance. 

6""" 

7 

8from __future__ import annotations 

9 

10import functools 

11import threading 

12from collections import defaultdict 

13from collections.abc import Callable, Sequence 

14from concurrent.futures import ThreadPoolExecutor, as_completed 

15from pathlib import Path 

16from typing import TYPE_CHECKING 

17 

18from loguru import logger 

19 

20from lintro.ai.cache import cache_suggestion 

21from lintro.ai.enums.sanitize_mode import SanitizeMode 

22from lintro.ai.fallback import complete_with_fallback 

23from lintro.ai.fix_context import ( 

24 CONTEXT_LINES, 

25 FULL_FILE_THRESHOLD, 

26 build_fix_context, 

27 check_cache, 

28 read_file_safely, 

29 validate_and_read_file, 

30) 

31from lintro.ai.fix_params import FixGenParams 

32from lintro.ai.fix_parsing import ( 

33 parse_batch_response, 

34 parse_fix_response, 

35) 

36from lintro.ai.models import AIFixSuggestion 

37from lintro.ai.paths import ( 

38 resolve_workspace_file, 

39 resolve_workspace_root, 

40 to_provider_path, 

41) 

42from lintro.ai.prompts import FIX_BATCH_PROMPT_TEMPLATE, FIX_SYSTEM 

43from lintro.ai.retry import ( 

44 DEFAULT_BACKOFF_FACTOR, 

45 DEFAULT_BASE_DELAY, 

46 DEFAULT_MAX_DELAY, 

47 with_retry, 

48) 

49from lintro.ai.sanitize import ( 

50 detect_injection_patterns, 

51 make_boundary_marker, 

52 sanitize_code_content, 

53) 

54from lintro.ai.secrets import redact_secrets 

55from lintro.ai.token_budget import estimate_tokens 

56 

57if TYPE_CHECKING: 

58 from lintro.ai.providers.base import AIResponse, BaseAIProvider 

59 from lintro.parsers.base_issue import BaseIssue 

60 

61 

62def _call_provider( 

63 provider: BaseAIProvider, 

64 prompt: str, 

65 system: str, 

66 max_tokens: int, 

67 timeout: float = 60.0, 

68 fallback_models: list[str] | None = None, 

69) -> AIResponse: 

70 """Call the AI provider with model fallback (no retry — caller wraps with retry).""" 

71 return complete_with_fallback( 

72 provider, 

73 prompt, 

74 fallback_models=fallback_models, 

75 system=system, 

76 max_tokens=max_tokens, 

77 timeout=timeout, 

78 ) 

79 

80 

81# Maximum concurrent API calls for fix generation 

82DEFAULT_MAX_WORKERS = 5 

83 

84 

85def _call_and_cache_fix( 

86 prompt: str, 

87 issue_file: str, 

88 issue: BaseIssue, 

89 code: str, 

90 tool_name: str, 

91 retrying_call: Callable[..., AIResponse], 

92 provider: BaseAIProvider, 

93 max_tokens: int, 

94 timeout: float, 

95 workspace_root: Path, 

96 file_content: str, 

97 enable_cache: bool, 

98) -> AIFixSuggestion | None: 

99 """Call the provider, parse the response, and optionally cache the result.""" 

100 try: 

101 response = retrying_call(provider, prompt, FIX_SYSTEM, max_tokens, timeout) 

102 

103 suggestion = parse_fix_response( 

104 response.content, 

105 issue_file, 

106 issue.line, 

107 code, 

108 ) 

109 

110 if suggestion: 

111 suggestion.tool_name = tool_name 

112 suggestion.input_tokens = response.input_tokens 

113 suggestion.output_tokens = response.output_tokens 

114 suggestion.cost_estimate = response.cost_estimate 

115 

116 if enable_cache: 

117 cache_suggestion( 

118 workspace_root, 

119 file_content, 

120 code, 

121 issue.line, 

122 issue.message, 

123 suggestion, 

124 ) 

125 

126 return suggestion 

127 

128 except (KeyboardInterrupt, SystemExit): 

129 raise 

130 except Exception as exc: 

131 logger.debug( 

132 f"AI fix generation failed for {issue.file}:{issue.line} " 

133 f"({type(exc).__name__}: {exc})", 

134 exc_info=True, 

135 ) 

136 

137 return None 

138 

139 

140def _generate_single_fix( 

141 issue: BaseIssue, 

142 provider: BaseAIProvider, 

143 tool_name: str, 

144 file_cache: dict[str, str | None], 

145 cache_lock: threading.Lock, 

146 workspace_root: Path, 

147 max_tokens: int, 

148 retrying_call: Callable[..., AIResponse], 

149 timeout: float = 60.0, 

150 context_lines: int = CONTEXT_LINES, 

151 max_prompt_tokens: int = 12000, 

152 enable_cache: bool = False, 

153 cache_ttl: int = 3600, 

154 full_file_threshold: int = FULL_FILE_THRESHOLD, 

155 sanitize_mode: SanitizeMode = SanitizeMode.WARN, 

156 cache_max_entries: int = 100, 

157) -> AIFixSuggestion | None: 

158 """Generate a fix suggestion for a single issue. 

159 

160 Thread-safe — uses a lock for the shared file cache. 

161 

162 Args: 

163 issue: The issue to fix. 

164 provider: AI provider instance. 

165 tool_name: Name of the tool. 

166 file_cache: Shared file content cache. 

167 cache_lock: Lock for thread-safe cache access. 

168 workspace_root: Root directory AI is allowed to edit/read. 

169 max_tokens: Maximum tokens to request from provider. 

170 retrying_call: Pre-built retry wrapper around ``_call_provider``. 

171 timeout: Request timeout in seconds. 

172 context_lines: Lines of context before/after the issue line. 

173 max_prompt_tokens: Token budget for the prompt (4 chars ~ 1 token). 

174 enable_cache: Whether to use the suggestion deduplication cache. 

175 cache_ttl: Time-to-live in seconds for cached suggestions. 

176 full_file_threshold: Max lines to attempt full-file context 

177 (default 500). 

178 sanitize_mode: How to handle detected prompt injection patterns. 

179 cache_max_entries: Maximum file cache entries to limit memory. 

180 

181 Returns: 

182 AIFixSuggestion, or None if generation fails. 

183 """ 

184 validated = validate_and_read_file( 

185 issue, 

186 file_cache, 

187 cache_lock, 

188 workspace_root, 

189 cache_max_entries=cache_max_entries, 

190 ) 

191 if validated is None: 

192 return None 

193 issue_file, file_content = validated 

194 

195 code = getattr(issue, "code", "") or "" 

196 

197 if enable_cache: 

198 cached = check_cache( 

199 workspace_root, 

200 file_content, 

201 code, 

202 issue, 

203 tool_name, 

204 cache_ttl, 

205 ) 

206 if cached is not None: 

207 return cached 

208 

209 prompt = build_fix_context( 

210 issue, 

211 issue_file, 

212 file_content, 

213 tool_name, 

214 code, 

215 workspace_root, 

216 context_lines, 

217 max_prompt_tokens, 

218 full_file_threshold, 

219 sanitize_mode=sanitize_mode, 

220 ) 

221 if prompt is None: 

222 return None 

223 

224 return _call_and_cache_fix( 

225 prompt, 

226 issue_file, 

227 issue, 

228 code, 

229 tool_name, 

230 retrying_call, 

231 provider, 

232 max_tokens, 

233 timeout, 

234 workspace_root, 

235 file_content, 

236 enable_cache, 

237 ) 

238 

239 

240def _generate_batch_fixes( 

241 file_path: str, 

242 file_issues: list[BaseIssue], 

243 provider: BaseAIProvider, 

244 tool_name: str, 

245 file_content: str, 

246 workspace_root: Path, 

247 max_tokens: int, 

248 retrying_call: Callable[..., AIResponse], 

249 timeout: float, 

250 max_prompt_tokens: int, 

251 sanitize_mode: SanitizeMode = SanitizeMode.WARN, 

252) -> list[AIFixSuggestion] | None: 

253 """Generate fixes for multiple issues in one file via a batch prompt. 

254 

255 Returns a list of suggestions on success, or None if the batch prompt 

256 does not fit within the token budget or the response cannot be parsed 

257 (signalling the caller to fall back to single-issue mode). 

258 

259 Args: 

260 file_path: Resolved absolute file path. 

261 file_issues: Issues in this file (must have len >= 2). 

262 provider: AI provider instance. 

263 tool_name: Name of the tool. 

264 file_content: Full file content string. 

265 workspace_root: Root directory for workspace-relative paths. 

266 max_tokens: Maximum tokens to request from provider. 

267 retrying_call: Pre-built retry wrapper around ``_call_provider``. 

268 timeout: Request timeout in seconds. 

269 max_prompt_tokens: Token budget for the prompt. 

270 sanitize_mode: How to handle detected prompt injection patterns. 

271 

272 Returns: 

273 List of AIFixSuggestions, or None on failure (fall back to single). 

274 

275 Raises: 

276 KeyboardInterrupt: Re-raised immediately. 

277 SystemExit: Re-raised immediately. 

278 """ 

279 issues_list_parts: list[str] = [] 

280 raw_messages: list[str] = [] 

281 for idx, issue in enumerate(file_issues, 1): 

282 code = getattr(issue, "code", "") or "" 

283 raw_messages.append(issue.message) 

284 msg = redact_secrets(sanitize_code_content(issue.message)) 

285 issues_list_parts.append( 

286 f"{idx}. Line {issue.line} [{code}]: {msg}", 

287 ) 

288 issues_list = "\n".join(issues_list_parts) 

289 

290 sanitized_content = redact_secrets(sanitize_code_content(file_content)) 

291 if sanitize_mode != SanitizeMode.OFF: 

292 file_injections = detect_injection_patterns(file_content) 

293 # Scan raw messages before sanitization to catch original injection markers 

294 msg_injections = detect_injection_patterns("\n".join(raw_messages)) 

295 injections = file_injections + msg_injections 

296 if injections: 

297 if sanitize_mode == SanitizeMode.BLOCK: 

298 logger.warning( 

299 f"Blocking batch fix for {file_path}: prompt injection " 

300 f"patterns detected in file/diagnostics: " 

301 f"{', '.join(injections)}", 

302 ) 

303 return None 

304 logger.warning( 

305 f"Potential prompt injection patterns detected in " 

306 f"{file_path} (file/diagnostics): {', '.join(injections)}", 

307 ) 

308 

309 boundary = make_boundary_marker() 

310 prompt = FIX_BATCH_PROMPT_TEMPLATE.format( 

311 tool_name=tool_name, 

312 file=to_provider_path(file_path, workspace_root), 

313 issues_list=issues_list, 

314 file_content=sanitized_content, 

315 boundary=boundary, 

316 ) 

317 

318 if estimate_tokens(prompt) > max_prompt_tokens: 

319 logger.debug( 

320 f"Batch prompt over budget for {file_path} " 

321 f"({len(file_issues)} issues), falling back to single-issue mode", 

322 ) 

323 return None 

324 

325 try: 

326 response = retrying_call(provider, prompt, FIX_SYSTEM, max_tokens, timeout) 

327 suggestions = parse_batch_response(response.content, file_path) 

328 if not suggestions: 

329 logger.debug( 

330 f"Batch response parse returned no suggestions for {file_path}, " 

331 f"falling back to single-issue mode", 

332 ) 

333 return None 

334 

335 if len(suggestions) != len(file_issues): 

336 logger.debug( 

337 f"Batch response count mismatch for {file_path}: " 

338 f"got {len(suggestions)} suggestions for {len(file_issues)} issues, " 

339 f"falling back to single-issue mode", 

340 ) 

341 return None 

342 

343 count = len(suggestions) 

344 per_input = response.input_tokens // count 

345 per_output = response.output_tokens // count 

346 per_cost = response.cost_estimate / count 

347 for suggestion in suggestions: 

348 suggestion.tool_name = tool_name 

349 suggestion.input_tokens = per_input 

350 suggestion.output_tokens = per_output 

351 suggestion.cost_estimate = per_cost 

352 

353 logger.debug( 

354 f"Batch fix generated {len(suggestions)} suggestions " 

355 f"for {file_path} ({len(file_issues)} issues)", 

356 ) 

357 return suggestions 

358 

359 except (KeyboardInterrupt, SystemExit): 

360 raise 

361 except Exception as exc: 

362 logger.debug( 

363 f"Batch AI fix generation failed for {file_path} " 

364 f"({type(exc).__name__}: {exc}), falling back to single-issue mode", 

365 exc_info=True, 

366 ) 

367 return None 

368 

369 

370def generate_fixes( 

371 issues: Sequence[BaseIssue], 

372 provider: BaseAIProvider, 

373 *, 

374 tool_name: str, 

375 max_issues: int = 20, 

376 max_workers: int = DEFAULT_MAX_WORKERS, 

377 workspace_root: Path | None = None, 

378 max_tokens: int = 2048, 

379 max_retries: int = 2, 

380 timeout: float = 60.0, 

381 context_lines: int = CONTEXT_LINES, 

382 max_prompt_tokens: int = 12000, 

383 base_delay: float | None = None, 

384 max_delay: float | None = None, 

385 backoff_factor: float | None = None, 

386 enable_cache: bool = False, 

387 cache_ttl: int = 3600, 

388 progress_callback: Callable[[int, int], None] | None = None, 

389 fallback_models: list[str] | None = None, 

390 sanitize_mode: SanitizeMode = SanitizeMode.WARN, 

391 cache_max_entries: int = 1000, 

392) -> list[AIFixSuggestion]: 

393 """Generate AI fix suggestions for unfixable issues. 

394 

395 Reads the source file for each issue, sends context to the AI, 

396 and produces a unified diff. Runs API calls in parallel. 

397 

398 Args: 

399 issues: Sequence of issues to fix. 

400 provider: AI provider instance. 

401 tool_name: Name of the tool that produced these issues. 

402 max_issues: Maximum number of issues to process. 

403 max_workers: Maximum concurrent API calls. 

404 workspace_root: Optional root directory limiting AI file access. 

405 max_tokens: Maximum tokens requested per fix generation call. 

406 max_retries: Maximum retry attempts for transient API failures. 

407 timeout: Request timeout in seconds per API call. 

408 context_lines: Lines of context before/after the issue line. 

409 max_prompt_tokens: Token budget for the prompt before context trimming. 

410 base_delay: Initial retry delay in seconds (None = use default). 

411 max_delay: Maximum retry delay in seconds (None = use default). 

412 backoff_factor: Retry backoff multiplier (None = use default). 

413 enable_cache: Whether to use the suggestion deduplication cache. 

414 cache_ttl: Time-to-live in seconds for cached suggestions. 

415 progress_callback: Optional callback invoked after each fix 

416 completes with (completed_count, total_count). 

417 fallback_models: Ordered list of fallback model identifiers 

418 to try when the primary model fails with a retryable error. 

419 sanitize_mode: How to handle prompt injection patterns. 

420 cache_max_entries: Maximum file cache entries to limit memory. 

421 

422 Returns: 

423 List of fix suggestions. 

424 

425 Raises: 

426 KeyboardInterrupt: Re-raised on user interrupt. 

427 SystemExit: Re-raised on system exit. 

428 """ 

429 if not issues: 

430 return [] 

431 

432 # Limit the number of issues to process 

433 target_issues = list(issues)[:max_issues] 

434 logger.debug( 

435 f"generate_fixes: {tool_name} received {len(issues)} issues, " 

436 f"processing {len(target_issues)} (max={max_issues})", 

437 ) 

438 

439 root = workspace_root or resolve_workspace_root() 

440 

441 # Shared file cache with thread safety (capped to limit memory usage). 

442 file_cache: dict[str, str | None] = {} 

443 cache_lock = threading.Lock() 

444 

445 # Build the retry wrapper once and share across all calls. 

446 # Bind fallback_models via partial to avoid global mutable state. 

447 bound_call = functools.partial( 

448 _call_provider, 

449 fallback_models=fallback_models or [], 

450 ) 

451 retrying_call = with_retry( 

452 max_retries=max_retries, 

453 base_delay=base_delay if base_delay is not None else DEFAULT_BASE_DELAY, 

454 max_delay=max_delay if max_delay is not None else DEFAULT_MAX_DELAY, 

455 backoff_factor=( 

456 backoff_factor if backoff_factor is not None else DEFAULT_BACKOFF_FACTOR 

457 ), 

458 )(bound_call) 

459 

460 suggestions: list[AIFixSuggestion] = [] 

461 completed_count = 0 

462 total_count = len(target_issues) 

463 

464 # --- Multi-issue batching per file --- 

465 # Group issues by resolved file path; files with 2+ issues are 

466 # candidates for a single batch prompt. 

467 file_groups: dict[str, list[BaseIssue]] = defaultdict(list) 

468 for issue in target_issues: 

469 if not issue.file or not issue.line: 

470 continue 

471 resolved = resolve_workspace_file(issue.file, root) 

472 if resolved is None: 

473 continue 

474 file_groups[str(resolved)].append(issue) 

475 

476 single_issues: list[BaseIssue] = [] 

477 

478 for resolved_path, group in file_groups.items(): 

479 if len(group) < 2: 

480 single_issues.extend(group) 

481 continue 

482 

483 # Read the file for the batch prompt 

484 content = read_file_safely(resolved_path) 

485 if content is None: 

486 single_issues.extend(group) 

487 continue 

488 

489 # Populate file_cache so single-fix fallback doesn't re-read. 

490 # Respect cache_max_entries to avoid unbounded growth. 

491 with cache_lock: 

492 if resolved_path not in file_cache and len(file_cache) >= cache_max_entries: 

493 oldest_key = next(iter(file_cache)) 

494 del file_cache[oldest_key] 

495 file_cache[resolved_path] = content 

496 

497 batch_result = _generate_batch_fixes( 

498 resolved_path, 

499 group, 

500 provider, 

501 tool_name, 

502 content, 

503 root, 

504 max_tokens, 

505 retrying_call, 

506 timeout, 

507 max_prompt_tokens, 

508 sanitize_mode=sanitize_mode, 

509 ) 

510 if batch_result is not None: 

511 suggestions.extend(batch_result) 

512 completed_count += len(group) 

513 if progress_callback is not None: 

514 progress_callback(completed_count, total_count) 

515 else: 

516 # Fall back to single-issue mode for this file 

517 single_issues.extend(group) 

518 

519 # Include issues that had no file/line (skipped by grouping) — 

520 # _generate_single_fix will skip them gracefully. 

521 for issue in target_issues: 

522 if not issue.file or not issue.line: 

523 single_issues.append(issue) 

524 

525 workers = min(len(single_issues), max_workers) if single_issues else 0 

526 

527 if workers <= 1: 

528 for issue in single_issues: 

529 result = _generate_single_fix( 

530 issue, 

531 provider, 

532 tool_name, 

533 file_cache, 

534 cache_lock, 

535 root, 

536 max_tokens, 

537 retrying_call, 

538 timeout, 

539 context_lines, 

540 max_prompt_tokens=max_prompt_tokens, 

541 enable_cache=enable_cache, 

542 cache_ttl=cache_ttl, 

543 sanitize_mode=sanitize_mode, 

544 cache_max_entries=cache_max_entries, 

545 ) 

546 if result: 

547 suggestions.append(result) 

548 completed_count += 1 

549 if progress_callback is not None: 

550 progress_callback(completed_count, total_count) 

551 else: 

552 with ThreadPoolExecutor(max_workers=workers) as executor: 

553 futures = [ 

554 executor.submit( 

555 _generate_single_fix, 

556 issue, 

557 provider, 

558 tool_name, 

559 file_cache, 

560 cache_lock, 

561 root, 

562 max_tokens, 

563 retrying_call, 

564 timeout, 

565 context_lines, 

566 max_prompt_tokens=max_prompt_tokens, 

567 enable_cache=enable_cache, 

568 cache_ttl=cache_ttl, 

569 sanitize_mode=sanitize_mode, 

570 cache_max_entries=cache_max_entries, 

571 ) 

572 for issue in single_issues 

573 ] 

574 for future in as_completed(futures): 

575 try: 

576 result = future.result() 

577 except (KeyboardInterrupt, SystemExit): 

578 raise 

579 except Exception as exc: 

580 logger.debug( 

581 f"AI fix worker failed ({type(exc).__name__}: {exc})", 

582 exc_info=True, 

583 ) 

584 completed_count += 1 

585 if progress_callback is not None: 

586 progress_callback(completed_count, total_count) 

587 continue 

588 if result: 

589 suggestions.append(result) 

590 completed_count += 1 

591 if progress_callback is not None: 

592 progress_callback(completed_count, total_count) 

593 

594 # Sort by (file, line) for deterministic ordering regardless of 

595 # thread completion order from as_completed(). 

596 suggestions.sort(key=lambda s: (s.file, s.line)) 

597 

598 logger.debug( 

599 f"generate_fixes: {tool_name} produced " 

600 f"{len(suggestions)}/{len(target_issues)} suggestions", 

601 ) 

602 return suggestions 

603 

604 

605def generate_fixes_from_params( 

606 issues: Sequence[BaseIssue], 

607 provider: BaseAIProvider, 

608 params: FixGenParams, 

609) -> list[AIFixSuggestion]: 

610 """Generate fixes using a ``FixGenParams`` parameter object. 

611 

612 Thin wrapper around ``generate_fixes`` that unpacks the params 

613 object into keyword arguments. 

614 

615 Args: 

616 issues: Sequence of issues to fix. 

617 provider: AI provider instance. 

618 params: Grouped generation parameters. 

619 

620 Returns: 

621 List of fix suggestions. 

622 """ 

623 return generate_fixes( 

624 issues, 

625 provider, 

626 tool_name=params.tool_name, 

627 max_issues=params.max_issues, 

628 max_workers=params.max_workers, 

629 workspace_root=params.workspace_root, 

630 max_tokens=params.max_tokens, 

631 max_retries=params.max_retries, 

632 timeout=params.timeout, 

633 context_lines=params.context_lines, 

634 max_prompt_tokens=params.max_prompt_tokens, 

635 base_delay=params.base_delay, 

636 max_delay=params.max_delay, 

637 backoff_factor=params.backoff_factor, 

638 enable_cache=params.enable_cache, 

639 cache_ttl=params.cache_ttl, 

640 progress_callback=params.progress_callback, 

641 fallback_models=params.fallback_models, 

642 sanitize_mode=params.sanitize_mode, 

643 cache_max_entries=params.cache_max_entries, 

644 )