Coverage for lintro / ai / risk.py: 89%

81 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""Risk classification and patch statistics for AI fix suggestions.""" 

2 

3from __future__ import annotations 

4 

5import re 

6from dataclasses import dataclass 

7from pathlib import Path 

8from typing import TYPE_CHECKING 

9 

10from lintro.ai.enums import ConfidenceLevel, RiskLevel 

11 

12if TYPE_CHECKING: 

13 from collections.abc import Sequence 

14 

15 from lintro.ai.models import AIFixSuggestion 

16 

17SAFE_STYLE_RISK = RiskLevel.SAFE_STYLE 

18BEHAVIORAL_RISK = RiskLevel.BEHAVIORAL_RISK 

19 

20 

21@dataclass(frozen=True) 

22class PatchStats: 

23 """Compact patch statistics for one or more suggestions.""" 

24 

25 files: int = 0 

26 hunks: int = 0 

27 lines_added: int = 0 

28 lines_removed: int = 0 

29 

30 

31def _ast_equivalent(original: str, suggested: str) -> bool | None: 

32 """Compare ASTs of original and suggested Python code. 

33 

34 Returns True if both snippets parse to the same AST (style-only change), 

35 False if they differ (behavioral change), or None if either snippet 

36 is not valid Python (fall back to heuristic). 

37 """ 

38 import ast 

39 

40 try: 

41 orig_ast = ast.dump(ast.parse(original)) 

42 sugg_ast = ast.dump(ast.parse(suggested)) 

43 return orig_ast == sugg_ast 

44 except SyntaxError: 

45 return None # Not parseable, fall back to heuristic 

46 

47 

48def _diff_is_style_only(suggestion: AIFixSuggestion) -> bool: 

49 """Check whether the diff only changes whitespace/style. 

50 

51 First attempts an AST comparison for Python code. If both snippets 

52 parse successfully, the AST result is authoritative. Otherwise falls 

53 back to comparing original and suggested code after stripping 

54 whitespace and normalizing quotes. 

55 """ 

56 original = suggestion.original_code or "" 

57 suggested = suggestion.suggested_code or "" 

58 

59 # Try AST comparison first (authoritative for valid Python) 

60 ast_result = _ast_equivalent(original, suggested) 

61 if ast_result is not None: 

62 return ast_result 

63 

64 def _normalize(text: str) -> str: 

65 """Normalize for style-only comparison without altering semantics. 

66 

67 Only performs safe normalizations: trim edges, normalize line 

68 endings and consecutive blank lines, and remove trailing commas 

69 before closing brackets. Does NOT remove internal whitespace or 

70 rewrite quote characters, which could mask behavioral changes. 

71 """ 

72 # Trim leading/trailing whitespace 

73 text = text.strip() 

74 # Normalize line endings 

75 text = text.replace("\r\n", "\n").replace("\r", "\n") 

76 # Collapse consecutive blank lines 

77 text = re.sub(r"\n{3,}", "\n\n", text) 

78 # Remove trailing commas before closing brackets 

79 text = re.sub(r",(\s*[}\])])", r"\1", text) 

80 return text 

81 

82 # Fall back to whitespace/quote normalization heuristic 

83 return _normalize(original) == _normalize(suggested) 

84 

85 

86def classify_fix_risk(suggestion: AIFixSuggestion) -> str: 

87 """Classify a suggestion as safe style-only or behavioral risk. 

88 

89 Uses the AI-reported ``risk_level`` from the fix response, combined 

90 with the suggestion's ``confidence``. Applies a heuristic cross-check: 

91 if the diff changes non-whitespace content beyond quotes and trailing 

92 commas, the fix is downgraded to behavioral-risk regardless of AI claim. 

93 

94 Args: 

95 suggestion: Fix suggestion to classify. 

96 

97 Returns: 

98 Risk classification string: ``"safe-style"`` or ``"behavioral-risk"``. 

99 """ 

100 risk = (suggestion.risk_level or "").strip().lower() 

101 

102 if risk == SAFE_STYLE_RISK: 

103 # Trust AI classification for safe-style only when confidence 

104 # is high or medium — low-confidence safe claims default to risky. 

105 confidence = (suggestion.confidence or "").strip().lower() 

106 if confidence in (ConfidenceLevel.HIGH, ConfidenceLevel.MEDIUM): 

107 # Heuristic cross-check: downgrade if the diff changes 

108 # non-whitespace/non-quote content. 

109 if not _diff_is_style_only(suggestion): 

110 return BEHAVIORAL_RISK 

111 return SAFE_STYLE_RISK 

112 return BEHAVIORAL_RISK 

113 

114 # Default: anything unknown or explicitly behavioral-risk 

115 return BEHAVIORAL_RISK 

116 

117 

118def is_safe_style_fix(suggestion: AIFixSuggestion) -> bool: 

119 """Return True when the suggestion is classified as safe style-only.""" 

120 return classify_fix_risk(suggestion) == SAFE_STYLE_RISK 

121 

122 

123def calculate_patch_stats(suggestions: Sequence[AIFixSuggestion]) -> PatchStats: 

124 """Calculate patch stats for a group of fix suggestions.""" 

125 import difflib 

126 

127 if not suggestions: 

128 return PatchStats() 

129 

130 files: set[str] = {str(Path(s.file)) for s in suggestions if s.file} 

131 hunks = 0 

132 lines_added = 0 

133 lines_removed = 0 

134 

135 for suggestion in suggestions: 

136 diff = suggestion.diff or "" 

137 if diff.strip(): 

138 for line in diff.splitlines(): 

139 if line.startswith("@@"): 

140 hunks += 1 

141 elif line.startswith("+") and not line.startswith("+++"): 

142 lines_added += 1 

143 elif line.startswith("-") and not line.startswith("---"): 

144 lines_removed += 1 

145 continue 

146 

147 # Fallback estimate when diff is unavailable: compute actual churn. 

148 original_lines = suggestion.original_code.splitlines() 

149 suggested_lines = suggestion.suggested_code.splitlines() 

150 matcher = difflib.SequenceMatcher( 

151 None, 

152 original_lines, 

153 suggested_lines, 

154 ) 

155 for tag, i1, i2, j1, j2 in matcher.get_opcodes(): 

156 if tag == "replace": 

157 lines_removed += i2 - i1 

158 lines_added += j2 - j1 

159 hunks += 1 

160 elif tag == "delete": 

161 lines_removed += i2 - i1 

162 hunks += 1 

163 elif tag == "insert": 

164 lines_added += j2 - j1 

165 hunks += 1 

166 

167 return PatchStats( 

168 files=len(files), 

169 hunks=hunks, 

170 lines_added=lines_added, 

171 lines_removed=lines_removed, 

172 )