Coverage for tests / unit / ai / test_risk.py: 100%

103 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""Tests for AI fix risk classification and patch statistics.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import FrozenInstanceError 

6 

7import pytest 

8from assertpy import assert_that 

9 

10from lintro.ai.models import AIFixSuggestion 

11from lintro.ai.risk import ( 

12 BEHAVIORAL_RISK, 

13 SAFE_STYLE_RISK, 

14 PatchStats, 

15 calculate_patch_stats, 

16 classify_fix_risk, 

17 is_safe_style_fix, 

18) 

19 

20# -- AI self-classification: risk_level + confidence combos ---------------- 

21 

22 

23def test_safe_style_high_confidence_returns_safe_style() -> None: 

24 """risk_level='safe-style' with high confidence is classified safe-style.""" 

25 suggestion = AIFixSuggestion(risk_level="safe-style", confidence="high") 

26 assert_that(classify_fix_risk(suggestion)).is_equal_to(SAFE_STYLE_RISK) 

27 

28 

29def test_safe_style_medium_confidence_returns_safe_style() -> None: 

30 """risk_level='safe-style' with medium confidence is classified safe-style.""" 

31 suggestion = AIFixSuggestion(risk_level="safe-style", confidence="medium") 

32 assert_that(classify_fix_risk(suggestion)).is_equal_to(SAFE_STYLE_RISK) 

33 

34 

35def test_safe_style_low_confidence_returns_behavioral() -> None: 

36 """risk_level='safe-style' with low confidence demotes to behavioral-risk.""" 

37 suggestion = AIFixSuggestion(risk_level="safe-style", confidence="low") 

38 assert_that(classify_fix_risk(suggestion)).is_equal_to(BEHAVIORAL_RISK) 

39 

40 

41def test_safe_style_empty_confidence_returns_behavioral() -> None: 

42 """risk_level='safe-style' with empty confidence defaults to behavioral-risk.""" 

43 suggestion = AIFixSuggestion(risk_level="safe-style", confidence="") 

44 assert_that(classify_fix_risk(suggestion)).is_equal_to(BEHAVIORAL_RISK) 

45 

46 

47@pytest.mark.parametrize( 

48 ("risk_level", "confidence", "expected"), 

49 [ 

50 ("safe-style", "high", SAFE_STYLE_RISK), 

51 ("safe-style", "medium", SAFE_STYLE_RISK), 

52 ("safe-style", "low", BEHAVIORAL_RISK), 

53 ("safe-style", "", BEHAVIORAL_RISK), 

54 ("behavioral-risk", "high", BEHAVIORAL_RISK), 

55 ("behavioral-risk", "medium", BEHAVIORAL_RISK), 

56 ("behavioral-risk", "low", BEHAVIORAL_RISK), 

57 ("", "high", BEHAVIORAL_RISK), 

58 ("", "", BEHAVIORAL_RISK), 

59 ("unknown", "high", BEHAVIORAL_RISK), 

60 ], 

61 ids=[ 

62 "safe-high", 

63 "safe-medium", 

64 "safe-low", 

65 "safe-empty-conf", 

66 "behavioral-high", 

67 "behavioral-medium", 

68 "behavioral-low", 

69 "empty-risk-high", 

70 "empty-risk-empty-conf", 

71 "unknown-risk-high", 

72 ], 

73) 

74def test_classify_fix_risk_matrix( 

75 risk_level: str, 

76 confidence: str, 

77 expected: str, 

78) -> None: 

79 """Parametrized matrix covering all risk_level x confidence combinations.""" 

80 suggestion = AIFixSuggestion(risk_level=risk_level, confidence=confidence) 

81 assert_that(classify_fix_risk(suggestion)).is_equal_to(expected) 

82 

83 

84# -- Edge cases for risk_level values -------------------------------------- 

85 

86 

87def test_explicit_behavioral_risk_returns_behavioral() -> None: 

88 """risk_level='behavioral-risk' always returns behavioral-risk.""" 

89 suggestion = AIFixSuggestion(risk_level="behavioral-risk", confidence="high") 

90 assert_that(classify_fix_risk(suggestion)).is_equal_to(BEHAVIORAL_RISK) 

91 

92 

93def test_empty_risk_level_returns_behavioral() -> None: 

94 """Empty risk_level defaults to behavioral-risk regardless of confidence.""" 

95 suggestion = AIFixSuggestion(risk_level="", confidence="high") 

96 assert_that(classify_fix_risk(suggestion)).is_equal_to(BEHAVIORAL_RISK) 

97 

98 

99def test_unexpected_risk_level_returns_behavioral() -> None: 

100 """Unexpected/garbage risk_level defaults to behavioral-risk.""" 

101 suggestion = AIFixSuggestion(risk_level="something-else", confidence="high") 

102 assert_that(classify_fix_risk(suggestion)).is_equal_to(BEHAVIORAL_RISK) 

103 

104 

105def test_whitespace_risk_level_returns_behavioral() -> None: 

106 """Whitespace-only risk_level is treated as empty -> behavioral-risk.""" 

107 suggestion = AIFixSuggestion(risk_level=" ", confidence="high") 

108 assert_that(classify_fix_risk(suggestion)).is_equal_to(BEHAVIORAL_RISK) 

109 

110 

111def test_case_insensitive_risk_level() -> None: 

112 """risk_level matching is case-insensitive.""" 

113 suggestion = AIFixSuggestion(risk_level="Safe-Style", confidence="high") 

114 assert_that(classify_fix_risk(suggestion)).is_equal_to(SAFE_STYLE_RISK) 

115 

116 

117def test_case_insensitive_confidence() -> None: 

118 """Confidence matching is case-insensitive.""" 

119 suggestion = AIFixSuggestion(risk_level="safe-style", confidence="HIGH") 

120 assert_that(classify_fix_risk(suggestion)).is_equal_to(SAFE_STYLE_RISK) 

121 

122 

123# -- is_safe_style_fix delegation ------------------------------------------ 

124 

125 

126def test_is_safe_style_fix_returns_true_for_safe_style() -> None: 

127 """is_safe_style_fix returns True when classify_fix_risk yields safe-style.""" 

128 suggestion = AIFixSuggestion(risk_level="safe-style", confidence="high") 

129 assert_that(is_safe_style_fix(suggestion)).is_true() 

130 

131 

132def test_is_safe_style_fix_returns_false_for_behavioral() -> None: 

133 """is_safe_style_fix returns False when classify_fix_risk yields behavioral.""" 

134 suggestion = AIFixSuggestion(risk_level="behavioral-risk", confidence="high") 

135 assert_that(is_safe_style_fix(suggestion)).is_false() 

136 

137 

138def test_is_safe_style_fix_returns_false_for_empty_risk() -> None: 

139 """is_safe_style_fix returns False for empty risk_level.""" 

140 suggestion = AIFixSuggestion(risk_level="", confidence="medium") 

141 assert_that(is_safe_style_fix(suggestion)).is_false() 

142 

143 

144def test_is_safe_style_fix_returns_false_for_low_confidence_safe() -> None: 

145 """is_safe_style_fix returns False when safe-style has low confidence.""" 

146 suggestion = AIFixSuggestion(risk_level="safe-style", confidence="low") 

147 assert_that(is_safe_style_fix(suggestion)).is_false() 

148 

149 

150# -- Heuristic cross-check ------------------------------------------------ 

151 

152 

153def test_heuristic_downgrades_safe_claim_with_behavioral_diff() -> None: 

154 """AI claims safe-style but diff changes logic → downgraded to behavioral.""" 

155 suggestion = AIFixSuggestion( 

156 risk_level="safe-style", 

157 confidence="high", 

158 original_code="x = 1", 

159 suggested_code="x = 2", 

160 ) 

161 assert_that(classify_fix_risk(suggestion)).is_equal_to(BEHAVIORAL_RISK) 

162 

163 

164def test_heuristic_allows_whitespace_only_diff() -> None: 

165 """AI claims safe-style and diff is whitespace-only → stays safe-style.""" 

166 suggestion = AIFixSuggestion( 

167 risk_level="safe-style", 

168 confidence="high", 

169 original_code="x = 1", 

170 suggested_code="x = 1", 

171 ) 

172 assert_that(classify_fix_risk(suggestion)).is_equal_to(SAFE_STYLE_RISK) 

173 

174 

175def test_heuristic_allows_quote_normalization() -> None: 

176 """Quote style changes are considered style-only.""" 

177 suggestion = AIFixSuggestion( 

178 risk_level="safe-style", 

179 confidence="high", 

180 original_code="x = 'hello'", 

181 suggested_code='x = "hello"', 

182 ) 

183 assert_that(classify_fix_risk(suggestion)).is_equal_to(SAFE_STYLE_RISK) 

184 

185 

186def test_heuristic_allows_trailing_comma_changes() -> None: 

187 """Trailing comma additions are considered style-only.""" 

188 suggestion = AIFixSuggestion( 

189 risk_level="safe-style", 

190 confidence="high", 

191 original_code="f(a, b)", 

192 suggested_code="f(a, b,)", 

193 ) 

194 assert_that(classify_fix_risk(suggestion)).is_equal_to(SAFE_STYLE_RISK) 

195 

196 

197# -- PatchStats dataclass -------------------------------------------------- 

198 

199 

200def test_patch_stats_defaults() -> None: 

201 """PatchStats defaults to all zeros.""" 

202 stats = PatchStats() 

203 assert_that(stats.files).is_equal_to(0) 

204 assert_that(stats.hunks).is_equal_to(0) 

205 assert_that(stats.lines_added).is_equal_to(0) 

206 assert_that(stats.lines_removed).is_equal_to(0) 

207 

208 

209def test_patch_stats_is_frozen() -> None: 

210 """PatchStats is a frozen dataclass.""" 

211 stats = PatchStats(files=1, hunks=2, lines_added=3, lines_removed=4) 

212 with pytest.raises(FrozenInstanceError): 

213 stats.files = 99 # type: ignore[misc] # intentionally mutating frozen dataclass 

214 

215 

216# -- calculate_patch_stats ------------------------------------------------- 

217 

218 

219def test_calculate_patch_stats_empty_list() -> None: 

220 """Empty suggestions list produces zero stats.""" 

221 stats = calculate_patch_stats([]) 

222 assert_that(stats).is_equal_to(PatchStats()) 

223 

224 

225def test_calculate_patch_stats_from_unified_diff() -> None: 

226 """Patch stats are calculated correctly from a unified diff.""" 

227 suggestion = AIFixSuggestion( 

228 file="src/main.py", 

229 diff=( 

230 "--- a/src/main.py\n" 

231 "+++ b/src/main.py\n" 

232 "@@ -1,2 +1,3 @@\n" 

233 "-a = 1\n" 

234 "+a = 2\n" 

235 "+b = 3\n" 

236 ), 

237 ) 

238 

239 stats = calculate_patch_stats([suggestion]) 

240 assert_that(stats.files).is_equal_to(1) 

241 assert_that(stats.hunks).is_equal_to(1) 

242 assert_that(stats.lines_added).is_equal_to(2) 

243 assert_that(stats.lines_removed).is_equal_to(1) 

244 

245 

246def test_calculate_patch_stats_fallback_without_diff() -> None: 

247 """Fallback estimate is used when no diff is provided.""" 

248 suggestion = AIFixSuggestion( 

249 file="src/main.py", 

250 original_code="a = 1\n", 

251 suggested_code="a = 1\nb = 2\n", 

252 ) 

253 

254 stats = calculate_patch_stats([suggestion]) 

255 assert_that(stats.files).is_equal_to(1) 

256 assert_that(stats.hunks).is_equal_to(1) 

257 assert_that(stats.lines_added).is_equal_to(1) 

258 assert_that(stats.lines_removed).is_equal_to(0) 

259 

260 

261def test_calculate_patch_stats_multiple_files() -> None: 

262 """Multiple suggestions across files are aggregated correctly.""" 

263 suggestions = [ 

264 AIFixSuggestion( 

265 file="a.py", 

266 diff=("--- a/a.py\n" "+++ b/a.py\n" "@@ -1 +1 @@\n" "-old\n" "+new\n"), 

267 ), 

268 AIFixSuggestion( 

269 file="b.py", 

270 diff=("--- a/b.py\n" "+++ b/b.py\n" "@@ -1 +1,2 @@\n" "+added\n"), 

271 ), 

272 ] 

273 

274 stats = calculate_patch_stats(suggestions) 

275 assert_that(stats.files).is_equal_to(2) 

276 assert_that(stats.hunks).is_equal_to(2) 

277 assert_that(stats.lines_added).is_equal_to(2) 

278 assert_that(stats.lines_removed).is_equal_to(1) 

279 

280 

281def test_calculate_patch_stats_fallback_lines_removed() -> None: 

282 """Fallback correctly counts lines removed when suggested code is shorter.""" 

283 suggestion = AIFixSuggestion( 

284 file="c.py", 

285 original_code="line1\nline2\nline3\n", 

286 suggested_code="line1\n", 

287 ) 

288 

289 stats = calculate_patch_stats([suggestion]) 

290 assert_that(stats.lines_removed).is_equal_to(2) 

291 assert_that(stats.lines_added).is_equal_to(0)