Coverage for tests / unit / security / test_json_edge_cases.py: 99%
148 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
1"""Tests for JSON parsing edge cases and potential security issues.
3These tests verify that JSON parsing handles malformed, large, and
4potentially malicious input safely without causing DoS or crashes.
5"""
7from __future__ import annotations
9import json
10from dataclasses import dataclass
12from assertpy import assert_that
14from lintro.parsers.base_issue import BaseIssue
15from lintro.parsers.streaming import (
16 collect_streaming_results,
17 stream_json_array_fallback,
18 stream_json_lines,
19)
22@dataclass
23class SimpleIssue(BaseIssue):
24 """Simple issue class for testing."""
26 code: str = "TEST001"
27 severity: str = "error"
30def simple_parse_item(item: dict[str, object]) -> SimpleIssue | None:
31 """Parse a simple item for testing.
33 Args:
34 item: Dictionary to parse.
36 Returns:
37 SimpleIssue or None.
38 """
39 file = item.get("file", "")
40 message = item.get("message", "")
41 if isinstance(file, str) and isinstance(message, str):
42 return SimpleIssue(file=file, line=1, column=1, message=message)
43 return None
46# =============================================================================
47# Tests for stream_json_lines edge cases
48# =============================================================================
51def test_json_lines_empty_input() -> None:
52 """Verify empty input returns no results."""
53 results = list(stream_json_lines("", simple_parse_item))
54 assert_that(results).is_empty()
57def test_json_lines_whitespace_only_input() -> None:
58 """Verify whitespace-only input returns no results."""
59 results = list(stream_json_lines(" \n\n \n", simple_parse_item))
60 assert_that(results).is_empty()
63def test_json_lines_invalid_json_lines_skipped() -> None:
64 """Verify invalid JSON lines are skipped without crashing."""
65 input_data = '{"file": "a.py"}\nnot json\n{"file": "b.py"}\n'
66 results = list(stream_json_lines(input_data, simple_parse_item))
67 assert_that(results).is_length(2)
70def test_json_lines_truncated_json_skipped() -> None:
71 """Verify truncated JSON is skipped."""
72 input_data = '{"file": "a.py"}\n{"file": "incomplete\n{"file": "b.py"}\n'
73 results = list(stream_json_lines(input_data, simple_parse_item))
74 assert_that(results).is_length(2)
77def test_json_lines_non_dict_json_skipped() -> None:
78 """Verify non-dict JSON values are skipped."""
79 input_data = '{"file": "a.py"}\n["array"]\n"string"\n123\nnull\n'
80 results = list(stream_json_lines(input_data, simple_parse_item))
81 assert_that(results).is_length(1)
84def test_json_lines_special_characters() -> None:
85 """Verify JSON with special characters is handled."""
86 input_data = '{"file": "path/with spaces.py", "message": "quote: \\"test\\""}\n'
87 results = list(stream_json_lines(input_data, simple_parse_item))
88 assert_that(results).is_length(1)
89 assert_that(results[0].file).is_equal_to("path/with spaces.py")
92def test_json_lines_unicode() -> None:
93 """Verify JSON with unicode characters is handled."""
94 input_data = '{"file": "file_\u00e9\u00e8.py", "message": "\u4e2d\u6587"}\n'
95 results = list(stream_json_lines(input_data, simple_parse_item))
96 assert_that(results).is_length(1)
99def test_json_lines_emoji() -> None:
100 """Verify JSON with emoji is handled."""
101 input_data = '{"file": "test.py", "message": "Error: \U0001f4a5 boom"}\n'
102 results = list(stream_json_lines(input_data, simple_parse_item))
103 assert_that(results).is_length(1)
106def test_json_lines_deeply_nested() -> None:
107 """Verify deeply nested JSON doesn't cause stack overflow."""
108 nested: dict[str, object] = {"file": "test.py", "meta": {}}
109 current = nested["meta"]
110 for _ in range(50): # 50 levels deep
111 current["level"] = {} # type: ignore[index]
112 current = current["level"] # type: ignore[index]
114 input_data = json.dumps(nested) + "\n"
115 results = list(stream_json_lines(input_data, simple_parse_item))
116 assert_that(results).is_length(1)
119def test_json_lines_large_json_object() -> None:
120 """Verify large JSON object is handled without memory issues."""
121 large_value = "x" * (1024 * 1024) # 1MB
122 input_data = f'{{"file": "test.py", "message": "{large_value}"}}\n'
124 results = list(stream_json_lines(input_data, simple_parse_item))
125 assert_that(results).is_length(1)
126 assert_that(len(results[0].message)).is_equal_to(1024 * 1024)
129def test_json_lines_many_lines() -> None:
130 """Verify many JSON lines are processed efficiently."""
131 lines = [f'{{"file": "file{i}.py", "message": "msg"}}' for i in range(1000)]
132 input_data = "\n".join(lines)
134 results = list(stream_json_lines(input_data, simple_parse_item))
135 assert_that(results).is_length(1000)
138def test_json_lines_mixed_valid_invalid() -> None:
139 """Verify mixed valid/invalid lines don't stop processing."""
140 input_data = "\n".join(
141 [
142 '{"file": "1.py"}',
143 "invalid",
144 '{"file": "2.py"}',
145 "{incomplete",
146 '{"file": "3.py"}',
147 "",
148 '{"file": "4.py"}',
149 ],
150 )
152 results = list(stream_json_lines(input_data, simple_parse_item))
153 assert_that(results).is_length(4)
156def test_json_lines_parser_exception_handling() -> None:
157 """Verify parser exceptions don't crash the stream."""
159 def failing_parser(item: dict[str, object]) -> SimpleIssue | None:
160 if item.get("fail"):
161 raise ValueError("Intentional failure")
162 return simple_parse_item(item)
164 input_data = '{"file": "1.py"}\n{"fail": true}\n{"file": "2.py"}\n'
165 results = list(stream_json_lines(input_data, failing_parser))
166 assert_that(results).is_length(2)
169def test_json_lines_not_starting_with_brace_skipped() -> None:
170 """Verify non-JSON-object lines are skipped."""
171 input_data = "Info: starting\n{'file': 'test.py'}\n[1,2,3]\n"
172 results = list(stream_json_lines(input_data, simple_parse_item))
173 assert_that(results).is_empty()
176# =============================================================================
177# Tests for stream_json_array_fallback edge cases
178# =============================================================================
181def test_json_array_empty_array() -> None:
182 """Verify empty array returns no results."""
183 results = list(stream_json_array_fallback("[]", simple_parse_item))
184 assert_that(results).is_empty()
187def test_json_array_empty_object() -> None:
188 """Verify empty object returns no results."""
189 results = list(stream_json_array_fallback("{}", simple_parse_item))
190 assert_that(results).is_empty()
193def test_json_array_empty_string() -> None:
194 """Verify empty string returns no results."""
195 results = list(stream_json_array_fallback("", simple_parse_item))
196 assert_that(results).is_empty()
199def test_json_array_valid_array() -> None:
200 """Verify valid array is parsed correctly."""
201 input_data = '[{"file": "a.py"}, {"file": "b.py"}]'
202 results = list(stream_json_array_fallback(input_data, simple_parse_item))
203 assert_that(results).is_length(2)
206def test_json_array_with_trailing_data() -> None:
207 """Verify array with trailing non-JSON data is handled."""
208 input_data = '[{"file": "a.py"}]\nSome trailing text'
209 results = list(stream_json_array_fallback(input_data, simple_parse_item))
210 assert_that(results).is_length(1)
213def test_json_array_fallback_to_json_lines() -> None:
214 """Verify fallback to JSON Lines when array fails."""
215 input_data = '{"file": "a.py"}\n{"file": "b.py"}\n'
216 results = list(stream_json_array_fallback(input_data, simple_parse_item))
217 assert_that(results).is_length(2)
220def test_json_array_non_dict_items_skipped() -> None:
221 """Verify non-dict items in array are skipped."""
222 input_data = '[{"file": "a.py"}, "string", 123, null]'
223 results = list(stream_json_array_fallback(input_data, simple_parse_item))
224 assert_that(results).is_length(1)
227def test_json_array_large_array() -> None:
228 """Verify large array is processed."""
229 items = [f'{{"file": "file{i}.py"}}' for i in range(500)]
230 input_data = "[" + ",".join(items) + "]"
232 results = list(stream_json_array_fallback(input_data, simple_parse_item))
233 assert_that(results).is_length(500)
236# =============================================================================
237# Tests for collect_streaming_results helper
238# =============================================================================
241def test_collect_streaming_results_collects_all() -> None:
242 """Verify all results are collected into list."""
243 input_data = '{"file": "a.py"}\n{"file": "b.py"}\n{"file": "c.py"}\n'
244 gen = stream_json_lines(input_data, simple_parse_item)
246 results = collect_streaming_results(gen)
247 assert_that(results).is_length(3)
248 assert_that(results).is_instance_of(list)
251def test_collect_streaming_results_empty_generator() -> None:
252 """Verify empty generator returns empty list."""
253 gen = stream_json_lines("", simple_parse_item)
254 results = collect_streaming_results(gen)
255 assert_that(results).is_empty()
258# =============================================================================
259# Tests for JSON security concerns
260# =============================================================================
263def test_json_bomb_protection() -> None:
264 """Verify JSON bomb (exponential expansion) doesn't crash.
266 Note: Python's json module handles this reasonably well,
267 but we test to ensure we don't introduce our own issues.
268 This test verifies that moderately nested JSON doesn't cause issues.
269 """
270 data = {"a": {"b": {"c": {"d": {"e": {"f": "value"}}}}}}
271 input_data = json.dumps(data) + "\n"
273 # Should process without crashing (may or may not match our simple parser)
274 results = list(stream_json_lines(input_data, simple_parse_item))
275 # Just verify processing completed - results depend on parse function
276 # (parser returns issue with empty file/message for nested data)
277 assert_that(results).is_not_none()
280def test_json_null_bytes() -> None:
281 """Verify null bytes in JSON strings are handled."""
282 input_data = '{"file": "test\\u0000.py", "message": "null byte"}\n'
283 results = list(stream_json_lines(input_data, simple_parse_item))
284 assert_that(results).is_length(1)
285 assert_that(results[0].file).contains("\x00")
288def test_json_control_characters() -> None:
289 """Verify control characters in JSON are handled."""
290 input_data = '{"file": "test.py", "message": "tab:\\there"}\n'
291 results = list(stream_json_lines(input_data, simple_parse_item))
292 assert_that(results).is_length(1)
293 assert_that(results[0].message).contains("\t")
296def test_json_very_long_string_keys() -> None:
297 """Verify very long string keys don't cause issues."""
298 long_key = "k" * 10000
299 input_data = f'{{"{long_key}": "value", "file": "test.py"}}\n'
300 results = list(stream_json_lines(input_data, simple_parse_item))
301 assert_that(results).is_length(1)
304def test_json_many_keys_in_object() -> None:
305 """Verify objects with many keys are handled."""
306 keys = [f'"key{i}": "value"' for i in range(1000)]
307 input_data = '{"file": "test.py", ' + ", ".join(keys) + "}\n"
308 results = list(stream_json_lines(input_data, simple_parse_item))
309 assert_that(results).is_length(1)