Coverage for lintro / parsers / gitleaks / gitleaks_parser.py: 95%
60 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
1"""Gitleaks output parser for secret detection findings."""
3from __future__ import annotations
5import json
7from loguru import logger
9from lintro.parsers.base_parser import validate_int_field, validate_str_field
10from lintro.parsers.gitleaks.gitleaks_issue import GitleaksIssue
13def parse_gitleaks_output(output: str | None) -> list[GitleaksIssue]:
14 """Parse Gitleaks JSON output into GitleaksIssue objects.
16 Gitleaks outputs a JSON array at the root level. Each element represents
17 a detected secret with fields like File, StartLine, RuleID, etc.
19 Args:
20 output: Raw JSON output string from gitleaks, or None.
22 Returns:
23 List of parsed secret detection findings. Returns empty list if the
24 output is invalid or cannot be parsed.
25 """
26 if not output or not output.strip():
27 return []
29 text = output.strip()
31 # Gitleaks outputs an empty array [] when no secrets found
32 if text == "[]":
33 return []
35 try:
36 data = json.loads(text)
37 except json.JSONDecodeError as e:
38 logger.warning(f"Failed to parse gitleaks JSON output: {e}")
39 return []
41 # Gitleaks outputs a JSON array at root level
42 if not isinstance(data, list):
43 logger.warning(
44 "Gitleaks output is not a JSON array, got %s",
45 type(data).__name__,
46 )
47 return []
49 issues: list[GitleaksIssue] = []
51 for item in data:
52 if not isinstance(item, dict):
53 logger.debug("Skipping non-dict item in gitleaks output")
54 continue
56 try:
57 issue = _parse_single_finding(item)
58 if issue is not None:
59 issues.append(issue)
60 except (KeyError, TypeError, ValueError) as e:
61 logger.debug(f"Failed to parse gitleaks finding: {e}")
62 continue
64 return issues
67def _parse_single_finding(item: dict[str, object]) -> GitleaksIssue | None:
68 """Parse a single gitleaks finding into a GitleaksIssue.
70 Args:
71 item: Dictionary representing a single finding from gitleaks JSON.
73 Returns:
74 GitleaksIssue if parsing succeeds, None otherwise.
75 """
76 # Required fields
77 file_path = validate_str_field(
78 value=item.get("File"),
79 field_name="File",
80 log_warning=True,
81 )
82 if not file_path:
83 logger.warning("Skipping gitleaks finding with empty File")
84 return None
86 start_line = validate_int_field(
87 value=item.get("StartLine"),
88 field_name="StartLine",
89 )
91 # Optional fields
92 start_column = validate_int_field(
93 value=item.get("StartColumn"),
94 field_name="StartColumn",
95 )
96 end_line = validate_int_field(
97 value=item.get("EndLine"),
98 field_name="EndLine",
99 )
100 end_column = validate_int_field(
101 value=item.get("EndColumn"),
102 field_name="EndColumn",
103 )
105 rule_id = validate_str_field(
106 value=item.get("RuleID"),
107 field_name="RuleID",
108 )
109 description = validate_str_field(
110 value=item.get("Description"),
111 field_name="Description",
112 )
113 secret = validate_str_field(
114 value=item.get("Secret"),
115 field_name="Secret",
116 )
117 match = validate_str_field(
118 value=item.get("Match"),
119 field_name="Match",
120 )
121 fingerprint = validate_str_field(
122 value=item.get("Fingerprint"),
123 field_name="Fingerprint",
124 )
126 # Git-related fields (populated when scanning git history)
127 commit = validate_str_field(
128 value=item.get("Commit"),
129 field_name="Commit",
130 )
131 author = validate_str_field(
132 value=item.get("Author"),
133 field_name="Author",
134 )
135 email = validate_str_field(
136 value=item.get("Email"),
137 field_name="Email",
138 )
139 date = validate_str_field(
140 value=item.get("Date"),
141 field_name="Date",
142 )
143 commit_message = validate_str_field(
144 value=item.get("Message"),
145 field_name="Message",
146 )
148 # Entropy field (float)
149 entropy_raw = item.get("Entropy")
150 entropy: float = 0.0
151 if isinstance(entropy_raw, (int, float)) and not isinstance(entropy_raw, bool):
152 entropy = float(entropy_raw)
154 # Tags field (list of strings)
155 tags_raw = item.get("Tags")
156 tags: list[str] = []
157 if isinstance(tags_raw, list):
158 tags = [t for t in tags_raw if isinstance(t, str)]
160 return GitleaksIssue(
161 file=file_path,
162 line=start_line,
163 column=start_column,
164 end_line=end_line,
165 end_column=end_column,
166 rule_id=rule_id,
167 description=description,
168 secret=secret,
169 entropy=entropy,
170 tags=tags,
171 fingerprint=fingerprint,
172 match=match,
173 commit=commit,
174 author=author,
175 email=email,
176 date=date,
177 commit_message=commit_message,
178 )