Coverage for lintro / parsers / gitleaks / gitleaks_parser.py: 95%

60 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""Gitleaks output parser for secret detection findings.""" 

2 

3from __future__ import annotations 

4 

5import json 

6 

7from loguru import logger 

8 

9from lintro.parsers.base_parser import validate_int_field, validate_str_field 

10from lintro.parsers.gitleaks.gitleaks_issue import GitleaksIssue 

11 

12 

13def parse_gitleaks_output(output: str | None) -> list[GitleaksIssue]: 

14 """Parse Gitleaks JSON output into GitleaksIssue objects. 

15 

16 Gitleaks outputs a JSON array at the root level. Each element represents 

17 a detected secret with fields like File, StartLine, RuleID, etc. 

18 

19 Args: 

20 output: Raw JSON output string from gitleaks, or None. 

21 

22 Returns: 

23 List of parsed secret detection findings. Returns empty list if the 

24 output is invalid or cannot be parsed. 

25 """ 

26 if not output or not output.strip(): 

27 return [] 

28 

29 text = output.strip() 

30 

31 # Gitleaks outputs an empty array [] when no secrets found 

32 if text == "[]": 

33 return [] 

34 

35 try: 

36 data = json.loads(text) 

37 except json.JSONDecodeError as e: 

38 logger.warning(f"Failed to parse gitleaks JSON output: {e}") 

39 return [] 

40 

41 # Gitleaks outputs a JSON array at root level 

42 if not isinstance(data, list): 

43 logger.warning( 

44 "Gitleaks output is not a JSON array, got %s", 

45 type(data).__name__, 

46 ) 

47 return [] 

48 

49 issues: list[GitleaksIssue] = [] 

50 

51 for item in data: 

52 if not isinstance(item, dict): 

53 logger.debug("Skipping non-dict item in gitleaks output") 

54 continue 

55 

56 try: 

57 issue = _parse_single_finding(item) 

58 if issue is not None: 

59 issues.append(issue) 

60 except (KeyError, TypeError, ValueError) as e: 

61 logger.debug(f"Failed to parse gitleaks finding: {e}") 

62 continue 

63 

64 return issues 

65 

66 

67def _parse_single_finding(item: dict[str, object]) -> GitleaksIssue | None: 

68 """Parse a single gitleaks finding into a GitleaksIssue. 

69 

70 Args: 

71 item: Dictionary representing a single finding from gitleaks JSON. 

72 

73 Returns: 

74 GitleaksIssue if parsing succeeds, None otherwise. 

75 """ 

76 # Required fields 

77 file_path = validate_str_field( 

78 value=item.get("File"), 

79 field_name="File", 

80 log_warning=True, 

81 ) 

82 if not file_path: 

83 logger.warning("Skipping gitleaks finding with empty File") 

84 return None 

85 

86 start_line = validate_int_field( 

87 value=item.get("StartLine"), 

88 field_name="StartLine", 

89 ) 

90 

91 # Optional fields 

92 start_column = validate_int_field( 

93 value=item.get("StartColumn"), 

94 field_name="StartColumn", 

95 ) 

96 end_line = validate_int_field( 

97 value=item.get("EndLine"), 

98 field_name="EndLine", 

99 ) 

100 end_column = validate_int_field( 

101 value=item.get("EndColumn"), 

102 field_name="EndColumn", 

103 ) 

104 

105 rule_id = validate_str_field( 

106 value=item.get("RuleID"), 

107 field_name="RuleID", 

108 ) 

109 description = validate_str_field( 

110 value=item.get("Description"), 

111 field_name="Description", 

112 ) 

113 secret = validate_str_field( 

114 value=item.get("Secret"), 

115 field_name="Secret", 

116 ) 

117 match = validate_str_field( 

118 value=item.get("Match"), 

119 field_name="Match", 

120 ) 

121 fingerprint = validate_str_field( 

122 value=item.get("Fingerprint"), 

123 field_name="Fingerprint", 

124 ) 

125 

126 # Git-related fields (populated when scanning git history) 

127 commit = validate_str_field( 

128 value=item.get("Commit"), 

129 field_name="Commit", 

130 ) 

131 author = validate_str_field( 

132 value=item.get("Author"), 

133 field_name="Author", 

134 ) 

135 email = validate_str_field( 

136 value=item.get("Email"), 

137 field_name="Email", 

138 ) 

139 date = validate_str_field( 

140 value=item.get("Date"), 

141 field_name="Date", 

142 ) 

143 commit_message = validate_str_field( 

144 value=item.get("Message"), 

145 field_name="Message", 

146 ) 

147 

148 # Entropy field (float) 

149 entropy_raw = item.get("Entropy") 

150 entropy: float = 0.0 

151 if isinstance(entropy_raw, (int, float)) and not isinstance(entropy_raw, bool): 

152 entropy = float(entropy_raw) 

153 

154 # Tags field (list of strings) 

155 tags_raw = item.get("Tags") 

156 tags: list[str] = [] 

157 if isinstance(tags_raw, list): 

158 tags = [t for t in tags_raw if isinstance(t, str)] 

159 

160 return GitleaksIssue( 

161 file=file_path, 

162 line=start_line, 

163 column=start_column, 

164 end_line=end_line, 

165 end_column=end_column, 

166 rule_id=rule_id, 

167 description=description, 

168 secret=secret, 

169 entropy=entropy, 

170 tags=tags, 

171 fingerprint=fingerprint, 

172 match=match, 

173 commit=commit, 

174 author=author, 

175 email=email, 

176 date=date, 

177 commit_message=commit_message, 

178 )