Coverage for lintro / tools / definitions / gitleaks.py: 92%

102 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""Gitleaks tool definition. 

2 

3Gitleaks is a SAST tool for detecting and preventing hardcoded secrets like 

4passwords, API keys, and tokens in git repos. It scans for patterns that match 

5known secret formats and reports findings with detailed location information. 

6""" 

7 

8from __future__ import annotations 

9 

10import json 

11import os 

12import subprocess # nosec B404 - used safely with shell disabled 

13import tempfile 

14from dataclasses import dataclass 

15from pathlib import Path 

16from typing import Any 

17 

18from loguru import logger 

19 

20from lintro._tool_versions import get_min_version 

21from lintro.enums.tool_name import ToolName 

22from lintro.enums.tool_type import ToolType 

23from lintro.models.core.tool_result import ToolResult 

24from lintro.parsers.gitleaks.gitleaks_parser import parse_gitleaks_output 

25from lintro.plugins.base import BaseToolPlugin 

26from lintro.plugins.protocol import ToolDefinition 

27from lintro.plugins.registry import register_tool 

28from lintro.tools.core.option_validators import ( 

29 filter_none_options, 

30 validate_bool, 

31 validate_positive_int, 

32 validate_str, 

33) 

34 

35# Constants for Gitleaks configuration 

36GITLEAKS_DEFAULT_TIMEOUT: int = 60 

37GITLEAKS_DEFAULT_PRIORITY: int = 90 # High priority for security tool 

38GITLEAKS_FILE_PATTERNS: list[str] = ["*"] # Scans all files 

39GITLEAKS_OUTPUT_FORMAT: str = "json" 

40 

41 

42@register_tool 

43@dataclass 

44class GitleaksPlugin(BaseToolPlugin): 

45 """Gitleaks secret detection plugin. 

46 

47 This plugin integrates Gitleaks with Lintro for detecting hardcoded 

48 secrets like passwords, API keys, and tokens in source code. 

49 """ 

50 

51 @property 

52 def definition(self) -> ToolDefinition: 

53 """Return the tool definition. 

54 

55 Returns: 

56 ToolDefinition containing tool metadata. 

57 """ 

58 return ToolDefinition( 

59 name="gitleaks", 

60 description=( 

61 "SAST tool for detecting hardcoded secrets like passwords, " 

62 "API keys, and tokens in git repos" 

63 ), 

64 can_fix=False, 

65 tool_type=ToolType.SECURITY, 

66 file_patterns=GITLEAKS_FILE_PATTERNS, 

67 priority=GITLEAKS_DEFAULT_PRIORITY, 

68 conflicts_with=[], 

69 native_configs=[".gitleaks.toml"], 

70 version_command=["gitleaks", "version"], 

71 min_version=get_min_version(ToolName.GITLEAKS), 

72 default_options={ 

73 "timeout": GITLEAKS_DEFAULT_TIMEOUT, 

74 "no_git": True, # Default to scanning files without git history 

75 "config": None, 

76 "baseline_path": None, 

77 "redact": True, 

78 "max_target_megabytes": None, 

79 }, 

80 default_timeout=GITLEAKS_DEFAULT_TIMEOUT, 

81 ) 

82 

83 def set_options( 

84 self, 

85 no_git: bool | None = None, 

86 config: str | None = None, 

87 baseline_path: str | None = None, 

88 redact: bool | None = None, 

89 max_target_megabytes: int | None = None, 

90 **kwargs: Any, 

91 ) -> None: 

92 """Set Gitleaks-specific options. 

93 

94 Args: 

95 no_git: Scan without git history (files only). 

96 config: Path to gitleaks config file. 

97 baseline_path: Path to baseline file (ignore known secrets). 

98 redact: Redact secrets in output. 

99 max_target_megabytes: Skip files larger than this size. 

100 **kwargs: Other tool options. 

101 """ 

102 validate_bool(value=no_git, name="no_git") 

103 validate_str(value=config, name="config") 

104 validate_str(value=baseline_path, name="baseline_path") 

105 validate_bool(value=redact, name="redact") 

106 validate_positive_int(value=max_target_megabytes, name="max_target_megabytes") 

107 

108 options = filter_none_options( 

109 no_git=no_git, 

110 config=config, 

111 baseline_path=baseline_path, 

112 redact=redact, 

113 max_target_megabytes=max_target_megabytes, 

114 ) 

115 super().set_options(**options, **kwargs) 

116 

117 def _build_check_command(self, source_path: str, report_path: str) -> list[str]: 

118 """Build the gitleaks check command. 

119 

120 Args: 

121 source_path: Path to the directory or file to scan. 

122 report_path: Path to write the JSON report to. 

123 

124 Returns: 

125 List of command arguments. 

126 """ 

127 cmd: list[str] = ["gitleaks", "detect"] 

128 

129 # Source path 

130 cmd.extend(["--source", source_path]) 

131 

132 # Scan without git history by default 

133 if self.options.get("no_git", True): 

134 cmd.append("--no-git") 

135 

136 # Config file 

137 config_opt = self.options.get("config") 

138 if config_opt is not None: 

139 cmd.extend(["--config", str(config_opt)]) 

140 

141 # Baseline file 

142 baseline_opt = self.options.get("baseline_path") 

143 if baseline_opt is not None: 

144 cmd.extend(["--baseline-path", str(baseline_opt)]) 

145 

146 # Redact secrets 

147 if self.options.get("redact", True): 

148 cmd.append("--redact") 

149 

150 # Max target megabytes 

151 max_mb_opt = self.options.get("max_target_megabytes") 

152 if max_mb_opt is not None: 

153 cmd.extend(["--max-target-megabytes", str(max_mb_opt)]) 

154 

155 # Output format and path 

156 cmd.extend(["--report-format", GITLEAKS_OUTPUT_FORMAT]) 

157 cmd.extend(["--report-path", report_path]) 

158 

159 # Exit with code 0 even when secrets are found (we parse the output) 

160 cmd.append("--exit-code") 

161 cmd.append("0") 

162 

163 return cmd 

164 

165 def check(self, paths: list[str], options: dict[str, object]) -> ToolResult: 

166 """Check files with Gitleaks for hardcoded secrets. 

167 

168 Args: 

169 paths: List of file or directory paths to check. 

170 options: Runtime options that override defaults. 

171 

172 Returns: 

173 ToolResult with check results. 

174 """ 

175 # Use shared preparation for version check, path validation 

176 ctx = self._prepare_execution(paths=paths, options=options) 

177 if ctx.should_skip: 

178 return ctx.early_result # type: ignore[return-value] 

179 

180 # Determine source path based on provided paths 

181 # Gitleaks can scan both directories and individual files 

182 cwd_path = Path(ctx.cwd) if ctx.cwd else Path.cwd() 

183 if paths and len(paths) == 1: 

184 # Single path provided - use it directly 

185 source_path = paths[0] 

186 elif paths and len(paths) > 1: 

187 # Multiple paths - resolve relative to ctx.cwd and find common parent 

188 resolved_paths = [ 

189 str(Path(p) if Path(p).is_absolute() else cwd_path / p) for p in paths 

190 ] 

191 try: 

192 source_path = str(Path(os.path.commonpath(resolved_paths))) 

193 except ValueError: 

194 # Paths on different drives or no common path - fall back to cwd 

195 logger.warning( 

196 "Cannot determine common path for provided paths; " 

197 "falling back to working directory.", 

198 ) 

199 source_path = str(cwd_path) 

200 else: 

201 # No paths provided - fall back to cwd 

202 source_path = str(cwd_path) 

203 

204 # Use a temporary file for the report (gitleaks can't write to /dev/stdout 

205 # in subprocess environments due to permission issues) 

206 with tempfile.NamedTemporaryFile( 

207 mode="w", 

208 suffix=".json", 

209 delete=False, 

210 ) as report_file: 

211 report_path = report_file.name 

212 

213 try: 

214 cmd = self._build_check_command( 

215 source_path=source_path, 

216 report_path=report_path, 

217 ) 

218 logger.debug( 

219 f"[gitleaks] Running: {' '.join(cmd[:10])}... (cwd={ctx.cwd})", 

220 ) 

221 

222 output: str 

223 execution_failure: bool = False 

224 try: 

225 # Note: gitleaks with --exit-code 0 always returns success, 

226 # we parse the JSON output to determine findings 

227 self._run_subprocess( 

228 cmd=cmd, 

229 timeout=ctx.timeout, 

230 cwd=ctx.cwd, 

231 ) 

232 # Read the report from the temp file 

233 output = Path(report_path).read_text(encoding="utf-8").strip() 

234 except subprocess.TimeoutExpired: 

235 timeout_msg = ( 

236 f"Gitleaks execution timed out ({ctx.timeout}s limit exceeded)." 

237 "\n\nThis may indicate:\n" 

238 " - Large codebase taking too long to scan\n" 

239 " - Need to increase timeout via --tool-options gitleaks:timeout=N" 

240 ) 

241 return ToolResult( 

242 name=self.definition.name, 

243 success=False, 

244 output=timeout_msg, 

245 issues_count=0, 

246 ) 

247 except (OSError, ValueError, RuntimeError) as e: 

248 logger.error(f"Failed to run Gitleaks: {e}") 

249 output = f"Gitleaks failed: {e}" 

250 execution_failure = True 

251 

252 # Parse the JSON output 

253 if execution_failure: 

254 return ToolResult( 

255 name=self.definition.name, 

256 success=False, 

257 output=output, 

258 issues_count=0, 

259 ) 

260 

261 issues = parse_gitleaks_output(output=output) 

262 issues_count = len(issues) 

263 

264 # Check for parsing failures: if we have output that's not empty/[] but 

265 # got no issues, verify the output is valid JSON. This catches cases 

266 # where the report file contains invalid data. 

267 if issues_count == 0 and output and output.strip() not in ("", "[]"): 

268 try: 

269 json.loads(output) 

270 except json.JSONDecodeError as e: 

271 logger.error(f"Failed to parse gitleaks output: {e}") 

272 return ToolResult( 

273 name=self.definition.name, 

274 success=False, 

275 output=f"Failed to parse gitleaks output: {e}", 

276 issues_count=0, 

277 ) 

278 

279 return ToolResult( 

280 name=self.definition.name, 

281 success=True, 

282 output=None, 

283 issues_count=issues_count, 

284 issues=issues, 

285 ) 

286 finally: 

287 # Clean up the temporary report file 

288 Path(report_path).unlink(missing_ok=True) 

289 

290 def fix(self, paths: list[str], options: dict[str, object]) -> ToolResult: 

291 """Gitleaks cannot fix issues, only report them. 

292 

293 Args: 

294 paths: List of file or directory paths to fix. 

295 options: Tool-specific options. 

296 

297 Returns: 

298 ToolResult: Never returns, always raises NotImplementedError. 

299 

300 Raises: 

301 NotImplementedError: Gitleaks does not support fixing issues. 

302 """ 

303 raise NotImplementedError( 

304 "Gitleaks cannot automatically fix security issues. Run 'lintro check' to " 

305 "see issues and manually remove or rotate the detected secrets.", 

306 )