Coverage for lintro/tools/definitions/gitleaks.py: 92%

1"""Gitleaks tool definition.

3Gitleaks is a SAST tool for detecting and preventing hardcoded secrets like

4passwords, API keys, and tokens in git repos. It scans for patterns that match

5known secret formats and reports findings with detailed location information.

6"""

8from __future__ import annotations

10import json

11import os

12import subprocess # nosec B404 - used safely with shell disabled

13import tempfile

14from dataclasses import dataclass

15from pathlib import Path

16from typing import Any

18from loguru import logger

20from lintro._tool_versions import get_min_version

21from lintro.enums.tool_name import ToolName

22from lintro.enums.tool_type import ToolType

23from lintro.models.core.tool_result import ToolResult

24from lintro.parsers.gitleaks.gitleaks_parser import parse_gitleaks_output

25from lintro.plugins.base import BaseToolPlugin

26from lintro.plugins.protocol import ToolDefinition

27from lintro.plugins.registry import register_tool

28from lintro.tools.core.option_validators import (

29 filter_none_options,

30 validate_bool,

31 validate_positive_int,

32 validate_str,

33)

35# Constants for Gitleaks configuration

36GITLEAKS_DEFAULT_TIMEOUT: int = 60

37GITLEAKS_DEFAULT_PRIORITY: int = 90 # High priority for security tool

38GITLEAKS_FILE_PATTERNS: list[str] = ["*"] # Scans all files

39GITLEAKS_OUTPUT_FORMAT: str = "json"

42@register_tool

43@dataclass

44class GitleaksPlugin(BaseToolPlugin):

45 """Gitleaks secret detection plugin.

47 This plugin integrates Gitleaks with Lintro for detecting hardcoded

48 secrets like passwords, API keys, and tokens in source code.

49 """

51 @property

52 def definition(self) -> ToolDefinition:

53 """Return the tool definition.

55 Returns:

56 ToolDefinition containing tool metadata.

57 """

58 return ToolDefinition(

59 name="gitleaks",

60 description=(

61 "SAST tool for detecting hardcoded secrets like passwords, "

62 "API keys, and tokens in git repos"

63 ),

64 can_fix=False,

65 tool_type=ToolType.SECURITY,

66 file_patterns=GITLEAKS_FILE_PATTERNS,

67 priority=GITLEAKS_DEFAULT_PRIORITY,

68 conflicts_with=[],

69 native_configs=[".gitleaks.toml"],

70 version_command=["gitleaks", "version"],

71 min_version=get_min_version(ToolName.GITLEAKS),

72 default_options={

73 "timeout": GITLEAKS_DEFAULT_TIMEOUT,

74 "no_git": True, # Default to scanning files without git history

75 "config": None,

76 "baseline_path": None,

77 "redact": True,

78 "max_target_megabytes": None,

79 },

80 default_timeout=GITLEAKS_DEFAULT_TIMEOUT,

81 )

83 def set_options(

84 self,

85 no_git: bool | None = None,

86 config: str | None = None,

87 baseline_path: str | None = None,

88 redact: bool | None = None,

89 max_target_megabytes: int | None = None,

90 **kwargs: Any,

91 ) -> None:

92 """Set Gitleaks-specific options.

94 Args:

95 no_git: Scan without git history (files only).

96 config: Path to gitleaks config file.

97 baseline_path: Path to baseline file (ignore known secrets).

98 redact: Redact secrets in output.

99 max_target_megabytes: Skip files larger than this size.

100 **kwargs: Other tool options.

101 """

102 validate_bool(value=no_git, name="no_git")

103 validate_str(value=config, name="config")

104 validate_str(value=baseline_path, name="baseline_path")

105 validate_bool(value=redact, name="redact")

106 validate_positive_int(value=max_target_megabytes, name="max_target_megabytes")

107

108 options = filter_none_options(

109 no_git=no_git,

110 config=config,

111 baseline_path=baseline_path,

112 redact=redact,

113 max_target_megabytes=max_target_megabytes,

114 )

115 super().set_options(**options, **kwargs)

116

117 def _build_check_command(self, source_path: str, report_path: str) -> list[str]:

118 """Build the gitleaks check command.

119

120 Args:

121 source_path: Path to the directory or file to scan.

122 report_path: Path to write the JSON report to.

123

124 Returns:

125 List of command arguments.

126 """

127 cmd: list[str] = ["gitleaks", "detect"]

128

129 # Source path

130 cmd.extend(["--source", source_path])

131

132 # Scan without git history by default

133 if self.options.get("no_git", True):

134 cmd.append("--no-git")

135

136 # Config file

137 config_opt = self.options.get("config")

138 if config_opt is not None:

139 cmd.extend(["--config", str(config_opt)])

140

141 # Baseline file

142 baseline_opt = self.options.get("baseline_path")

143 if baseline_opt is not None:

144 cmd.extend(["--baseline-path", str(baseline_opt)])

145

146 # Redact secrets

147 if self.options.get("redact", True):

148 cmd.append("--redact")

149

150 # Max target megabytes

151 max_mb_opt = self.options.get("max_target_megabytes")

152 if max_mb_opt is not None:

153 cmd.extend(["--max-target-megabytes", str(max_mb_opt)])

154

155 # Output format and path

156 cmd.extend(["--report-format", GITLEAKS_OUTPUT_FORMAT])

157 cmd.extend(["--report-path", report_path])

158

159 # Exit with code 0 even when secrets are found (we parse the output)

160 cmd.append("--exit-code")

161 cmd.append("0")

162

163 return cmd

164

165 def check(self, paths: list[str], options: dict[str, object]) -> ToolResult:

166 """Check files with Gitleaks for hardcoded secrets.

167

168 Args:

169 paths: List of file or directory paths to check.

170 options: Runtime options that override defaults.

171

172 Returns:

173 ToolResult with check results.

174 """

175 # Use shared preparation for version check, path validation

176 ctx = self._prepare_execution(paths=paths, options=options)

177 if ctx.should_skip:

178 return ctx.early_result # type: ignore[return-value]

179

180 # Determine source path based on provided paths

181 # Gitleaks can scan both directories and individual files

182 cwd_path = Path(ctx.cwd) if ctx.cwd else Path.cwd()

183 if paths and len(paths) == 1:

184 # Single path provided - use it directly

185 source_path = paths[0]

186 elif paths and len(paths) > 1:

187 # Multiple paths - resolve relative to ctx.cwd and find common parent

188 resolved_paths = [

189 str(Path(p) if Path(p).is_absolute() else cwd_path / p) for p in paths

190 ]

191 try:

192 source_path = str(Path(os.path.commonpath(resolved_paths)))

193 except ValueError:

194 # Paths on different drives or no common path - fall back to cwd

195 logger.warning(

196 "Cannot determine common path for provided paths; "

197 "falling back to working directory.",

198 )

199 source_path = str(cwd_path)

200 else:

201 # No paths provided - fall back to cwd

202 source_path = str(cwd_path)

203

204 # Use a temporary file for the report (gitleaks can't write to /dev/stdout

205 # in subprocess environments due to permission issues)

206 with tempfile.NamedTemporaryFile(

207 mode="w",

208 suffix=".json",

209 delete=False,

210 ) as report_file:

211 report_path = report_file.name

212

213 try:

214 cmd = self._build_check_command(

215 source_path=source_path,

216 report_path=report_path,

217 )

218 logger.debug(

219 f"[gitleaks] Running: {' '.join(cmd[:10])}... (cwd={ctx.cwd})",

220 )

221

222 output: str

223 execution_failure: bool = False

224 try:

225 # Note: gitleaks with --exit-code 0 always returns success,

226 # we parse the JSON output to determine findings

227 self._run_subprocess(

228 cmd=cmd,

229 timeout=ctx.timeout,

230 cwd=ctx.cwd,

231 )

232 # Read the report from the temp file

233 output = Path(report_path).read_text(encoding="utf-8").strip()

234 except subprocess.TimeoutExpired:

235 timeout_msg = (

236 f"Gitleaks execution timed out ({ctx.timeout}s limit exceeded)."

237 "\n\nThis may indicate:\n"

238 " - Large codebase taking too long to scan\n"

239 " - Need to increase timeout via --tool-options gitleaks:timeout=N"

240 )

241 return ToolResult(

242 name=self.definition.name,

243 success=False,

244 output=timeout_msg,

245 issues_count=0,

246 )

247 except (OSError, ValueError, RuntimeError) as e:

248 logger.error(f"Failed to run Gitleaks: {e}")

249 output = f"Gitleaks failed: {e}"

250 execution_failure = True

251

252 # Parse the JSON output

253 if execution_failure:

254 return ToolResult(

255 name=self.definition.name,

256 success=False,

257 output=output,

258 issues_count=0,

259 )

260

261 issues = parse_gitleaks_output(output=output)

262 issues_count = len(issues)

263

264 # Check for parsing failures: if we have output that's not empty/[] but

265 # got no issues, verify the output is valid JSON. This catches cases

266 # where the report file contains invalid data.

267 if issues_count == 0 and output and output.strip() not in ("", "[]"):

268 try:

269 json.loads(output)

270 except json.JSONDecodeError as e:

271 logger.error(f"Failed to parse gitleaks output: {e}")

272 return ToolResult(

273 name=self.definition.name,

274 success=False,

275 output=f"Failed to parse gitleaks output: {e}",

276 issues_count=0,

277 )

278

279 return ToolResult(

280 name=self.definition.name,

281 success=True,

282 output=None,

283 issues_count=issues_count,

284 issues=issues,

285 )

286 finally:

287 # Clean up the temporary report file

288 Path(report_path).unlink(missing_ok=True)

289

290 def fix(self, paths: list[str], options: dict[str, object]) -> ToolResult:

291 """Gitleaks cannot fix issues, only report them.

292

293 Args:

294 paths: List of file or directory paths to fix.

295 options: Tool-specific options.

296

297 Returns:

298 ToolResult: Never returns, always raises NotImplementedError.

299

300 Raises:

301 NotImplementedError: Gitleaks does not support fixing issues.

302 """

303 raise NotImplementedError(

304 "Gitleaks cannot automatically fix security issues. Run 'lintro check' to "

305 "see issues and manually remove or rotate the detected secrets.",

306 )

Coverage for lintro / tools / definitions / gitleaks.py: 92%

102 statements