Coverage for lintro / tools / definitions / gitleaks.py: 92%
102 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
1"""Gitleaks tool definition.
3Gitleaks is a SAST tool for detecting and preventing hardcoded secrets like
4passwords, API keys, and tokens in git repos. It scans for patterns that match
5known secret formats and reports findings with detailed location information.
6"""
8from __future__ import annotations
10import json
11import os
12import subprocess # nosec B404 - used safely with shell disabled
13import tempfile
14from dataclasses import dataclass
15from pathlib import Path
16from typing import Any
18from loguru import logger
20from lintro._tool_versions import get_min_version
21from lintro.enums.tool_name import ToolName
22from lintro.enums.tool_type import ToolType
23from lintro.models.core.tool_result import ToolResult
24from lintro.parsers.gitleaks.gitleaks_parser import parse_gitleaks_output
25from lintro.plugins.base import BaseToolPlugin
26from lintro.plugins.protocol import ToolDefinition
27from lintro.plugins.registry import register_tool
28from lintro.tools.core.option_validators import (
29 filter_none_options,
30 validate_bool,
31 validate_positive_int,
32 validate_str,
33)
35# Constants for Gitleaks configuration
36GITLEAKS_DEFAULT_TIMEOUT: int = 60
37GITLEAKS_DEFAULT_PRIORITY: int = 90 # High priority for security tool
38GITLEAKS_FILE_PATTERNS: list[str] = ["*"] # Scans all files
39GITLEAKS_OUTPUT_FORMAT: str = "json"
42@register_tool
43@dataclass
44class GitleaksPlugin(BaseToolPlugin):
45 """Gitleaks secret detection plugin.
47 This plugin integrates Gitleaks with Lintro for detecting hardcoded
48 secrets like passwords, API keys, and tokens in source code.
49 """
51 @property
52 def definition(self) -> ToolDefinition:
53 """Return the tool definition.
55 Returns:
56 ToolDefinition containing tool metadata.
57 """
58 return ToolDefinition(
59 name="gitleaks",
60 description=(
61 "SAST tool for detecting hardcoded secrets like passwords, "
62 "API keys, and tokens in git repos"
63 ),
64 can_fix=False,
65 tool_type=ToolType.SECURITY,
66 file_patterns=GITLEAKS_FILE_PATTERNS,
67 priority=GITLEAKS_DEFAULT_PRIORITY,
68 conflicts_with=[],
69 native_configs=[".gitleaks.toml"],
70 version_command=["gitleaks", "version"],
71 min_version=get_min_version(ToolName.GITLEAKS),
72 default_options={
73 "timeout": GITLEAKS_DEFAULT_TIMEOUT,
74 "no_git": True, # Default to scanning files without git history
75 "config": None,
76 "baseline_path": None,
77 "redact": True,
78 "max_target_megabytes": None,
79 },
80 default_timeout=GITLEAKS_DEFAULT_TIMEOUT,
81 )
83 def set_options(
84 self,
85 no_git: bool | None = None,
86 config: str | None = None,
87 baseline_path: str | None = None,
88 redact: bool | None = None,
89 max_target_megabytes: int | None = None,
90 **kwargs: Any,
91 ) -> None:
92 """Set Gitleaks-specific options.
94 Args:
95 no_git: Scan without git history (files only).
96 config: Path to gitleaks config file.
97 baseline_path: Path to baseline file (ignore known secrets).
98 redact: Redact secrets in output.
99 max_target_megabytes: Skip files larger than this size.
100 **kwargs: Other tool options.
101 """
102 validate_bool(value=no_git, name="no_git")
103 validate_str(value=config, name="config")
104 validate_str(value=baseline_path, name="baseline_path")
105 validate_bool(value=redact, name="redact")
106 validate_positive_int(value=max_target_megabytes, name="max_target_megabytes")
108 options = filter_none_options(
109 no_git=no_git,
110 config=config,
111 baseline_path=baseline_path,
112 redact=redact,
113 max_target_megabytes=max_target_megabytes,
114 )
115 super().set_options(**options, **kwargs)
117 def _build_check_command(self, source_path: str, report_path: str) -> list[str]:
118 """Build the gitleaks check command.
120 Args:
121 source_path: Path to the directory or file to scan.
122 report_path: Path to write the JSON report to.
124 Returns:
125 List of command arguments.
126 """
127 cmd: list[str] = ["gitleaks", "detect"]
129 # Source path
130 cmd.extend(["--source", source_path])
132 # Scan without git history by default
133 if self.options.get("no_git", True):
134 cmd.append("--no-git")
136 # Config file
137 config_opt = self.options.get("config")
138 if config_opt is not None:
139 cmd.extend(["--config", str(config_opt)])
141 # Baseline file
142 baseline_opt = self.options.get("baseline_path")
143 if baseline_opt is not None:
144 cmd.extend(["--baseline-path", str(baseline_opt)])
146 # Redact secrets
147 if self.options.get("redact", True):
148 cmd.append("--redact")
150 # Max target megabytes
151 max_mb_opt = self.options.get("max_target_megabytes")
152 if max_mb_opt is not None:
153 cmd.extend(["--max-target-megabytes", str(max_mb_opt)])
155 # Output format and path
156 cmd.extend(["--report-format", GITLEAKS_OUTPUT_FORMAT])
157 cmd.extend(["--report-path", report_path])
159 # Exit with code 0 even when secrets are found (we parse the output)
160 cmd.append("--exit-code")
161 cmd.append("0")
163 return cmd
165 def check(self, paths: list[str], options: dict[str, object]) -> ToolResult:
166 """Check files with Gitleaks for hardcoded secrets.
168 Args:
169 paths: List of file or directory paths to check.
170 options: Runtime options that override defaults.
172 Returns:
173 ToolResult with check results.
174 """
175 # Use shared preparation for version check, path validation
176 ctx = self._prepare_execution(paths=paths, options=options)
177 if ctx.should_skip:
178 return ctx.early_result # type: ignore[return-value]
180 # Determine source path based on provided paths
181 # Gitleaks can scan both directories and individual files
182 cwd_path = Path(ctx.cwd) if ctx.cwd else Path.cwd()
183 if paths and len(paths) == 1:
184 # Single path provided - use it directly
185 source_path = paths[0]
186 elif paths and len(paths) > 1:
187 # Multiple paths - resolve relative to ctx.cwd and find common parent
188 resolved_paths = [
189 str(Path(p) if Path(p).is_absolute() else cwd_path / p) for p in paths
190 ]
191 try:
192 source_path = str(Path(os.path.commonpath(resolved_paths)))
193 except ValueError:
194 # Paths on different drives or no common path - fall back to cwd
195 logger.warning(
196 "Cannot determine common path for provided paths; "
197 "falling back to working directory.",
198 )
199 source_path = str(cwd_path)
200 else:
201 # No paths provided - fall back to cwd
202 source_path = str(cwd_path)
204 # Use a temporary file for the report (gitleaks can't write to /dev/stdout
205 # in subprocess environments due to permission issues)
206 with tempfile.NamedTemporaryFile(
207 mode="w",
208 suffix=".json",
209 delete=False,
210 ) as report_file:
211 report_path = report_file.name
213 try:
214 cmd = self._build_check_command(
215 source_path=source_path,
216 report_path=report_path,
217 )
218 logger.debug(
219 f"[gitleaks] Running: {' '.join(cmd[:10])}... (cwd={ctx.cwd})",
220 )
222 output: str
223 execution_failure: bool = False
224 try:
225 # Note: gitleaks with --exit-code 0 always returns success,
226 # we parse the JSON output to determine findings
227 self._run_subprocess(
228 cmd=cmd,
229 timeout=ctx.timeout,
230 cwd=ctx.cwd,
231 )
232 # Read the report from the temp file
233 output = Path(report_path).read_text(encoding="utf-8").strip()
234 except subprocess.TimeoutExpired:
235 timeout_msg = (
236 f"Gitleaks execution timed out ({ctx.timeout}s limit exceeded)."
237 "\n\nThis may indicate:\n"
238 " - Large codebase taking too long to scan\n"
239 " - Need to increase timeout via --tool-options gitleaks:timeout=N"
240 )
241 return ToolResult(
242 name=self.definition.name,
243 success=False,
244 output=timeout_msg,
245 issues_count=0,
246 )
247 except (OSError, ValueError, RuntimeError) as e:
248 logger.error(f"Failed to run Gitleaks: {e}")
249 output = f"Gitleaks failed: {e}"
250 execution_failure = True
252 # Parse the JSON output
253 if execution_failure:
254 return ToolResult(
255 name=self.definition.name,
256 success=False,
257 output=output,
258 issues_count=0,
259 )
261 issues = parse_gitleaks_output(output=output)
262 issues_count = len(issues)
264 # Check for parsing failures: if we have output that's not empty/[] but
265 # got no issues, verify the output is valid JSON. This catches cases
266 # where the report file contains invalid data.
267 if issues_count == 0 and output and output.strip() not in ("", "[]"):
268 try:
269 json.loads(output)
270 except json.JSONDecodeError as e:
271 logger.error(f"Failed to parse gitleaks output: {e}")
272 return ToolResult(
273 name=self.definition.name,
274 success=False,
275 output=f"Failed to parse gitleaks output: {e}",
276 issues_count=0,
277 )
279 return ToolResult(
280 name=self.definition.name,
281 success=True,
282 output=None,
283 issues_count=issues_count,
284 issues=issues,
285 )
286 finally:
287 # Clean up the temporary report file
288 Path(report_path).unlink(missing_ok=True)
290 def fix(self, paths: list[str], options: dict[str, object]) -> ToolResult:
291 """Gitleaks cannot fix issues, only report them.
293 Args:
294 paths: List of file or directory paths to fix.
295 options: Tool-specific options.
297 Returns:
298 ToolResult: Never returns, always raises NotImplementedError.
300 Raises:
301 NotImplementedError: Gitleaks does not support fixing issues.
302 """
303 raise NotImplementedError(
304 "Gitleaks cannot automatically fix security issues. Run 'lintro check' to "
305 "see issues and manually remove or rotate the detected secrets.",
306 )