Coverage for lintro / ai / sanitize.py: 100%
23 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
1"""Prompt injection hardening for AI fix generation.
3Sanitizes code content before it is inserted into AI prompts to
4mitigate prompt injection attacks. This is a defense-in-depth
5measure — it neutralizes common injection patterns without altering
6valid source code semantics.
7"""
9from __future__ import annotations
11import re
12import uuid
14# Boundary marker used to fence code content in prompts.
15# A per-call unique token is appended at runtime so that
16# attacker-controlled content cannot predict or replicate it.
17_BOUNDARY_PREFIX = "CODE_BLOCK"
19# Patterns that look like attempts to break out of the code context
20# and inject new instructions. Each tuple is (compiled regex, label).
21_INJECTION_PATTERNS: list[tuple[re.Pattern[str], str]] = [
22 # Direct instruction overrides
23 (
24 re.compile(
25 r"(?:^|\n)\s*(?:ignore|disregard|forget)\s+"
26 r"(?:all\s+)?(?:previous|prior|above|earlier)\s+"
27 r"(?:instructions?|context|prompts?|rules?)",
28 re.I,
29 ),
30 "instruction-override",
31 ),
32 # Attempts to impersonate system / assistant role boundaries
33 (
34 re.compile(r"(?:^|\n)\s*(?:system|assistant|user)\s*:", re.I),
35 "role-impersonation",
36 ),
37 # XML-style tags that could confuse structured prompts
38 # (but NOT common HTML tags like <div>, <span>, <p>, etc.)
39 (
40 re.compile(
41 r"</?(?:system|instruction|prompt|command|tool_call"
42 r"|function_call|assistant|user)(?:\s[^>]*)?>",
43 re.I,
44 ),
45 "xml-tag-injection",
46 ),
47 # Markdown heading-style instruction injection
48 (
49 re.compile(
50 r"(?:^|\n)#{1,3}\s*(?:new\s+)?(?:system\s+)?instructions?",
51 re.I,
52 ),
53 "heading-injection",
54 ),
55]
57# Characters used to escape role-boundary patterns inside code content.
58# We insert a zero-width space (U+200B) after the colon in "system:" etc.
59# so that the AI does not interpret them as role markers.
60_ZERO_WIDTH_SPACE = "\u200b"
63def _neutralize_role_markers(text: str) -> str:
64 """Insert a zero-width space after role-like prefixes.
66 Transforms patterns like ``system:`` into ``system:\u200b`` so the
67 AI provider does not misinterpret them as role boundaries. Only
68 matches at the start of a line (with optional leading whitespace).
70 Args:
71 text: The text to process.
73 Returns:
74 Text with role markers neutralized.
75 """
76 return re.sub(
77 r"(?m)(^[ \t]*(?:system|assistant|user))\s*:",
78 rf"\1:{_ZERO_WIDTH_SPACE}",
79 text,
80 flags=re.IGNORECASE,
81 )
84def _neutralize_xml_tags(text: str) -> str:
85 """Escape XML-like tags that could confuse the model's parsing.
87 Replaces the opening ``<`` with ``<`` only for tags whose names
88 match known prompt-structural elements (system, instruction, etc.).
90 Args:
91 text: The text to process.
93 Returns:
94 Text with dangerous XML tags escaped.
95 """
96 return re.sub(
97 r"<(/?(?:system|instruction|prompt|command|tool_call"
98 r"|function_call|assistant|user)(?:\s[^>]*)?)>",
99 r"<\1>",
100 text,
101 flags=re.IGNORECASE,
102 )
105def sanitize_code_content(content: str) -> str:
106 """Sanitize code content before inserting it into an AI prompt.
108 Applies lightweight transformations that neutralize common prompt
109 injection vectors without changing the semantic meaning of valid
110 source code:
112 * Role-boundary markers (``system:``, ``assistant:``) are broken
113 with a zero-width space so the model does not treat them as
114 role switches.
115 * XML-like tags matching prompt-structural names are escaped.
116 * The content is otherwise returned unchanged — ordinary code
117 that happens to contain words like "system" or "ignore" in
118 variable names or comments is not affected.
120 Args:
121 content: Raw code content to sanitize.
123 Returns:
124 Sanitized content safe for prompt insertion.
125 """
126 if not content:
127 return content
129 return _neutralize_xml_tags(_neutralize_role_markers(content))
132def detect_injection_patterns(content: str) -> list[str]:
133 """Detect potential prompt injection patterns in content.
135 Returns a list of labels for each injection pattern detected.
136 This is intended for logging/auditing — it does NOT block the
137 content from being sent.
139 Args:
140 content: The text to scan.
142 Returns:
143 List of injection pattern labels found (empty if clean).
144 """
145 found: list[str] = []
146 for pattern, label in _INJECTION_PATTERNS:
147 if pattern.search(content):
148 found.append(label)
149 return found
152def make_boundary_marker() -> str:
153 """Generate a unique boundary marker for code fencing.
155 Returns a string like ``CODE_BLOCK_a1b2c3d4`` that can be used as
156 a delimiter around code content in prompts. The random suffix
157 makes it infeasible for attacker-controlled content to replicate
158 the boundary.
160 Returns:
161 A unique boundary marker string.
162 """
163 suffix = uuid.uuid4().hex[:8]
164 return f"{_BOUNDARY_PREFIX}_{suffix}"