Coverage for lintro/ai/sanitize.py: 100%

1"""Prompt injection hardening for AI fix generation.

3Sanitizes code content before it is inserted into AI prompts to

4mitigate prompt injection attacks. This is a defense-in-depth

5measure — it neutralizes common injection patterns without altering

6valid source code semantics.

7"""

9from __future__ import annotations

11import re

12import uuid

14# Boundary marker used to fence code content in prompts.

15# A per-call unique token is appended at runtime so that

16# attacker-controlled content cannot predict or replicate it.

17_BOUNDARY_PREFIX = "CODE_BLOCK"

19# Patterns that look like attempts to break out of the code context

20# and inject new instructions. Each tuple is (compiled regex, label).

21_INJECTION_PATTERNS: list[tuple[re.Pattern[str], str]] = [

22 # Direct instruction overrides

23 (

24 re.compile(

25 r"(?:^|\n)\s*(?:ignore|disregard|forget)\s+"

26 r"(?:all\s+)?(?:previous|prior|above|earlier)\s+"

27 r"(?:instructions?|context|prompts?|rules?)",

28 re.I,

29 ),

30 "instruction-override",

31 ),

32 # Attempts to impersonate system / assistant role boundaries

33 (

34 re.compile(r"(?:^|\n)\s*(?:system|assistant|user)\s*:", re.I),

35 "role-impersonation",

36 ),

37 # XML-style tags that could confuse structured prompts

38 # (but NOT common HTML tags like <div>, <span>, <p>, etc.)

39 (

40 re.compile(

41 r"</?(?:system|instruction|prompt|command|tool_call"

42 r"|function_call|assistant|user)(?:\s[^>]*)?>",

43 re.I,

44 ),

45 "xml-tag-injection",

46 ),

47 # Markdown heading-style instruction injection

48 (

49 re.compile(

50 r"(?:^|\n)#{1,3}\s*(?:new\s+)?(?:system\s+)?instructions?",

51 re.I,

52 ),

53 "heading-injection",

54 ),

55]

57# Characters used to escape role-boundary patterns inside code content.

58# We insert a zero-width space (U+200B) after the colon in "system:" etc.

59# so that the AI does not interpret them as role markers.

60_ZERO_WIDTH_SPACE = "\u200b"

63def _neutralize_role_markers(text: str) -> str:

64 """Insert a zero-width space after role-like prefixes.

66 Transforms patterns like ``system:`` into ``system:\u200b`` so the

67 AI provider does not misinterpret them as role boundaries. Only

68 matches at the start of a line (with optional leading whitespace).

70 Args:

71 text: The text to process.

73 Returns:

74 Text with role markers neutralized.

75 """

76 return re.sub(

77 r"(?m)(^[ \t]*(?:system|assistant|user))\s*:",

78 rf"\1:{_ZERO_WIDTH_SPACE}",

79 text,

80 flags=re.IGNORECASE,

81 )

84def _neutralize_xml_tags(text: str) -> str:

85 """Escape XML-like tags that could confuse the model's parsing.

87 Replaces the opening ``<`` with ``<`` only for tags whose names

88 match known prompt-structural elements (system, instruction, etc.).

90 Args:

91 text: The text to process.

93 Returns:

94 Text with dangerous XML tags escaped.

95 """

96 return re.sub(

97 r"<(/?(?:system|instruction|prompt|command|tool_call"

98 r"|function_call|assistant|user)(?:\s[^>]*)?)>",

99 r"<\1>",

100 text,

101 flags=re.IGNORECASE,

102 )

103

104

105def sanitize_code_content(content: str) -> str:

106 """Sanitize code content before inserting it into an AI prompt.

107

108 Applies lightweight transformations that neutralize common prompt

109 injection vectors without changing the semantic meaning of valid

110 source code:

111

112 * Role-boundary markers (``system:``, ``assistant:``) are broken

113 with a zero-width space so the model does not treat them as

114 role switches.

115 * XML-like tags matching prompt-structural names are escaped.

116 * The content is otherwise returned unchanged — ordinary code

117 that happens to contain words like "system" or "ignore" in

118 variable names or comments is not affected.

119

120 Args:

121 content: Raw code content to sanitize.

122

123 Returns:

124 Sanitized content safe for prompt insertion.

125 """

126 if not content:

127 return content

128

129 return _neutralize_xml_tags(_neutralize_role_markers(content))

130

131

132def detect_injection_patterns(content: str) -> list[str]:

133 """Detect potential prompt injection patterns in content.

134

135 Returns a list of labels for each injection pattern detected.

136 This is intended for logging/auditing — it does NOT block the

137 content from being sent.

138

139 Args:

140 content: The text to scan.

141

142 Returns:

143 List of injection pattern labels found (empty if clean).

144 """

145 found: list[str] = []

146 for pattern, label in _INJECTION_PATTERNS:

147 if pattern.search(content):

148 found.append(label)

149 return found

150

151

152def make_boundary_marker() -> str:

153 """Generate a unique boundary marker for code fencing.

154

155 Returns a string like ``CODE_BLOCK_a1b2c3d4`` that can be used as

156 a delimiter around code content in prompts. The random suffix

157 makes it infeasible for attacker-controlled content to replicate

158 the boundary.

159

160 Returns:

161 A unique boundary marker string.

162 """

163 suffix = uuid.uuid4().hex[:8]

164 return f"{_BOUNDARY_PREFIX}_{suffix}"

Coverage for lintro / ai / sanitize.py: 100%

23 statements