Coverage for lintro / ai / sanitize.py: 100%

23 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""Prompt injection hardening for AI fix generation. 

2 

3Sanitizes code content before it is inserted into AI prompts to 

4mitigate prompt injection attacks. This is a defense-in-depth 

5measure — it neutralizes common injection patterns without altering 

6valid source code semantics. 

7""" 

8 

9from __future__ import annotations 

10 

11import re 

12import uuid 

13 

14# Boundary marker used to fence code content in prompts. 

15# A per-call unique token is appended at runtime so that 

16# attacker-controlled content cannot predict or replicate it. 

17_BOUNDARY_PREFIX = "CODE_BLOCK" 

18 

19# Patterns that look like attempts to break out of the code context 

20# and inject new instructions. Each tuple is (compiled regex, label). 

21_INJECTION_PATTERNS: list[tuple[re.Pattern[str], str]] = [ 

22 # Direct instruction overrides 

23 ( 

24 re.compile( 

25 r"(?:^|\n)\s*(?:ignore|disregard|forget)\s+" 

26 r"(?:all\s+)?(?:previous|prior|above|earlier)\s+" 

27 r"(?:instructions?|context|prompts?|rules?)", 

28 re.I, 

29 ), 

30 "instruction-override", 

31 ), 

32 # Attempts to impersonate system / assistant role boundaries 

33 ( 

34 re.compile(r"(?:^|\n)\s*(?:system|assistant|user)\s*:", re.I), 

35 "role-impersonation", 

36 ), 

37 # XML-style tags that could confuse structured prompts 

38 # (but NOT common HTML tags like <div>, <span>, <p>, etc.) 

39 ( 

40 re.compile( 

41 r"</?(?:system|instruction|prompt|command|tool_call" 

42 r"|function_call|assistant|user)(?:\s[^>]*)?>", 

43 re.I, 

44 ), 

45 "xml-tag-injection", 

46 ), 

47 # Markdown heading-style instruction injection 

48 ( 

49 re.compile( 

50 r"(?:^|\n)#{1,3}\s*(?:new\s+)?(?:system\s+)?instructions?", 

51 re.I, 

52 ), 

53 "heading-injection", 

54 ), 

55] 

56 

57# Characters used to escape role-boundary patterns inside code content. 

58# We insert a zero-width space (U+200B) after the colon in "system:" etc. 

59# so that the AI does not interpret them as role markers. 

60_ZERO_WIDTH_SPACE = "\u200b" 

61 

62 

63def _neutralize_role_markers(text: str) -> str: 

64 """Insert a zero-width space after role-like prefixes. 

65 

66 Transforms patterns like ``system:`` into ``system:\u200b`` so the 

67 AI provider does not misinterpret them as role boundaries. Only 

68 matches at the start of a line (with optional leading whitespace). 

69 

70 Args: 

71 text: The text to process. 

72 

73 Returns: 

74 Text with role markers neutralized. 

75 """ 

76 return re.sub( 

77 r"(?m)(^[ \t]*(?:system|assistant|user))\s*:", 

78 rf"\1:{_ZERO_WIDTH_SPACE}", 

79 text, 

80 flags=re.IGNORECASE, 

81 ) 

82 

83 

84def _neutralize_xml_tags(text: str) -> str: 

85 """Escape XML-like tags that could confuse the model's parsing. 

86 

87 Replaces the opening ``<`` with ``&lt;`` only for tags whose names 

88 match known prompt-structural elements (system, instruction, etc.). 

89 

90 Args: 

91 text: The text to process. 

92 

93 Returns: 

94 Text with dangerous XML tags escaped. 

95 """ 

96 return re.sub( 

97 r"<(/?(?:system|instruction|prompt|command|tool_call" 

98 r"|function_call|assistant|user)(?:\s[^>]*)?)>", 

99 r"&lt;\1>", 

100 text, 

101 flags=re.IGNORECASE, 

102 ) 

103 

104 

105def sanitize_code_content(content: str) -> str: 

106 """Sanitize code content before inserting it into an AI prompt. 

107 

108 Applies lightweight transformations that neutralize common prompt 

109 injection vectors without changing the semantic meaning of valid 

110 source code: 

111 

112 * Role-boundary markers (``system:``, ``assistant:``) are broken 

113 with a zero-width space so the model does not treat them as 

114 role switches. 

115 * XML-like tags matching prompt-structural names are escaped. 

116 * The content is otherwise returned unchanged — ordinary code 

117 that happens to contain words like "system" or "ignore" in 

118 variable names or comments is not affected. 

119 

120 Args: 

121 content: Raw code content to sanitize. 

122 

123 Returns: 

124 Sanitized content safe for prompt insertion. 

125 """ 

126 if not content: 

127 return content 

128 

129 return _neutralize_xml_tags(_neutralize_role_markers(content)) 

130 

131 

132def detect_injection_patterns(content: str) -> list[str]: 

133 """Detect potential prompt injection patterns in content. 

134 

135 Returns a list of labels for each injection pattern detected. 

136 This is intended for logging/auditing — it does NOT block the 

137 content from being sent. 

138 

139 Args: 

140 content: The text to scan. 

141 

142 Returns: 

143 List of injection pattern labels found (empty if clean). 

144 """ 

145 found: list[str] = [] 

146 for pattern, label in _INJECTION_PATTERNS: 

147 if pattern.search(content): 

148 found.append(label) 

149 return found 

150 

151 

152def make_boundary_marker() -> str: 

153 """Generate a unique boundary marker for code fencing. 

154 

155 Returns a string like ``CODE_BLOCK_a1b2c3d4`` that can be used as 

156 a delimiter around code content in prompts. The random suffix 

157 makes it infeasible for attacker-controlled content to replicate 

158 the boundary. 

159 

160 Returns: 

161 A unique boundary marker string. 

162 """ 

163 suffix = uuid.uuid4().hex[:8] 

164 return f"{_BOUNDARY_PREFIX}_{suffix}"