Coverage for lintro / utils / jsonc.py: 91%

120 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""JSONC (JSON with Comments) parsing utilities. 

2 

3Provides functions for stripping JSONC comments and trailing commas, 

4plus a convenience loader that produces standard Python objects from 

5JSONC text (as used in tsconfig.json, .markdownlint.jsonc, etc.). 

6""" 

7 

8from __future__ import annotations 

9 

10import json 

11from pathlib import Path 

12from typing import Any 

13 

14from loguru import logger 

15 

16 

17def strip_jsonc_comments(content: str) -> str: 

18 """Strip JSONC comments from content, preserving strings. 

19 

20 This function safely removes // and /* */ comments from JSONC content 

21 while preserving comment-like sequences inside string values. 

22 

23 Args: 

24 content: JSONC content as string 

25 

26 Returns: 

27 Content with comments stripped 

28 

29 Note: 

30 This is a simple implementation that handles most common cases. 

31 For complex JSONC with nested comments or edge cases, consider 

32 using a proper JSONC parser library (e.g., json5 or commentjson). 

33 """ 

34 result: list[str] = [] 

35 i = 0 

36 content_len = len(content) 

37 in_string = False 

38 escape_next = False 

39 in_block_comment = False 

40 

41 while i < content_len: 

42 char = content[i] 

43 

44 if escape_next: 

45 escape_next = False 

46 if not in_block_comment: 

47 result.append(char) 

48 i += 1 

49 continue 

50 

51 if char == "\\" and in_string: 

52 escape_next = True 

53 if not in_block_comment: 

54 result.append(char) 

55 i += 1 

56 continue 

57 

58 if char == '"' and not in_block_comment: 

59 in_string = not in_string 

60 result.append(char) 

61 i += 1 

62 continue 

63 

64 if in_string: 

65 result.append(char) 

66 i += 1 

67 continue 

68 

69 # Check for block comment start /* ... */ 

70 if i < content_len - 1 and char == "/" and content[i + 1] == "*": 

71 in_block_comment = True 

72 i += 2 

73 continue 

74 

75 # Check for block comment end */ (when we see *) 

76 if ( 

77 char == "*" 

78 and in_block_comment 

79 and i < content_len - 1 

80 and content[i + 1] == "/" 

81 ): 

82 in_block_comment = False 

83 i += 2 # Skip both * and / 

84 continue 

85 

86 # Check for line comment // 

87 if ( 

88 i < content_len - 1 

89 and char == "/" 

90 and content[i + 1] == "/" 

91 and not in_block_comment 

92 ): 

93 # Skip to end of line 

94 while i < content_len and content[i] != "\n": 

95 i += 1 

96 # Include the newline if present 

97 if i < content_len: 

98 result.append("\n") 

99 i += 1 

100 continue 

101 

102 if not in_block_comment: 

103 result.append(char) 

104 

105 i += 1 

106 

107 if in_block_comment: 

108 logger.warning("Unclosed block comment in JSONC content") 

109 

110 return "".join(result) 

111 

112 

113def strip_trailing_commas(content: str) -> str: 

114 """Strip trailing commas from JSON content. 

115 

116 Removes trailing commas before closing brackets/braces that are 

117 invalid in strict JSON but common in JSONC (e.g., tsconfig.json). 

118 Uses a character-scan to avoid modifying commas inside string literals. 

119 

120 Args: 

121 content: JSON content with potential trailing commas. 

122 

123 Returns: 

124 Content with trailing commas removed. 

125 """ 

126 result: list[str] = [] 

127 i = 0 

128 length = len(content) 

129 in_string = False 

130 quote_char = "" 

131 escape_next = False 

132 

133 while i < length: 

134 char = content[i] 

135 

136 if escape_next: 

137 escape_next = False 

138 result.append(char) 

139 i += 1 

140 continue 

141 

142 if char == "\\" and in_string: 

143 escape_next = True 

144 result.append(char) 

145 i += 1 

146 continue 

147 

148 if not in_string and char in ('"', "'"): 

149 in_string = True 

150 quote_char = char 

151 result.append(char) 

152 i += 1 

153 continue 

154 

155 if in_string and char == quote_char: 

156 in_string = False 

157 result.append(char) 

158 i += 1 

159 continue 

160 

161 if not in_string and char == ",": 

162 # Look ahead past whitespace for a closing bracket/brace 

163 j = i + 1 

164 while j < length and content[j] in (" ", "\t", "\n", "\r"): 

165 j += 1 

166 if j < length and content[j] in ("]", "}"): 

167 # Skip the trailing comma 

168 i += 1 

169 continue 

170 

171 result.append(char) 

172 i += 1 

173 

174 return "".join(result) 

175 

176 

177def extract_type_roots(base_content: Any, base_dir: Path) -> list[str] | None: 

178 """Extract and resolve typeRoots from parsed tsconfig content. 

179 

180 Args: 

181 base_content: Parsed tsconfig content (expected to be a dict). 

182 base_dir: Directory of the base tsconfig for resolving relative paths. 

183 

184 Returns: 

185 Resolved typeRoots list, empty list if explicitly set to ``[]``, 

186 or ``None`` if typeRoots is not present. 

187 """ 

188 if not isinstance(base_content, dict): 

189 return None 

190 comp_opts = base_content.get("compilerOptions") 

191 if not isinstance(comp_opts, dict): 

192 return None 

193 if "typeRoots" not in comp_opts: 

194 return None 

195 base_roots = comp_opts["typeRoots"] 

196 if not isinstance(base_roots, list): 

197 return None 

198 resolved: list[str] = [] 

199 for r in base_roots: 

200 if not isinstance(r, str): 

201 continue 

202 try: 

203 resolved.append(str((base_dir / r).resolve())) 

204 except (ValueError, OSError): 

205 continue 

206 return resolved 

207 

208 

209def load_jsonc(text: str) -> Any: 

210 """Parse JSONC text into a Python object. 

211 

212 Strips comments and trailing commas, then delegates to ``json.loads``. 

213 

214 Args: 

215 text: JSONC content as string. 

216 

217 Returns: 

218 Parsed Python object (dict, list, str, etc.). 

219 """ 

220 cleaned = strip_jsonc_comments(text) 

221 cleaned = strip_trailing_commas(cleaned) 

222 return json.loads(cleaned)