Coverage for scripts / utils / merge_pr_comment.py: 94%

51 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1#!/usr/bin/env python3 

2"""Utilities to merge PR comment bodies while preserving history. 

3 

4This module exposes a function to combine a previous PR comment body with 

5new content. Historical runs are displayed in separate collapsed <details> 

6blocks below the current content. 

7 

8The marker line (e.g., "<!-- coverage-report -->") is ensured at the very top 

9of the merged body so future runs can reliably find and update the same 

10comment. 

11 

12Google-style docstrings are used per project standards. 

13""" 

14 

15from __future__ import annotations 

16 

17import re 

18from datetime import UTC, datetime 

19 

20# Maximum number of historical runs to keep in comments 

21# Prevents comment bloat on long-running PRs 

22MAX_HISTORY_RUNS: int = 5 

23 

24 

25def _normalize_newline(value: str) -> str: 

26 """Normalize newlines to Unix style. 

27 

28 Args: 

29 value: The text to normalize. 

30 

31 Returns: 

32 str: Normalized text with Unix newlines. 

33 """ 

34 return value.replace("\r\n", "\n").replace("\r", "\n") 

35 

36 

37def _extract_details_blocks(content: str) -> tuple[str, list[str]]: 

38 """Extract history <details> blocks and remaining content from a string. 

39 

40 Only extracts <details> blocks that are history entries (identified by 

41 summary text containing "Previous run" or "Run #"). Other <details> 

42 blocks (e.g., user-created collapsibles) are left intact in the remaining 

43 content. 

44 

45 Args: 

46 content: The content to parse. 

47 

48 Returns: 

49 Tuple of (content outside history details blocks, list of history blocks). 

50 """ 

51 # Pattern to match <details>...</details> blocks that are history entries 

52 # History blocks have summaries containing "Previous run" or "Run #" patterns 

53 # Uses non-greedy match to handle multiple blocks; assumes no nesting 

54 history_pattern = re.compile( 

55 r"<details>\s*<summary>[^<]*(Previous run|Run #)[^<]*</summary>.*?</details>", 

56 re.DOTALL, 

57 ) 

58 

59 # findall returns the captured group, not the full match; use finditer instead 

60 details_blocks: list[str] = [m.group(0) for m in history_pattern.finditer(content)] 

61 remaining_content: str = history_pattern.sub("", content).strip() 

62 

63 return remaining_content, details_blocks 

64 

65 

66def _extract_timestamp_from_details(details_block: str) -> str | None: 

67 """Extract timestamp from a details block summary. 

68 

69 Args: 

70 details_block: A <details>...</details> block. 

71 

72 Returns: 

73 The timestamp string if found, None otherwise. 

74 """ 

75 # Match patterns like "Run #N (2026-01-25 19:00:00 UTC)" or 

76 # "Previous run (updated 2026-01-25 19:00:00 UTC)" 

77 timestamp_pattern = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+") 

78 match = timestamp_pattern.search(details_block) 

79 return match.group(0) if match else None 

80 

81 

82def _sort_history_by_timestamp(history_blocks: list[str]) -> list[str]: 

83 """Sort history blocks by timestamp, newest first. 

84 

85 Blocks without timestamps are placed at the end to preserve order 

86 for malformed or legacy entries. 

87 

88 Args: 

89 history_blocks: List of <details>...</details> blocks to sort. 

90 

91 Returns: 

92 List of blocks sorted by timestamp (newest first). 

93 """ 

94 

95 def sort_key(block: str) -> tuple[int, str]: 

96 timestamp = _extract_timestamp_from_details(block) 

97 if timestamp: 

98 # (0, x) sorts before (1, "") so dated blocks come first. 

99 # Invert chars so newer timestamps sort first (descending). 

100 inverted = "".join(chr(126 - ord(c)) for c in timestamp) 

101 return (0, inverted) 

102 # Blocks without timestamps go last, maintain relative order 

103 return (1, "") 

104 

105 # Sort ascending: (0, inverted_timestamp) < (1, "") puts dated first, newest first 

106 return sorted(history_blocks, key=sort_key) 

107 

108 

109def merge_comment_bodies( 

110 *, 

111 marker: str, 

112 previous_body: str | None, 

113 new_body: str, 

114 place_new_above: bool = True, 

115) -> str: 

116 """Merge previous and new PR comment bodies with history in flat sections. 

117 

118 Ensures the marker appears at the top of the merged body exactly once. 

119 

120 The current run content is displayed at the top (not collapsed). Historical 

121 runs are displayed in separate <details> blocks below, each with its own 

122 timestamp. History is limited to the most recent MAX_HISTORY_RUNS. 

123 

124 Args: 

125 marker: Marker string used to identify the comment. 

126 previous_body: Existing comment body to preserve, if any. 

127 new_body: Freshly generated body for the current run. 

128 place_new_above: If True, place new_body above the historical 

129 sections; otherwise, place it below. Defaults to True. 

130 Use ``place_new_above=False`` for chronological log-style display 

131 where the latest entry appears at the bottom (e.g., deployment logs 

132 or audit trails where older entries should be visible first). 

133 

134 Returns: 

135 str: The merged comment body ready to send to the GitHub API. 

136 """ 

137 marker_line: str = marker.strip() 

138 normalized_new: str = _normalize_newline(new_body).strip() 

139 

140 # Ensure the marker only appears once at the very top by removing any 

141 # occurrences from the newly generated body. 

142 if marker_line in normalized_new: 

143 normalized_new = normalized_new.replace(marker_line, "").strip() 

144 

145 if not previous_body: 

146 return f"{marker_line}\n\n{normalized_new}\n" 

147 

148 normalized_prev: str = _normalize_newline(previous_body).strip() 

149 

150 now_utc: str = datetime.now(tz=UTC).strftime("%Y-%m-%d %H:%M:%S %Z") 

151 

152 # Remove any leading marker line from previous to avoid duplication 

153 if normalized_prev.startswith(marker_line): 

154 normalized_prev = normalized_prev[len(marker_line) :].lstrip("\n") 

155 

156 # Extract existing details blocks and the current content from previous 

157 prev_current_content, existing_history = _extract_details_blocks(normalized_prev) 

158 

159 # Build the list of historical runs (newest first) 

160 history_blocks: list[str] = [] 

161 

162 # The previous "current" content becomes the newest historical entry 

163 if prev_current_content.strip(): 

164 new_history_block: str = ( 

165 f"<details>\n" 

166 f"<summary>📜 Previous run ({now_utc})</summary>\n\n" 

167 f"{prev_current_content.strip()}\n" 

168 f"</details>" 

169 ) 

170 history_blocks.append(new_history_block) 

171 

172 # Add existing historical blocks and sort by timestamp to ensure newest-first order 

173 history_blocks.extend(existing_history) 

174 history_blocks = _sort_history_by_timestamp(history_blocks) 

175 

176 # Limit history to MAX_HISTORY_RUNS 

177 history_blocks = history_blocks[:MAX_HISTORY_RUNS] 

178 

179 # Build final merged content 

180 history_section: str = "\n\n".join(history_blocks) if history_blocks else "" 

181 

182 if place_new_above: 

183 if history_section: 

184 merged = f"{marker_line}\n\n{normalized_new}\n\n{history_section}\n" 

185 else: 

186 merged = f"{marker_line}\n\n{normalized_new}\n" 

187 else: 

188 if history_section: 

189 merged = f"{marker_line}\n\n{history_section}\n\n{normalized_new}\n" 

190 else: 

191 merged = f"{marker_line}\n\n{normalized_new}\n" 

192 

193 return merged 

194 

195 

196if __name__ == "__main__": # pragma: no cover - simple CLI aid 

197 import argparse 

198 import sys 

199 

200 parser = argparse.ArgumentParser(description="Merge PR comment bodies") 

201 parser.add_argument("marker", help="Marker line, e.g., <!-- coverage-report -->") 

202 parser.add_argument("new_file", help="Path to file with new body content") 

203 parser.add_argument( 

204 "--previous-file", 

205 help="Path to file with previous body content", 

206 default=None, 

207 ) 

208 parser.add_argument( 

209 "--place-new-below", 

210 help="Place new content below previous details", 

211 action="store_true", 

212 ) 

213 args = parser.parse_args() 

214 

215 prev_text: str | None = None 

216 if args.previous_file: 

217 try: 

218 with open(args.previous_file, encoding="utf-8") as f: 

219 prev_text = f.read() 

220 except FileNotFoundError: 

221 prev_text = None 

222 

223 with open(args.new_file, encoding="utf-8") as f: 

224 new_text = f.read() 

225 

226 merged_out = merge_comment_bodies( 

227 marker=args.marker, 

228 previous_body=prev_text, 

229 new_body=new_text, 

230 place_new_above=not args.place_new_below, 

231 ) 

232 sys.stdout.write(merged_out)