Coverage for scripts / utils / merge_pr_comment.py: 94%
51 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-04-03 18:53 +0000
1#!/usr/bin/env python3
2"""Utilities to merge PR comment bodies while preserving history.
4This module exposes a function to combine a previous PR comment body with
5new content. Historical runs are displayed in separate collapsed <details>
6blocks below the current content.
8The marker line (e.g., "<!-- coverage-report -->") is ensured at the very top
9of the merged body so future runs can reliably find and update the same
10comment.
12Google-style docstrings are used per project standards.
13"""
15from __future__ import annotations
17import re
18from datetime import UTC, datetime
20# Maximum number of historical runs to keep in comments
21# Prevents comment bloat on long-running PRs
22MAX_HISTORY_RUNS: int = 5
25def _normalize_newline(value: str) -> str:
26 """Normalize newlines to Unix style.
28 Args:
29 value: The text to normalize.
31 Returns:
32 str: Normalized text with Unix newlines.
33 """
34 return value.replace("\r\n", "\n").replace("\r", "\n")
37def _extract_details_blocks(content: str) -> tuple[str, list[str]]:
38 """Extract history <details> blocks and remaining content from a string.
40 Only extracts <details> blocks that are history entries (identified by
41 summary text containing "Previous run" or "Run #"). Other <details>
42 blocks (e.g., user-created collapsibles) are left intact in the remaining
43 content.
45 Args:
46 content: The content to parse.
48 Returns:
49 Tuple of (content outside history details blocks, list of history blocks).
50 """
51 # Pattern to match <details>...</details> blocks that are history entries
52 # History blocks have summaries containing "Previous run" or "Run #" patterns
53 # Uses non-greedy match to handle multiple blocks; assumes no nesting
54 history_pattern = re.compile(
55 r"<details>\s*<summary>[^<]*(Previous run|Run #)[^<]*</summary>.*?</details>",
56 re.DOTALL,
57 )
59 # findall returns the captured group, not the full match; use finditer instead
60 details_blocks: list[str] = [m.group(0) for m in history_pattern.finditer(content)]
61 remaining_content: str = history_pattern.sub("", content).strip()
63 return remaining_content, details_blocks
66def _extract_timestamp_from_details(details_block: str) -> str | None:
67 """Extract timestamp from a details block summary.
69 Args:
70 details_block: A <details>...</details> block.
72 Returns:
73 The timestamp string if found, None otherwise.
74 """
75 # Match patterns like "Run #N (2026-01-25 19:00:00 UTC)" or
76 # "Previous run (updated 2026-01-25 19:00:00 UTC)"
77 timestamp_pattern = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+")
78 match = timestamp_pattern.search(details_block)
79 return match.group(0) if match else None
82def _sort_history_by_timestamp(history_blocks: list[str]) -> list[str]:
83 """Sort history blocks by timestamp, newest first.
85 Blocks without timestamps are placed at the end to preserve order
86 for malformed or legacy entries.
88 Args:
89 history_blocks: List of <details>...</details> blocks to sort.
91 Returns:
92 List of blocks sorted by timestamp (newest first).
93 """
95 def sort_key(block: str) -> tuple[int, str]:
96 timestamp = _extract_timestamp_from_details(block)
97 if timestamp:
98 # (0, x) sorts before (1, "") so dated blocks come first.
99 # Invert chars so newer timestamps sort first (descending).
100 inverted = "".join(chr(126 - ord(c)) for c in timestamp)
101 return (0, inverted)
102 # Blocks without timestamps go last, maintain relative order
103 return (1, "")
105 # Sort ascending: (0, inverted_timestamp) < (1, "") puts dated first, newest first
106 return sorted(history_blocks, key=sort_key)
109def merge_comment_bodies(
110 *,
111 marker: str,
112 previous_body: str | None,
113 new_body: str,
114 place_new_above: bool = True,
115) -> str:
116 """Merge previous and new PR comment bodies with history in flat sections.
118 Ensures the marker appears at the top of the merged body exactly once.
120 The current run content is displayed at the top (not collapsed). Historical
121 runs are displayed in separate <details> blocks below, each with its own
122 timestamp. History is limited to the most recent MAX_HISTORY_RUNS.
124 Args:
125 marker: Marker string used to identify the comment.
126 previous_body: Existing comment body to preserve, if any.
127 new_body: Freshly generated body for the current run.
128 place_new_above: If True, place new_body above the historical
129 sections; otherwise, place it below. Defaults to True.
130 Use ``place_new_above=False`` for chronological log-style display
131 where the latest entry appears at the bottom (e.g., deployment logs
132 or audit trails where older entries should be visible first).
134 Returns:
135 str: The merged comment body ready to send to the GitHub API.
136 """
137 marker_line: str = marker.strip()
138 normalized_new: str = _normalize_newline(new_body).strip()
140 # Ensure the marker only appears once at the very top by removing any
141 # occurrences from the newly generated body.
142 if marker_line in normalized_new:
143 normalized_new = normalized_new.replace(marker_line, "").strip()
145 if not previous_body:
146 return f"{marker_line}\n\n{normalized_new}\n"
148 normalized_prev: str = _normalize_newline(previous_body).strip()
150 now_utc: str = datetime.now(tz=UTC).strftime("%Y-%m-%d %H:%M:%S %Z")
152 # Remove any leading marker line from previous to avoid duplication
153 if normalized_prev.startswith(marker_line):
154 normalized_prev = normalized_prev[len(marker_line) :].lstrip("\n")
156 # Extract existing details blocks and the current content from previous
157 prev_current_content, existing_history = _extract_details_blocks(normalized_prev)
159 # Build the list of historical runs (newest first)
160 history_blocks: list[str] = []
162 # The previous "current" content becomes the newest historical entry
163 if prev_current_content.strip():
164 new_history_block: str = (
165 f"<details>\n"
166 f"<summary>📜 Previous run ({now_utc})</summary>\n\n"
167 f"{prev_current_content.strip()}\n"
168 f"</details>"
169 )
170 history_blocks.append(new_history_block)
172 # Add existing historical blocks and sort by timestamp to ensure newest-first order
173 history_blocks.extend(existing_history)
174 history_blocks = _sort_history_by_timestamp(history_blocks)
176 # Limit history to MAX_HISTORY_RUNS
177 history_blocks = history_blocks[:MAX_HISTORY_RUNS]
179 # Build final merged content
180 history_section: str = "\n\n".join(history_blocks) if history_blocks else ""
182 if place_new_above:
183 if history_section:
184 merged = f"{marker_line}\n\n{normalized_new}\n\n{history_section}\n"
185 else:
186 merged = f"{marker_line}\n\n{normalized_new}\n"
187 else:
188 if history_section:
189 merged = f"{marker_line}\n\n{history_section}\n\n{normalized_new}\n"
190 else:
191 merged = f"{marker_line}\n\n{normalized_new}\n"
193 return merged
196if __name__ == "__main__": # pragma: no cover - simple CLI aid
197 import argparse
198 import sys
200 parser = argparse.ArgumentParser(description="Merge PR comment bodies")
201 parser.add_argument("marker", help="Marker line, e.g., <!-- coverage-report -->")
202 parser.add_argument("new_file", help="Path to file with new body content")
203 parser.add_argument(
204 "--previous-file",
205 help="Path to file with previous body content",
206 default=None,
207 )
208 parser.add_argument(
209 "--place-new-below",
210 help="Place new content below previous details",
211 action="store_true",
212 )
213 args = parser.parse_args()
215 prev_text: str | None = None
216 if args.previous_file:
217 try:
218 with open(args.previous_file, encoding="utf-8") as f:
219 prev_text = f.read()
220 except FileNotFoundError:
221 prev_text = None
223 with open(args.new_file, encoding="utf-8") as f:
224 new_text = f.read()
226 merged_out = merge_comment_bodies(
227 marker=args.marker,
228 previous_body=prev_text,
229 new_body=new_text,
230 place_new_above=not args.place_new_below,
231 )
232 sys.stdout.write(merged_out)