Coverage for lintro/ai/token

1"""Token-aware budget estimation and truncation utilities.

3Provides a simple character-based token estimator (4 chars ~ 1 token)

4and truncation helpers used by summary and fix prompt builders to stay

5within model context limits.

6"""

8from __future__ import annotations

11def estimate_tokens(text: str) -> int:

12 """Estimate token count from text (4 chars ~ 1 token).

14 Args:

15 text: Input text to estimate.

17 Returns:

18 Estimated token count (minimum 1 for non-empty text, 0 for empty).

19 """

20 if not text:

21 return 0

22 return max(1, (len(text) + 3) // 4)

25def truncate_to_budget(text: str, max_tokens: int) -> tuple[str, bool]:

26 """Truncate text to fit within a token budget.

28 Cuts at the last newline boundary before the character limit so that

29 the result remains readable.

31 Args:

32 text: Text to truncate.

33 max_tokens: Maximum allowed tokens.

35 Returns:

36 Tuple of (possibly truncated text, was_truncated).

38 Raises:

39 ValueError: If max_tokens is not positive.

40 """

41 if max_tokens <= 0:

42 raise ValueError(f"max_tokens must be positive, got {max_tokens}")

43 if estimate_tokens(text) <= max_tokens:

44 return text, False

46 max_chars = max_tokens * 4

47 # Try to cut at a line boundary for readability

48 truncated = text[:max_chars]

49 last_newline = truncated.rfind("\n")

50 if last_newline > 0:

51 truncated = truncated[:last_newline]

53 return truncated, True

Coverage for lintro / ai / token_budget.py: 100%