Coverage for lintro / ai / token_budget.py: 100%

16 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-04-03 18:53 +0000

1"""Token-aware budget estimation and truncation utilities. 

2 

3Provides a simple character-based token estimator (4 chars ~ 1 token) 

4and truncation helpers used by summary and fix prompt builders to stay 

5within model context limits. 

6""" 

7 

8from __future__ import annotations 

9 

10 

11def estimate_tokens(text: str) -> int: 

12 """Estimate token count from text (4 chars ~ 1 token). 

13 

14 Args: 

15 text: Input text to estimate. 

16 

17 Returns: 

18 Estimated token count (minimum 1 for non-empty text, 0 for empty). 

19 """ 

20 if not text: 

21 return 0 

22 return max(1, (len(text) + 3) // 4) 

23 

24 

25def truncate_to_budget(text: str, max_tokens: int) -> tuple[str, bool]: 

26 """Truncate text to fit within a token budget. 

27 

28 Cuts at the last newline boundary before the character limit so that 

29 the result remains readable. 

30 

31 Args: 

32 text: Text to truncate. 

33 max_tokens: Maximum allowed tokens. 

34 

35 Returns: 

36 Tuple of (possibly truncated text, was_truncated). 

37 

38 Raises: 

39 ValueError: If max_tokens is not positive. 

40 """ 

41 if max_tokens <= 0: 

42 raise ValueError(f"max_tokens must be positive, got {max_tokens}") 

43 if estimate_tokens(text) <= max_tokens: 

44 return text, False 

45 

46 max_chars = max_tokens * 4 

47 # Try to cut at a line boundary for readability 

48 truncated = text[:max_chars] 

49 last_newline = truncated.rfind("\n") 

50 if last_newline > 0: 

51 truncated = truncated[:last_newline] 

52 

53 return truncated, True