"""ANSI escape code parser — converts ANSI-styled text to StyledSpan sequences."""
from __future__ import annotations
import logging
import re
from dataclasses import dataclass
log = logging.getLogger(__name__)
__all__ = ["StyledSpan", "ANSI_COLORS", "parse_ansi", "strip_ansi"]
_SGR_RE = re.compile(r"\x1b\[([0-9;]*)m")
_CSI_RE = re.compile(r"\x1b\[[^a-zA-Z]*[a-zA-Z]")
DEFAULT_FG: tuple[float, float, float, float] = (1.0, 1.0, 1.0, 1.0)
[docs]
@dataclass(slots=True)
class StyledSpan:
"""A segment of text with uniform styling."""
text: str
colour: tuple[float, float, float, float] = DEFAULT_FG
bg_colour: tuple[float, float, float, float] | None = None
bold: bool = False
underline: bool = False
# 16 standard ANSI colours as float RGBA
ANSI_COLORS: list[tuple[float, float, float, float]] = [
# Standard 8 (dark)
(0.0, 0.0, 0.0, 1.0), # 0: Black
(0.8, 0.0, 0.0, 1.0), # 1: Red
(0.0, 0.8, 0.0, 1.0), # 2: Green
(0.8, 0.8, 0.0, 1.0), # 3: Yellow
(0.0, 0.0, 0.8, 1.0), # 4: Blue
(0.8, 0.0, 0.8, 1.0), # 5: Magenta
(0.0, 0.8, 0.8, 1.0), # 6: Cyan
(0.75, 0.75, 0.75, 1.0), # 7: White (light gray)
# Bright 8
(0.5, 0.5, 0.5, 1.0), # 8: Bright Black (dark gray)
(1.0, 0.3, 0.3, 1.0), # 9: Bright Red
(0.3, 1.0, 0.3, 1.0), # 10: Bright Green
(1.0, 1.0, 0.3, 1.0), # 11: Bright Yellow
(0.3, 0.3, 1.0, 1.0), # 12: Bright Blue
(1.0, 0.3, 1.0, 1.0), # 13: Bright Magenta
(0.3, 1.0, 1.0, 1.0), # 14: Bright Cyan
(1.0, 1.0, 1.0, 1.0), # 15: Bright White
]
def _colour_256(n: int) -> tuple[float, float, float, float]:
"""Convert a 256-colour palette index to float RGBA."""
if n < 16:
return ANSI_COLORS[n]
if n < 232:
n -= 16
r = (n // 36) * 51
g = ((n % 36) // 6) * 51
b = (n % 6) * 51
return (r / 255, g / 255, b / 255, 1.0)
# Grayscale ramp: 232-255
v = (n - 232) * 10 + 8
return (v / 255, v / 255, v / 255, 1.0)
@dataclass(slots=True)
class _State:
"""Mutable state tracker for the current SGR attributes."""
fg: tuple[float, float, float, float] = DEFAULT_FG
bg: tuple[float, float, float, float] | None = None
bold: bool = False
underline: bool = False
def reset(self) -> None:
self.fg = DEFAULT_FG
self.bg = None
self.bold = False
self.underline = False
def span(self, text: str) -> StyledSpan:
return StyledSpan(text=text, colour=self.fg, bg_colour=self.bg, bold=self.bold, underline=self.underline)
def _apply_sgr(params_str: str, state: _State) -> None:
"""Apply a single SGR parameter string (e.g. '1;31') to state."""
codes = [int(c) if c else 0 for c in params_str.split(";")]
i = 0
while i < len(codes):
c = codes[i]
if c == 0:
state.reset()
elif c == 1:
state.bold = True
elif c == 4:
state.underline = True
elif c == 22:
state.bold = False
elif c == 24:
state.underline = False
elif 30 <= c <= 37:
state.fg = ANSI_COLORS[c - 30]
elif c == 39:
state.fg = DEFAULT_FG
elif 40 <= c <= 47:
state.bg = ANSI_COLORS[c - 40]
elif c == 49:
state.bg = None
elif 90 <= c <= 97:
state.fg = ANSI_COLORS[c - 90 + 8]
elif 100 <= c <= 107:
state.bg = ANSI_COLORS[c - 100 + 8]
elif c in (38, 48):
# Extended colour: 38;5;N or 38;2;R;G;B
if i + 1 < len(codes):
mode = codes[i + 1]
if mode == 5 and i + 2 < len(codes):
colour = _colour_256(codes[i + 2])
if c == 38:
state.fg = colour
else:
state.bg = colour
i += 3
continue
if mode == 2 and i + 4 < len(codes):
r, g, b = codes[i + 2], codes[i + 3], codes[i + 4]
colour = (r / 255, g / 255, b / 255, 1.0)
if c == 38:
state.fg = colour
else:
state.bg = colour
i += 5
continue
i += 1
[docs]
def parse_ansi(text: str) -> list[StyledSpan]:
"""Parse ANSI-escaped text into a list of StyledSpans.
Each span carries the text segment and its styling attributes (colour, background,
bold, underline). Consecutive escape sequences without intervening text do not
produce empty spans.
"""
if not text:
return []
spans: list[StyledSpan] = []
state = _State()
pos = 0
for match in _SGR_RE.finditer(text):
start, end = match.span()
# Emit text before this escape
if start > pos:
spans.append(state.span(text[pos:start]))
_apply_sgr(match.group(1), state)
pos = end
# Emit trailing text
if pos < len(text):
spans.append(state.span(text[pos:]))
return spans
[docs]
def strip_ansi(text: str) -> str:
"""Remove all ANSI escape sequences from text, returning plain content."""
return _CSI_RE.sub("", text)