Python API¶

Beyond the command line, clat is a small library. Import it to format LaTeX source from your own scripts, editors, or build tools.

from clat import texfmt, load_config

config = load_config()                 # nearest .clat.toml, or defaults
result = texfmt(open("main.tex").read(), filename="main.tex", config=config)

print(result.text)                     # the formatted source
for rule, n_hits in result.clangs:     # what was auto-fixed
    print(f"clang: {rule.name} ({n_hits})")
for rule, fname, line, msg in result.clunks:
    print(f"clunk: {fname}:{line}: {msg}")

If you omit config, texfmt uses the built-in defaults (threshold 5, every rule at its default weight). To run with a custom threshold without a file on disk, build the dict yourself:

config = {"threshold": 8, "weights": {"ellipsis": 9}}
result = texfmt(source, config=config)

By default, texfmt runs fixable rules to a text fixed point, up to 5 sweeps. Pass max_iter=1 for single-pass behaviour:

result = texfmt(source, config=config, max_iter=1)

Formatting¶

clat.texfmt ¶

texfmt(text, filename='<input>', config=None, max_iter=DEFAULT_MAX_ITER)

Apply formatting rules according to config.

Returns a ClatResult with the formatted text and categorised issues.

Fixable rules are applied repeatedly until a full sweep makes no text changes, or until max_iter sweeps have run. Detect-only rules are then evaluated once against the final text.

Source code in src/clat/rules.py

def texfmt(text, filename='<input>', config=None, max_iter=DEFAULT_MAX_ITER):
    """Apply formatting rules according to config.

    Returns a ClatResult with the formatted text and categorised issues.

    Fixable rules are applied repeatedly until a full sweep makes no text
    changes, or until ``max_iter`` sweeps have run. Detect-only rules are then
    evaluated once against the final text.

    """
    if max_iter < 1:
        raise ValueError('max_iter must be >= 1')

    if config is None:
        config = _default_config()

    threshold = config['threshold']
    result = ClatResult(text=text)

    # Sort rules by order for deterministic application
    sorted_rules = sorted(RULES, key=lambda r: r.order)
    fixable_rules = [r for r in sorted_rules if r.fixable]
    detect_rules = [r for r in sorted_rules if not r.fixable]
    clang_counts = {}
    fixable_splats = set()

    # Environments masked out before a rule runs (e.g. tikzpicture), unless the
    # rule is listed in ``unprotected_rules``.
    protected_envs = set(config.get('protected_environments',
                                    DEFAULT_PROTECTED_ENVIRONMENTS))
    unprotected = set(config.get('unprotected_rules', ()))

    def _respects_protection(rule):
        return bool(protected_envs) and rule.id not in unprotected

    for sweep in range(max_iter):
        sweep_start = result.text
        result.iterations = sweep + 1

        for rule in fixable_rules:
            w = _effective_weight(rule, config)
            if w <= 0:
                continue

            original = result.text
            if _respects_protection(rule):
                masked, mapping = _mask_environments(original, protected_envs)
                result.text = _unmask_environments(rule.fn(masked), mapping)
            else:
                result.text = rule.fn(original)
            if result.text == original:
                continue

            n_hits = _count_text_changes(original, result.text)
            if w >= threshold:
                clang_counts[rule.id] = clang_counts.get(rule.id, 0) + n_hits
            else:
                # Below threshold: still fix, but report as splat.
                fixable_splats.add(rule.id)

        if result.text == sweep_start:
            result.converged = True
            break
    else:
        result.converged = False

    for rule in fixable_rules:
        count = clang_counts.get(rule.id)
        if count:
            result.clangs.append((rule, count))
        if rule.id in fixable_splats:
            result.splats.append((rule, filename, 0, f'{rule.name} (auto-fixed)'))

    protected_lines = _protected_line_set(result.text, protected_envs)
    for rule in detect_rules:
        w = _effective_weight(rule, config)
        if w <= 0:
            continue
        issues = rule.fn(result.text, filename)
        if _respects_protection(rule):
            issues = [iss for iss in issues if iss[1] not in protected_lines]
        for fname, line, msg in issues:
            if w >= threshold:
                result.clunks.append((rule, fname, line, msg))
            else:
                result.splats.append((rule, fname, line, msg))

    if not result.converged and max_iter > 1:
        result.clunks.append((
            _MAX_ITER_RULE,
            filename,
            0,
            f'max iterations ({max_iter}) reached before convergence; '
            'possible rule interaction cycle',
        ))

    return result

clat.ClatResult `dataclass` ¶

Result of running clat on a file.

Attributes:

Name	Type	Description
`text`	`str — the (possibly modified) source text`
`clangs`	`list[tuple] — (rule, count) for auto-fixed rules above threshold`
`clunks`	`list[tuple] — (rule, filename, line, msg) for unfixable issues above threshold`
`splats`	`list[tuple] — (rule, filename, line, msg) for issues below threshold`
`iterations`	`int — number of fixable-rule sweeps performed`
`converged`	`bool — True if a sweep completed with no text changes`

Source code in src/clat/rules.py

@dataclass
class ClatResult:
    """Result of running clat on a file.

    Attributes
    ----------
    text : str          — the (possibly modified) source text
    clangs : list[tuple] — (rule, count) for auto-fixed rules above threshold
    clunks : list[tuple] — (rule, filename, line, msg) for unfixable issues above threshold
    splats : list[tuple] — (rule, filename, line, msg) for issues below threshold
    iterations : int      — number of fixable-rule sweeps performed
    converged : bool      — True if a sweep completed with no text changes
    """
    text: str
    clangs: list = field(default_factory=list)
    clunks: list = field(default_factory=list)
    splats: list = field(default_factory=list)
    iterations: int = 0
    converged: bool = True

Configuration¶

clat.load_config ¶

load_config(path=None)

Load config from .clat.toml or fallback locations.

Returns a dict with 'threshold' (int), 'weights' (dict[str, int]), 'protected_environments' (list[str]), and 'unprotected_rules' (list[str]).

Source code in src/clat/rules.py

def load_config(path=None):
    """Load config from .clat.toml or fallback locations.

    Returns a dict with 'threshold' (int), 'weights' (dict[str, int]),
    'protected_environments' (list[str]), and 'unprotected_rules' (list[str]).
    """
    try:
        import tomllib
    except ModuleNotFoundError:
        import tomli as tomllib
    from pathlib import Path

    search_paths = []
    if path:
        search_paths.append(Path(path))
    else:
        search_paths.append(Path.cwd() / '.clat.toml')
        config_home = Path.home() / '.config' / 'clat' / 'config.toml'
        search_paths.append(config_home)

    for p in search_paths:
        if p.is_file():
            with open(p, 'rb') as f:
                data = tomllib.load(f)
            return {
                'threshold': data.get('threshold', DEFAULT_THRESHOLD),
                'weights': data.get('weights', {}),
                'protected_environments': data.get(
                    'protected_environments',
                    list(DEFAULT_PROTECTED_ENVIRONMENTS)),
                'unprotected_rules': data.get('unprotected_rules', []),
            }

    return _default_config()

clat.save_config ¶

save_config(config, path)

Write config dict back to a .clat.toml file, preserving all settings.

Source code in src/clat/rules.py

def save_config(config, path):
    """Write config dict back to a .clat.toml file, preserving all settings."""
    from pathlib import Path
    # Rebuild from current state, respecting weight and protection overrides.
    lines = _config_lines(
        config['threshold'],
        config.get('protected_environments', DEFAULT_PROTECTED_ENVIRONMENTS),
        config.get('unprotected_rules', ()),
    )
    lines += _weight_lines(lambda r: _effective_weight(r, config))
    Path(path).write_text('\n'.join(lines) + '\n')
    return path

clat.generate_default_config ¶

generate_default_config()

Return a .clat.toml string with all rules and their default weights.

Source code in src/clat/rules.py

def generate_default_config():
    """Return a .clat.toml string with all rules and their default weights."""
    lines = _config_lines(
        DEFAULT_THRESHOLD,
        DEFAULT_PROTECTED_ENVIRONMENTS,
        (),
    )
    lines += _weight_lines(lambda r: r.weight)
    return '\n'.join(lines) + '\n'

The rule registry¶

clat.Rule `dataclass` ¶

A single clat rule.

Attributes:

Name	Type	Description
`id`	`str — unique key, used in config overrides (e.g. 'labels_inline')`
`name`	`str — human-readable description`
`fn`	`callable — fix function f(text) -> text (fixable=True)`	or warn function f(text, filename) -> [(file, line, msg)]
`weight`	`int — default severity 1–10; 0 disables the rule`
`fixable`	`bool — True if clat can auto-fix this`
`order`	`int — execution order (lower = earlier); fixes run before warns`

Source code in src/clat/rules.py

@dataclass
class Rule:
    """A single clat rule.

    Attributes
    ----------
    id : str        — unique key, used in config overrides (e.g. 'labels_inline')
    name : str      — human-readable description
    fn : callable   — fix function  f(text) -> text           (fixable=True)
                       or warn function  f(text, filename) -> [(file, line, msg)]
    weight : int    — default severity 1–10; 0 disables the rule
    fixable : bool  — True if clat can auto-fix this
    order : int     — execution order (lower = earlier); fixes run before warns
    """
    num: int
    id: str
    name: str
    fn: Callable
    weight: int
    fixable: bool
    order: int

clat.RULES `module-attribute` ¶

RULES = [Rule(1, 'labels_inline', 'Merge \\label onto the same line as \\section', rule1_labels_inline, weight=8, fixable=True, order=10), Rule(2, 'decorative_comments', 'Strip decorative comment separators (%%===, %%--- etc.)', rule7_strip_decorative_comments, weight=6, fixable=True, order=20), Rule(3, 'heading_spacing', 'Two blank lines before headings, none after', rule5_heading_spacing, weight=7, fixable=True, order=30), Rule(4, 'equation_separators', 'Insert % lines around display-math environments', rule2_equation_separators, weight=7, fixable=True, order=40), Rule(5, 'equation_punctuation', 'Add trailing comma or period to display equations', rule4_equation_punctuation, weight=6, fixable=True, order=50), Rule(6, 'float_indentation', 'Tab-indent content inside figure/table/list environments', rule6_figure_indentation, weight=5, fixable=True, order=60), Rule(7, 'one_sentence_per_line', 'Split sentences onto individual lines', rule3_one_sentence_per_line, weight=8, fixable=True, order=70), Rule(8, 'math_delimiters_inline', 'Replace \\(...\\) with $...$', rule8_math_delimiters_inline, weight=5, fixable=True, order=80), Rule(9, 'math_delimiters_display', 'Replace \\[...\\] with $$...$$', rule9_math_delimiters_display, weight=0, fixable=True, order=85), Rule(10, 'math_delimiters_equation', 'Replace \\[...\\] or $$...$$ with equation environment', rule10_math_delimiters_equation, weight=0, fixable=True, order=35), Rule(11, 'tilde_before_refs', 'Ensure non-breaking space before \\ref, \\cite etc.', rule11_tilde_before_refs, weight=7, fixable=True, order=90), Rule(12, 'number_unit_spacing', 'Normalise number-unit spacing (100\\,kN)', rule12_number_unit_spacing, weight=6, fixable=True, order=100), Rule(13, 'old_font_commands', 'Replace {\\bf text} with \\textbf{text} etc.', rule13_old_font_commands, weight=5, fixable=True, order=110), Rule(14, 'ellipsis', 'Replace ... with \\dots', rule14_ellipsis, weight=4, fixable=True, order=120), Rule(15, 'ordinal_suffixes', 'Convert superscript ordinals to plain text (1st, 2nd)', rule15_ordinal_suffixes, weight=8, fixable=True, order=130), Rule(16, 'table_line_endings', 'Table \\\\ on row line, \\hline/\\toprule on own line', rule16_table_line_endings, weight=7, fixable=True, order=140), Rule(17, 'abbreviation_spacing', 'Force interword space after e.g., i.e., et al.', rule17_abbreviation_spacing, weight=7, fixable=True, order=145), Rule(22, 'join_wrapped_lines', 'Join hard-wrapped lines so each sentence is one line', rule18_join_wrapped_lines, weight=8, fixable=True, order=65), Rule(18, 'long_file', 'Warn if file exceeds 2000 lines', warn_long_file, weight=3, fixable=False, order=200), Rule(19, 'hardcoded_refs', 'Detect "Figure 3" instead of \\cref{...}', warn_hardcoded_refs, weight=6, fixable=False, order=210), Rule(20, 'manual_sizing', 'Detect \\big, \\Big etc. (prefer \\left/\\right)', warn_manual_sizing, weight=3, fixable=False, order=220), Rule(21, 'float_after_heading', 'Detect float placed directly after a heading', warn_float_after_heading, weight=4, fixable=False, order=230)]

clat.DEFAULT_THRESHOLD `module-attribute` ¶

DEFAULT_THRESHOLD = 5

Multi-file discovery¶

For multi-file documents, clat.cli.discover_tex_files expands a list of root files into the full, ordered, de-duplicated set of .tex files reachable through \input/\include-style commands — the same traversal the -r flag uses. See Multi-file documents.

from clat.cli import discover_tex_files

for path in discover_tex_files(["main.tex"]):
    print(path)

clat.cli.discover_tex_files ¶

discover_tex_files(files)

Return files plus recursively discovered LaTeX inputs/includes.

Roots are visited in the order provided. Dependencies are depth-first, de-duplicated, and resolved relative to the file that references them. Missing .tex dependencies are included in the returned list so the normal formatter path reports them as missing.

Source code in src/clat/cli.py

def discover_tex_files(files):
    """Return files plus recursively discovered LaTeX inputs/includes.

    Roots are visited in the order provided. Dependencies are depth-first,
    de-duplicated, and resolved relative to the file that references them.
    Missing .tex dependencies are included in the returned list so the normal
    formatter path reports them as missing.
    """
    discovered = []
    seen = set()

    def visit(path):
        path = Path(path)
        if path.suffix == '':
            path = Path(f'{path}.tex')
        if not path.is_absolute():
            path = Path.cwd() / path
        path = path.resolve()

        if path in seen:
            return
        seen.add(path)
        discovered.append(path)

        try:
            text = path.read_text()
        except OSError:
            return

        for child in _iter_tex_inputs(text, path.parent):
            visit(child)

    for file in files:
        visit(file)

    return [_display_path(path) for path in discovered]

Python API¶

Formatting¶

clat.texfmt ¶

clat.ClatResult dataclass ¶

Configuration¶

clat.load_config ¶

clat.save_config ¶

clat.generate_default_config ¶

The rule registry¶

clat.Rule dataclass ¶

clat.RULES module-attribute ¶

clat.DEFAULT_THRESHOLD module-attribute ¶

Multi-file discovery¶

clat.cli.discover_tex_files ¶

clat.ClatResult `dataclass` ¶

clat.Rule `dataclass` ¶

clat.RULES `module-attribute` ¶

clat.DEFAULT_THRESHOLD `module-attribute` ¶