Source code for gh_issue_validator.checks.headings

"""Checks related to expectations about headings and content within them."""

import typing
from itertools import pairwise
from typing import NotRequired, TypedDict

import nltk

from gh_issue_validator.checks._base import ValidationCheck
from gh_issue_validator.markdown import _render_tokens_as_md
from gh_issue_validator.report import ValidationIssue, ValidationReport
from gh_issue_validator.types import SegmentsMap


[docs] class HeadingRequirement(TypedDict): """Specification of expectations for a heading and its contents.""" heading: str min_words: NotRequired[int] max_words: NotRequired[int]
[docs] class CheckMissingHeadings(ValidationCheck): """Validate that expected headings are present.""" def __init__(self, *, requirements: list[HeadingRequirement]) -> None: self._requirements = requirements
[docs] @typing.override def check(self, *, segments: SegmentsMap, report: ValidationReport) -> None: for req in self._requirements: if req["heading"] in segments: continue report.add_issue( ValidationIssue( code="missing-heading", message=f"Missing required heading: '{req['heading']}'", heading=req["heading"], ) )
[docs] class CheckUnexpectedHeadings(ValidationCheck): """Validate that no unexpected headings are present.""" def __init__( self, *, requirements: list[HeadingRequirement], freeform_headings: list[str] | None = None, ) -> None: self._allowed_headings = [req["heading"] for req in requirements] + ( freeform_headings or [] )
[docs] @typing.override def check(self, *, segments: SegmentsMap, report: ValidationReport) -> None: for heading in segments: if heading in self._allowed_headings: continue report.add_issue( ValidationIssue( code="unexpected-heading", message=f"Unexpected heading: '{heading}'", heading=heading, ) )
[docs] class CheckDisorderedHeadings(ValidationCheck): """Validate that headings appear in the expected order.""" def __init__( self, *, requirements: list[HeadingRequirement], freeform_headings: list[str] | None = None, ) -> None: self._expected_order = [req["heading"] for req in requirements] + ( freeform_headings or [] )
[docs] @typing.override def check(self, *, segments: SegmentsMap, report: ValidationReport) -> None: expected_index = { heading: index for index, heading in enumerate(self._expected_order) } actual_headings = [ heading for heading in segments if heading in self._expected_order ] for current_heading, next_heading in pairwise(actual_headings): if expected_index[current_heading] <= expected_index[next_heading]: continue report.add_issue( ValidationIssue( code="disordered-header", message=( f"Heading '{next_heading}' should appear before" f" '{current_heading}'" ), ) )
[docs] class CheckWordCount(ValidationCheck): """Check that each heading has the expected number of words.""" def __init__(self, *, requirements: list[HeadingRequirement]) -> None: nltk.download("punkt_tab", quiet=True) self._requirements = requirements
[docs] @typing.override def check(self, *, segments: SegmentsMap, report: ValidationReport) -> None: for req in self._requirements: heading = req["heading"] if heading not in segments: continue content = _render_tokens_as_md(segments[heading]).strip() word_count = len(nltk.word_tokenize(content.lower())) min_words = req.get("min_words", 0) if word_count < min_words: report.add_issue( ValidationIssue( code="incomplete-info", message=( f"Heading '{heading}' requires at least {min_words} words," f" found {word_count} words" ), heading=heading, ) ) max_words = req.get("max_words") if max_words is not None and word_count > max_words: report.add_issue( ValidationIssue( code="too-much-info", message=( f"Heading '{heading}' requires at most {max_words} words," f" found {word_count} words" ), heading=heading, ) )