add meditation and pre-commit scripts

2026-02-27 00:48:58 -07:00
parent 6a0ebf5f45
commit 625b2d9729
5 changed files with 634 additions and 41 deletions
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+"""
+check-tags.py — Tag similarity checker for Hugo content
+
+Compares tags in staged files against all existing tags in the site.
+Warns and blocks commit when a new tag looks similar to an existing one.
+
+Similarity checks (via difflib.SequenceMatcher):
+  - Ratio >= 0.6 (catches typos, reordered chars, partial matches)
+  - One tag is a substring of the other
+
+Skip with: SKIP_TAG_CHECK=1 git commit
+
+Usage: check-tags.py <file1.md> [file2.md ...]
+"""
+
+import os
+import re
+import sys
+from difflib import SequenceMatcher
+from pathlib import Path
+
+# --- Colors ---
+RED = "\033[0;31m"
+YELLOW = "\033[0;33m"
+GREEN = "\033[0;32m"
+CYAN = "\033[0;36m"
+BOLD = "\033[1m"
+NC = "\033[0m"
+
+SIMILARITY_THRESHOLD = 0.6  # SequenceMatcher ratio (0-1)
+
+
+def extract_tags(filepath: Path, *, keep_blanks: bool = False) -> list[str]:
+    """Extract tags from YAML front matter of a markdown file.
+
+    If keep_blanks is True, empty strings from blank tag entries like
+    tags: ["", "foo"] are preserved so the caller can detect them.
+    """
+    try:
+        text = filepath.read_text(encoding="utf-8")
+    except (OSError, UnicodeDecodeError):
+        return []
+
+    # Match front matter between --- delimiters
+    fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
+    if not fm_match:
+        return []
+
+    frontmatter = fm_match.group(1)
+
+    tags: list[str] = []
+
+    # Match inline array: tags: ["foo", "bar"] or tags: ['foo', 'bar'] or tags: [foo, bar]
+    inline = re.search(r"^tags:\s*\[([^\]]*)\]", frontmatter, re.MULTILINE)
+    if inline:
+        raw = inline.group(1).strip()
+        if raw:  # non-empty array contents (skip tags: [] and tags: [ ])
+            tags = [t.strip().strip("\"'").lower() for t in raw.split(",")]
+    else:
+        # Match YAML list format:
+        # tags:
+        #   - foo
+        #   - bar
+        list_match = re.search(
+            r"^tags:\s*\n((?:\s+-\s+.+\n?)+)", frontmatter, re.MULTILINE
+        )
+        if list_match:
+            items = re.findall(r"^\s+-\s+(.*)", list_match.group(1), re.MULTILINE)
+            tags = [t.strip().strip("\"'").lower() for t in items]
+
+    if keep_blanks:
+        return tags
+    return [t for t in tags if t]
+
+
+def find_similar(new_tag: str, existing_tags: set[str]) -> list[tuple[str, str]]:
+    """Find existing tags similar to a new tag.
+
+    Returns list of (existing_tag, reason) tuples.
+    """
+    similar = []
+
+    for existing in sorted(existing_tags):
+        if existing == new_tag:
+            continue
+
+        # Check substring match
+        if existing in new_tag or new_tag in existing:
+            similar.append((existing, "substring match"))
+            continue
+
+        # Check similarity ratio
+        ratio = SequenceMatcher(None, new_tag, existing).ratio()
+        if ratio >= SIMILARITY_THRESHOLD:
+            similar.append((existing, f"similarity: {ratio:.0%}"))
+
+    return similar
+
+
+def main() -> int:
+    if os.environ.get("SKIP_TAG_CHECK") == "1":
+        print(f"{YELLOW}Skipping tag similarity check (SKIP_TAG_CHECK=1){NC}")
+        return 0
+
+    if len(sys.argv) < 2:
+        print("Usage: check-tags.py <file1.md> [file2.md ...]", file=sys.stderr)
+        return 1
+
+    staged_files = sys.argv[1:]
+
+    # Find repo root
+    repo_root = Path.cwd()
+    content_dir = repo_root / "content"
+    if not content_dir.is_dir():
+        print(f"{YELLOW}No content/ directory found, skipping tag check{NC}")
+        return 0
+
+    # Resolve staged files to absolute paths for exclusion
+    staged_abs = {(repo_root / f).resolve() for f in staged_files}
+
+    # Build tag registry from all content files (excluding staged files)
+    all_tags: set[str] = set()
+    for md_file in content_dir.rglob("*.md"):
+        if md_file.resolve() in staged_abs:
+            continue
+        all_tags.update(extract_tags(md_file))
+
+    if not all_tags:
+        print(f"{GREEN}No existing tags found, nothing to compare against.{NC}")
+        return 0
+
+    # Check staged files for similar tags
+    found_issues = False
+
+    for staged_file in staged_files:
+        filepath = repo_root / staged_file
+        if not filepath.is_file():
+            continue
+        # Only check content files
+        if not staged_file.startswith("content/"):
+            continue
+
+        file_tags_raw = extract_tags(filepath, keep_blanks=True)
+
+        # Check for blank/empty tags
+        blank_count = sum(1 for t in file_tags_raw if not t)
+        if blank_count:
+            found_issues = True
+            print()
+            print(
+                f"{YELLOW}{BOLD}Found {blank_count} blank tag(s) "
+                f"in {staged_file}{NC}"
+            )
+            print(f"{YELLOW}  Remove empty strings from the tags array{NC}")
+
+        file_tags = [t for t in file_tags_raw if t]
+
+        for tag in file_tags:
+            # If the tag already exists exactly, it's fine
+            if tag in all_tags:
+                continue
+
+            # New tag — check for similarity
+            similar = find_similar(tag, all_tags)
+
+            if similar:
+                found_issues = True
+                print()
+                print(f"{YELLOW}{BOLD}New tag '{tag}' in {staged_file}{NC}")
+                print(f"{YELLOW}  Similar existing tags:{NC}")
+                for existing, reason in similar:
+                    print(f"    {CYAN}\u2192 {existing} ({reason}){NC}")
+
+    if found_issues:
+        print()
+        print(f"{RED}{BOLD}Tag similarity check failed.{NC}")
+        print(f"{RED}Consider using an existing tag, or skip with:{NC}")
+        print(f"{RED}  SKIP_TAG_CHECK=1 git commit{NC}")
+        print()
+        return 1
+
+    print(f"{GREEN}Tag check passed \u2014 no similar tags found.{NC}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,386 @@
+#!/usr/bin/env bash
+#
+# spellcheck-interactive.sh — Interactive fzf-based spell checker for markdown
+#
+# Checks markdown files with aspell, then presents each misspelled word
+# interactively with fzf, showing context and offering actions:
+#   - Skip (ignore this word)
+#   - Add to dictionary (.aspell.en.pws)
+#   - Replace with a suggestion
+#   - Type a custom replacement
+#
+# If no TTY is available (non-interactive), falls back to batch output.
+#
+# Usage: spellcheck-interactive.sh <file1.md> [file2.md ...]
+
+set -euo pipefail
+
+RED='\033[0;31m'
+YELLOW='\033[0;33m'
+GREEN='\033[0;32m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+DIM='\033[2m'
+NC='\033[0m'
+
+if [ $# -eq 0 ]; then
+    echo "Usage: spellcheck-interactive.sh <file1.md> [file2.md ...]"
+    exit 1
+fi
+
+FILES=("$@")
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+DICT_FILE="$REPO_ROOT/.aspell.en.pws"
+
+# Check aspell availability
+if ! command -v aspell &> /dev/null; then
+    echo -e "${YELLOW}aspell not found, skipping spell check${NC}"
+    exit 0
+fi
+
+if ! aspell dump dicts 2>/dev/null | grep -q "en"; then
+    echo -e "${YELLOW}aspell found but no English dictionaries available${NC}"
+    echo "    Make sure ASPELL_CONF is set correctly in your direnv"
+    exit 0
+fi
+
+# Detect interactive mode
+INTERACTIVE=0
+if [ -t 0 ]; then
+    INTERACTIVE=1
+elif ( exec 0</dev/tty ) 2>/dev/null; then
+    # Pre-commit hook: stdin is redirected, but /dev/tty is available
+    INTERACTIVE=1
+fi
+
+if [ "$INTERACTIVE" -eq 1 ] && ! command -v fzf &> /dev/null; then
+    echo -e "${YELLOW}fzf not found, falling back to non-interactive mode${NC}"
+    INTERACTIVE=0
+fi
+
+# --- Strip front matter and code blocks from markdown ---
+# Returns cleaned text suitable for spell checking.
+# We keep line numbers aligned by replacing stripped content with blank lines,
+# so aspell line numbers match the original file.
+strip_for_spellcheck() {
+    local file="$1"
+    local in_frontmatter=0
+    local frontmatter_count=0
+    local in_codeblock=0
+    local linenum=0
+
+    while IFS= read -r line || [ -n "$line" ]; do
+        linenum=$((linenum + 1))
+
+        # Track YAML front matter (between --- delimiters)
+        if [[ "$line" =~ ^---[[:space:]]*$ ]]; then
+            frontmatter_count=$((frontmatter_count + 1))
+            if [ "$frontmatter_count" -eq 1 ]; then
+                in_frontmatter=1
+                echo ""
+                continue
+            elif [ "$frontmatter_count" -eq 2 ]; then
+                in_frontmatter=0
+                echo ""
+                continue
+            fi
+        fi
+
+        if [ "$in_frontmatter" -eq 1 ]; then
+            echo ""
+            continue
+        fi
+
+        # Track fenced code blocks
+        if [[ "$line" =~ ^\`\`\` ]]; then
+            if [ "$in_codeblock" -eq 0 ]; then
+                in_codeblock=1
+            else
+                in_codeblock=0
+            fi
+            echo ""
+            continue
+        fi
+
+        if [ "$in_codeblock" -eq 1 ]; then
+            echo ""
+            continue
+        fi
+
+        # Strip inline code (`...`) to avoid checking code snippets
+        echo "$line" | sed 's/`[^`]*`//g'
+    done < "$file"
+}
+
+# --- Get aspell suggestions for a word ---
+get_suggestions() {
+    local word="$1"
+    # aspell pipe mode: & means misspelled with suggestions, # means no suggestions
+    local result
+    result=$(echo "$word" | aspell pipe --mode=markdown --lang=en --personal="$DICT_FILE" 2>/dev/null | tail -n +2)
+
+    if [[ "$result" =~ ^'&' ]]; then
+        # Format: & word count offset: suggestion1, suggestion2, ...
+        echo "$result" | sed 's/^& [^ ]* [0-9]* [0-9]*: //' | tr ',' '\n' | sed 's/^[[:space:]]*//' | head -8
+    fi
+}
+
+# --- Get context lines around a word occurrence in a file ---
+get_context() {
+    local file="$1"
+    local word="$2"
+    local max_contexts="${3:-3}"
+
+    # Find line numbers containing the word (case-insensitive, word boundary)
+    local lines
+    lines=$(grep -n -i -w "$word" "$file" 2>/dev/null | head -"$max_contexts") || true
+    echo "$lines"
+}
+
+# --- Add a word to the personal dictionary ---
+add_to_dictionary() {
+    local word="$1"
+    echo "$word" >> "$DICT_FILE"
+    # Sort and deduplicate (preserving the header line)
+    local header
+    header=$(head -1 "$DICT_FILE")
+    local body
+    body=$(tail -n +2 "$DICT_FILE" | sort -u)
+    printf '%s\n%s\n' "$header" "$body" > "$DICT_FILE"
+}
+
+# --- Replace a word in a file ---
+replace_word_in_file() {
+    local file="$1"
+    local old_word="$2"
+    local new_word="$3"
+
+    # Use word-boundary-aware sed replacement (case-sensitive, first occurrence per line)
+    # We use perl for proper word boundary support
+    if command -v perl &> /dev/null; then
+        perl -pi -e "s/\\b\Q${old_word}\E\\b/${new_word}/g" "$file"
+    else
+        sed -i "s/\b${old_word}\b/${new_word}/g" "$file" 2>/dev/null || \
+        sed -i '' "s/[[:<:]]${old_word}[[:>:]]/${new_word}/g" "$file"
+    fi
+}
+
+# --- Non-interactive fallback (matches old pre-commit behavior) ---
+run_batch_check() {
+    local overall_fail=0
+
+    for file in "${FILES[@]}"; do
+        if [ ! -f "$file" ]; then continue; fi
+
+        local errors
+        errors=$(strip_for_spellcheck "$file" | aspell list --mode=markdown --lang=en --personal="$DICT_FILE" 2>/dev/null | sort -u)
+
+        if [ -n "$errors" ]; then
+            echo -e "${YELLOW}Possible misspellings in ${BOLD}$file${NC}${YELLOW}:${NC}"
+            while IFS= read -r word; do
+                if [ -z "$word" ]; then continue; fi
+                local suggestion
+                suggestion=$(get_suggestions "$word" | head -1)
+                echo -e "  ${RED}'$word'${NC}  ${DIM}→${NC}  ${GREEN}$suggestion${NC}"
+                get_context "$file" "$word" 2 | while IFS= read -r ctx; do
+                    echo -e "      ${DIM}$ctx${NC}"
+                done
+            done <<< "$errors"
+            echo ""
+            overall_fail=1
+        fi
+    done
+
+    if [ "$overall_fail" -eq 1 ]; then
+        echo -e "${RED}${BOLD}Spell check failed.${NC}"
+        return 1
+    fi
+
+    return 0
+}
+
+# --- Interactive spell check with fzf ---
+run_interactive_check() {
+    # Grab the terminal for input (needed in pre-commit hook context)
+    exec < /dev/tty
+
+    local files_modified=0
+    local overall_skipped=0
+    local words_added=0
+    local words_replaced=0
+    local words_skipped=0
+    local user_quit=0
+
+    for file in "${FILES[@]}"; do
+        if [ ! -f "$file" ]; then continue; fi
+
+        # Get misspelled words from stripped content
+        local errors
+        errors=$(strip_for_spellcheck "$file" | aspell list --mode=markdown --lang=en --personal="$DICT_FILE" 2>/dev/null | sort -u)
+
+        if [ -z "$errors" ]; then
+            continue
+        fi
+
+        local word_count
+        word_count=$(echo "$errors" | wc -l | tr -d ' ')
+        local current=0
+
+        echo -e "\n${BOLD}${CYAN}Spell checking: $file${NC} ${DIM}($word_count issues)${NC}"
+
+        while IFS= read -r word; do
+            if [ -z "$word" ]; then continue; fi
+            if [ "$user_quit" -eq 1 ]; then
+                words_skipped=$((words_skipped + 1))
+                continue
+            fi
+
+            current=$((current + 1))
+
+            # Check if this word was already added to dictionary in this session
+            if grep -qx "$word" "$DICT_FILE" 2>/dev/null; then
+                continue
+            fi
+
+            # Get suggestions
+            local suggestions
+            suggestions=$(get_suggestions "$word")
+
+            # Get context
+            local context
+            context=$(get_context "$file" "$word" 3)
+
+            # Build the context header for fzf
+            local header=""
+            header+="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"$'\n'
+            header+="  Misspelled: '$word'  [$current/$word_count]  in $file"$'\n'
+            header+="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"$'\n'
+            if [ -n "$context" ]; then
+                header+=$'\n'"  Context:"$'\n'
+                while IFS= read -r ctx_line; do
+                    header+="    $ctx_line"$'\n'
+                done <<< "$context"
+            fi
+            header+=$'\n'"  Choose an action:"
+
+            # Build fzf options
+            local options=""
+            options+="skip  (ignore this word)"$'\n'
+            options+="add   (add '$word' to dictionary)"$'\n'
+
+            if [ -n "$suggestions" ]; then
+                while IFS= read -r sug; do
+                    if [ -n "$sug" ]; then
+                        options+="use   '$sug'"$'\n'
+                    fi
+                done <<< "$suggestions"
+            fi
+
+            options+="type  (enter custom replacement)"$'\n'
+            options+="quit  (skip remaining words)"
+
+            # Run fzf
+            local choice
+            choice=$(echo -e "$options" | fzf \
+                --header="$header" \
+                --prompt="Action: " \
+                --no-sort \
+                --no-multi \
+                --height=~30 \
+                --reverse \
+                --no-info \
+                --pointer="▶" \
+                --color="header:cyan,pointer:yellow,prompt:yellow" \
+                2>/dev/tty) || {
+                # fzf returns 130 on Ctrl-C/Esc
+                echo -e "${YELLOW}Spell check cancelled.${NC}"
+                user_quit=1
+                words_skipped=$((words_skipped + 1))
+                continue
+            }
+
+            # Parse the action
+            local action
+            action=$(echo "$choice" | awk '{print $1}')
+
+            case "$action" in
+                skip)
+                    words_skipped=$((words_skipped + 1))
+                    ;;
+                add)
+                    add_to_dictionary "$word"
+                    words_added=$((words_added + 1))
+                    echo -e "  ${GREEN}+ Added '$word' to dictionary${NC}"
+                    ;;
+                use)
+                    local replacement
+                    replacement=$(echo "$choice" | sed "s/^use[[:space:]]*'//;s/'$//" )
+                    replace_word_in_file "$file" "$word" "$replacement"
+                    files_modified=1
+                    words_replaced=$((words_replaced + 1))
+                    echo -e "  ${GREEN}~ Replaced '$word' → '$replacement'${NC}"
+                    ;;
+                type)
+                    echo -n "  Enter replacement for '$word': "
+                    local custom
+                    read -r custom < /dev/tty
+                    if [ -n "$custom" ]; then
+                        replace_word_in_file "$file" "$word" "$custom"
+                        files_modified=1
+                        words_replaced=$((words_replaced + 1))
+                        echo -e "  ${GREEN}~ Replaced '$word' → '$custom'${NC}"
+                    else
+                        echo -e "  ${DIM}(empty input, skipping)${NC}"
+                        words_skipped=$((words_skipped + 1))
+                    fi
+                    ;;
+                quit)
+                    user_quit=1
+                    words_skipped=$((words_skipped + 1))
+                    ;;
+                *)
+                    words_skipped=$((words_skipped + 1))
+                    ;;
+            esac
+        done <<< "$errors"
+
+        # Re-stage the file if we modified it
+        if [ "$files_modified" -eq 1 ]; then
+            git add "$file" 2>/dev/null || true
+            files_modified=0
+        fi
+    done
+
+    # Re-stage dictionary if words were added
+    if [ "$words_added" -gt 0 ]; then
+        git add "$DICT_FILE" 2>/dev/null || true
+    fi
+
+    # Summary
+    echo ""
+    echo -e "${BOLD}Spell check summary:${NC}"
+    if [ "$words_replaced" -gt 0 ]; then echo -e "  ${GREEN}Replaced: $words_replaced${NC}"; fi
+    if [ "$words_added" -gt 0 ]; then echo -e "  ${GREEN}Added to dictionary: $words_added${NC}"; fi
+    if [ "$words_skipped" -gt 0 ]; then echo -e "  ${YELLOW}Skipped: $words_skipped${NC}"; fi
+
+    # If any words were skipped (not fixed), that's still a pass —
+    # the user explicitly chose to skip them. Only fail if user quit early.
+    if [ "$user_quit" -eq 1 ] && [ "$words_skipped" -gt 0 ]; then
+        echo -e "\n${YELLOW}Some words were skipped due to early quit.${NC}"
+        echo -e "${YELLOW}Commit will proceed — re-run to address remaining words.${NC}"
+    fi
+
+    echo -e "${GREEN}${BOLD}Spell check complete.${NC}"
+    return 0
+}
+
+# --- Main ---
+echo "Running spell check..."
+
+if [ "$INTERACTIVE" -eq 1 ]; then
+    run_interactive_check
+    exit $?
+else
+    run_batch_check
+    exit $?
+fi