add meditation and pre-commit scripts
This commit is contained in:
Executable
+188
@@ -0,0 +1,188 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
check-tags.py — Tag similarity checker for Hugo content
|
||||
|
||||
Compares tags in staged files against all existing tags in the site.
|
||||
Warns and blocks commit when a new tag looks similar to an existing one.
|
||||
|
||||
Similarity checks (via difflib.SequenceMatcher):
|
||||
- Ratio >= 0.6 (catches typos, reordered chars, partial matches)
|
||||
- One tag is a substring of the other
|
||||
|
||||
Skip with: SKIP_TAG_CHECK=1 git commit
|
||||
|
||||
Usage: check-tags.py <file1.md> [file2.md ...]
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from difflib import SequenceMatcher
|
||||
from pathlib import Path
|
||||
|
||||
# --- Colors ---
|
||||
RED = "\033[0;31m"
|
||||
YELLOW = "\033[0;33m"
|
||||
GREEN = "\033[0;32m"
|
||||
CYAN = "\033[0;36m"
|
||||
BOLD = "\033[1m"
|
||||
NC = "\033[0m"
|
||||
|
||||
SIMILARITY_THRESHOLD = 0.6 # SequenceMatcher ratio (0-1)
|
||||
|
||||
|
||||
def extract_tags(filepath: Path, *, keep_blanks: bool = False) -> list[str]:
|
||||
"""Extract tags from YAML front matter of a markdown file.
|
||||
|
||||
If keep_blanks is True, empty strings from blank tag entries like
|
||||
tags: ["", "foo"] are preserved so the caller can detect them.
|
||||
"""
|
||||
try:
|
||||
text = filepath.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
return []
|
||||
|
||||
# Match front matter between --- delimiters
|
||||
fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
|
||||
if not fm_match:
|
||||
return []
|
||||
|
||||
frontmatter = fm_match.group(1)
|
||||
|
||||
tags: list[str] = []
|
||||
|
||||
# Match inline array: tags: ["foo", "bar"] or tags: ['foo', 'bar'] or tags: [foo, bar]
|
||||
inline = re.search(r"^tags:\s*\[([^\]]*)\]", frontmatter, re.MULTILINE)
|
||||
if inline:
|
||||
raw = inline.group(1).strip()
|
||||
if raw: # non-empty array contents (skip tags: [] and tags: [ ])
|
||||
tags = [t.strip().strip("\"'").lower() for t in raw.split(",")]
|
||||
else:
|
||||
# Match YAML list format:
|
||||
# tags:
|
||||
# - foo
|
||||
# - bar
|
||||
list_match = re.search(
|
||||
r"^tags:\s*\n((?:\s+-\s+.+\n?)+)", frontmatter, re.MULTILINE
|
||||
)
|
||||
if list_match:
|
||||
items = re.findall(r"^\s+-\s+(.*)", list_match.group(1), re.MULTILINE)
|
||||
tags = [t.strip().strip("\"'").lower() for t in items]
|
||||
|
||||
if keep_blanks:
|
||||
return tags
|
||||
return [t for t in tags if t]
|
||||
|
||||
|
||||
def find_similar(new_tag: str, existing_tags: set[str]) -> list[tuple[str, str]]:
|
||||
"""Find existing tags similar to a new tag.
|
||||
|
||||
Returns list of (existing_tag, reason) tuples.
|
||||
"""
|
||||
similar = []
|
||||
|
||||
for existing in sorted(existing_tags):
|
||||
if existing == new_tag:
|
||||
continue
|
||||
|
||||
# Check substring match
|
||||
if existing in new_tag or new_tag in existing:
|
||||
similar.append((existing, "substring match"))
|
||||
continue
|
||||
|
||||
# Check similarity ratio
|
||||
ratio = SequenceMatcher(None, new_tag, existing).ratio()
|
||||
if ratio >= SIMILARITY_THRESHOLD:
|
||||
similar.append((existing, f"similarity: {ratio:.0%}"))
|
||||
|
||||
return similar
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if os.environ.get("SKIP_TAG_CHECK") == "1":
|
||||
print(f"{YELLOW}Skipping tag similarity check (SKIP_TAG_CHECK=1){NC}")
|
||||
return 0
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: check-tags.py <file1.md> [file2.md ...]", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
staged_files = sys.argv[1:]
|
||||
|
||||
# Find repo root
|
||||
repo_root = Path.cwd()
|
||||
content_dir = repo_root / "content"
|
||||
if not content_dir.is_dir():
|
||||
print(f"{YELLOW}No content/ directory found, skipping tag check{NC}")
|
||||
return 0
|
||||
|
||||
# Resolve staged files to absolute paths for exclusion
|
||||
staged_abs = {(repo_root / f).resolve() for f in staged_files}
|
||||
|
||||
# Build tag registry from all content files (excluding staged files)
|
||||
all_tags: set[str] = set()
|
||||
for md_file in content_dir.rglob("*.md"):
|
||||
if md_file.resolve() in staged_abs:
|
||||
continue
|
||||
all_tags.update(extract_tags(md_file))
|
||||
|
||||
if not all_tags:
|
||||
print(f"{GREEN}No existing tags found, nothing to compare against.{NC}")
|
||||
return 0
|
||||
|
||||
# Check staged files for similar tags
|
||||
found_issues = False
|
||||
|
||||
for staged_file in staged_files:
|
||||
filepath = repo_root / staged_file
|
||||
if not filepath.is_file():
|
||||
continue
|
||||
# Only check content files
|
||||
if not staged_file.startswith("content/"):
|
||||
continue
|
||||
|
||||
file_tags_raw = extract_tags(filepath, keep_blanks=True)
|
||||
|
||||
# Check for blank/empty tags
|
||||
blank_count = sum(1 for t in file_tags_raw if not t)
|
||||
if blank_count:
|
||||
found_issues = True
|
||||
print()
|
||||
print(
|
||||
f"{YELLOW}{BOLD}Found {blank_count} blank tag(s) "
|
||||
f"in {staged_file}{NC}"
|
||||
)
|
||||
print(f"{YELLOW} Remove empty strings from the tags array{NC}")
|
||||
|
||||
file_tags = [t for t in file_tags_raw if t]
|
||||
|
||||
for tag in file_tags:
|
||||
# If the tag already exists exactly, it's fine
|
||||
if tag in all_tags:
|
||||
continue
|
||||
|
||||
# New tag — check for similarity
|
||||
similar = find_similar(tag, all_tags)
|
||||
|
||||
if similar:
|
||||
found_issues = True
|
||||
print()
|
||||
print(f"{YELLOW}{BOLD}New tag '{tag}' in {staged_file}{NC}")
|
||||
print(f"{YELLOW} Similar existing tags:{NC}")
|
||||
for existing, reason in similar:
|
||||
print(f" {CYAN}\u2192 {existing} ({reason}){NC}")
|
||||
|
||||
if found_issues:
|
||||
print()
|
||||
print(f"{RED}{BOLD}Tag similarity check failed.{NC}")
|
||||
print(f"{RED}Consider using an existing tag, or skip with:{NC}")
|
||||
print(f"{RED} SKIP_TAG_CHECK=1 git commit{NC}")
|
||||
print()
|
||||
return 1
|
||||
|
||||
print(f"{GREEN}Tag check passed \u2014 no similar tags found.{NC}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Executable
+386
@@ -0,0 +1,386 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# spellcheck-interactive.sh — Interactive fzf-based spell checker for markdown
|
||||
#
|
||||
# Checks markdown files with aspell, then presents each misspelled word
|
||||
# interactively with fzf, showing context and offering actions:
|
||||
# - Skip (ignore this word)
|
||||
# - Add to dictionary (.aspell.en.pws)
|
||||
# - Replace with a suggestion
|
||||
# - Type a custom replacement
|
||||
#
|
||||
# If no TTY is available (non-interactive), falls back to batch output.
|
||||
#
|
||||
# Usage: spellcheck-interactive.sh <file1.md> [file2.md ...]
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
RED='\033[0;31m'
|
||||
YELLOW='\033[0;33m'
|
||||
GREEN='\033[0;32m'
|
||||
CYAN='\033[0;36m'
|
||||
BOLD='\033[1m'
|
||||
DIM='\033[2m'
|
||||
NC='\033[0m'
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Usage: spellcheck-interactive.sh <file1.md> [file2.md ...]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FILES=("$@")
|
||||
REPO_ROOT="$(git rev-parse --show-toplevel)"
|
||||
DICT_FILE="$REPO_ROOT/.aspell.en.pws"
|
||||
|
||||
# Check aspell availability
|
||||
if ! command -v aspell &> /dev/null; then
|
||||
echo -e "${YELLOW}aspell not found, skipping spell check${NC}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if ! aspell dump dicts 2>/dev/null | grep -q "en"; then
|
||||
echo -e "${YELLOW}aspell found but no English dictionaries available${NC}"
|
||||
echo " Make sure ASPELL_CONF is set correctly in your direnv"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Detect interactive mode
|
||||
INTERACTIVE=0
|
||||
if [ -t 0 ]; then
|
||||
INTERACTIVE=1
|
||||
elif ( exec 0</dev/tty ) 2>/dev/null; then
|
||||
# Pre-commit hook: stdin is redirected, but /dev/tty is available
|
||||
INTERACTIVE=1
|
||||
fi
|
||||
|
||||
if [ "$INTERACTIVE" -eq 1 ] && ! command -v fzf &> /dev/null; then
|
||||
echo -e "${YELLOW}fzf not found, falling back to non-interactive mode${NC}"
|
||||
INTERACTIVE=0
|
||||
fi
|
||||
|
||||
# --- Strip front matter and code blocks from markdown ---
|
||||
# Returns cleaned text suitable for spell checking.
|
||||
# We keep line numbers aligned by replacing stripped content with blank lines,
|
||||
# so aspell line numbers match the original file.
|
||||
strip_for_spellcheck() {
|
||||
local file="$1"
|
||||
local in_frontmatter=0
|
||||
local frontmatter_count=0
|
||||
local in_codeblock=0
|
||||
local linenum=0
|
||||
|
||||
while IFS= read -r line || [ -n "$line" ]; do
|
||||
linenum=$((linenum + 1))
|
||||
|
||||
# Track YAML front matter (between --- delimiters)
|
||||
if [[ "$line" =~ ^---[[:space:]]*$ ]]; then
|
||||
frontmatter_count=$((frontmatter_count + 1))
|
||||
if [ "$frontmatter_count" -eq 1 ]; then
|
||||
in_frontmatter=1
|
||||
echo ""
|
||||
continue
|
||||
elif [ "$frontmatter_count" -eq 2 ]; then
|
||||
in_frontmatter=0
|
||||
echo ""
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$in_frontmatter" -eq 1 ]; then
|
||||
echo ""
|
||||
continue
|
||||
fi
|
||||
|
||||
# Track fenced code blocks
|
||||
if [[ "$line" =~ ^\`\`\` ]]; then
|
||||
if [ "$in_codeblock" -eq 0 ]; then
|
||||
in_codeblock=1
|
||||
else
|
||||
in_codeblock=0
|
||||
fi
|
||||
echo ""
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ "$in_codeblock" -eq 1 ]; then
|
||||
echo ""
|
||||
continue
|
||||
fi
|
||||
|
||||
# Strip inline code (`...`) to avoid checking code snippets
|
||||
echo "$line" | sed 's/`[^`]*`//g'
|
||||
done < "$file"
|
||||
}
|
||||
|
||||
# --- Get aspell suggestions for a word ---
|
||||
get_suggestions() {
|
||||
local word="$1"
|
||||
# aspell pipe mode: & means misspelled with suggestions, # means no suggestions
|
||||
local result
|
||||
result=$(echo "$word" | aspell pipe --mode=markdown --lang=en --personal="$DICT_FILE" 2>/dev/null | tail -n +2)
|
||||
|
||||
if [[ "$result" =~ ^'&' ]]; then
|
||||
# Format: & word count offset: suggestion1, suggestion2, ...
|
||||
echo "$result" | sed 's/^& [^ ]* [0-9]* [0-9]*: //' | tr ',' '\n' | sed 's/^[[:space:]]*//' | head -8
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Get context lines around a word occurrence in a file ---
|
||||
get_context() {
|
||||
local file="$1"
|
||||
local word="$2"
|
||||
local max_contexts="${3:-3}"
|
||||
|
||||
# Find line numbers containing the word (case-insensitive, word boundary)
|
||||
local lines
|
||||
lines=$(grep -n -i -w "$word" "$file" 2>/dev/null | head -"$max_contexts") || true
|
||||
echo "$lines"
|
||||
}
|
||||
|
||||
# --- Add a word to the personal dictionary ---
|
||||
add_to_dictionary() {
|
||||
local word="$1"
|
||||
echo "$word" >> "$DICT_FILE"
|
||||
# Sort and deduplicate (preserving the header line)
|
||||
local header
|
||||
header=$(head -1 "$DICT_FILE")
|
||||
local body
|
||||
body=$(tail -n +2 "$DICT_FILE" | sort -u)
|
||||
printf '%s\n%s\n' "$header" "$body" > "$DICT_FILE"
|
||||
}
|
||||
|
||||
# --- Replace a word in a file ---
|
||||
replace_word_in_file() {
|
||||
local file="$1"
|
||||
local old_word="$2"
|
||||
local new_word="$3"
|
||||
|
||||
# Use word-boundary-aware sed replacement (case-sensitive, first occurrence per line)
|
||||
# We use perl for proper word boundary support
|
||||
if command -v perl &> /dev/null; then
|
||||
perl -pi -e "s/\\b\Q${old_word}\E\\b/${new_word}/g" "$file"
|
||||
else
|
||||
sed -i "s/\b${old_word}\b/${new_word}/g" "$file" 2>/dev/null || \
|
||||
sed -i '' "s/[[:<:]]${old_word}[[:>:]]/${new_word}/g" "$file"
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Non-interactive fallback (matches old pre-commit behavior) ---
|
||||
run_batch_check() {
|
||||
local overall_fail=0
|
||||
|
||||
for file in "${FILES[@]}"; do
|
||||
if [ ! -f "$file" ]; then continue; fi
|
||||
|
||||
local errors
|
||||
errors=$(strip_for_spellcheck "$file" | aspell list --mode=markdown --lang=en --personal="$DICT_FILE" 2>/dev/null | sort -u)
|
||||
|
||||
if [ -n "$errors" ]; then
|
||||
echo -e "${YELLOW}Possible misspellings in ${BOLD}$file${NC}${YELLOW}:${NC}"
|
||||
while IFS= read -r word; do
|
||||
if [ -z "$word" ]; then continue; fi
|
||||
local suggestion
|
||||
suggestion=$(get_suggestions "$word" | head -1)
|
||||
echo -e " ${RED}'$word'${NC} ${DIM}→${NC} ${GREEN}$suggestion${NC}"
|
||||
get_context "$file" "$word" 2 | while IFS= read -r ctx; do
|
||||
echo -e " ${DIM}$ctx${NC}"
|
||||
done
|
||||
done <<< "$errors"
|
||||
echo ""
|
||||
overall_fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$overall_fail" -eq 1 ]; then
|
||||
echo -e "${RED}${BOLD}Spell check failed.${NC}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# --- Interactive spell check with fzf ---
|
||||
run_interactive_check() {
|
||||
# Grab the terminal for input (needed in pre-commit hook context)
|
||||
exec < /dev/tty
|
||||
|
||||
local files_modified=0
|
||||
local overall_skipped=0
|
||||
local words_added=0
|
||||
local words_replaced=0
|
||||
local words_skipped=0
|
||||
local user_quit=0
|
||||
|
||||
for file in "${FILES[@]}"; do
|
||||
if [ ! -f "$file" ]; then continue; fi
|
||||
|
||||
# Get misspelled words from stripped content
|
||||
local errors
|
||||
errors=$(strip_for_spellcheck "$file" | aspell list --mode=markdown --lang=en --personal="$DICT_FILE" 2>/dev/null | sort -u)
|
||||
|
||||
if [ -z "$errors" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
local word_count
|
||||
word_count=$(echo "$errors" | wc -l | tr -d ' ')
|
||||
local current=0
|
||||
|
||||
echo -e "\n${BOLD}${CYAN}Spell checking: $file${NC} ${DIM}($word_count issues)${NC}"
|
||||
|
||||
while IFS= read -r word; do
|
||||
if [ -z "$word" ]; then continue; fi
|
||||
if [ "$user_quit" -eq 1 ]; then
|
||||
words_skipped=$((words_skipped + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
current=$((current + 1))
|
||||
|
||||
# Check if this word was already added to dictionary in this session
|
||||
if grep -qx "$word" "$DICT_FILE" 2>/dev/null; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Get suggestions
|
||||
local suggestions
|
||||
suggestions=$(get_suggestions "$word")
|
||||
|
||||
# Get context
|
||||
local context
|
||||
context=$(get_context "$file" "$word" 3)
|
||||
|
||||
# Build the context header for fzf
|
||||
local header=""
|
||||
header+="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"$'\n'
|
||||
header+=" Misspelled: '$word' [$current/$word_count] in $file"$'\n'
|
||||
header+="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"$'\n'
|
||||
if [ -n "$context" ]; then
|
||||
header+=$'\n'" Context:"$'\n'
|
||||
while IFS= read -r ctx_line; do
|
||||
header+=" $ctx_line"$'\n'
|
||||
done <<< "$context"
|
||||
fi
|
||||
header+=$'\n'" Choose an action:"
|
||||
|
||||
# Build fzf options
|
||||
local options=""
|
||||
options+="skip (ignore this word)"$'\n'
|
||||
options+="add (add '$word' to dictionary)"$'\n'
|
||||
|
||||
if [ -n "$suggestions" ]; then
|
||||
while IFS= read -r sug; do
|
||||
if [ -n "$sug" ]; then
|
||||
options+="use '$sug'"$'\n'
|
||||
fi
|
||||
done <<< "$suggestions"
|
||||
fi
|
||||
|
||||
options+="type (enter custom replacement)"$'\n'
|
||||
options+="quit (skip remaining words)"
|
||||
|
||||
# Run fzf
|
||||
local choice
|
||||
choice=$(echo -e "$options" | fzf \
|
||||
--header="$header" \
|
||||
--prompt="Action: " \
|
||||
--no-sort \
|
||||
--no-multi \
|
||||
--height=~30 \
|
||||
--reverse \
|
||||
--no-info \
|
||||
--pointer="▶" \
|
||||
--color="header:cyan,pointer:yellow,prompt:yellow" \
|
||||
2>/dev/tty) || {
|
||||
# fzf returns 130 on Ctrl-C/Esc
|
||||
echo -e "${YELLOW}Spell check cancelled.${NC}"
|
||||
user_quit=1
|
||||
words_skipped=$((words_skipped + 1))
|
||||
continue
|
||||
}
|
||||
|
||||
# Parse the action
|
||||
local action
|
||||
action=$(echo "$choice" | awk '{print $1}')
|
||||
|
||||
case "$action" in
|
||||
skip)
|
||||
words_skipped=$((words_skipped + 1))
|
||||
;;
|
||||
add)
|
||||
add_to_dictionary "$word"
|
||||
words_added=$((words_added + 1))
|
||||
echo -e " ${GREEN}+ Added '$word' to dictionary${NC}"
|
||||
;;
|
||||
use)
|
||||
local replacement
|
||||
replacement=$(echo "$choice" | sed "s/^use[[:space:]]*'//;s/'$//" )
|
||||
replace_word_in_file "$file" "$word" "$replacement"
|
||||
files_modified=1
|
||||
words_replaced=$((words_replaced + 1))
|
||||
echo -e " ${GREEN}~ Replaced '$word' → '$replacement'${NC}"
|
||||
;;
|
||||
type)
|
||||
echo -n " Enter replacement for '$word': "
|
||||
local custom
|
||||
read -r custom < /dev/tty
|
||||
if [ -n "$custom" ]; then
|
||||
replace_word_in_file "$file" "$word" "$custom"
|
||||
files_modified=1
|
||||
words_replaced=$((words_replaced + 1))
|
||||
echo -e " ${GREEN}~ Replaced '$word' → '$custom'${NC}"
|
||||
else
|
||||
echo -e " ${DIM}(empty input, skipping)${NC}"
|
||||
words_skipped=$((words_skipped + 1))
|
||||
fi
|
||||
;;
|
||||
quit)
|
||||
user_quit=1
|
||||
words_skipped=$((words_skipped + 1))
|
||||
;;
|
||||
*)
|
||||
words_skipped=$((words_skipped + 1))
|
||||
;;
|
||||
esac
|
||||
done <<< "$errors"
|
||||
|
||||
# Re-stage the file if we modified it
|
||||
if [ "$files_modified" -eq 1 ]; then
|
||||
git add "$file" 2>/dev/null || true
|
||||
files_modified=0
|
||||
fi
|
||||
done
|
||||
|
||||
# Re-stage dictionary if words were added
|
||||
if [ "$words_added" -gt 0 ]; then
|
||||
git add "$DICT_FILE" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo -e "${BOLD}Spell check summary:${NC}"
|
||||
if [ "$words_replaced" -gt 0 ]; then echo -e " ${GREEN}Replaced: $words_replaced${NC}"; fi
|
||||
if [ "$words_added" -gt 0 ]; then echo -e " ${GREEN}Added to dictionary: $words_added${NC}"; fi
|
||||
if [ "$words_skipped" -gt 0 ]; then echo -e " ${YELLOW}Skipped: $words_skipped${NC}"; fi
|
||||
|
||||
# If any words were skipped (not fixed), that's still a pass —
|
||||
# the user explicitly chose to skip them. Only fail if user quit early.
|
||||
if [ "$user_quit" -eq 1 ] && [ "$words_skipped" -gt 0 ]; then
|
||||
echo -e "\n${YELLOW}Some words were skipped due to early quit.${NC}"
|
||||
echo -e "${YELLOW}Commit will proceed — re-run to address remaining words.${NC}"
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}${BOLD}Spell check complete.${NC}"
|
||||
return 0
|
||||
}
|
||||
|
||||
# --- Main ---
|
||||
echo "Running spell check..."
|
||||
|
||||
if [ "$INTERACTIVE" -eq 1 ]; then
|
||||
run_interactive_check
|
||||
exit $?
|
||||
else
|
||||
run_batch_check
|
||||
exit $?
|
||||
fi
|
||||
Reference in New Issue
Block a user