update images to webp, update pre-commit hooks

2026-03-04 01:37:18 -07:00
parent cfb55e0e51
commit bd73fcd39b
72 changed files with 1007 additions and 80 deletions
@@ -1,13 +1,15 @@
 #!/usr/bin/env python3
 """
-check-tags.py — Tag similarity checker for Hugo content
+check-tags.py — Semantic tag similarity checker for Hugo content

 Compares tags in staged files against all existing tags in the site.
-Warns and blocks commit when a new tag looks similar to an existing one.
+Warns and blocks commit when a new tag is semantically similar to an existing one.

-Similarity checks (via difflib.SequenceMatcher):
-  - Ratio >= 0.6 (catches typos, reordered chars, partial matches)
-  - One tag is a substring of the other
+Uses spaCy word vectors (en_core_web_lg) for cosine similarity — catches
+conceptual matches like "parenting" ≈ "fatherhood" while ignoring unrelated
+words that happen to share letters like "dogs" vs "daily".
+
+Fallback: if spaCy is unavailable, uses conservative edit-distance checks only.

 Skip with: SKIP_TAG_CHECK=1 git commit

@@ -17,6 +19,7 @@ Usage: check-tags.py <file1.md> [file2.md ...]
 import os
 import re
 import sys
+import time
 from difflib import SequenceMatcher
 from pathlib import Path

@@ -28,7 +31,44 @@ CYAN = "\033[0;36m"
 BOLD = "\033[1m"
 NC = "\033[0m"

-SIMILARITY_THRESHOLD = 0.6  # SequenceMatcher ratio (0-1)
+# Cosine similarity threshold for word vectors (0-1).
+# 0.65 catches morphological variants (parenting/parenthood) and synonyms
+# (cannabis/marijuana) while avoiding unrelated words. Tuned for short blog tags.
+SEMANTIC_THRESHOLD = 0.65
+
+# Edit-distance threshold — only used as a typo catcher alongside semantics.
+# 0.85 is very conservative: catches "kubernetse" vs "kubernetes" but not
+# "dogs" vs "daily" (which scores ~0.40).
+TYPO_THRESHOLD = 0.85
+
+# Substring match: shorter tag must be at least this many chars
+# and cover at least this fraction of the longer tag.
+SUBSTRING_MIN_LEN = 5
+SUBSTRING_MIN_RATIO = 0.6
+
+# --- spaCy setup (lazy, with graceful fallback) ---
+_nlp = None
+_spacy_available = None
+
+
+def _load_spacy():
+    """Load spaCy model once. Returns (nlp, True) or (None, False)."""
+    global _nlp, _spacy_available
+    if _spacy_available is not None:
+        return _nlp, _spacy_available
+    try:
+        import spacy
+
+        _nlp = spacy.load("en_core_web_lg")
+        _spacy_available = True
+    except (ImportError, OSError) as e:
+        print(
+            f"{YELLOW}spaCy not available ({e}), "
+            f"falling back to edit-distance only{NC}"
+        )
+        _nlp = None
+        _spacy_available = False
+    return _nlp, _spacy_available


 def extract_tags(filepath: Path, *, keep_blanks: bool = False) -> list[str]:
@@ -74,26 +114,53 @@ def extract_tags(filepath: Path, *, keep_blanks: bool = False) -> list[str]:
    return [t for t in tags if t]


-def find_similar(new_tag: str, existing_tags: set[str]) -> list[tuple[str, str]]:
+def find_similar(
+    new_tag: str,
+    existing_tags: set[str],
+    existing_docs: dict | None = None,
+) -> list[tuple[str, str]]:
    """Find existing tags similar to a new tag.

+    Uses semantic similarity (spaCy vectors) as the primary check,
+    with edit-distance as a typo-catching backup.
+
+    If existing_docs is provided, it should be a dict mapping tag strings
+    to their pre-computed spaCy Doc objects (avoids redundant nlp() calls).
+
    Returns list of (existing_tag, reason) tuples.
    """
+    nlp, has_spacy = _load_spacy()
    similar = []

    for existing in sorted(existing_tags):
        if existing == new_tag:
            continue

-        # Check substring match
-        if existing in new_tag or new_tag in existing:
+        # --- Check 1: Substring match (restricted) ---
+        shorter, longer = sorted([new_tag, existing], key=len)
+        if (
+            len(shorter) >= SUBSTRING_MIN_LEN
+            and shorter in longer
+            and len(shorter) / len(longer) >= SUBSTRING_MIN_RATIO
+        ):
            similar.append((existing, "substring match"))
            continue

-        # Check similarity ratio
+        # --- Check 2: Semantic similarity (primary) ---
+        if has_spacy:
+            doc_new = nlp(new_tag)
+            doc_ex = existing_docs[existing] if existing_docs else nlp(existing)
+
+            if doc_new.has_vector and doc_ex.has_vector:
+                score = doc_new.similarity(doc_ex)
+                if score >= SEMANTIC_THRESHOLD:
+                    similar.append((existing, f"semantic: {score:.0%}"))
+                    continue
+
+        # --- Check 3: Typo detection via edit distance (conservative) ---
        ratio = SequenceMatcher(None, new_tag, existing).ratio()
-        if ratio >= SIMILARITY_THRESHOLD:
-            similar.append((existing, f"similarity: {ratio:.0%}"))
+        if ratio >= TYPO_THRESHOLD:
+            similar.append((existing, f"typo match: {ratio:.0%}"))

    return similar

@@ -130,8 +197,13 @@ def main() -> int:
        print(f"{GREEN}No existing tags found, nothing to compare against.{NC}")
        return 0

+    # Pre-compute spaCy docs for all existing tags (avoids repeated nlp() calls)
+    nlp, has_spacy = _load_spacy()
+    existing_docs = {tag: nlp(tag) for tag in all_tags} if has_spacy else None
+
    # Check staged files for similar tags
    found_issues = False
+    start = time.monotonic()

    for staged_file in staged_files:
        filepath = repo_root / staged_file
@@ -162,7 +234,7 @@ def main() -> int:
                continue

            # New tag — check for similarity
-            similar = find_similar(tag, all_tags)
+            similar = find_similar(tag, all_tags, existing_docs)

            if similar:
                found_issues = True
@@ -172,15 +244,18 @@ def main() -> int:
                for existing, reason in similar:
                    print(f"    {CYAN}\u2192 {existing} ({reason}){NC}")

+    elapsed = time.monotonic() - start
+
    if found_issues:
        print()
        print(f"{RED}{BOLD}Tag similarity check failed.{NC}")
        print(f"{RED}Consider using an existing tag, or skip with:{NC}")
        print(f"{RED}  SKIP_TAG_CHECK=1 git commit{NC}")
+        print(f"{RED}  ({elapsed:.1f}s){NC}")
        print()
        return 1

-    print(f"{GREEN}Tag check passed \u2014 no similar tags found.{NC}")
+    print(f"{GREEN}Tag check passed \u2014 no similar tags found. ({elapsed:.1f}s){NC}")
    return 0


@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+"""List all unique tags across Hugo content, sorted alphabetically."""
+
+import re
+from pathlib import Path
+
+content_dir = Path(__file__).resolve().parent.parent / "content"
+tags: set[str] = set()
+
+for md in content_dir.rglob("*.md"):
+    text = md.read_text(encoding="utf-8")
+    fm = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
+    if not fm:
+        continue
+    inline = re.search(r"^tags:\s*\[([^\]]*)\]", fm.group(1), re.MULTILINE)
+    if inline and inline.group(1).strip():
+        for t in inline.group(1).split(","):
+            t = t.strip().strip("\"'").lower()
+            if t:
+                tags.add(t)
+    else:
+        lm = re.search(
+            r"^tags:\s*\n((?:\s+-\s+.+\n?)+)", fm.group(1), re.MULTILINE
+        )
+        if lm:
+            for t in re.findall(r"^\s+-\s+(.*)", lm.group(1), re.MULTILINE):
+                t = t.strip().strip("\"'").lower()
+                if t:
+                    tags.add(t)
+
+for t in sorted(tags):
+    print(t)
@@ -0,0 +1,738 @@
+#!/usr/bin/env bash
+# optimize-images.sh — Image auditor, metadata stripper, and WebP optimizer for fosscat.com
+#
+# Usage:
+#   ./scripts/optimize-images.sh              # Interactive mode
+#   ./scripts/optimize-images.sh --dry-run    # Show what would happen without changing anything
+#   ./scripts/optimize-images.sh --yes        # Skip all confirmation prompts
+#   ./scripts/optimize-images.sh --audit-only # Only run the audit phase (no changes)
+
+set -euo pipefail
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+IMAGES_DIR="static/images"
+CONTENT_DIR="content"
+CONFIG_FILE="config.toml"
+MAX_WIDTH=2000
+MAX_HEIGHT=2000
+WEBP_QUALITY=82
+
+# ---------------------------------------------------------------------------
+# CLI flags
+# ---------------------------------------------------------------------------
+DRY_RUN=false
+AUTO_YES=false
+AUDIT_ONLY=false
+
+for arg in "$@"; do
+  case "$arg" in
+    --dry-run)   DRY_RUN=true ;;
+    --yes|-y)    AUTO_YES=true ;;
+    --audit-only) AUDIT_ONLY=true ;;
+    --help|-h)
+      echo "Usage: $0 [--dry-run] [--yes] [--audit-only]"
+      echo ""
+      echo "  --dry-run     Show what would happen without making changes"
+      echo "  --yes, -y     Skip confirmation prompts"
+      echo "  --audit-only  Only run the audit (no modifications)"
+      echo "  --help, -h    Show this help"
+      exit 0
+      ;;
+    *)
+      echo "Unknown option: $arg"
+      echo "Run $0 --help for usage"
+      exit 1
+      ;;
+  esac
+done
+
+# ---------------------------------------------------------------------------
+# Colors and formatting
+# ---------------------------------------------------------------------------
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+DIM='\033[2m'
+NC='\033[0m' # No Color
+
+info()    { echo -e "${BLUE}[INFO]${NC} $*"; }
+success() { echo -e "${GREEN}[OK]${NC} $*"; }
+warn()    { echo -e "${YELLOW}[WARN]${NC} $*"; }
+error()   { echo -e "${RED}[ERROR]${NC} $*"; }
+header()  { echo -e "\n${BOLD}${CYAN}═══ $* ═══${NC}\n"; }
+
+# ---------------------------------------------------------------------------
+# Dependency checks
+# ---------------------------------------------------------------------------
+check_deps() {
+  local missing=()
+  for cmd in exiftool convert identify cwebp; do
+    if ! command -v "$cmd" &>/dev/null; then
+      missing+=("$cmd")
+    fi
+  done
+
+  if [[ ${#missing[@]} -gt 0 ]]; then
+    error "Missing required tools: ${missing[*]}"
+    echo "  These are provided by the Nix dev shell. Run:"
+    echo "    nix develop   # or let direnv load the flake"
+    echo ""
+    echo "  Required nix packages:"
+    echo "    perl538Packages.ImageExifTool  (exiftool)"
+    echo "    imagemagick                    (convert, identify)"
+    echo "    libwebp                        (cwebp)"
+    exit 1
+  fi
+}
+
+# ---------------------------------------------------------------------------
+# Ensure we're in the project root
+# ---------------------------------------------------------------------------
+if [[ ! -f "$CONFIG_FILE" ]] || [[ ! -d "$IMAGES_DIR" ]]; then
+  error "Must be run from the project root (where $CONFIG_FILE and $IMAGES_DIR exist)"
+  exit 1
+fi
+
+check_deps
+
+# ---------------------------------------------------------------------------
+# Utility: human-readable file size
+# ---------------------------------------------------------------------------
+human_size() {
+  local bytes=$1
+  if (( bytes >= 1048576 )); then
+    local mb_whole=$(( bytes / 1048576 ))
+    local mb_frac=$(( (bytes % 1048576) * 10 / 1048576 ))
+    echo "${mb_whole}.${mb_frac} MB"
+  elif (( bytes >= 1024 )); then
+    echo "$(( bytes / 1024 )) KB"
+  else
+    echo "${bytes} B"
+  fi
+}
+
+# ---------------------------------------------------------------------------
+# Utility: confirm prompt (respects --yes and --dry-run)
+# ---------------------------------------------------------------------------
+confirm() {
+  local prompt="$1"
+  if $AUTO_YES; then
+    return 0
+  fi
+  if $DRY_RUN; then
+    echo -e "  ${DIM}(dry-run: would ask) $prompt${NC}"
+    return 0
+  fi
+  echo -en "  $prompt ${BOLD}[y/N]${NC} "
+  read -r answer
+  [[ "$answer" =~ ^[Yy]$ ]]
+}
+
+# ---------------------------------------------------------------------------
+# PHASE 1: AUDIT
+# ---------------------------------------------------------------------------
+phase_audit() {
+  header "PHASE 1: IMAGE AUDIT"
+
+  # Collect all image files
+  local -a image_files=()
+  while IFS= read -r -d '' f; do
+    image_files+=("$f")
+  done < <(find "$IMAGES_DIR" -maxdepth 1 -type f \( -iname '*.jpg' -o -iname '*.jpeg' -o -iname '*.png' -o -iname '*.webp' -o -iname '*.gif' \) -print0 | sort -z)
+
+  if [[ ${#image_files[@]} -eq 0 ]]; then
+    warn "No images found in $IMAGES_DIR"
+    return
+  fi
+
+  # --- Image inventory table ---
+  echo -e "${BOLD}Image Inventory${NC}"
+  printf "  %-40s  %-6s  %-12s  %s\n" "FILENAME" "FORMAT" "DIMENSIONS" "SIZE"
+  printf "  %-40s  %-6s  %-12s  %s\n" "--------" "------" "----------" "----"
+
+  local total_size=0
+  for img in "${image_files[@]}"; do
+    local fname
+    fname=$(basename "$img")
+    local ext="${fname##*.}"
+    local fsize
+    fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
+    total_size=$((total_size + fsize))
+    local dims
+    dims=$(identify -format "%wx%h" "$img" 2>/dev/null || echo "unknown")
+    printf "  %-40s  %-6s  %-12s  %s\n" "$fname" "$ext" "$dims" "$(human_size "$fsize")"
+  done
+
+  echo ""
+  info "Total: ${#image_files[@]} images, $(human_size $total_size)"
+
+  # --- EXIF / Metadata scan ---
+  echo ""
+  echo -e "${BOLD}Metadata / Privacy Scan${NC}"
+
+  local privacy_issues=0
+  # Sensitive tag names to check (extracted in a single exiftool call per image)
+  local sensitive_tag_args=(
+    -GPSLatitude -GPSLongitude -GPSPosition
+    -SerialNumber -CameraSerialNumber -BodySerialNumber -LensSerialNumber
+    -OwnerName -Artist -Copyright -Creator -Rights
+    -By-line -Contact
+    -Make -Model -LensModel -Software
+    -DateTime -DateTimeOriginal -CreateDate
+    -CreatorTool -ImageDescription -UserComment
+  )
+
+  for img in "${image_files[@]}"; do
+    local fname
+    fname=$(basename "$img")
+    local has_metadata=false
+    local metadata_lines=()
+
+    # Single exiftool call to extract all sensitive tags at once
+    local exif_output
+    exif_output=$(exiftool -s -f "${sensitive_tag_args[@]}" "$img" 2>/dev/null || true)
+
+    while IFS= read -r line; do
+      [[ -z "$line" ]] && continue
+      # exiftool -s output format: "TagName                         : value"
+      local tagname value
+      tagname=$(echo "$line" | sed 's/\s*:.*//' | xargs)
+      value=$(echo "$line" | sed 's/^[^:]*:\s*//')
+
+      # Skip tags with no value (exiftool -f shows "-" for missing tags)
+      [[ "$value" == "-" ]] && continue
+      [[ -z "$value" ]] && continue
+
+      has_metadata=true
+      # Highlight GPS data in red
+      if [[ "$tagname" == *GPS* ]] || [[ "$tagname" == *Latitude* ]] || [[ "$tagname" == *Longitude* ]]; then
+        metadata_lines+=("${RED}!!${NC} $tagname: $value")
+      elif [[ "$tagname" == *Serial* ]] || [[ "$tagname" == *Owner* ]] || [[ "$tagname" == *Artist* ]] || [[ "$tagname" == *Creator* ]]; then
+        metadata_lines+=("${YELLOW}!${NC}  $tagname: $value")
+      else
+        metadata_lines+=("${DIM}-${NC}  $tagname: $value")
+      fi
+    done <<< "$exif_output"
+
+    if $has_metadata; then
+      privacy_issues=$((privacy_issues + 1))
+      echo -e "  ${YELLOW}$fname${NC} — metadata found:"
+      for line in "${metadata_lines[@]}"; do
+        echo -e "      $line"
+      done
+    else
+      echo -e "  ${GREEN}$fname${NC} — clean"
+    fi
+  done
+
+  echo ""
+  if [[ $privacy_issues -gt 0 ]]; then
+    warn "$privacy_issues image(s) contain metadata that should be stripped"
+  else
+    success "All images are clean of sensitive metadata"
+  fi
+
+  # --- Cross-reference with content ---
+  echo ""
+  echo -e "${BOLD}Content Reference Check${NC}"
+
+  # Collect all image references from content files
+  local -a referenced_images=()
+  local -a broken_refs=()
+  local -a inconsistent_paths=()
+
+  while IFS= read -r -d '' mdfile; do
+    # Front matter image field (handles both `image: "..."` and `  image: "..."` under cover:)
+    while IFS= read -r fm_image; do
+      [[ -z "$fm_image" ]] && continue
+      # Clean up: remove surrounding quotes and whitespace
+      fm_image=$(echo "$fm_image" | sed 's/^[[:space:]]*image:[[:space:]]*//' | sed 's/^["'\'']//' | sed 's/["'\'']\s*$//')
+
+      if [[ -n "$fm_image" ]] && [[ "$fm_image" != '""' ]] && [[ "$fm_image" != http* ]]; then
+        # Normalize: Hugo serves /images/... from static/images/...
+        local fs_path="static/${fm_image#/}"
+
+        # Check if it's a broken reference
+        if [[ ! -f "$fs_path" ]]; then
+          broken_refs+=("$mdfile|$fm_image")
+        else
+          referenced_images+=("$fs_path")
+        fi
+
+        # Check for inconsistent path (missing leading /)
+        if [[ "$fm_image" != /* ]]; then
+          inconsistent_paths+=("$mdfile|$fm_image")
+        fi
+      fi
+    done < <(grep -E '^\s*image:\s' "$mdfile" 2>/dev/null || true)
+
+    # Inline markdown images: ![alt](/images/foo.jpg#center)
+    while IFS= read -r inline_ref; do
+      [[ -z "$inline_ref" ]] && continue
+      # Strip #fragment
+      local clean_ref="${inline_ref%%#*}"
+      local fs_ref="static/${clean_ref#/}"
+
+      if [[ ! -f "$fs_ref" ]] && [[ "$clean_ref" != http* ]]; then
+        broken_refs+=("$mdfile|$inline_ref")
+      else
+        referenced_images+=("$fs_ref")
+      fi
+    done < <(grep -oP '!\[[^\]]*\]\(\K[^)]+' "$mdfile" 2>/dev/null || true)
+
+  done < <(find "$CONTENT_DIR" -name '*.md' -print0)
+
+  # Also check config.toml for avatarUrl
+  local avatar_path
+  avatar_path=$(grep 'avatarUrl' "$CONFIG_FILE" | sed 's/.*=\s*["'\'']\(.*\)["'\'']/\1/' || true)
+  if [[ -n "$avatar_path" ]]; then
+    referenced_images+=("static/${avatar_path#/}")
+  fi
+
+  # Find unreferenced images (compare using static/images/... paths)
+  local -a unreferenced=()
+  for img in "${image_files[@]}"; do
+    local found=false
+    for ref in "${referenced_images[@]}"; do
+      if [[ "$ref" == "$img" ]]; then
+        found=true
+        break
+      fi
+    done
+    if ! $found; then
+      unreferenced+=("$img")
+    fi
+  done
+
+  # Report broken references
+  if [[ ${#broken_refs[@]} -gt 0 ]]; then
+    warn "${#broken_refs[@]} broken image reference(s):"
+    for entry in "${broken_refs[@]}"; do
+      local file="${entry%%|*}"
+      local ref="${entry##*|}"
+      echo -e "    ${RED}$ref${NC}  in  ${DIM}$file${NC}"
+    done
+  else
+    success "No broken image references"
+  fi
+
+  # Report unreferenced images
+  echo ""
+  if [[ ${#unreferenced[@]} -gt 0 ]]; then
+    warn "${#unreferenced[@]} unreferenced image(s) (not used in any content):"
+    for img in "${unreferenced[@]}"; do
+      local fsize
+      fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
+      echo -e "    ${YELLOW}$(basename "$img")${NC}  ($(human_size "$fsize"))"
+    done
+  else
+    success "All images are referenced in content"
+  fi
+
+  # Report inconsistent paths
+  if [[ ${#inconsistent_paths[@]} -gt 0 ]]; then
+    echo ""
+    warn "${#inconsistent_paths[@]} image path(s) missing leading '/':"
+    for entry in "${inconsistent_paths[@]}"; do
+      local file="${entry%%|*}"
+      local ref="${entry##*|}"
+      echo -e "    ${YELLOW}$ref${NC}  in  ${DIM}$file${NC}"
+    done
+  fi
+
+  # Export arrays for later phases (bash 4+ trick: print to temp files)
+  printf '%s\n' "${image_files[@]}" > /tmp/optimg_files.txt
+  printf '%s\n' "${unreferenced[@]+"${unreferenced[@]}"}" > /tmp/optimg_unreferenced.txt
+  printf '%s\n' "${broken_refs[@]+"${broken_refs[@]}"}" > /tmp/optimg_broken.txt
+  echo "$total_size" > /tmp/optimg_total_size.txt
+}
+
+# ---------------------------------------------------------------------------
+# PHASE 2: METADATA STRIPPING
+# ---------------------------------------------------------------------------
+phase_strip_metadata() {
+  header "PHASE 2: METADATA STRIPPING"
+
+  if $DRY_RUN; then
+    info "(dry-run) Would strip all EXIF/IPTC/XMP metadata from images"
+    echo ""
+    return
+  fi
+
+  local -a image_files=()
+  mapfile -t image_files < /tmp/optimg_files.txt
+
+  local stripped=0
+  for img in "${image_files[@]}"; do
+    [[ -z "$img" ]] && continue
+    local fname
+    fname=$(basename "$img")
+
+    # Check if image has strippable EXIF/XMP/IPTC metadata (not just file properties)
+    # Use -EXIF:All -XMP:All -IPTC:All to only check real metadata groups
+    local meta_check
+    meta_check=$(exiftool -s -s -s -EXIF:All -XMP:All -IPTC:All "$img" 2>/dev/null || true)
+
+    if [[ -z "$meta_check" ]]; then
+      echo -e "  ${DIM}$fname — already clean, skipping${NC}"
+      continue
+    fi
+
+    # Auto-orient JPEG/PNG before stripping (applies EXIF rotation to pixels)
+    local ext="${fname##*.}"
+    ext=$(echo "$ext" | tr '[:upper:]' '[:lower:]')
+    if [[ "$ext" == "jpg" ]] || [[ "$ext" == "jpeg" ]] || [[ "$ext" == "png" ]]; then
+      magick "$img" -auto-orient "$img" 2>/dev/null || true
+    fi
+
+    # Strip all metadata
+    exiftool -all= -overwrite_original "$img" 2>/dev/null
+    stripped=$((stripped + 1))
+    echo -e "  ${GREEN}$fname${NC} — metadata stripped"
+  done
+
+  echo ""
+  success "Stripped metadata from $stripped image(s)"
+}
+
+# ---------------------------------------------------------------------------
+# PHASE 3: CONVERT & COMPRESS
+# ---------------------------------------------------------------------------
+phase_convert() {
+  header "PHASE 3: CONVERT TO WEBP & COMPRESS"
+
+  local -a image_files=()
+  mapfile -t image_files < /tmp/optimg_files.txt
+
+  # Delete unreferenced images first
+  local -a unreferenced=()
+  mapfile -t unreferenced < /tmp/optimg_unreferenced.txt
+
+  if [[ ${#unreferenced[@]} -gt 0 ]] && [[ -n "${unreferenced[0]}" ]]; then
+    echo -e "${BOLD}Removing unreferenced images${NC}"
+    for img in "${unreferenced[@]}"; do
+      [[ -z "$img" ]] && continue
+      local fsize
+      fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
+      if $DRY_RUN; then
+        echo -e "  ${DIM}(dry-run) Would delete: $(basename "$img") ($(human_size "$fsize"))${NC}"
+      else
+        rm -f "$img"
+        echo -e "  ${RED}Deleted:${NC} $(basename "$img") ($(human_size "$fsize"))"
+      fi
+    done
+    echo ""
+  fi
+
+  echo -e "${BOLD}Converting images to WebP (quality $WEBP_QUALITY, max ${MAX_WIDTH}x${MAX_HEIGHT})${NC}"
+  printf "  %-40s  %-12s  %-12s  %s\n" "FILENAME" "BEFORE" "AFTER" "SAVINGS"
+  printf "  %-40s  %-12s  %-12s  %s\n" "--------" "------" "-----" "-------"
+
+  local total_before=0
+  local total_after=0
+  local converted=0
+
+  for img in "${image_files[@]}"; do
+    [[ -z "$img" ]] && continue
+    # Skip if this was an unreferenced file we just deleted
+    [[ ! -f "$img" ]] && continue
+
+    local fname
+    fname=$(basename "$img")
+    local ext="${fname##*.}"
+    local base="${fname%.*}"
+    ext_lower=$(echo "$ext" | tr '[:upper:]' '[:lower:]')
+    local webp_path="$IMAGES_DIR/${base}.webp"
+
+    local before_size
+    before_size=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
+    total_before=$((total_before + before_size))
+
+    if $DRY_RUN; then
+      echo -e "  ${DIM}(dry-run) Would convert: $fname -> ${base}.webp${NC}"
+      # Estimate: assume 80% reduction for JPEGs, 70% for PNGs, 10% for existing WebP
+      local est_after=$before_size
+      case "$ext_lower" in
+        jpg|jpeg) est_after=$((before_size / 5)) ;;
+        png)      est_after=$((before_size / 3)) ;;
+        webp)     est_after=$((before_size * 9 / 10)) ;;
+      esac
+      total_after=$((total_after + est_after))
+      converted=$((converted + 1))
+      continue
+    fi
+
+    # Get current dimensions
+    local cur_width cur_height
+    read -r cur_width cur_height < <(identify -format "%w %h\n" "$img" 2>/dev/null || echo "0 0")
+
+    local needs_resize=false
+    if (( cur_width > MAX_WIDTH )) || (( cur_height > MAX_HEIGHT )); then
+      needs_resize=true
+    fi
+
+    # Determine the input for cwebp
+    local cwebp_input="$img"
+    local tmp_resized=""
+
+    if $needs_resize; then
+      # Resize via ImageMagick, output to temp PNG for cwebp
+      tmp_resized=$(mktemp /tmp/optimg_XXXXXX.png)
+      magick "$img" -resize "${MAX_WIDTH}x${MAX_HEIGHT}>" -quality 100 "$tmp_resized"
+      info "  Resized $fname: ${cur_width}x${cur_height} -> $(magick identify -format '%wx%h' "$tmp_resized")"
+      cwebp_input="$tmp_resized"
+    fi
+
+    # Convert to WebP via cwebp (handles JPEG/PNG/WebP input natively)
+    if [[ "$ext_lower" == "webp" ]] && [[ "$img" == "$webp_path" ]]; then
+      # Same input and output: use temp output
+      local tmp_webp
+      tmp_webp=$(mktemp /tmp/optimg_XXXXXX.webp)
+      cwebp -q "$WEBP_QUALITY" "$cwebp_input" -o "$tmp_webp" 2>/dev/null
+      mv "$tmp_webp" "$webp_path"
+    else
+      cwebp -q "$WEBP_QUALITY" "$cwebp_input" -o "$webp_path" 2>/dev/null
+    fi
+
+    # Cleanup temp file if we resized
+    [[ -n "$tmp_resized" ]] && rm -f "$tmp_resized"
+
+    # Step 3: Delete original if it's not already .webp
+    if [[ "$ext_lower" != "webp" ]]; then
+      rm -f "$img"
+    fi
+
+    local after_size
+    after_size=$(stat -c%s "$webp_path" 2>/dev/null || stat -f%z "$webp_path" 2>/dev/null)
+    total_after=$((total_after + after_size))
+
+    local savings=0
+    if (( before_size > 0 )); then
+      savings=$(( (before_size - after_size) * 100 / before_size ))
+    fi
+
+    local savings_color="$GREEN"
+    if (( savings < 10 )); then
+      savings_color="$YELLOW"
+    fi
+
+    printf "  %-40s  %-12s  %-12s  ${savings_color}%s%%${NC}\n" \
+      "${base}.webp" "$(human_size "$before_size")" "$(human_size "$after_size")" "$savings"
+
+    converted=$((converted + 1))
+  done
+
+  echo ""
+  local total_savings=0
+  if (( total_before > 0 )); then
+    total_savings=$(( (total_before - total_after) * 100 / total_before ))
+  fi
+  info "Converted $converted image(s)"
+  info "Total: $(human_size $total_before) -> $(human_size $total_after) (${total_savings}% reduction)"
+
+  # Save totals for summary
+  echo "$total_before" > /tmp/optimg_total_before.txt
+  echo "$total_after" > /tmp/optimg_total_after.txt
+  echo "$converted" > /tmp/optimg_converted.txt
+}
+
+# ---------------------------------------------------------------------------
+# PHASE 4: UPDATE CONTENT REFERENCES
+# ---------------------------------------------------------------------------
+phase_update_refs() {
+  header "PHASE 4: UPDATE CONTENT REFERENCES"
+
+  local updated_files=0
+
+  # --- Step 1: Update image extensions in content files ---
+  # This must happen BEFORE broken ref clearing, since .jpg/.png files are now .webp
+  echo -e "${BOLD}Updating image references (.jpg/.jpeg/.png -> .webp)${NC}"
+
+  while IFS= read -r -d '' mdfile; do
+    local changed=false
+
+    # Normalize front matter paths first: change image: "images/... to image: "/images/...
+    if grep -qE '^\s*image:\s*"images/' "$mdfile" 2>/dev/null; then
+      if ! $DRY_RUN; then
+        sed -i -E 's@^(\s*image:\s*)"images/@\1"/images/@' "$mdfile"
+      fi
+      changed=true
+    fi
+
+    # Update front matter image field (only local paths, not http URLs)
+    # Handles both `image: "/images/..."` and `  image: "/images/..."` (indented under cover:)
+    if grep -qE '^\s*image:\s*"/images/.*\.(jpg|jpeg|JPG|JPEG|png|PNG)"' "$mdfile" 2>/dev/null; then
+      if ! $DRY_RUN; then
+        sed -i -E 's@^(\s*image:\s*"/images/[^"]*)\.(jpg|jpeg|JPG|JPEG|png|PNG)"@\1.webp"@' "$mdfile"
+      fi
+      changed=true
+    fi
+
+    # Update inline markdown images: ![alt](/images/foo.jpg#center)
+    # Only match local /images/ paths, not external URLs
+    if grep -qP '!\[[^\]]*\]\(/images/[^)]*\.(jpg|jpeg|JPG|JPEG|png|PNG)(#[^)]*)?\)' "$mdfile" 2>/dev/null; then
+      if ! $DRY_RUN; then
+        sed -i -E 's@(!\[[^]]*\]\(/images/[^.)]*)\.(jpg|jpeg|JPG|JPEG|png|PNG)([#][^)]*)?(\))@\1.webp\3\4@g' "$mdfile"
+      fi
+      changed=true
+    fi
+
+    if $changed; then
+      local relpath="${mdfile}"
+      if $DRY_RUN; then
+        echo -e "  ${DIM}(dry-run) Would update refs in: $relpath${NC}"
+      else
+        echo -e "  ${GREEN}Updated${NC} $relpath"
+      fi
+      updated_files=$((updated_files + 1))
+    fi
+  done < <(find "$CONTENT_DIR" -name '*.md' -print0)
+
+  # --- Step 2: Update config.toml avatar ---
+  if grep -q 'avatarUrl.*\.png' "$CONFIG_FILE" 2>/dev/null; then
+    if $DRY_RUN; then
+      echo -e "  ${DIM}(dry-run) Would update avatarUrl in $CONFIG_FILE${NC}"
+    else
+      sed -i 's@avatarUrl = "/images/fosscat_icon\.png"@avatarUrl = "/images/fosscat_icon.webp"@' "$CONFIG_FILE"
+      echo -e "  ${GREEN}Updated${NC} avatarUrl in $CONFIG_FILE"
+    fi
+    updated_files=$((updated_files + 1))
+  fi
+
+  # --- Step 3: Clear genuinely broken image references ---
+  # Only clear refs that still don't resolve after extension updates
+  # (e.g., placeholder /images/img.jpg that was never a real image)
+  echo ""
+  echo -e "${BOLD}Checking for remaining broken image references${NC}"
+
+  local cleared=0
+  while IFS= read -r -d '' mdfile; do
+    # Check front matter image fields
+    while IFS= read -r fm_line; do
+      [[ -z "$fm_line" ]] && continue
+      local fm_image
+      fm_image=$(echo "$fm_line" | sed 's/^[[:space:]]*image:[[:space:]]*//' | sed 's/^["'\'']//' | sed 's/["'\'']\s*$//')
+
+      [[ -z "$fm_image" ]] && continue
+      [[ "$fm_image" == '""' ]] && continue
+      [[ "$fm_image" == http* ]] && continue
+
+      local fs_path="static/${fm_image#/}"
+      if [[ ! -f "$fs_path" ]]; then
+        if $DRY_RUN; then
+          echo -e "  ${DIM}(dry-run) Would clear broken ref in: $mdfile (was: $fm_image)${NC}"
+        else
+          local escaped_image
+          escaped_image=$(echo "$fm_image" | sed 's/[.[\/*^$]/\\&/g')
+          sed -i -E "s@^(\s*image:\s*).*${escaped_image}.*@\1\"\"@" "$mdfile"
+          echo -e "  ${GREEN}Cleared${NC} broken ref ${DIM}$fm_image${NC} in ${DIM}$mdfile${NC}"
+          cleared=$((cleared + 1))
+        fi
+      fi
+    done < <(grep -E '^\s*image:\s' "$mdfile" 2>/dev/null || true)
+  done < <(find "$CONTENT_DIR" -name '*.md' -print0)
+
+  if [[ $cleared -eq 0 ]] && ! $DRY_RUN; then
+    success "No broken image references remaining"
+  fi
+
+  echo ""
+  info "Updated $updated_files file(s)"
+}
+
+# ---------------------------------------------------------------------------
+# PHASE 5: SUMMARY
+# ---------------------------------------------------------------------------
+phase_summary() {
+  header "PHASE 5: SUMMARY"
+
+  if $DRY_RUN; then
+    echo -e "${BOLD}${YELLOW}DRY RUN — no changes were made${NC}"
+    echo ""
+  fi
+
+  local total_before total_after converted
+  total_before=$(cat /tmp/optimg_total_before.txt 2>/dev/null || cat /tmp/optimg_total_size.txt 2>/dev/null || echo 0)
+  total_after=$(cat /tmp/optimg_total_after.txt 2>/dev/null || echo 0)
+  converted=$(cat /tmp/optimg_converted.txt 2>/dev/null || echo 0)
+
+  local savings=0
+  if (( total_before > 0 )) && (( total_after > 0 )); then
+    savings=$(( (total_before - total_after) * 100 / total_before ))
+  fi
+
+  echo -e "  Images processed:  ${BOLD}$converted${NC}"
+  if (( total_after > 0 )); then
+    echo -e "  Size before:       ${BOLD}$(human_size "$total_before")${NC}"
+    echo -e "  Size after:        ${BOLD}$(human_size "$total_after")${NC}"
+    echo -e "  Total reduction:   ${BOLD}${GREEN}${savings}%${NC}"
+  fi
+
+  echo ""
+  echo -e "  ${BOLD}Next steps:${NC}"
+  echo -e "    1. Run ${CYAN}hugo server${NC} and verify images look correct"
+  echo -e "    2. Check the browser dev tools Network tab for proper WebP delivery"
+  echo -e "    3. Commit when satisfied: ${CYAN}git add -A && git commit -m \"optimize: convert images to webp, strip metadata\"${NC}"
+
+  # Cleanup temp files
+  rm -f /tmp/optimg_*.txt
+}
+
+# ---------------------------------------------------------------------------
+# MAIN
+# ---------------------------------------------------------------------------
+main() {
+  echo -e "${BOLD}${CYAN}"
+  echo "  ┌─────────────────────────────────────────┐"
+  echo "  │   fosscat.com Image Optimizer            │"
+  echo "  │   Strip metadata · Convert to WebP       │"
+  echo "  │   Resize · Audit references              │"
+  echo "  └─────────────────────────────────────────┘"
+  echo -e "${NC}"
+
+  if $DRY_RUN; then
+    echo -e "  ${YELLOW}Running in DRY RUN mode — no files will be modified${NC}"
+    echo ""
+  fi
+
+  # Phase 1: Audit (always runs)
+  phase_audit
+
+  if $AUDIT_ONLY; then
+    echo ""
+    info "Audit complete. Run without --audit-only to process images."
+    rm -f /tmp/optimg_*.txt
+    return
+  fi
+
+  # Confirm before proceeding
+  echo ""
+  if ! $AUTO_YES && ! $DRY_RUN; then
+    echo -en "  ${BOLD}Proceed with optimization? [y/N]${NC} "
+    read -r answer
+    if [[ ! "$answer" =~ ^[Yy]$ ]]; then
+      info "Aborted."
+      rm -f /tmp/optimg_*.txt
+      exit 0
+    fi
+  fi
+
+  # Phase 2: Strip metadata
+  phase_strip_metadata
+
+  # Phase 3: Convert & compress
+  phase_convert
+
+  # Phase 4: Update references
+  phase_update_refs
+
+  # Phase 5: Summary
+  phase_summary
+}
+
+main