update images to webp, update pre-commit hooks

This commit is contained in:
2026-03-04 01:37:18 -07:00
parent cfb55e0e51
commit bd73fcd39b
72 changed files with 1007 additions and 80 deletions
+89 -14
View File
@@ -1,13 +1,15 @@
#!/usr/bin/env python3
"""
check-tags.py — Tag similarity checker for Hugo content
check-tags.py — Semantic tag similarity checker for Hugo content
Compares tags in staged files against all existing tags in the site.
Warns and blocks commit when a new tag looks similar to an existing one.
Warns and blocks commit when a new tag is semantically similar to an existing one.
Similarity checks (via difflib.SequenceMatcher):
- Ratio >= 0.6 (catches typos, reordered chars, partial matches)
- One tag is a substring of the other
Uses spaCy word vectors (en_core_web_lg) for cosine similarity — catches
conceptual matches like "parenting""fatherhood" while ignoring unrelated
words that happen to share letters like "dogs" vs "daily".
Fallback: if spaCy is unavailable, uses conservative edit-distance checks only.
Skip with: SKIP_TAG_CHECK=1 git commit
@@ -17,6 +19,7 @@ Usage: check-tags.py <file1.md> [file2.md ...]
import os
import re
import sys
import time
from difflib import SequenceMatcher
from pathlib import Path
@@ -28,7 +31,44 @@ CYAN = "\033[0;36m"
BOLD = "\033[1m"
NC = "\033[0m"
SIMILARITY_THRESHOLD = 0.6 # SequenceMatcher ratio (0-1)
# Cosine similarity threshold for word vectors (0-1).
# 0.65 catches morphological variants (parenting/parenthood) and synonyms
# (cannabis/marijuana) while avoiding unrelated words. Tuned for short blog tags.
SEMANTIC_THRESHOLD = 0.65
# Edit-distance threshold — only used as a typo catcher alongside semantics.
# 0.85 is very conservative: catches "kubernetse" vs "kubernetes" but not
# "dogs" vs "daily" (which scores ~0.40).
TYPO_THRESHOLD = 0.85
# Substring match: shorter tag must be at least this many chars
# and cover at least this fraction of the longer tag.
SUBSTRING_MIN_LEN = 5
SUBSTRING_MIN_RATIO = 0.6
# --- spaCy setup (lazy, with graceful fallback) ---
_nlp = None
_spacy_available = None
def _load_spacy():
"""Load spaCy model once. Returns (nlp, True) or (None, False)."""
global _nlp, _spacy_available
if _spacy_available is not None:
return _nlp, _spacy_available
try:
import spacy
_nlp = spacy.load("en_core_web_lg")
_spacy_available = True
except (ImportError, OSError) as e:
print(
f"{YELLOW}spaCy not available ({e}), "
f"falling back to edit-distance only{NC}"
)
_nlp = None
_spacy_available = False
return _nlp, _spacy_available
def extract_tags(filepath: Path, *, keep_blanks: bool = False) -> list[str]:
@@ -74,26 +114,53 @@ def extract_tags(filepath: Path, *, keep_blanks: bool = False) -> list[str]:
return [t for t in tags if t]
def find_similar(new_tag: str, existing_tags: set[str]) -> list[tuple[str, str]]:
def find_similar(
new_tag: str,
existing_tags: set[str],
existing_docs: dict | None = None,
) -> list[tuple[str, str]]:
"""Find existing tags similar to a new tag.
Uses semantic similarity (spaCy vectors) as the primary check,
with edit-distance as a typo-catching backup.
If existing_docs is provided, it should be a dict mapping tag strings
to their pre-computed spaCy Doc objects (avoids redundant nlp() calls).
Returns list of (existing_tag, reason) tuples.
"""
nlp, has_spacy = _load_spacy()
similar = []
for existing in sorted(existing_tags):
if existing == new_tag:
continue
# Check substring match
if existing in new_tag or new_tag in existing:
# --- Check 1: Substring match (restricted) ---
shorter, longer = sorted([new_tag, existing], key=len)
if (
len(shorter) >= SUBSTRING_MIN_LEN
and shorter in longer
and len(shorter) / len(longer) >= SUBSTRING_MIN_RATIO
):
similar.append((existing, "substring match"))
continue
# Check similarity ratio
# --- Check 2: Semantic similarity (primary) ---
if has_spacy:
doc_new = nlp(new_tag)
doc_ex = existing_docs[existing] if existing_docs else nlp(existing)
if doc_new.has_vector and doc_ex.has_vector:
score = doc_new.similarity(doc_ex)
if score >= SEMANTIC_THRESHOLD:
similar.append((existing, f"semantic: {score:.0%}"))
continue
# --- Check 3: Typo detection via edit distance (conservative) ---
ratio = SequenceMatcher(None, new_tag, existing).ratio()
if ratio >= SIMILARITY_THRESHOLD:
similar.append((existing, f"similarity: {ratio:.0%}"))
if ratio >= TYPO_THRESHOLD:
similar.append((existing, f"typo match: {ratio:.0%}"))
return similar
@@ -130,8 +197,13 @@ def main() -> int:
print(f"{GREEN}No existing tags found, nothing to compare against.{NC}")
return 0
# Pre-compute spaCy docs for all existing tags (avoids repeated nlp() calls)
nlp, has_spacy = _load_spacy()
existing_docs = {tag: nlp(tag) for tag in all_tags} if has_spacy else None
# Check staged files for similar tags
found_issues = False
start = time.monotonic()
for staged_file in staged_files:
filepath = repo_root / staged_file
@@ -162,7 +234,7 @@ def main() -> int:
continue
# New tag — check for similarity
similar = find_similar(tag, all_tags)
similar = find_similar(tag, all_tags, existing_docs)
if similar:
found_issues = True
@@ -172,15 +244,18 @@ def main() -> int:
for existing, reason in similar:
print(f" {CYAN}\u2192 {existing} ({reason}){NC}")
elapsed = time.monotonic() - start
if found_issues:
print()
print(f"{RED}{BOLD}Tag similarity check failed.{NC}")
print(f"{RED}Consider using an existing tag, or skip with:{NC}")
print(f"{RED} SKIP_TAG_CHECK=1 git commit{NC}")
print(f"{RED} ({elapsed:.1f}s){NC}")
print()
return 1
print(f"{GREEN}Tag check passed \u2014 no similar tags found.{NC}")
print(f"{GREEN}Tag check passed \u2014 no similar tags found. ({elapsed:.1f}s){NC}")
return 0
+32
View File
@@ -0,0 +1,32 @@
#!/usr/bin/env python3
"""List all unique tags across Hugo content, sorted alphabetically."""
import re
from pathlib import Path
content_dir = Path(__file__).resolve().parent.parent / "content"
tags: set[str] = set()
for md in content_dir.rglob("*.md"):
text = md.read_text(encoding="utf-8")
fm = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
if not fm:
continue
inline = re.search(r"^tags:\s*\[([^\]]*)\]", fm.group(1), re.MULTILINE)
if inline and inline.group(1).strip():
for t in inline.group(1).split(","):
t = t.strip().strip("\"'").lower()
if t:
tags.add(t)
else:
lm = re.search(
r"^tags:\s*\n((?:\s+-\s+.+\n?)+)", fm.group(1), re.MULTILINE
)
if lm:
for t in re.findall(r"^\s+-\s+(.*)", lm.group(1), re.MULTILINE):
t = t.strip().strip("\"'").lower()
if t:
tags.add(t)
for t in sorted(tags):
print(t)
+738
View File
@@ -0,0 +1,738 @@
#!/usr/bin/env bash
# optimize-images.sh — Image auditor, metadata stripper, and WebP optimizer for fosscat.com
#
# Usage:
# ./scripts/optimize-images.sh # Interactive mode
# ./scripts/optimize-images.sh --dry-run # Show what would happen without changing anything
# ./scripts/optimize-images.sh --yes # Skip all confirmation prompts
# ./scripts/optimize-images.sh --audit-only # Only run the audit phase (no changes)
set -euo pipefail
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
IMAGES_DIR="static/images"
CONTENT_DIR="content"
CONFIG_FILE="config.toml"
MAX_WIDTH=2000
MAX_HEIGHT=2000
WEBP_QUALITY=82
# ---------------------------------------------------------------------------
# CLI flags
# ---------------------------------------------------------------------------
DRY_RUN=false
AUTO_YES=false
AUDIT_ONLY=false
for arg in "$@"; do
case "$arg" in
--dry-run) DRY_RUN=true ;;
--yes|-y) AUTO_YES=true ;;
--audit-only) AUDIT_ONLY=true ;;
--help|-h)
echo "Usage: $0 [--dry-run] [--yes] [--audit-only]"
echo ""
echo " --dry-run Show what would happen without making changes"
echo " --yes, -y Skip confirmation prompts"
echo " --audit-only Only run the audit (no modifications)"
echo " --help, -h Show this help"
exit 0
;;
*)
echo "Unknown option: $arg"
echo "Run $0 --help for usage"
exit 1
;;
esac
done
# ---------------------------------------------------------------------------
# Colors and formatting
# ---------------------------------------------------------------------------
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
NC='\033[0m' # No Color
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
success() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; }
header() { echo -e "\n${BOLD}${CYAN}═══ $* ═══${NC}\n"; }
# ---------------------------------------------------------------------------
# Dependency checks
# ---------------------------------------------------------------------------
check_deps() {
local missing=()
for cmd in exiftool convert identify cwebp; do
if ! command -v "$cmd" &>/dev/null; then
missing+=("$cmd")
fi
done
if [[ ${#missing[@]} -gt 0 ]]; then
error "Missing required tools: ${missing[*]}"
echo " These are provided by the Nix dev shell. Run:"
echo " nix develop # or let direnv load the flake"
echo ""
echo " Required nix packages:"
echo " perl538Packages.ImageExifTool (exiftool)"
echo " imagemagick (convert, identify)"
echo " libwebp (cwebp)"
exit 1
fi
}
# ---------------------------------------------------------------------------
# Ensure we're in the project root
# ---------------------------------------------------------------------------
if [[ ! -f "$CONFIG_FILE" ]] || [[ ! -d "$IMAGES_DIR" ]]; then
error "Must be run from the project root (where $CONFIG_FILE and $IMAGES_DIR exist)"
exit 1
fi
check_deps
# ---------------------------------------------------------------------------
# Utility: human-readable file size
# ---------------------------------------------------------------------------
human_size() {
local bytes=$1
if (( bytes >= 1048576 )); then
local mb_whole=$(( bytes / 1048576 ))
local mb_frac=$(( (bytes % 1048576) * 10 / 1048576 ))
echo "${mb_whole}.${mb_frac} MB"
elif (( bytes >= 1024 )); then
echo "$(( bytes / 1024 )) KB"
else
echo "${bytes} B"
fi
}
# ---------------------------------------------------------------------------
# Utility: confirm prompt (respects --yes and --dry-run)
# ---------------------------------------------------------------------------
confirm() {
local prompt="$1"
if $AUTO_YES; then
return 0
fi
if $DRY_RUN; then
echo -e " ${DIM}(dry-run: would ask) $prompt${NC}"
return 0
fi
echo -en " $prompt ${BOLD}[y/N]${NC} "
read -r answer
[[ "$answer" =~ ^[Yy]$ ]]
}
# ---------------------------------------------------------------------------
# PHASE 1: AUDIT
# ---------------------------------------------------------------------------
phase_audit() {
header "PHASE 1: IMAGE AUDIT"
# Collect all image files
local -a image_files=()
while IFS= read -r -d '' f; do
image_files+=("$f")
done < <(find "$IMAGES_DIR" -maxdepth 1 -type f \( -iname '*.jpg' -o -iname '*.jpeg' -o -iname '*.png' -o -iname '*.webp' -o -iname '*.gif' \) -print0 | sort -z)
if [[ ${#image_files[@]} -eq 0 ]]; then
warn "No images found in $IMAGES_DIR"
return
fi
# --- Image inventory table ---
echo -e "${BOLD}Image Inventory${NC}"
printf " %-40s %-6s %-12s %s\n" "FILENAME" "FORMAT" "DIMENSIONS" "SIZE"
printf " %-40s %-6s %-12s %s\n" "--------" "------" "----------" "----"
local total_size=0
for img in "${image_files[@]}"; do
local fname
fname=$(basename "$img")
local ext="${fname##*.}"
local fsize
fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
total_size=$((total_size + fsize))
local dims
dims=$(identify -format "%wx%h" "$img" 2>/dev/null || echo "unknown")
printf " %-40s %-6s %-12s %s\n" "$fname" "$ext" "$dims" "$(human_size "$fsize")"
done
echo ""
info "Total: ${#image_files[@]} images, $(human_size $total_size)"
# --- EXIF / Metadata scan ---
echo ""
echo -e "${BOLD}Metadata / Privacy Scan${NC}"
local privacy_issues=0
# Sensitive tag names to check (extracted in a single exiftool call per image)
local sensitive_tag_args=(
-GPSLatitude -GPSLongitude -GPSPosition
-SerialNumber -CameraSerialNumber -BodySerialNumber -LensSerialNumber
-OwnerName -Artist -Copyright -Creator -Rights
-By-line -Contact
-Make -Model -LensModel -Software
-DateTime -DateTimeOriginal -CreateDate
-CreatorTool -ImageDescription -UserComment
)
for img in "${image_files[@]}"; do
local fname
fname=$(basename "$img")
local has_metadata=false
local metadata_lines=()
# Single exiftool call to extract all sensitive tags at once
local exif_output
exif_output=$(exiftool -s -f "${sensitive_tag_args[@]}" "$img" 2>/dev/null || true)
while IFS= read -r line; do
[[ -z "$line" ]] && continue
# exiftool -s output format: "TagName : value"
local tagname value
tagname=$(echo "$line" | sed 's/\s*:.*//' | xargs)
value=$(echo "$line" | sed 's/^[^:]*:\s*//')
# Skip tags with no value (exiftool -f shows "-" for missing tags)
[[ "$value" == "-" ]] && continue
[[ -z "$value" ]] && continue
has_metadata=true
# Highlight GPS data in red
if [[ "$tagname" == *GPS* ]] || [[ "$tagname" == *Latitude* ]] || [[ "$tagname" == *Longitude* ]]; then
metadata_lines+=("${RED}!!${NC} $tagname: $value")
elif [[ "$tagname" == *Serial* ]] || [[ "$tagname" == *Owner* ]] || [[ "$tagname" == *Artist* ]] || [[ "$tagname" == *Creator* ]]; then
metadata_lines+=("${YELLOW}!${NC} $tagname: $value")
else
metadata_lines+=("${DIM}-${NC} $tagname: $value")
fi
done <<< "$exif_output"
if $has_metadata; then
privacy_issues=$((privacy_issues + 1))
echo -e " ${YELLOW}$fname${NC} — metadata found:"
for line in "${metadata_lines[@]}"; do
echo -e " $line"
done
else
echo -e " ${GREEN}$fname${NC} — clean"
fi
done
echo ""
if [[ $privacy_issues -gt 0 ]]; then
warn "$privacy_issues image(s) contain metadata that should be stripped"
else
success "All images are clean of sensitive metadata"
fi
# --- Cross-reference with content ---
echo ""
echo -e "${BOLD}Content Reference Check${NC}"
# Collect all image references from content files
local -a referenced_images=()
local -a broken_refs=()
local -a inconsistent_paths=()
while IFS= read -r -d '' mdfile; do
# Front matter image field (handles both `image: "..."` and ` image: "..."` under cover:)
while IFS= read -r fm_image; do
[[ -z "$fm_image" ]] && continue
# Clean up: remove surrounding quotes and whitespace
fm_image=$(echo "$fm_image" | sed 's/^[[:space:]]*image:[[:space:]]*//' | sed 's/^["'\'']//' | sed 's/["'\'']\s*$//')
if [[ -n "$fm_image" ]] && [[ "$fm_image" != '""' ]] && [[ "$fm_image" != http* ]]; then
# Normalize: Hugo serves /images/... from static/images/...
local fs_path="static/${fm_image#/}"
# Check if it's a broken reference
if [[ ! -f "$fs_path" ]]; then
broken_refs+=("$mdfile|$fm_image")
else
referenced_images+=("$fs_path")
fi
# Check for inconsistent path (missing leading /)
if [[ "$fm_image" != /* ]]; then
inconsistent_paths+=("$mdfile|$fm_image")
fi
fi
done < <(grep -E '^\s*image:\s' "$mdfile" 2>/dev/null || true)
# Inline markdown images: ![alt](/images/foo.jpg#center)
while IFS= read -r inline_ref; do
[[ -z "$inline_ref" ]] && continue
# Strip #fragment
local clean_ref="${inline_ref%%#*}"
local fs_ref="static/${clean_ref#/}"
if [[ ! -f "$fs_ref" ]] && [[ "$clean_ref" != http* ]]; then
broken_refs+=("$mdfile|$inline_ref")
else
referenced_images+=("$fs_ref")
fi
done < <(grep -oP '!\[[^\]]*\]\(\K[^)]+' "$mdfile" 2>/dev/null || true)
done < <(find "$CONTENT_DIR" -name '*.md' -print0)
# Also check config.toml for avatarUrl
local avatar_path
avatar_path=$(grep 'avatarUrl' "$CONFIG_FILE" | sed 's/.*=\s*["'\'']\(.*\)["'\'']/\1/' || true)
if [[ -n "$avatar_path" ]]; then
referenced_images+=("static/${avatar_path#/}")
fi
# Find unreferenced images (compare using static/images/... paths)
local -a unreferenced=()
for img in "${image_files[@]}"; do
local found=false
for ref in "${referenced_images[@]}"; do
if [[ "$ref" == "$img" ]]; then
found=true
break
fi
done
if ! $found; then
unreferenced+=("$img")
fi
done
# Report broken references
if [[ ${#broken_refs[@]} -gt 0 ]]; then
warn "${#broken_refs[@]} broken image reference(s):"
for entry in "${broken_refs[@]}"; do
local file="${entry%%|*}"
local ref="${entry##*|}"
echo -e " ${RED}$ref${NC} in ${DIM}$file${NC}"
done
else
success "No broken image references"
fi
# Report unreferenced images
echo ""
if [[ ${#unreferenced[@]} -gt 0 ]]; then
warn "${#unreferenced[@]} unreferenced image(s) (not used in any content):"
for img in "${unreferenced[@]}"; do
local fsize
fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
echo -e " ${YELLOW}$(basename "$img")${NC} ($(human_size "$fsize"))"
done
else
success "All images are referenced in content"
fi
# Report inconsistent paths
if [[ ${#inconsistent_paths[@]} -gt 0 ]]; then
echo ""
warn "${#inconsistent_paths[@]} image path(s) missing leading '/':"
for entry in "${inconsistent_paths[@]}"; do
local file="${entry%%|*}"
local ref="${entry##*|}"
echo -e " ${YELLOW}$ref${NC} in ${DIM}$file${NC}"
done
fi
# Export arrays for later phases (bash 4+ trick: print to temp files)
printf '%s\n' "${image_files[@]}" > /tmp/optimg_files.txt
printf '%s\n' "${unreferenced[@]+"${unreferenced[@]}"}" > /tmp/optimg_unreferenced.txt
printf '%s\n' "${broken_refs[@]+"${broken_refs[@]}"}" > /tmp/optimg_broken.txt
echo "$total_size" > /tmp/optimg_total_size.txt
}
# ---------------------------------------------------------------------------
# PHASE 2: METADATA STRIPPING
# ---------------------------------------------------------------------------
phase_strip_metadata() {
header "PHASE 2: METADATA STRIPPING"
if $DRY_RUN; then
info "(dry-run) Would strip all EXIF/IPTC/XMP metadata from images"
echo ""
return
fi
local -a image_files=()
mapfile -t image_files < /tmp/optimg_files.txt
local stripped=0
for img in "${image_files[@]}"; do
[[ -z "$img" ]] && continue
local fname
fname=$(basename "$img")
# Check if image has strippable EXIF/XMP/IPTC metadata (not just file properties)
# Use -EXIF:All -XMP:All -IPTC:All to only check real metadata groups
local meta_check
meta_check=$(exiftool -s -s -s -EXIF:All -XMP:All -IPTC:All "$img" 2>/dev/null || true)
if [[ -z "$meta_check" ]]; then
echo -e " ${DIM}$fname — already clean, skipping${NC}"
continue
fi
# Auto-orient JPEG/PNG before stripping (applies EXIF rotation to pixels)
local ext="${fname##*.}"
ext=$(echo "$ext" | tr '[:upper:]' '[:lower:]')
if [[ "$ext" == "jpg" ]] || [[ "$ext" == "jpeg" ]] || [[ "$ext" == "png" ]]; then
magick "$img" -auto-orient "$img" 2>/dev/null || true
fi
# Strip all metadata
exiftool -all= -overwrite_original "$img" 2>/dev/null
stripped=$((stripped + 1))
echo -e " ${GREEN}$fname${NC} — metadata stripped"
done
echo ""
success "Stripped metadata from $stripped image(s)"
}
# ---------------------------------------------------------------------------
# PHASE 3: CONVERT & COMPRESS
# ---------------------------------------------------------------------------
phase_convert() {
header "PHASE 3: CONVERT TO WEBP & COMPRESS"
local -a image_files=()
mapfile -t image_files < /tmp/optimg_files.txt
# Delete unreferenced images first
local -a unreferenced=()
mapfile -t unreferenced < /tmp/optimg_unreferenced.txt
if [[ ${#unreferenced[@]} -gt 0 ]] && [[ -n "${unreferenced[0]}" ]]; then
echo -e "${BOLD}Removing unreferenced images${NC}"
for img in "${unreferenced[@]}"; do
[[ -z "$img" ]] && continue
local fsize
fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
if $DRY_RUN; then
echo -e " ${DIM}(dry-run) Would delete: $(basename "$img") ($(human_size "$fsize"))${NC}"
else
rm -f "$img"
echo -e " ${RED}Deleted:${NC} $(basename "$img") ($(human_size "$fsize"))"
fi
done
echo ""
fi
echo -e "${BOLD}Converting images to WebP (quality $WEBP_QUALITY, max ${MAX_WIDTH}x${MAX_HEIGHT})${NC}"
printf " %-40s %-12s %-12s %s\n" "FILENAME" "BEFORE" "AFTER" "SAVINGS"
printf " %-40s %-12s %-12s %s\n" "--------" "------" "-----" "-------"
local total_before=0
local total_after=0
local converted=0
for img in "${image_files[@]}"; do
[[ -z "$img" ]] && continue
# Skip if this was an unreferenced file we just deleted
[[ ! -f "$img" ]] && continue
local fname
fname=$(basename "$img")
local ext="${fname##*.}"
local base="${fname%.*}"
ext_lower=$(echo "$ext" | tr '[:upper:]' '[:lower:]')
local webp_path="$IMAGES_DIR/${base}.webp"
local before_size
before_size=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
total_before=$((total_before + before_size))
if $DRY_RUN; then
echo -e " ${DIM}(dry-run) Would convert: $fname -> ${base}.webp${NC}"
# Estimate: assume 80% reduction for JPEGs, 70% for PNGs, 10% for existing WebP
local est_after=$before_size
case "$ext_lower" in
jpg|jpeg) est_after=$((before_size / 5)) ;;
png) est_after=$((before_size / 3)) ;;
webp) est_after=$((before_size * 9 / 10)) ;;
esac
total_after=$((total_after + est_after))
converted=$((converted + 1))
continue
fi
# Get current dimensions
local cur_width cur_height
read -r cur_width cur_height < <(identify -format "%w %h\n" "$img" 2>/dev/null || echo "0 0")
local needs_resize=false
if (( cur_width > MAX_WIDTH )) || (( cur_height > MAX_HEIGHT )); then
needs_resize=true
fi
# Determine the input for cwebp
local cwebp_input="$img"
local tmp_resized=""
if $needs_resize; then
# Resize via ImageMagick, output to temp PNG for cwebp
tmp_resized=$(mktemp /tmp/optimg_XXXXXX.png)
magick "$img" -resize "${MAX_WIDTH}x${MAX_HEIGHT}>" -quality 100 "$tmp_resized"
info " Resized $fname: ${cur_width}x${cur_height} -> $(magick identify -format '%wx%h' "$tmp_resized")"
cwebp_input="$tmp_resized"
fi
# Convert to WebP via cwebp (handles JPEG/PNG/WebP input natively)
if [[ "$ext_lower" == "webp" ]] && [[ "$img" == "$webp_path" ]]; then
# Same input and output: use temp output
local tmp_webp
tmp_webp=$(mktemp /tmp/optimg_XXXXXX.webp)
cwebp -q "$WEBP_QUALITY" "$cwebp_input" -o "$tmp_webp" 2>/dev/null
mv "$tmp_webp" "$webp_path"
else
cwebp -q "$WEBP_QUALITY" "$cwebp_input" -o "$webp_path" 2>/dev/null
fi
# Cleanup temp file if we resized
[[ -n "$tmp_resized" ]] && rm -f "$tmp_resized"
# Step 3: Delete original if it's not already .webp
if [[ "$ext_lower" != "webp" ]]; then
rm -f "$img"
fi
local after_size
after_size=$(stat -c%s "$webp_path" 2>/dev/null || stat -f%z "$webp_path" 2>/dev/null)
total_after=$((total_after + after_size))
local savings=0
if (( before_size > 0 )); then
savings=$(( (before_size - after_size) * 100 / before_size ))
fi
local savings_color="$GREEN"
if (( savings < 10 )); then
savings_color="$YELLOW"
fi
printf " %-40s %-12s %-12s ${savings_color}%s%%${NC}\n" \
"${base}.webp" "$(human_size "$before_size")" "$(human_size "$after_size")" "$savings"
converted=$((converted + 1))
done
echo ""
local total_savings=0
if (( total_before > 0 )); then
total_savings=$(( (total_before - total_after) * 100 / total_before ))
fi
info "Converted $converted image(s)"
info "Total: $(human_size $total_before) -> $(human_size $total_after) (${total_savings}% reduction)"
# Save totals for summary
echo "$total_before" > /tmp/optimg_total_before.txt
echo "$total_after" > /tmp/optimg_total_after.txt
echo "$converted" > /tmp/optimg_converted.txt
}
# ---------------------------------------------------------------------------
# PHASE 4: UPDATE CONTENT REFERENCES
# ---------------------------------------------------------------------------
phase_update_refs() {
header "PHASE 4: UPDATE CONTENT REFERENCES"
local updated_files=0
# --- Step 1: Update image extensions in content files ---
# This must happen BEFORE broken ref clearing, since .jpg/.png files are now .webp
echo -e "${BOLD}Updating image references (.jpg/.jpeg/.png -> .webp)${NC}"
while IFS= read -r -d '' mdfile; do
local changed=false
# Normalize front matter paths first: change image: "images/... to image: "/images/...
if grep -qE '^\s*image:\s*"images/' "$mdfile" 2>/dev/null; then
if ! $DRY_RUN; then
sed -i -E 's@^(\s*image:\s*)"images/@\1"/images/@' "$mdfile"
fi
changed=true
fi
# Update front matter image field (only local paths, not http URLs)
# Handles both `image: "/images/..."` and ` image: "/images/..."` (indented under cover:)
if grep -qE '^\s*image:\s*"/images/.*\.(jpg|jpeg|JPG|JPEG|png|PNG)"' "$mdfile" 2>/dev/null; then
if ! $DRY_RUN; then
sed -i -E 's@^(\s*image:\s*"/images/[^"]*)\.(jpg|jpeg|JPG|JPEG|png|PNG)"@\1.webp"@' "$mdfile"
fi
changed=true
fi
# Update inline markdown images: ![alt](/images/foo.jpg#center)
# Only match local /images/ paths, not external URLs
if grep -qP '!\[[^\]]*\]\(/images/[^)]*\.(jpg|jpeg|JPG|JPEG|png|PNG)(#[^)]*)?\)' "$mdfile" 2>/dev/null; then
if ! $DRY_RUN; then
sed -i -E 's@(!\[[^]]*\]\(/images/[^.)]*)\.(jpg|jpeg|JPG|JPEG|png|PNG)([#][^)]*)?(\))@\1.webp\3\4@g' "$mdfile"
fi
changed=true
fi
if $changed; then
local relpath="${mdfile}"
if $DRY_RUN; then
echo -e " ${DIM}(dry-run) Would update refs in: $relpath${NC}"
else
echo -e " ${GREEN}Updated${NC} $relpath"
fi
updated_files=$((updated_files + 1))
fi
done < <(find "$CONTENT_DIR" -name '*.md' -print0)
# --- Step 2: Update config.toml avatar ---
if grep -q 'avatarUrl.*\.png' "$CONFIG_FILE" 2>/dev/null; then
if $DRY_RUN; then
echo -e " ${DIM}(dry-run) Would update avatarUrl in $CONFIG_FILE${NC}"
else
sed -i 's@avatarUrl = "/images/fosscat_icon\.png"@avatarUrl = "/images/fosscat_icon.webp"@' "$CONFIG_FILE"
echo -e " ${GREEN}Updated${NC} avatarUrl in $CONFIG_FILE"
fi
updated_files=$((updated_files + 1))
fi
# --- Step 3: Clear genuinely broken image references ---
# Only clear refs that still don't resolve after extension updates
# (e.g., placeholder /images/img.jpg that was never a real image)
echo ""
echo -e "${BOLD}Checking for remaining broken image references${NC}"
local cleared=0
while IFS= read -r -d '' mdfile; do
# Check front matter image fields
while IFS= read -r fm_line; do
[[ -z "$fm_line" ]] && continue
local fm_image
fm_image=$(echo "$fm_line" | sed 's/^[[:space:]]*image:[[:space:]]*//' | sed 's/^["'\'']//' | sed 's/["'\'']\s*$//')
[[ -z "$fm_image" ]] && continue
[[ "$fm_image" == '""' ]] && continue
[[ "$fm_image" == http* ]] && continue
local fs_path="static/${fm_image#/}"
if [[ ! -f "$fs_path" ]]; then
if $DRY_RUN; then
echo -e " ${DIM}(dry-run) Would clear broken ref in: $mdfile (was: $fm_image)${NC}"
else
local escaped_image
escaped_image=$(echo "$fm_image" | sed 's/[.[\/*^$]/\\&/g')
sed -i -E "s@^(\s*image:\s*).*${escaped_image}.*@\1\"\"@" "$mdfile"
echo -e " ${GREEN}Cleared${NC} broken ref ${DIM}$fm_image${NC} in ${DIM}$mdfile${NC}"
cleared=$((cleared + 1))
fi
fi
done < <(grep -E '^\s*image:\s' "$mdfile" 2>/dev/null || true)
done < <(find "$CONTENT_DIR" -name '*.md' -print0)
if [[ $cleared -eq 0 ]] && ! $DRY_RUN; then
success "No broken image references remaining"
fi
echo ""
info "Updated $updated_files file(s)"
}
# ---------------------------------------------------------------------------
# PHASE 5: SUMMARY
# ---------------------------------------------------------------------------
phase_summary() {
header "PHASE 5: SUMMARY"
if $DRY_RUN; then
echo -e "${BOLD}${YELLOW}DRY RUN — no changes were made${NC}"
echo ""
fi
local total_before total_after converted
total_before=$(cat /tmp/optimg_total_before.txt 2>/dev/null || cat /tmp/optimg_total_size.txt 2>/dev/null || echo 0)
total_after=$(cat /tmp/optimg_total_after.txt 2>/dev/null || echo 0)
converted=$(cat /tmp/optimg_converted.txt 2>/dev/null || echo 0)
local savings=0
if (( total_before > 0 )) && (( total_after > 0 )); then
savings=$(( (total_before - total_after) * 100 / total_before ))
fi
echo -e " Images processed: ${BOLD}$converted${NC}"
if (( total_after > 0 )); then
echo -e " Size before: ${BOLD}$(human_size "$total_before")${NC}"
echo -e " Size after: ${BOLD}$(human_size "$total_after")${NC}"
echo -e " Total reduction: ${BOLD}${GREEN}${savings}%${NC}"
fi
echo ""
echo -e " ${BOLD}Next steps:${NC}"
echo -e " 1. Run ${CYAN}hugo server${NC} and verify images look correct"
echo -e " 2. Check the browser dev tools Network tab for proper WebP delivery"
echo -e " 3. Commit when satisfied: ${CYAN}git add -A && git commit -m \"optimize: convert images to webp, strip metadata\"${NC}"
# Cleanup temp files
rm -f /tmp/optimg_*.txt
}
# ---------------------------------------------------------------------------
# MAIN
# ---------------------------------------------------------------------------
main() {
echo -e "${BOLD}${CYAN}"
echo " ┌─────────────────────────────────────────┐"
echo " │ fosscat.com Image Optimizer │"
echo " │ Strip metadata · Convert to WebP │"
echo " │ Resize · Audit references │"
echo " └─────────────────────────────────────────┘"
echo -e "${NC}"
if $DRY_RUN; then
echo -e " ${YELLOW}Running in DRY RUN mode — no files will be modified${NC}"
echo ""
fi
# Phase 1: Audit (always runs)
phase_audit
if $AUDIT_ONLY; then
echo ""
info "Audit complete. Run without --audit-only to process images."
rm -f /tmp/optimg_*.txt
return
fi
# Confirm before proceeding
echo ""
if ! $AUTO_YES && ! $DRY_RUN; then
echo -en " ${BOLD}Proceed with optimization? [y/N]${NC} "
read -r answer
if [[ ! "$answer" =~ ^[Yy]$ ]]; then
info "Aborted."
rm -f /tmp/optimg_*.txt
exit 0
fi
fi
# Phase 2: Strip metadata
phase_strip_metadata
# Phase 3: Convert & compress
phase_convert
# Phase 4: Update references
phase_update_refs
# Phase 5: Summary
phase_summary
}
main