update images to webp, update pre-commit hooks
This commit is contained in:
+89
-14
@@ -1,13 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
check-tags.py — Tag similarity checker for Hugo content
|
||||
check-tags.py — Semantic tag similarity checker for Hugo content
|
||||
|
||||
Compares tags in staged files against all existing tags in the site.
|
||||
Warns and blocks commit when a new tag looks similar to an existing one.
|
||||
Warns and blocks commit when a new tag is semantically similar to an existing one.
|
||||
|
||||
Similarity checks (via difflib.SequenceMatcher):
|
||||
- Ratio >= 0.6 (catches typos, reordered chars, partial matches)
|
||||
- One tag is a substring of the other
|
||||
Uses spaCy word vectors (en_core_web_lg) for cosine similarity — catches
|
||||
conceptual matches like "parenting" ≈ "fatherhood" while ignoring unrelated
|
||||
words that happen to share letters like "dogs" vs "daily".
|
||||
|
||||
Fallback: if spaCy is unavailable, uses conservative edit-distance checks only.
|
||||
|
||||
Skip with: SKIP_TAG_CHECK=1 git commit
|
||||
|
||||
@@ -17,6 +19,7 @@ Usage: check-tags.py <file1.md> [file2.md ...]
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from difflib import SequenceMatcher
|
||||
from pathlib import Path
|
||||
|
||||
@@ -28,7 +31,44 @@ CYAN = "\033[0;36m"
|
||||
BOLD = "\033[1m"
|
||||
NC = "\033[0m"
|
||||
|
||||
SIMILARITY_THRESHOLD = 0.6 # SequenceMatcher ratio (0-1)
|
||||
# Cosine similarity threshold for word vectors (0-1).
|
||||
# 0.65 catches morphological variants (parenting/parenthood) and synonyms
|
||||
# (cannabis/marijuana) while avoiding unrelated words. Tuned for short blog tags.
|
||||
SEMANTIC_THRESHOLD = 0.65
|
||||
|
||||
# Edit-distance threshold — only used as a typo catcher alongside semantics.
|
||||
# 0.85 is very conservative: catches "kubernetse" vs "kubernetes" but not
|
||||
# "dogs" vs "daily" (which scores ~0.40).
|
||||
TYPO_THRESHOLD = 0.85
|
||||
|
||||
# Substring match: shorter tag must be at least this many chars
|
||||
# and cover at least this fraction of the longer tag.
|
||||
SUBSTRING_MIN_LEN = 5
|
||||
SUBSTRING_MIN_RATIO = 0.6
|
||||
|
||||
# --- spaCy setup (lazy, with graceful fallback) ---
|
||||
_nlp = None
|
||||
_spacy_available = None
|
||||
|
||||
|
||||
def _load_spacy():
|
||||
"""Load spaCy model once. Returns (nlp, True) or (None, False)."""
|
||||
global _nlp, _spacy_available
|
||||
if _spacy_available is not None:
|
||||
return _nlp, _spacy_available
|
||||
try:
|
||||
import spacy
|
||||
|
||||
_nlp = spacy.load("en_core_web_lg")
|
||||
_spacy_available = True
|
||||
except (ImportError, OSError) as e:
|
||||
print(
|
||||
f"{YELLOW}spaCy not available ({e}), "
|
||||
f"falling back to edit-distance only{NC}"
|
||||
)
|
||||
_nlp = None
|
||||
_spacy_available = False
|
||||
return _nlp, _spacy_available
|
||||
|
||||
|
||||
def extract_tags(filepath: Path, *, keep_blanks: bool = False) -> list[str]:
|
||||
@@ -74,26 +114,53 @@ def extract_tags(filepath: Path, *, keep_blanks: bool = False) -> list[str]:
|
||||
return [t for t in tags if t]
|
||||
|
||||
|
||||
def find_similar(new_tag: str, existing_tags: set[str]) -> list[tuple[str, str]]:
|
||||
def find_similar(
|
||||
new_tag: str,
|
||||
existing_tags: set[str],
|
||||
existing_docs: dict | None = None,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Find existing tags similar to a new tag.
|
||||
|
||||
Uses semantic similarity (spaCy vectors) as the primary check,
|
||||
with edit-distance as a typo-catching backup.
|
||||
|
||||
If existing_docs is provided, it should be a dict mapping tag strings
|
||||
to their pre-computed spaCy Doc objects (avoids redundant nlp() calls).
|
||||
|
||||
Returns list of (existing_tag, reason) tuples.
|
||||
"""
|
||||
nlp, has_spacy = _load_spacy()
|
||||
similar = []
|
||||
|
||||
for existing in sorted(existing_tags):
|
||||
if existing == new_tag:
|
||||
continue
|
||||
|
||||
# Check substring match
|
||||
if existing in new_tag or new_tag in existing:
|
||||
# --- Check 1: Substring match (restricted) ---
|
||||
shorter, longer = sorted([new_tag, existing], key=len)
|
||||
if (
|
||||
len(shorter) >= SUBSTRING_MIN_LEN
|
||||
and shorter in longer
|
||||
and len(shorter) / len(longer) >= SUBSTRING_MIN_RATIO
|
||||
):
|
||||
similar.append((existing, "substring match"))
|
||||
continue
|
||||
|
||||
# Check similarity ratio
|
||||
# --- Check 2: Semantic similarity (primary) ---
|
||||
if has_spacy:
|
||||
doc_new = nlp(new_tag)
|
||||
doc_ex = existing_docs[existing] if existing_docs else nlp(existing)
|
||||
|
||||
if doc_new.has_vector and doc_ex.has_vector:
|
||||
score = doc_new.similarity(doc_ex)
|
||||
if score >= SEMANTIC_THRESHOLD:
|
||||
similar.append((existing, f"semantic: {score:.0%}"))
|
||||
continue
|
||||
|
||||
# --- Check 3: Typo detection via edit distance (conservative) ---
|
||||
ratio = SequenceMatcher(None, new_tag, existing).ratio()
|
||||
if ratio >= SIMILARITY_THRESHOLD:
|
||||
similar.append((existing, f"similarity: {ratio:.0%}"))
|
||||
if ratio >= TYPO_THRESHOLD:
|
||||
similar.append((existing, f"typo match: {ratio:.0%}"))
|
||||
|
||||
return similar
|
||||
|
||||
@@ -130,8 +197,13 @@ def main() -> int:
|
||||
print(f"{GREEN}No existing tags found, nothing to compare against.{NC}")
|
||||
return 0
|
||||
|
||||
# Pre-compute spaCy docs for all existing tags (avoids repeated nlp() calls)
|
||||
nlp, has_spacy = _load_spacy()
|
||||
existing_docs = {tag: nlp(tag) for tag in all_tags} if has_spacy else None
|
||||
|
||||
# Check staged files for similar tags
|
||||
found_issues = False
|
||||
start = time.monotonic()
|
||||
|
||||
for staged_file in staged_files:
|
||||
filepath = repo_root / staged_file
|
||||
@@ -162,7 +234,7 @@ def main() -> int:
|
||||
continue
|
||||
|
||||
# New tag — check for similarity
|
||||
similar = find_similar(tag, all_tags)
|
||||
similar = find_similar(tag, all_tags, existing_docs)
|
||||
|
||||
if similar:
|
||||
found_issues = True
|
||||
@@ -172,15 +244,18 @@ def main() -> int:
|
||||
for existing, reason in similar:
|
||||
print(f" {CYAN}\u2192 {existing} ({reason}){NC}")
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
if found_issues:
|
||||
print()
|
||||
print(f"{RED}{BOLD}Tag similarity check failed.{NC}")
|
||||
print(f"{RED}Consider using an existing tag, or skip with:{NC}")
|
||||
print(f"{RED} SKIP_TAG_CHECK=1 git commit{NC}")
|
||||
print(f"{RED} ({elapsed:.1f}s){NC}")
|
||||
print()
|
||||
return 1
|
||||
|
||||
print(f"{GREEN}Tag check passed \u2014 no similar tags found.{NC}")
|
||||
print(f"{GREEN}Tag check passed \u2014 no similar tags found. ({elapsed:.1f}s){NC}")
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
Executable
+32
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
"""List all unique tags across Hugo content, sorted alphabetically."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
content_dir = Path(__file__).resolve().parent.parent / "content"
|
||||
tags: set[str] = set()
|
||||
|
||||
for md in content_dir.rglob("*.md"):
|
||||
text = md.read_text(encoding="utf-8")
|
||||
fm = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
|
||||
if not fm:
|
||||
continue
|
||||
inline = re.search(r"^tags:\s*\[([^\]]*)\]", fm.group(1), re.MULTILINE)
|
||||
if inline and inline.group(1).strip():
|
||||
for t in inline.group(1).split(","):
|
||||
t = t.strip().strip("\"'").lower()
|
||||
if t:
|
||||
tags.add(t)
|
||||
else:
|
||||
lm = re.search(
|
||||
r"^tags:\s*\n((?:\s+-\s+.+\n?)+)", fm.group(1), re.MULTILINE
|
||||
)
|
||||
if lm:
|
||||
for t in re.findall(r"^\s+-\s+(.*)", lm.group(1), re.MULTILINE):
|
||||
t = t.strip().strip("\"'").lower()
|
||||
if t:
|
||||
tags.add(t)
|
||||
|
||||
for t in sorted(tags):
|
||||
print(t)
|
||||
Executable
+738
@@ -0,0 +1,738 @@
|
||||
#!/usr/bin/env bash
|
||||
# optimize-images.sh — Image auditor, metadata stripper, and WebP optimizer for fosscat.com
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/optimize-images.sh # Interactive mode
|
||||
# ./scripts/optimize-images.sh --dry-run # Show what would happen without changing anything
|
||||
# ./scripts/optimize-images.sh --yes # Skip all confirmation prompts
|
||||
# ./scripts/optimize-images.sh --audit-only # Only run the audit phase (no changes)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
IMAGES_DIR="static/images"
|
||||
CONTENT_DIR="content"
|
||||
CONFIG_FILE="config.toml"
|
||||
MAX_WIDTH=2000
|
||||
MAX_HEIGHT=2000
|
||||
WEBP_QUALITY=82
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI flags
|
||||
# ---------------------------------------------------------------------------
|
||||
DRY_RUN=false
|
||||
AUTO_YES=false
|
||||
AUDIT_ONLY=false
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--dry-run) DRY_RUN=true ;;
|
||||
--yes|-y) AUTO_YES=true ;;
|
||||
--audit-only) AUDIT_ONLY=true ;;
|
||||
--help|-h)
|
||||
echo "Usage: $0 [--dry-run] [--yes] [--audit-only]"
|
||||
echo ""
|
||||
echo " --dry-run Show what would happen without making changes"
|
||||
echo " --yes, -y Skip confirmation prompts"
|
||||
echo " --audit-only Only run the audit (no modifications)"
|
||||
echo " --help, -h Show this help"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $arg"
|
||||
echo "Run $0 --help for usage"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Colors and formatting
|
||||
# ---------------------------------------------------------------------------
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
CYAN='\033[0;36m'
|
||||
BOLD='\033[1m'
|
||||
DIM='\033[2m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
|
||||
success() { echo -e "${GREEN}[OK]${NC} $*"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
error() { echo -e "${RED}[ERROR]${NC} $*"; }
|
||||
header() { echo -e "\n${BOLD}${CYAN}═══ $* ═══${NC}\n"; }
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dependency checks
|
||||
# ---------------------------------------------------------------------------
|
||||
check_deps() {
|
||||
local missing=()
|
||||
for cmd in exiftool convert identify cwebp; do
|
||||
if ! command -v "$cmd" &>/dev/null; then
|
||||
missing+=("$cmd")
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#missing[@]} -gt 0 ]]; then
|
||||
error "Missing required tools: ${missing[*]}"
|
||||
echo " These are provided by the Nix dev shell. Run:"
|
||||
echo " nix develop # or let direnv load the flake"
|
||||
echo ""
|
||||
echo " Required nix packages:"
|
||||
echo " perl538Packages.ImageExifTool (exiftool)"
|
||||
echo " imagemagick (convert, identify)"
|
||||
echo " libwebp (cwebp)"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ensure we're in the project root
|
||||
# ---------------------------------------------------------------------------
|
||||
if [[ ! -f "$CONFIG_FILE" ]] || [[ ! -d "$IMAGES_DIR" ]]; then
|
||||
error "Must be run from the project root (where $CONFIG_FILE and $IMAGES_DIR exist)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check_deps
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Utility: human-readable file size
|
||||
# ---------------------------------------------------------------------------
|
||||
human_size() {
|
||||
local bytes=$1
|
||||
if (( bytes >= 1048576 )); then
|
||||
local mb_whole=$(( bytes / 1048576 ))
|
||||
local mb_frac=$(( (bytes % 1048576) * 10 / 1048576 ))
|
||||
echo "${mb_whole}.${mb_frac} MB"
|
||||
elif (( bytes >= 1024 )); then
|
||||
echo "$(( bytes / 1024 )) KB"
|
||||
else
|
||||
echo "${bytes} B"
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Utility: confirm prompt (respects --yes and --dry-run)
|
||||
# ---------------------------------------------------------------------------
|
||||
confirm() {
|
||||
local prompt="$1"
|
||||
if $AUTO_YES; then
|
||||
return 0
|
||||
fi
|
||||
if $DRY_RUN; then
|
||||
echo -e " ${DIM}(dry-run: would ask) $prompt${NC}"
|
||||
return 0
|
||||
fi
|
||||
echo -en " $prompt ${BOLD}[y/N]${NC} "
|
||||
read -r answer
|
||||
[[ "$answer" =~ ^[Yy]$ ]]
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PHASE 1: AUDIT
|
||||
# ---------------------------------------------------------------------------
|
||||
phase_audit() {
|
||||
header "PHASE 1: IMAGE AUDIT"
|
||||
|
||||
# Collect all image files
|
||||
local -a image_files=()
|
||||
while IFS= read -r -d '' f; do
|
||||
image_files+=("$f")
|
||||
done < <(find "$IMAGES_DIR" -maxdepth 1 -type f \( -iname '*.jpg' -o -iname '*.jpeg' -o -iname '*.png' -o -iname '*.webp' -o -iname '*.gif' \) -print0 | sort -z)
|
||||
|
||||
if [[ ${#image_files[@]} -eq 0 ]]; then
|
||||
warn "No images found in $IMAGES_DIR"
|
||||
return
|
||||
fi
|
||||
|
||||
# --- Image inventory table ---
|
||||
echo -e "${BOLD}Image Inventory${NC}"
|
||||
printf " %-40s %-6s %-12s %s\n" "FILENAME" "FORMAT" "DIMENSIONS" "SIZE"
|
||||
printf " %-40s %-6s %-12s %s\n" "--------" "------" "----------" "----"
|
||||
|
||||
local total_size=0
|
||||
for img in "${image_files[@]}"; do
|
||||
local fname
|
||||
fname=$(basename "$img")
|
||||
local ext="${fname##*.}"
|
||||
local fsize
|
||||
fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
|
||||
total_size=$((total_size + fsize))
|
||||
local dims
|
||||
dims=$(identify -format "%wx%h" "$img" 2>/dev/null || echo "unknown")
|
||||
printf " %-40s %-6s %-12s %s\n" "$fname" "$ext" "$dims" "$(human_size "$fsize")"
|
||||
done
|
||||
|
||||
echo ""
|
||||
info "Total: ${#image_files[@]} images, $(human_size $total_size)"
|
||||
|
||||
# --- EXIF / Metadata scan ---
|
||||
echo ""
|
||||
echo -e "${BOLD}Metadata / Privacy Scan${NC}"
|
||||
|
||||
local privacy_issues=0
|
||||
# Sensitive tag names to check (extracted in a single exiftool call per image)
|
||||
local sensitive_tag_args=(
|
||||
-GPSLatitude -GPSLongitude -GPSPosition
|
||||
-SerialNumber -CameraSerialNumber -BodySerialNumber -LensSerialNumber
|
||||
-OwnerName -Artist -Copyright -Creator -Rights
|
||||
-By-line -Contact
|
||||
-Make -Model -LensModel -Software
|
||||
-DateTime -DateTimeOriginal -CreateDate
|
||||
-CreatorTool -ImageDescription -UserComment
|
||||
)
|
||||
|
||||
for img in "${image_files[@]}"; do
|
||||
local fname
|
||||
fname=$(basename "$img")
|
||||
local has_metadata=false
|
||||
local metadata_lines=()
|
||||
|
||||
# Single exiftool call to extract all sensitive tags at once
|
||||
local exif_output
|
||||
exif_output=$(exiftool -s -f "${sensitive_tag_args[@]}" "$img" 2>/dev/null || true)
|
||||
|
||||
while IFS= read -r line; do
|
||||
[[ -z "$line" ]] && continue
|
||||
# exiftool -s output format: "TagName : value"
|
||||
local tagname value
|
||||
tagname=$(echo "$line" | sed 's/\s*:.*//' | xargs)
|
||||
value=$(echo "$line" | sed 's/^[^:]*:\s*//')
|
||||
|
||||
# Skip tags with no value (exiftool -f shows "-" for missing tags)
|
||||
[[ "$value" == "-" ]] && continue
|
||||
[[ -z "$value" ]] && continue
|
||||
|
||||
has_metadata=true
|
||||
# Highlight GPS data in red
|
||||
if [[ "$tagname" == *GPS* ]] || [[ "$tagname" == *Latitude* ]] || [[ "$tagname" == *Longitude* ]]; then
|
||||
metadata_lines+=("${RED}!!${NC} $tagname: $value")
|
||||
elif [[ "$tagname" == *Serial* ]] || [[ "$tagname" == *Owner* ]] || [[ "$tagname" == *Artist* ]] || [[ "$tagname" == *Creator* ]]; then
|
||||
metadata_lines+=("${YELLOW}!${NC} $tagname: $value")
|
||||
else
|
||||
metadata_lines+=("${DIM}-${NC} $tagname: $value")
|
||||
fi
|
||||
done <<< "$exif_output"
|
||||
|
||||
if $has_metadata; then
|
||||
privacy_issues=$((privacy_issues + 1))
|
||||
echo -e " ${YELLOW}$fname${NC} — metadata found:"
|
||||
for line in "${metadata_lines[@]}"; do
|
||||
echo -e " $line"
|
||||
done
|
||||
else
|
||||
echo -e " ${GREEN}$fname${NC} — clean"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
if [[ $privacy_issues -gt 0 ]]; then
|
||||
warn "$privacy_issues image(s) contain metadata that should be stripped"
|
||||
else
|
||||
success "All images are clean of sensitive metadata"
|
||||
fi
|
||||
|
||||
# --- Cross-reference with content ---
|
||||
echo ""
|
||||
echo -e "${BOLD}Content Reference Check${NC}"
|
||||
|
||||
# Collect all image references from content files
|
||||
local -a referenced_images=()
|
||||
local -a broken_refs=()
|
||||
local -a inconsistent_paths=()
|
||||
|
||||
while IFS= read -r -d '' mdfile; do
|
||||
# Front matter image field (handles both `image: "..."` and ` image: "..."` under cover:)
|
||||
while IFS= read -r fm_image; do
|
||||
[[ -z "$fm_image" ]] && continue
|
||||
# Clean up: remove surrounding quotes and whitespace
|
||||
fm_image=$(echo "$fm_image" | sed 's/^[[:space:]]*image:[[:space:]]*//' | sed 's/^["'\'']//' | sed 's/["'\'']\s*$//')
|
||||
|
||||
if [[ -n "$fm_image" ]] && [[ "$fm_image" != '""' ]] && [[ "$fm_image" != http* ]]; then
|
||||
# Normalize: Hugo serves /images/... from static/images/...
|
||||
local fs_path="static/${fm_image#/}"
|
||||
|
||||
# Check if it's a broken reference
|
||||
if [[ ! -f "$fs_path" ]]; then
|
||||
broken_refs+=("$mdfile|$fm_image")
|
||||
else
|
||||
referenced_images+=("$fs_path")
|
||||
fi
|
||||
|
||||
# Check for inconsistent path (missing leading /)
|
||||
if [[ "$fm_image" != /* ]]; then
|
||||
inconsistent_paths+=("$mdfile|$fm_image")
|
||||
fi
|
||||
fi
|
||||
done < <(grep -E '^\s*image:\s' "$mdfile" 2>/dev/null || true)
|
||||
|
||||
# Inline markdown images: 
|
||||
while IFS= read -r inline_ref; do
|
||||
[[ -z "$inline_ref" ]] && continue
|
||||
# Strip #fragment
|
||||
local clean_ref="${inline_ref%%#*}"
|
||||
local fs_ref="static/${clean_ref#/}"
|
||||
|
||||
if [[ ! -f "$fs_ref" ]] && [[ "$clean_ref" != http* ]]; then
|
||||
broken_refs+=("$mdfile|$inline_ref")
|
||||
else
|
||||
referenced_images+=("$fs_ref")
|
||||
fi
|
||||
done < <(grep -oP '!\[[^\]]*\]\(\K[^)]+' "$mdfile" 2>/dev/null || true)
|
||||
|
||||
done < <(find "$CONTENT_DIR" -name '*.md' -print0)
|
||||
|
||||
# Also check config.toml for avatarUrl
|
||||
local avatar_path
|
||||
avatar_path=$(grep 'avatarUrl' "$CONFIG_FILE" | sed 's/.*=\s*["'\'']\(.*\)["'\'']/\1/' || true)
|
||||
if [[ -n "$avatar_path" ]]; then
|
||||
referenced_images+=("static/${avatar_path#/}")
|
||||
fi
|
||||
|
||||
# Find unreferenced images (compare using static/images/... paths)
|
||||
local -a unreferenced=()
|
||||
for img in "${image_files[@]}"; do
|
||||
local found=false
|
||||
for ref in "${referenced_images[@]}"; do
|
||||
if [[ "$ref" == "$img" ]]; then
|
||||
found=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
if ! $found; then
|
||||
unreferenced+=("$img")
|
||||
fi
|
||||
done
|
||||
|
||||
# Report broken references
|
||||
if [[ ${#broken_refs[@]} -gt 0 ]]; then
|
||||
warn "${#broken_refs[@]} broken image reference(s):"
|
||||
for entry in "${broken_refs[@]}"; do
|
||||
local file="${entry%%|*}"
|
||||
local ref="${entry##*|}"
|
||||
echo -e " ${RED}$ref${NC} in ${DIM}$file${NC}"
|
||||
done
|
||||
else
|
||||
success "No broken image references"
|
||||
fi
|
||||
|
||||
# Report unreferenced images
|
||||
echo ""
|
||||
if [[ ${#unreferenced[@]} -gt 0 ]]; then
|
||||
warn "${#unreferenced[@]} unreferenced image(s) (not used in any content):"
|
||||
for img in "${unreferenced[@]}"; do
|
||||
local fsize
|
||||
fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
|
||||
echo -e " ${YELLOW}$(basename "$img")${NC} ($(human_size "$fsize"))"
|
||||
done
|
||||
else
|
||||
success "All images are referenced in content"
|
||||
fi
|
||||
|
||||
# Report inconsistent paths
|
||||
if [[ ${#inconsistent_paths[@]} -gt 0 ]]; then
|
||||
echo ""
|
||||
warn "${#inconsistent_paths[@]} image path(s) missing leading '/':"
|
||||
for entry in "${inconsistent_paths[@]}"; do
|
||||
local file="${entry%%|*}"
|
||||
local ref="${entry##*|}"
|
||||
echo -e " ${YELLOW}$ref${NC} in ${DIM}$file${NC}"
|
||||
done
|
||||
fi
|
||||
|
||||
# Export arrays for later phases (bash 4+ trick: print to temp files)
|
||||
printf '%s\n' "${image_files[@]}" > /tmp/optimg_files.txt
|
||||
printf '%s\n' "${unreferenced[@]+"${unreferenced[@]}"}" > /tmp/optimg_unreferenced.txt
|
||||
printf '%s\n' "${broken_refs[@]+"${broken_refs[@]}"}" > /tmp/optimg_broken.txt
|
||||
echo "$total_size" > /tmp/optimg_total_size.txt
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PHASE 2: METADATA STRIPPING
|
||||
# ---------------------------------------------------------------------------
|
||||
phase_strip_metadata() {
|
||||
header "PHASE 2: METADATA STRIPPING"
|
||||
|
||||
if $DRY_RUN; then
|
||||
info "(dry-run) Would strip all EXIF/IPTC/XMP metadata from images"
|
||||
echo ""
|
||||
return
|
||||
fi
|
||||
|
||||
local -a image_files=()
|
||||
mapfile -t image_files < /tmp/optimg_files.txt
|
||||
|
||||
local stripped=0
|
||||
for img in "${image_files[@]}"; do
|
||||
[[ -z "$img" ]] && continue
|
||||
local fname
|
||||
fname=$(basename "$img")
|
||||
|
||||
# Check if image has strippable EXIF/XMP/IPTC metadata (not just file properties)
|
||||
# Use -EXIF:All -XMP:All -IPTC:All to only check real metadata groups
|
||||
local meta_check
|
||||
meta_check=$(exiftool -s -s -s -EXIF:All -XMP:All -IPTC:All "$img" 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$meta_check" ]]; then
|
||||
echo -e " ${DIM}$fname — already clean, skipping${NC}"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Auto-orient JPEG/PNG before stripping (applies EXIF rotation to pixels)
|
||||
local ext="${fname##*.}"
|
||||
ext=$(echo "$ext" | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "$ext" == "jpg" ]] || [[ "$ext" == "jpeg" ]] || [[ "$ext" == "png" ]]; then
|
||||
magick "$img" -auto-orient "$img" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Strip all metadata
|
||||
exiftool -all= -overwrite_original "$img" 2>/dev/null
|
||||
stripped=$((stripped + 1))
|
||||
echo -e " ${GREEN}$fname${NC} — metadata stripped"
|
||||
done
|
||||
|
||||
echo ""
|
||||
success "Stripped metadata from $stripped image(s)"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PHASE 3: CONVERT & COMPRESS
|
||||
# ---------------------------------------------------------------------------
|
||||
phase_convert() {
|
||||
header "PHASE 3: CONVERT TO WEBP & COMPRESS"
|
||||
|
||||
local -a image_files=()
|
||||
mapfile -t image_files < /tmp/optimg_files.txt
|
||||
|
||||
# Delete unreferenced images first
|
||||
local -a unreferenced=()
|
||||
mapfile -t unreferenced < /tmp/optimg_unreferenced.txt
|
||||
|
||||
if [[ ${#unreferenced[@]} -gt 0 ]] && [[ -n "${unreferenced[0]}" ]]; then
|
||||
echo -e "${BOLD}Removing unreferenced images${NC}"
|
||||
for img in "${unreferenced[@]}"; do
|
||||
[[ -z "$img" ]] && continue
|
||||
local fsize
|
||||
fsize=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
|
||||
if $DRY_RUN; then
|
||||
echo -e " ${DIM}(dry-run) Would delete: $(basename "$img") ($(human_size "$fsize"))${NC}"
|
||||
else
|
||||
rm -f "$img"
|
||||
echo -e " ${RED}Deleted:${NC} $(basename "$img") ($(human_size "$fsize"))"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo -e "${BOLD}Converting images to WebP (quality $WEBP_QUALITY, max ${MAX_WIDTH}x${MAX_HEIGHT})${NC}"
|
||||
printf " %-40s %-12s %-12s %s\n" "FILENAME" "BEFORE" "AFTER" "SAVINGS"
|
||||
printf " %-40s %-12s %-12s %s\n" "--------" "------" "-----" "-------"
|
||||
|
||||
local total_before=0
|
||||
local total_after=0
|
||||
local converted=0
|
||||
|
||||
for img in "${image_files[@]}"; do
|
||||
[[ -z "$img" ]] && continue
|
||||
# Skip if this was an unreferenced file we just deleted
|
||||
[[ ! -f "$img" ]] && continue
|
||||
|
||||
local fname
|
||||
fname=$(basename "$img")
|
||||
local ext="${fname##*.}"
|
||||
local base="${fname%.*}"
|
||||
ext_lower=$(echo "$ext" | tr '[:upper:]' '[:lower:]')
|
||||
local webp_path="$IMAGES_DIR/${base}.webp"
|
||||
|
||||
local before_size
|
||||
before_size=$(stat -c%s "$img" 2>/dev/null || stat -f%z "$img" 2>/dev/null)
|
||||
total_before=$((total_before + before_size))
|
||||
|
||||
if $DRY_RUN; then
|
||||
echo -e " ${DIM}(dry-run) Would convert: $fname -> ${base}.webp${NC}"
|
||||
# Estimate: assume 80% reduction for JPEGs, 70% for PNGs, 10% for existing WebP
|
||||
local est_after=$before_size
|
||||
case "$ext_lower" in
|
||||
jpg|jpeg) est_after=$((before_size / 5)) ;;
|
||||
png) est_after=$((before_size / 3)) ;;
|
||||
webp) est_after=$((before_size * 9 / 10)) ;;
|
||||
esac
|
||||
total_after=$((total_after + est_after))
|
||||
converted=$((converted + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Get current dimensions
|
||||
local cur_width cur_height
|
||||
read -r cur_width cur_height < <(identify -format "%w %h\n" "$img" 2>/dev/null || echo "0 0")
|
||||
|
||||
local needs_resize=false
|
||||
if (( cur_width > MAX_WIDTH )) || (( cur_height > MAX_HEIGHT )); then
|
||||
needs_resize=true
|
||||
fi
|
||||
|
||||
# Determine the input for cwebp
|
||||
local cwebp_input="$img"
|
||||
local tmp_resized=""
|
||||
|
||||
if $needs_resize; then
|
||||
# Resize via ImageMagick, output to temp PNG for cwebp
|
||||
tmp_resized=$(mktemp /tmp/optimg_XXXXXX.png)
|
||||
magick "$img" -resize "${MAX_WIDTH}x${MAX_HEIGHT}>" -quality 100 "$tmp_resized"
|
||||
info " Resized $fname: ${cur_width}x${cur_height} -> $(magick identify -format '%wx%h' "$tmp_resized")"
|
||||
cwebp_input="$tmp_resized"
|
||||
fi
|
||||
|
||||
# Convert to WebP via cwebp (handles JPEG/PNG/WebP input natively)
|
||||
if [[ "$ext_lower" == "webp" ]] && [[ "$img" == "$webp_path" ]]; then
|
||||
# Same input and output: use temp output
|
||||
local tmp_webp
|
||||
tmp_webp=$(mktemp /tmp/optimg_XXXXXX.webp)
|
||||
cwebp -q "$WEBP_QUALITY" "$cwebp_input" -o "$tmp_webp" 2>/dev/null
|
||||
mv "$tmp_webp" "$webp_path"
|
||||
else
|
||||
cwebp -q "$WEBP_QUALITY" "$cwebp_input" -o "$webp_path" 2>/dev/null
|
||||
fi
|
||||
|
||||
# Cleanup temp file if we resized
|
||||
[[ -n "$tmp_resized" ]] && rm -f "$tmp_resized"
|
||||
|
||||
# Step 3: Delete original if it's not already .webp
|
||||
if [[ "$ext_lower" != "webp" ]]; then
|
||||
rm -f "$img"
|
||||
fi
|
||||
|
||||
local after_size
|
||||
after_size=$(stat -c%s "$webp_path" 2>/dev/null || stat -f%z "$webp_path" 2>/dev/null)
|
||||
total_after=$((total_after + after_size))
|
||||
|
||||
local savings=0
|
||||
if (( before_size > 0 )); then
|
||||
savings=$(( (before_size - after_size) * 100 / before_size ))
|
||||
fi
|
||||
|
||||
local savings_color="$GREEN"
|
||||
if (( savings < 10 )); then
|
||||
savings_color="$YELLOW"
|
||||
fi
|
||||
|
||||
printf " %-40s %-12s %-12s ${savings_color}%s%%${NC}\n" \
|
||||
"${base}.webp" "$(human_size "$before_size")" "$(human_size "$after_size")" "$savings"
|
||||
|
||||
converted=$((converted + 1))
|
||||
done
|
||||
|
||||
echo ""
|
||||
local total_savings=0
|
||||
if (( total_before > 0 )); then
|
||||
total_savings=$(( (total_before - total_after) * 100 / total_before ))
|
||||
fi
|
||||
info "Converted $converted image(s)"
|
||||
info "Total: $(human_size $total_before) -> $(human_size $total_after) (${total_savings}% reduction)"
|
||||
|
||||
# Save totals for summary
|
||||
echo "$total_before" > /tmp/optimg_total_before.txt
|
||||
echo "$total_after" > /tmp/optimg_total_after.txt
|
||||
echo "$converted" > /tmp/optimg_converted.txt
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PHASE 4: UPDATE CONTENT REFERENCES
|
||||
# ---------------------------------------------------------------------------
|
||||
phase_update_refs() {
|
||||
header "PHASE 4: UPDATE CONTENT REFERENCES"
|
||||
|
||||
local updated_files=0
|
||||
|
||||
# --- Step 1: Update image extensions in content files ---
|
||||
# This must happen BEFORE broken ref clearing, since .jpg/.png files are now .webp
|
||||
echo -e "${BOLD}Updating image references (.jpg/.jpeg/.png -> .webp)${NC}"
|
||||
|
||||
while IFS= read -r -d '' mdfile; do
|
||||
local changed=false
|
||||
|
||||
# Normalize front matter paths first: change image: "images/... to image: "/images/...
|
||||
if grep -qE '^\s*image:\s*"images/' "$mdfile" 2>/dev/null; then
|
||||
if ! $DRY_RUN; then
|
||||
sed -i -E 's@^(\s*image:\s*)"images/@\1"/images/@' "$mdfile"
|
||||
fi
|
||||
changed=true
|
||||
fi
|
||||
|
||||
# Update front matter image field (only local paths, not http URLs)
|
||||
# Handles both `image: "/images/..."` and ` image: "/images/..."` (indented under cover:)
|
||||
if grep -qE '^\s*image:\s*"/images/.*\.(jpg|jpeg|JPG|JPEG|png|PNG)"' "$mdfile" 2>/dev/null; then
|
||||
if ! $DRY_RUN; then
|
||||
sed -i -E 's@^(\s*image:\s*"/images/[^"]*)\.(jpg|jpeg|JPG|JPEG|png|PNG)"@\1.webp"@' "$mdfile"
|
||||
fi
|
||||
changed=true
|
||||
fi
|
||||
|
||||
# Update inline markdown images: 
|
||||
# Only match local /images/ paths, not external URLs
|
||||
if grep -qP '!\[[^\]]*\]\(/images/[^)]*\.(jpg|jpeg|JPG|JPEG|png|PNG)(#[^)]*)?\)' "$mdfile" 2>/dev/null; then
|
||||
if ! $DRY_RUN; then
|
||||
sed -i -E 's@(!\[[^]]*\]\(/images/[^.)]*)\.(jpg|jpeg|JPG|JPEG|png|PNG)([#][^)]*)?(\))@\1.webp\3\4@g' "$mdfile"
|
||||
fi
|
||||
changed=true
|
||||
fi
|
||||
|
||||
if $changed; then
|
||||
local relpath="${mdfile}"
|
||||
if $DRY_RUN; then
|
||||
echo -e " ${DIM}(dry-run) Would update refs in: $relpath${NC}"
|
||||
else
|
||||
echo -e " ${GREEN}Updated${NC} $relpath"
|
||||
fi
|
||||
updated_files=$((updated_files + 1))
|
||||
fi
|
||||
done < <(find "$CONTENT_DIR" -name '*.md' -print0)
|
||||
|
||||
# --- Step 2: Update config.toml avatar ---
|
||||
if grep -q 'avatarUrl.*\.png' "$CONFIG_FILE" 2>/dev/null; then
|
||||
if $DRY_RUN; then
|
||||
echo -e " ${DIM}(dry-run) Would update avatarUrl in $CONFIG_FILE${NC}"
|
||||
else
|
||||
sed -i 's@avatarUrl = "/images/fosscat_icon\.png"@avatarUrl = "/images/fosscat_icon.webp"@' "$CONFIG_FILE"
|
||||
echo -e " ${GREEN}Updated${NC} avatarUrl in $CONFIG_FILE"
|
||||
fi
|
||||
updated_files=$((updated_files + 1))
|
||||
fi
|
||||
|
||||
# --- Step 3: Clear genuinely broken image references ---
|
||||
# Only clear refs that still don't resolve after extension updates
|
||||
# (e.g., placeholder /images/img.jpg that was never a real image)
|
||||
echo ""
|
||||
echo -e "${BOLD}Checking for remaining broken image references${NC}"
|
||||
|
||||
local cleared=0
|
||||
while IFS= read -r -d '' mdfile; do
|
||||
# Check front matter image fields
|
||||
while IFS= read -r fm_line; do
|
||||
[[ -z "$fm_line" ]] && continue
|
||||
local fm_image
|
||||
fm_image=$(echo "$fm_line" | sed 's/^[[:space:]]*image:[[:space:]]*//' | sed 's/^["'\'']//' | sed 's/["'\'']\s*$//')
|
||||
|
||||
[[ -z "$fm_image" ]] && continue
|
||||
[[ "$fm_image" == '""' ]] && continue
|
||||
[[ "$fm_image" == http* ]] && continue
|
||||
|
||||
local fs_path="static/${fm_image#/}"
|
||||
if [[ ! -f "$fs_path" ]]; then
|
||||
if $DRY_RUN; then
|
||||
echo -e " ${DIM}(dry-run) Would clear broken ref in: $mdfile (was: $fm_image)${NC}"
|
||||
else
|
||||
local escaped_image
|
||||
escaped_image=$(echo "$fm_image" | sed 's/[.[\/*^$]/\\&/g')
|
||||
sed -i -E "s@^(\s*image:\s*).*${escaped_image}.*@\1\"\"@" "$mdfile"
|
||||
echo -e " ${GREEN}Cleared${NC} broken ref ${DIM}$fm_image${NC} in ${DIM}$mdfile${NC}"
|
||||
cleared=$((cleared + 1))
|
||||
fi
|
||||
fi
|
||||
done < <(grep -E '^\s*image:\s' "$mdfile" 2>/dev/null || true)
|
||||
done < <(find "$CONTENT_DIR" -name '*.md' -print0)
|
||||
|
||||
if [[ $cleared -eq 0 ]] && ! $DRY_RUN; then
|
||||
success "No broken image references remaining"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
info "Updated $updated_files file(s)"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PHASE 5: SUMMARY
|
||||
# ---------------------------------------------------------------------------
|
||||
phase_summary() {
|
||||
header "PHASE 5: SUMMARY"
|
||||
|
||||
if $DRY_RUN; then
|
||||
echo -e "${BOLD}${YELLOW}DRY RUN — no changes were made${NC}"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
local total_before total_after converted
|
||||
total_before=$(cat /tmp/optimg_total_before.txt 2>/dev/null || cat /tmp/optimg_total_size.txt 2>/dev/null || echo 0)
|
||||
total_after=$(cat /tmp/optimg_total_after.txt 2>/dev/null || echo 0)
|
||||
converted=$(cat /tmp/optimg_converted.txt 2>/dev/null || echo 0)
|
||||
|
||||
local savings=0
|
||||
if (( total_before > 0 )) && (( total_after > 0 )); then
|
||||
savings=$(( (total_before - total_after) * 100 / total_before ))
|
||||
fi
|
||||
|
||||
echo -e " Images processed: ${BOLD}$converted${NC}"
|
||||
if (( total_after > 0 )); then
|
||||
echo -e " Size before: ${BOLD}$(human_size "$total_before")${NC}"
|
||||
echo -e " Size after: ${BOLD}$(human_size "$total_after")${NC}"
|
||||
echo -e " Total reduction: ${BOLD}${GREEN}${savings}%${NC}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e " ${BOLD}Next steps:${NC}"
|
||||
echo -e " 1. Run ${CYAN}hugo server${NC} and verify images look correct"
|
||||
echo -e " 2. Check the browser dev tools Network tab for proper WebP delivery"
|
||||
echo -e " 3. Commit when satisfied: ${CYAN}git add -A && git commit -m \"optimize: convert images to webp, strip metadata\"${NC}"
|
||||
|
||||
# Cleanup temp files
|
||||
rm -f /tmp/optimg_*.txt
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MAIN
|
||||
# ---------------------------------------------------------------------------
|
||||
main() {
|
||||
echo -e "${BOLD}${CYAN}"
|
||||
echo " ┌─────────────────────────────────────────┐"
|
||||
echo " │ fosscat.com Image Optimizer │"
|
||||
echo " │ Strip metadata · Convert to WebP │"
|
||||
echo " │ Resize · Audit references │"
|
||||
echo " └─────────────────────────────────────────┘"
|
||||
echo -e "${NC}"
|
||||
|
||||
if $DRY_RUN; then
|
||||
echo -e " ${YELLOW}Running in DRY RUN mode — no files will be modified${NC}"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Phase 1: Audit (always runs)
|
||||
phase_audit
|
||||
|
||||
if $AUDIT_ONLY; then
|
||||
echo ""
|
||||
info "Audit complete. Run without --audit-only to process images."
|
||||
rm -f /tmp/optimg_*.txt
|
||||
return
|
||||
fi
|
||||
|
||||
# Confirm before proceeding
|
||||
echo ""
|
||||
if ! $AUTO_YES && ! $DRY_RUN; then
|
||||
echo -en " ${BOLD}Proceed with optimization? [y/N]${NC} "
|
||||
read -r answer
|
||||
if [[ ! "$answer" =~ ^[Yy]$ ]]; then
|
||||
info "Aborted."
|
||||
rm -f /tmp/optimg_*.txt
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Phase 2: Strip metadata
|
||||
phase_strip_metadata
|
||||
|
||||
# Phase 3: Convert & compress
|
||||
phase_convert
|
||||
|
||||
# Phase 4: Update references
|
||||
phase_update_refs
|
||||
|
||||
# Phase 5: Summary
|
||||
phase_summary
|
||||
}
|
||||
|
||||
main
|
||||
Reference in New Issue
Block a user