#!/usr/bin/env bash set -e # Color codes for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Function to check if a URL is reachable check_url() { local url="$1" local file="$2" local line_num="$3" # Skip relative URLs and internal anchors if [[ "$url" =~ ^(#|/[^/]|\./) ]]; then return 0 fi # Skip mailto and other non-http protocols if [[ "$url" =~ ^(mailto:|ftp:|file:|javascript:) ]]; then return 0 fi # For URLs without protocol, assume https if [[ ! "$url" =~ ^https?:// ]]; then url="https://$url" fi # Use curl to check the URL with a reasonable timeout # Follow redirects, but limit to 5 redirects to prevent infinite loops local http_code http_code=$(curl -s -o /dev/null -w "%{http_code}" \ --max-time 10 \ --max-redirs 5 \ --retry 1 \ --user-agent "Mozilla/5.0 (compatible; FossCat Link Checker)" \ "$url" 2>/dev/null || echo "000") # Consider 2xx and 3xx status codes as success if [[ "$http_code" =~ ^[23][0-9][0-9]$ ]]; then return 0 else echo -e "${RED}❌ Dead link in $file:$line_num${NC}" echo -e " URL: $url" echo -e " Status: $http_code" return 1 fi } # Function to extract and check links from a markdown file check_markdown_links() { local file="$1" local failed=0 echo "Checking links in $file..." # Use a simpler approach that works with basic grep # Extract URLs from markdown links [text](url) and images ![alt](url) local urls=$(grep -o '\]([^)]*)' "$file" | sed 's/](\([^)]*\)).*/\1/' | grep -v '^$') if [ -n "$urls" ]; then while IFS= read -r url; do if [ -n "$url" ]; then # Get line number where this URL appears local line_num=$(grep -n "$url" "$file" | head -1 | cut -d: -f1) if ! check_url "$url" "$file" "$line_num"; then failed=1 fi fi done <<< "$urls" fi # Also check for reference-style links [ref]: url local ref_urls=$(grep -o '^\[.*\]: *[^ ]*' "$file" | sed 's/^\[.*\]: *//' | grep -v '^$') if [ -n "$ref_urls" ]; then while IFS= read -r url; do if [ -n "$url" ]; then local line_num=$(grep -n "$url" "$file" | head -1 | cut -d: -f1) if ! check_url "$url" "$file" "$line_num"; then failed=1 fi fi done <<< "$ref_urls" fi return $failed } # Main function main() { local files_to_check=() local failed_files=0 local total_files=0 # If arguments provided, check those files, otherwise check all markdown files if [ $# -gt 0 ]; then files_to_check=("$@") else while IFS= read -r -d '' file; do files_to_check+=("$file") done < <(find content -name "*.md" -type f -print0) fi if [ ${#files_to_check[@]} -eq 0 ]; then echo -e "${YELLOW}No markdown files to check.${NC}" exit 0 fi echo -e "${GREEN}🔗 Checking links in ${#files_to_check[@]} markdown file(s)...${NC}" echo for file in "${files_to_check[@]}"; do if [ -f "$file" ]; then total_files=$((total_files + 1)) if ! check_markdown_links "$file"; then failed_files=$((failed_files + 1)) fi else echo -e "${YELLOW}⚠️ File not found: $file${NC}" fi done echo if [ $failed_files -eq 0 ]; then echo -e "${GREEN}✅ All links are working! Checked $total_files file(s).${NC}" exit 0 else echo -e "${RED}❌ Found broken links in $failed_files file(s) out of $total_files checked.${NC}" exit 1 fi } # Check if curl is available if ! command -v curl &> /dev/null; then echo -e "${RED}❌ curl is required but not installed.${NC}" echo "Please install curl to use the link checker." exit 1 fi # Check if basic grep and sed are available if ! command -v sed &> /dev/null; then echo -e "${RED}❌ sed is required but not installed.${NC}" exit 1 fi main "$@"