Configuration Examples
Real-world squirrelscan configurations for common use cases
Complete configuration examples for different website types and use cases.
Blog / Content Site
Optimized for long-form content with comprehensive SEO checks.
[project]
name = "myblog"
domains = []
[crawler]
max_pages = 500
delay_ms = 100
timeout_ms = 30000
concurrency = 5
per_host_concurrency = 2
per_host_delay_ms = 200
# Focus on content, exclude admin
include = []
exclude = [
"/wp-admin/**",
"/wp-content/uploads/**",
"/admin/**",
"*.pdf",
"*.zip"
]
# Preserve pagination
allow_query_params = ["page", "p"]
drop_query_prefixes = ["utm_", "gclid", "fbclid"]
respect_robots = true
breadth_first = true
max_prefix_budget = 0.25
[rules]
enable = ["*"]
disable = ["ai/*"] # AI rules require API key
[external_links]
enabled = true
cache_ttl_days = 7
timeout_ms = 10000
concurrency = 5
[output]
format = "console"
# Rule options for blog content
[rule_options."core/meta-title"]
min_length = 40
max_length = 60
[rule_options."core/meta-description"]
min_length = 120
max_length = 160
[rule_options."content/word-count"]
min_words = 500
warn_threshold = 1000
[rule_options."content/article-toc"]
min_headings = 3
[rule_options."content/article-links"]
min_external_links = 2
[rule_options."links/orphan-pages"]
exclude_patterns = ["/404", "/500"]Why this works:
- Focuses on content quality (word count, TOC, citations)
- Longer titles and descriptions for blog posts
- Excludes admin areas and media files
- Preserves pagination parameters
- Validates external citations
E-commerce Site
Optimized for product pages and conversion paths.
[project]
name = "mystore"
[crawler]
max_pages = 1000
delay_ms = 100
concurrency = 10
per_host_concurrency = 3
# Focus on products and categories
include = [
"/products/**",
"/categories/**",
"/collections/**"
]
exclude = [
"/cart/**",
"/checkout/**",
"/account/**",
"/admin/**",
"*.pdf"
]
# Preserve filters and sorting
allow_query_params = [
"category",
"sort",
"filter",
"color",
"size",
"price",
"page"
]
breadth_first = true
max_prefix_budget = 0.3 # Allow more per category
[rules]
enable = ["*"]
disable = [
"ai/*",
"content/article-toc", # Products don't need TOC
"content/article-links" # Products don't cite
]
[external_links]
enabled = true
cache_ttl_days = 14 # Products change less
[rule_options."core/meta-title"]
min_length = 30
max_length = 55 # Shorter for mobile
[rule_options."content/word-count"]
min_words = 150
warn_threshold = 300 # Products are shorter
[rule_options."links/dead-end-pages"]
exclude_patterns = [
"/thank-you",
"/order-confirmation",
"/checkout/success"
]
[rule_options."content/article-links"]
min_external_links = 0 # Products link internallyWhy this works:
- Focuses on product and category pages
- Excludes cart/checkout (dynamic content)
- Preserves filter and sort parameters
- Allows shorter content for products
- Excludes conversion pages from dead-end check
- Longer cache for external links (products stable)
Documentation Site
Optimized for technical documentation and cross-referencing.
[project]
name = "docs"
[crawler]
max_pages = 800
delay_ms = 50 # Local or fast hosting
concurrency = 10
include = ["/docs/**", "/api/**", "/guides/**"]
exclude = ["*.png", "*.jpg", "*.gif"]
# Preserve version in URL
allow_query_params = ["version", "v"]
breadth_first = true
max_prefix_budget = 0.2 # Ensure wide coverage
[rules]
enable = ["*"]
disable = [
"ai/*",
"content/article-links", # Docs link internally
"eeat/*" # Not needed for docs
]
[external_links]
enabled = true
cache_ttl_days = 3 # Docs change frequently
[rule_options."content/word-count"]
min_words = 200
warn_threshold = 400 # Docs can be concise
[rule_options."content/article-toc"]
min_headings = 2 # Most docs need TOC
[rule_options."links/internal-links"]
min_internal_links = 5 # Heavy cross-referencing
[rule_options."content/keyword-stuffing"]
density_threshold = 0.05 # Technical terms repeat
[rule_options."links/orphan-pages"]
exclude_patterns = ["/api/deprecated/**"]Why this works:
- Focuses on documentation sections
- Excludes images from crawl
- Shorter content acceptable (reference material)
- Requires TOC for most pages
- Expects heavy internal linking
- Allows technical term repetition
- Short external link cache (docs update often)
Marketing Landing Pages
Optimized for conversion-focused single pages.
[project]
name = "landing-pages"
[crawler]
max_pages = 50
delay_ms = 100
concurrency = 5
include = ["/lp/**", "/landing/**"]
exclude = ["/lp/old/**", "/lp/archive/**"]
# Usually no query params
allow_query_params = []
[rules]
enable = [
"core/*",
"security/*",
"mobile/*",
"performance/*",
"social/*"
]
disable = [
"ai/*",
"content/article-toc",
"content/article-links",
"links/orphan-pages",
"links/dead-end-pages",
"crawl/*"
]
[external_links]
enabled = false # Landing pages often have minimal links
[rule_options."content/word-count"]
min_words = 100
warn_threshold = 200 # Concise for conversion
[rule_options."links/internal-links"]
min_internal_links = 1 # Just CTA
[rule_options."core/meta-title"]
min_length = 30
max_length = 55Why this works:
- Small page count (focused audits)
- Disables TOC, citations (not relevant)
- Disables orphan/dead-end checks (landing pages isolated)
- Very short content acceptable
- Minimal internal links (CTA focused)
- No external link checking (minimal outbound)
- Focuses on core SEO, mobile, performance
News / Magazine Site
Optimized for frequently updated content.
[project]
name = "news-site"
[crawler]
max_pages = 1000
delay_ms = 150 # Polite
concurrency = 5
per_host_concurrency = 2
include = [
"/news/**",
"/articles/**",
"/opinion/**"
]
exclude = [
"/news/archive/**",
"/breaking/**", # Changes too fast
"*.amp"
]
# Preserve section and pagination
allow_query_params = ["section", "page", "topic"]
breadth_first = true
max_prefix_budget = 0.15 # Balanced coverage
[rules]
enable = ["*"]
disable = ["ai/*"]
[external_links]
enabled = true
cache_ttl_days = 1 # News links change daily
[rule_options."content/word-count"]
min_words = 400
warn_threshold = 800
[rule_options."content/article-links"]
min_external_links = 3 # Journalism requires citations
[rule_options."content/article-toc"]
min_headings = 4
[rule_options."performance/ttfb"]
warn_threshold = 400 # News sites need speed
error_threshold = 800Why this works:
- Excludes fast-changing breaking news
- Short external link cache (news changes)
- Requires citations (journalism)
- Expects longer content
- Strict performance requirements
- Low prefix budget (balanced coverage)
SaaS Marketing Site
Optimized for product marketing and conversion.
[project]
name = "saas-marketing"
[crawler]
max_pages = 200
delay_ms = 100
concurrency = 8
include = [
"/",
"/features/**",
"/pricing",
"/about",
"/blog/**"
]
exclude = [
"/app/**", # Logged-in application
"/dashboard/**",
"/api/**"
]
allow_query_params = ["plan", "billing"]
[rules]
enable = ["*"]
disable = [
"ai/*",
"content/article-links", # Marketing doesn't cite
"local/*", # Not local business
"video/*" # No video content
]
[external_links]
enabled = true
cache_ttl_days = 14
[rule_options."content/word-count"]
min_words = 200
warn_threshold = 400
[rule_options."links/dead-end-pages"]
exclude_patterns = [
"/pricing",
"/signup",
"/demo"
]Why this works:
- Excludes logged-in app areas
- Focuses on marketing pages
- Shorter content acceptable (marketing)
- Pricing/signup pages are intentional dead-ends
- Disables irrelevant categories (local, video)
Local Business Site
Optimized for local SEO and service pages.
[project]
name = "local-business"
[crawler]
max_pages = 100
delay_ms = 100
concurrency = 5
exclude = ["/admin/**", "/wp-admin/**"]
[rules]
enable = ["*"]
disable = [
"ai/*",
"content/article-links",
"eeat/citations",
"video/*"
]
[external_links]
enabled = true
cache_ttl_days = 30 # Local info stable
[rule_options."content/word-count"]
min_words = 200
warn_threshold = 300
[rule_options."core/meta-title"]
min_length = 35
max_length = 60 # Include city/state
[rule_options."links/internal-links"]
min_internal_links = 3Why this works:
- Small site (100 pages typical)
- Focuses on local SEO rules
- Longer title to include location
- Shorter content acceptable
- Long external link cache (info stable)
- Disables citation requirements
CI/CD Pipeline
Fast, focused checks for automated testing.
[project]
name = "ci-checks"
[crawler]
max_pages = 100
delay_ms = 0 # Fast for CI
timeout_ms = 10000
concurrency = 10
respect_robots = false # Testing/staging
[rules]
enable = [
"core/*",
"security/*",
"links/broken-links"
]
disable = ["*"] # Only enable specific critical rules
[external_links]
enabled = false # Speed priority
[output]
format = "json"
path = "reports/audit.json"
[rule_options."core/meta-title"]
min_length = 1 # Just check presence
max_length = 200
[rule_options."core/meta-description"]
min_length = 1
max_length = 500Why this works:
- Fast crawl (no delays)
- Only critical checks
- No external link validation
- JSON output for parsing
- Lenient thresholds (just presence checks)
- Ignores robots.txt (testing environment)
CI/CD usage:
# .github/workflows/audit.yml
- name: Audit
run: squirrel audit https://staging.example.com
- name: Check Score
run: |
SCORE=$(cat reports/audit.json | jq '.health.score')
if [ "$SCORE" -lt 80 ]; then
exit 1
fiMulti-Domain Project
Crawl main site and subdomain together.
[project]
name = "multi-domain"
domains = ["example.com"] # Includes all subdomains
[crawler]
max_pages = 1000
delay_ms = 100
concurrency = 10
per_host_concurrency = 2 # Per subdomain
# Crawl across subdomains
include = [] # Empty = all subdomains allowed
exclude = [
"/admin/**",
"/api/**"
]
breadth_first = true
max_prefix_budget = 0.2
[rules]
enable = ["*"]
disable = ["ai/*"]Crawls:
example.comwww.example.comblog.example.comdocs.example.comshop.example.com
Why this works:
domains = ["example.com"]allows all subdomains- Higher page limit for multiple domains
- Per-host concurrency prevents overwhelming single subdomain
- Balanced prefix budget for diverse coverage
High-Volume Crawl
Large site with thousands of pages.
[project]
name = "large-site"
[crawler]
max_pages = 5000
delay_ms = 50
timeout_ms = 20000
concurrency = 15
per_host_concurrency = 5
per_host_delay_ms = 100
breadth_first = true
max_prefix_budget = 0.15 # Prevent blog dominance
[rules]
enable = ["*"]
disable = [
"ai/*",
"content/quality", # Slow on large sites
"performance/cls-hints"
]
[external_links]
enabled = true
cache_ttl_days = 14
timeout_ms = 5000 # Fail fast
concurrency = 20 # High parallelism
[output]
format = "json"
path = "reports/audit-large.json"Why this works:
- High page limit
- Aggressive concurrency
- Low prefix budget (balanced coverage)
- Disables slow rules
- Fast external link timeout
- JSON output for large datasets
Local Development
Fast crawling for localhost testing.
[project]
name = "local-dev"
[crawler]
max_pages = 100
delay_ms = 0 # No delay
timeout_ms = 5000
concurrency = 10
respect_robots = false # Local testing
[rules]
enable = ["core/*", "links/*", "content/*"]
disable = [
"ai/*",
"security/*", # Localhost is HTTP
"crawl/*" # No robots.txt
]
[external_links]
enabled = false # External links fail on localhost
[output]
format = "console"Usage:
cd myproject
squirrel audit http://localhost:3000Why this works:
- No delays (localhost is fast)
- Disables security checks (localhost is HTTP)
- No external link checking
- Console output for quick feedback
Accessibility Audit
Focus on WCAG compliance.
[project]
name = "accessibility"
[crawler]
max_pages = 300
[rules]
enable = [
"a11y/*",
"mobile/*",
"core/meta-title",
"core/meta-description",
"images/alt-text"
]
disable = ["*"] # Only accessibility rules
[external_links]
enabled = false
[output]
format = "html"
path = "reports/accessibility-audit.html"Why this works:
- Only accessibility and mobile rules
- Includes image alt text
- HTML report for visual review
- Moderate page count
Performance Audit
Focus on Core Web Vitals hints.
[project]
name = "performance"
[crawler]
max_pages = 200
delay_ms = 100
[rules]
enable = [
"performance/*",
"images/*",
"core/preconnect",
"core/preload"
]
disable = ["*"]
[external_links]
enabled = false
[rule_options."performance/ttfb"]
warn_threshold = 300
error_threshold = 600
[rule_options."performance/dom-size"]
warn_threshold = 1000
error_threshold = 2000
[output]
format = "html"Why this works:
- Only performance rules
- Strict TTFB thresholds
- Strict DOM size limits
- HTML report for visualization
Security Audit
Focus on HTTPS, headers, and security.
[project]
name = "security"
[crawler]
max_pages = 500
[rules]
enable = [
"security/*",
"crawl/robots-txt",
"links/https-downgrade"
]
disable = ["*"]
[external_links]
enabled = true # Check HTTPS external links
[output]
format = "json"
path = "reports/security-audit.json"Why this works:
- Only security rules
- Validates external HTTPS
- JSON for automated security testing
- Moderate page count
Complete Production Site
Comprehensive audit for production deployment.
[project]
name = "production-site"
domains = []
[crawler]
max_pages = 1000
delay_ms = 150
timeout_ms = 30000
concurrency = 5
per_host_concurrency = 2
per_host_delay_ms = 300
exclude = [
"/admin/**",
"/wp-admin/**",
"/api/**",
"*.pdf",
"*.zip"
]
allow_query_params = ["page", "category", "tag"]
drop_query_prefixes = ["utm_", "gclid", "fbclid", "mc_", "_ga"]
respect_robots = true
breadth_first = true
max_prefix_budget = 0.2
follow_redirects = true
[rules]
enable = ["*"]
disable = ["ai/*"] # Requires API key
[external_links]
enabled = true
cache_ttl_days = 7
timeout_ms = 10000
concurrency = 5
[output]
format = "html"
path = "reports/production-audit.html"
# Production-quality content
[rule_options."core/meta-title"]
min_length = 35
max_length = 60
[rule_options."core/meta-description"]
min_length = 120
max_length = 160
[rule_options."content/word-count"]
min_words = 300
warn_threshold = 600
[rule_options."performance/ttfb"]
warn_threshold = 600
error_threshold = 1000
[rule_options."links/orphan-pages"]
exclude_patterns = [
"/404",
"/500",
"/.well-known/**"
]Why this works:
- Polite crawling (production site)
- All rules enabled (comprehensive)
- External link validation
- Quality content thresholds
- HTML report for sharing
- Respects robots.txt
- Browser-like headers for better compatibility
Related
- Configuration Overview - Understanding config system
- Crawler Settings - Crawl behavior
- Rules Configuration - Rule patterns
- Rule Options - Per-rule config