{
  "policy_version": "1.0.0",
  "last_updated": "2026-05-31",
  "canonical_url": "https://better-robots.com/check/data-policy.json",
  "tool": "Better Robots Crawl Governance Audit",
  "operator": "Pagup",
  "scope": "/check audit tool only",
  "relationship_to_domain_policy": "This is a tool-specific operational data policy. It does not replace the domain-level AI usage policy or privacy policy.",
  "data_inputs": [
    {
      "field": "domain",
      "source": "user input",
      "purpose": "external audit target normalisation and fetches"
    },
    {
      "field": "profile",
      "source": "user selection or default",
      "purpose": "profile-fit scoring and recommendation generation"
    },
    {
      "field": "urls",
      "source": "optional user input",
      "purpose": "URL-by-bot matrix testing"
    },
    {
      "field": "sitemap_url",
      "source": "optional user input",
      "purpose": "same-host sitemap discovery for llms.txt draft generation only"
    },
    {
      "field": "request metadata",
      "source": "HTTP request",
      "purpose": "rate limiting, abuse prevention and anonymised analytics"
    }
  ],
  "remote_fetches": [
    "/robots.txt",
    "/llms.txt",
    "/ai-manifest.json",
    "/ai-usage-policy or known AI policy candidates",
    "/.well-known/ai-governance.json",
    "/.well-known/llm-policy.json",
    "/.well-known/interpretation-policy.json",
    "/ads.txt and /app-ads.txt when evaluated",
    "one randomised nonexistent sentinel path to detect suspicious catch-all 200 responses",
    "sitemap URLs declared in robots.txt, a safe same-host sitemap_url parameter, and bounded standard sitemap paths for llms.txt draft generation",
    "homepage HTML for fallback metadata and same-domain link extraction when no usable sitemap is available",
    "selected same-host public pages for lightweight metadata extraction when generating llms.txt drafts"
  ],
  "safeguards": [
    "SSRF guard and hostname normalisation before outbound fetches",
    "fetch timeout and maximum robots.txt body size",
    "rate limiting per IP and hostname",
    "short-lived cache for audit results",
    "bounded llms.txt draft generation with no LLM API calls, no JavaScript rendering and no full-site crawl",
    "no raw Authorization header logging",
    "analytics payload minimisation with score buckets rather than raw behavioural trails"
  ],
  "cache_and_retention": {
    "audit_cache": "Short-lived KV cache per domain, controlled by CACHE_TTL_SECONDS in the Worker environment.",
    "previous_score": "Short-term previous-score comparison may be retained per hostname for up to 30 days when enabled by the audit cache implementation.",
    "analytics": "Aggregated Cloudflare Analytics Engine events used for funnel and reliability measurement. IPs are hashed with a daily salt before analytics write.",
    "generated_config": "The downloadable JSON is generated on demand from the audit result and profile. It is not a private plugin configuration export.",
    "generated_llms_draft": "The llms.txt draft may be cached briefly by domain, profile, max URL count and sitemap input hash. Full markdown is not written to analytics."
  },
  "not_stored_by_design": [
    "WordPress admin credentials",
    "plugin licence keys",
    "private site configuration state",
    "server log files from audited domains",
    "full URL lists as durable behavioural profiles"
  ],
  "output_boundaries": [
    "Scores are deterministic signals from the public audit surface, not legal certification.",
    "Profile fit measures alignment with a selected intent profile, not universal goodness.",
    "Generated JSON expresses recommendations; the WordPress plugin resolves them locally after admin preview and confirmation.",
    "Generated llms.txt drafts are source-grounded starting points and must be reviewed by the site owner or publisher before publication.",
    "Crawler access decisions describe the public robots.txt decision path. They do not prove crawler obedience."
  ],
  "user_actions": {
    "markdown_export": "Technical report for developers, agencies or code agents.",
    "json_export": "Layer-A recommendation contract for Better Robots.txt PRO import when WordPress is detected.",
    "url_matrix": "Per-URL access test against selected bots using the fetched robots.txt.",
    "llms_draft": "English-only llms.txt draft generated from public signals, sitemap/homepage evidence and metadata limits."
  }
}