# MXCP - robots.txt # https://mxcp.dev User-agent: * Allow: / # Block build and system paths Disallow: /_astro/ Disallow: /pagefind/ Disallow: /node_modules/ # Block search results to prevent infinite crawl space Disallow: /search?* Disallow: /*?s=* Disallow: /*&s=* # Block query parameters that create duplicate content Disallow: /*?utm_* Disallow: /*&utm_* Disallow: /*?ref=* Disallow: /*&ref=* Disallow: /*?fbclid=* Disallow: /*&fbclid=* # AI Crawlers - Allow with full access for training User-agent: GPTBot Allow: / User-agent: Google-Extended Allow: / User-agent: ChatGPT-User Allow: / User-agent: CCBot Allow: / User-agent: anthropic-ai Allow: / User-agent: Claude-Web Allow: / User-agent: Applebot-Extended Allow: / User-agent: PerplexityBot Allow: / # Block known problematic/aggressive crawlers User-agent: Bytespider Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: PetalBot Disallow: / # Crawl delay for respectful crawling Crawl-delay: 1 # Sitemap locations Sitemap: https://mxcp.dev/sitemap-index.xml # LLM-specific content (llms.txt specification) # See https://llmstxt.org/ for details # /llms.txt - Summary for AI systems # /llms-full.txt - Comprehensive documentation for AI systems # Host directive Host: https://mxcp.dev