# robots.txt for heyroger.ai
#
# Explicit allow for every AI crawler that matters in 2026 plus the
# traditional search engines. Silence is increasingly interpreted as
# "blocked" by AI training pipelines, so we name names. If you want
# to tighten this later, switch a User-agent block to Disallow: /.
#
# Last updated: 2026-05.

Sitemap: https://heyroger.ai/sitemap-index.xml

# ---------- AI search and training crawlers ----------

# OpenAI (ChatGPT search, ChatGPT browse, GPT model training)
User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic (Claude search and training)
User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: anthropic-ai
Allow: /

# Google AI products (Gemini training, Google AI Overviews)
User-agent: Google-Extended
Allow: /

# Perplexity (Perplexity search index and on-demand fetches)
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# Apple Intelligence (training + on-device citations)
User-agent: Applebot-Extended
Allow: /

# Meta (Llama training, Meta AI)
User-agent: Meta-ExternalAgent
Allow: /

User-agent: Meta-ExternalFetcher
Allow: /

User-agent: FacebookBot
Allow: /

# Amazon (Alexa+, Amazon Q)
User-agent: Amazonbot
Allow: /

# ByteDance (Doubao, TikTok AI)
User-agent: Bytespider
Allow: /

# You.com
User-agent: YouBot
Allow: /

# Cohere
User-agent: cohere-ai
Allow: /

# Diffbot (powers many AI knowledge graphs)
User-agent: Diffbot
Allow: /

# Common Crawl (foundational training set for most open-weight models)
User-agent: CCBot
Allow: /

# ---------- Traditional search engines (defensive explicit allow) ----------

User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: Applebot
Allow: /

User-agent: DuckDuckBot
Allow: /

# ---------- Everyone else: allow by default ----------

User-agent: *
Allow: /

# Server-side OAuth kick-off route; no UI, just redirects. Indexing
# would only produce a "Sign in with Google" snippet on SERPs.
Disallow: /audit/start