# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# AI policy: retrieve-with-attribution. We permit AI search engines to
# retrieve and cite this library's content with attribution. We do NOT
# permit the use of this content for training generative models.
#
# Authoritative signal: Cloudflare-managed Content-Signal response header
#   Content-Signal: search=yes, ai-input=yes, ai-train=no
# Training-only crawlers (GPTBot, ClaudeBot, CCBot, Bytespider, etc.) are
# blocked at the Cloudflare edge regardless of any rule below.
#
# Full policy: /llms.txt (machine-readable), /about (human-readable).
# Note: this origin file is overridden by Cloudflare's managed robots.txt
# on najafdesertlibrary.com and www.najafdesertlibrary.com. It applies only
# if CF managed-robots is ever disabled.

User-agent: *
Allow: /

# Note: admin and auth-gated routes (Filament panel, /invitations, /bookmark,
# /livewire) are auth-protected and emit X-Robots-Tag: noindex, nofollow.
# They are intentionally NOT listed via Disallow here, since Disallow rules
# in robots.txt are publicly readable and would advertise their existence.

# Explicit allow for retrieval/citation crawlers (intentionally excludes
# training-only crawlers like GPTBot, ClaudeBot, CCBot, Bytespider,
# Google-Extended, Applebot-Extended — those are blocked at the CF edge).
User-agent: PerplexityBot
Allow: /

User-agent: YouBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: Claude-User
Allow: /

Disallow: /library
Disallow: /api/library/
Disallow: /shared/
# Note: three feature-flagged-off surfaces — the narrator-graph
# (/narrators + /narrator/{slug}), occasions (/events + /event/{slug}), and
# topics (/topics + /topic/{slug}) — return HTTP 404 Not Found. They were
# never deployed to production, so to crawlers these pages never existed; 404
# ("no such page") is the honest status (not 410 Gone, which would assert a
# prior existence there is none of). They are intentionally NOT Disallow'd: a
# Disallow is publicly readable and would advertise the routes. They're also
# dropped from the sitemap and carry no inbound links.

Sitemap: https://najafdesertlibrary.com/sitemap.xml
Sitemap: https://najafdesertlibrary.com/sitemaps/sitemap-recent.xml