From 30fa4cada40453bcf4c6d61ecabe9809c7fa5b74 Mon Sep 17 00:00:00 2001 From: "Evan G." Date: Fri, 24 May 2024 17:39:07 -0500 Subject: [PATCH] Add robots.txt I copied https://codeberg.org/benjaminhollon/robots.txt-deny-llm/src/branch/main/robots.txt, or the robots.txt from benjaminhollon, to deny LLM's --- static/robots.txt | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 static/robots.txt diff --git a/static/robots.txt b/static/robots.txt new file mode 100644 index 0000000..c2d2be4 --- /dev/null +++ b/static/robots.txt @@ -0,0 +1,69 @@ +# from https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/ + +User-agent: CCBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: GPTBot +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: anthropic-ai +Disallow: / + +User-agent: Omgilibot +Disallow: / + +User-agent: Omgili +Disallow: / + +User-agent: FacebookBot +Disallow: / + +User-agent: Bytespider +Disallow: / + +# from https://github.com/healsdata/ai-training-opt-out + +# may not work, needs more research (see https://github.com/rom1504/img2dataset/issues/48) +User-agent: img2dataset +Disallow: / + +User-agent: Claude-Web +Disallow: / + +User-agent: magpie-crawler +Disallow: / + +# AhrefsBot crawls for data for an "SEO Dataset"—one of their "products" based on this dataset is "AI Writing Tools" +User-agent: AhrefsBot +Disallow: / + +# from https://www.cyberciti.biz/web-developer/block-openai-bard-bing-ai-crawler-bots-using-robots-txt-file/ +User-agent: PerplexityBot +Disallow: / + +# from https://netfuture.ch/2023/07/blocking-ai-crawlers-robots-txt-chatgpt/ +User-agent: cohere-ai +Disallow: / + +# from https://claytonerrington.com/blog/robots-and-ai/ + +User-agent: Amazonbot +Disallow: / + +# from https://darkvisitors.com/ + +User-agent: Applebot +Disallow: / + +User-agent: YouBot +Disallow: / + +# from https://imho.alex-kunz.com/2024/01/25/an-update-on-friendly-crawler/ +User-agent: FriendlyCrawler +Disallow: /