diff --git a/api/api/templates/ai.txt b/api/api/templates/ai.txt new file mode 100644 index 00000000000..1472382b3a8 --- /dev/null +++ b/api/api/templates/ai.txt @@ -0,0 +1,60 @@ +# Spawning AI +# Prevent datasets from using the following file types + +User-Agent: * +Disallow: *.txt +Disallow: *.pdf +Disallow: *.doc +Disallow: *.docx +Disallow: *.odt +Disallow: *.rtf +Disallow: *.tex +Disallow: *.wks +Disallow: *.wpd +Disallow: *.wps +Disallow: *.html +Disallow: *.bmp +Disallow: *.gif +Disallow: *.ico +Disallow: *.jpeg +Disallow: *.jpg +Disallow: *.png +Disallow: *.svg +Disallow: *.tif +Disallow: *.tiff +Disallow: *.webp +Disallow: *.aac +Disallow: *.aiff +Disallow: *.amr +Disallow: *.flac +Disallow: *.m4a +Disallow: *.mp3 +Disallow: *.oga +Disallow: *.opus +Disallow: *.wav +Disallow: *.wma +Disallow: *.mp4 +Disallow: *.webm +Disallow: *.ogg +Disallow: *.avi +Disallow: *.mov +Disallow: *.wmv +Disallow: *.flv +Disallow: *.mkv +Disallow: *.py +Disallow: *.js +Disallow: *.java +Disallow: *.c +Disallow: *.cpp +Disallow: *.cs +Disallow: *.h +Disallow: *.css +Disallow: *.php +Disallow: *.swift +Disallow: *.go +Disallow: *.rb +Disallow: *.pl +Disallow: *.sh +Disallow: *.sql +Disallow: / +Disallow: * diff --git a/api/api/templates/robots.txt b/api/api/templates/robots.txt index 811e0134451..65f0e5f876d 100644 --- a/api/api/templates/robots.txt +++ b/api/api/templates/robots.txt @@ -6,3 +6,36 @@ Disallow: /v1/auth/ User-agent: GPTBot Disallow: / + +User-agent: CCBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: anthropic-ai +Disallow: / + +User-agent: Omgilibot +Disallow: / + +User-agent: Omgili +Disallow: / + +User-agent: FacebookBot +Disallow: / + +User-agent: Diffbot +Disallow: / + +User-agent: Bytespider +Disallow: / + +User-agent: ImagesiftBot +Disallow: / + +User-agent: cohere-ai +Disallow: / diff --git a/api/conf/urls/__init__.py b/api/conf/urls/__init__.py index 8ce45a6ad55..0dd724d6ae4 100644 --- a/api/conf/urls/__init__.py +++ b/api/conf/urls/__init__.py @@ -36,11 +36,13 @@ path("admin/", admin.site.urls), path("healthcheck/", HealthCheck.as_view(), name="health"), path("v1/", include(versioned_paths)), +] + [ path( - "robots.txt/", + f"{file}", TemplateView.as_view( - template_name="robots.txt", + template_name=file, content_type="text/plain", ), - ), + ) + for file in ["robots.txt", "ai.txt"] ] diff --git a/documentation/ai.txt b/documentation/ai.txt new file mode 100644 index 00000000000..1472382b3a8 --- /dev/null +++ b/documentation/ai.txt @@ -0,0 +1,60 @@ +# Spawning AI +# Prevent datasets from using the following file types + +User-Agent: * +Disallow: *.txt +Disallow: *.pdf +Disallow: *.doc +Disallow: *.docx +Disallow: *.odt +Disallow: *.rtf +Disallow: *.tex +Disallow: *.wks +Disallow: *.wpd +Disallow: *.wps +Disallow: *.html +Disallow: *.bmp +Disallow: *.gif +Disallow: *.ico +Disallow: *.jpeg +Disallow: *.jpg +Disallow: *.png +Disallow: *.svg +Disallow: *.tif +Disallow: *.tiff +Disallow: *.webp +Disallow: *.aac +Disallow: *.aiff +Disallow: *.amr +Disallow: *.flac +Disallow: *.m4a +Disallow: *.mp3 +Disallow: *.oga +Disallow: *.opus +Disallow: *.wav +Disallow: *.wma +Disallow: *.mp4 +Disallow: *.webm +Disallow: *.ogg +Disallow: *.avi +Disallow: *.mov +Disallow: *.wmv +Disallow: *.flv +Disallow: *.mkv +Disallow: *.py +Disallow: *.js +Disallow: *.java +Disallow: *.c +Disallow: *.cpp +Disallow: *.cs +Disallow: *.h +Disallow: *.css +Disallow: *.php +Disallow: *.swift +Disallow: *.go +Disallow: *.rb +Disallow: *.pl +Disallow: *.sh +Disallow: *.sql +Disallow: / +Disallow: * diff --git a/documentation/conf.py b/documentation/conf.py index 9c58cfca3e5..0227dc32cf1 100644 --- a/documentation/conf.py +++ b/documentation/conf.py @@ -76,6 +76,7 @@ def add_ext_to_path(): ) html_static_path = ["_static"] +html_extra_path = ["robots.txt", "ai.txt"] html_show_copyright = False diff --git a/documentation/robots.txt b/documentation/robots.txt new file mode 100644 index 00000000000..2739ca4400a --- /dev/null +++ b/documentation/robots.txt @@ -0,0 +1,35 @@ +User-agent: GPTBot +Disallow: / + +User-agent: CCBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: anthropic-ai +Disallow: / + +User-agent: Omgilibot +Disallow: / + +User-agent: Omgili +Disallow: / + +User-agent: FacebookBot +Disallow: / + +User-agent: Diffbot +Disallow: / + +User-agent: Bytespider +Disallow: / + +User-agent: ImagesiftBot +Disallow: / + +User-agent: cohere-ai +Disallow: / diff --git a/frontend/src/static/ai.txt b/frontend/src/static/ai.txt new file mode 100644 index 00000000000..1472382b3a8 --- /dev/null +++ b/frontend/src/static/ai.txt @@ -0,0 +1,60 @@ +# Spawning AI +# Prevent datasets from using the following file types + +User-Agent: * +Disallow: *.txt +Disallow: *.pdf +Disallow: *.doc +Disallow: *.docx +Disallow: *.odt +Disallow: *.rtf +Disallow: *.tex +Disallow: *.wks +Disallow: *.wpd +Disallow: *.wps +Disallow: *.html +Disallow: *.bmp +Disallow: *.gif +Disallow: *.ico +Disallow: *.jpeg +Disallow: *.jpg +Disallow: *.png +Disallow: *.svg +Disallow: *.tif +Disallow: *.tiff +Disallow: *.webp +Disallow: *.aac +Disallow: *.aiff +Disallow: *.amr +Disallow: *.flac +Disallow: *.m4a +Disallow: *.mp3 +Disallow: *.oga +Disallow: *.opus +Disallow: *.wav +Disallow: *.wma +Disallow: *.mp4 +Disallow: *.webm +Disallow: *.ogg +Disallow: *.avi +Disallow: *.mov +Disallow: *.wmv +Disallow: *.flv +Disallow: *.mkv +Disallow: *.py +Disallow: *.js +Disallow: *.java +Disallow: *.c +Disallow: *.cpp +Disallow: *.cs +Disallow: *.h +Disallow: *.css +Disallow: *.php +Disallow: *.swift +Disallow: *.go +Disallow: *.rb +Disallow: *.pl +Disallow: *.sh +Disallow: *.sql +Disallow: / +Disallow: * diff --git a/frontend/src/static/robots.txt b/frontend/src/static/robots.txt new file mode 100644 index 00000000000..2739ca4400a --- /dev/null +++ b/frontend/src/static/robots.txt @@ -0,0 +1,35 @@ +User-agent: GPTBot +Disallow: / + +User-agent: CCBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: anthropic-ai +Disallow: / + +User-agent: Omgilibot +Disallow: / + +User-agent: Omgili +Disallow: / + +User-agent: FacebookBot +Disallow: / + +User-agent: Diffbot +Disallow: / + +User-agent: Bytespider +Disallow: / + +User-agent: ImagesiftBot +Disallow: / + +User-agent: cohere-ai +Disallow: /