robots-https.txt 1.31 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
# $Id$

Sitemap: https://www.videolan.org/sitemap-https.xml

User-agent: *
Disallow: /pub
Disallow: /removed
Disallow: /doc/logs
Disallow: /mirror.php
Disallow: /mirror-geo.php
Disallow: /mirror-geo-redirect.php
Disallow: /vlc/download-skins2-go.php
Disallow: /private
Disallow: /~videolan/
Disallow: /developers/vlc/po
Disallow: /developers/vlc-branch/po

# Do not crawl CVS and .svn directories
User-agent: *
Disallow: CVS
Disallow: .svn

# "This robot collects content from the Internet for the sole purpose of
# helping educational institutions prevent plagiarism. [...] we compare
# student papers against the content we find on the Internet to see if we
# can find similarities." (http://www.turnitin.com/robot/crawlerinfo.html)
#  --> fuck off.
User-Agent: TurnitinBot
Disallow: /

# "NameProtect engages in crawling activity in search of a wide range of
# brand and other intellectual property violations that may be of interest
# to our clients." (http://www.nameprotect.com/botinfo.html)
#  --> fuck off.
User-Agent: NPBot
Disallow: /

# "iThenticate® is a new service we have developed to combat the piracy
# of intellectual property and ensure the originality of written work for#
# publishers, non-profit agencies, corporations, and newspapers."
# (http://www.slysearch.com/)
#  --> fuck off.
User-Agent: SlySearch
Disallow: /