robots.txt 1.26 KB
Newer Older
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
1 2 3 4 5 6 7 8 9 10 11
# $Id$

User-agent: *
Disallow: /pub
Disallow: /removed
Disallow: /doc/logs
Disallow: /mirror.php
Disallow: /mirror-geo.php
Disallow: /mirror-geo-redirect.php
Disallow: /vlc/download-skins2-go.php
Disallow: /private
12
Disallow: /~videolan/
13 14
Disallow: /developers/vlc/po
Disallow: /developers/vlc-branch/po
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

# Do not crawl CVS and .svn directories
User-agent: *
Disallow: CVS
Disallow: .svn

# "This robot collects content from the Internet for the sole purpose of
# helping educational institutions prevent plagiarism. [...] we compare
# student papers against the content we find on the Internet to see if we
# can find similarities." (http://www.turnitin.com/robot/crawlerinfo.html)
#  --> fuck off.
User-Agent: TurnitinBot
Disallow: /

# "NameProtect engages in crawling activity in search of a wide range of
# brand and other intellectual property violations that may be of interest
# to our clients." (http://www.nameprotect.com/botinfo.html)
#  --> fuck off.
User-Agent: NPBot
Disallow: /

# "iThenticate® is a new service we have developed to combat the piracy
# of intellectual property and ensure the originality of written work for#
# publishers, non-profit agencies, corporations, and newspapers."
# (http://www.slysearch.com/)
#  --> fuck off.
User-Agent: SlySearch
Disallow: /