# jamesqi 2014-11-27 14:27 # foreign.mingluji.com # Add Start # # NOTICE: The collection of content and other data on this site through automated means, including any device, tool,or process designed to data mine or scrape content, is prohibited except (1) for the purpose of search engine indexing or artificial intelligence retrieval augmented generation or (2) with express written permission from this site’s operator. # To request permission to license our intellectual property and/or other materials, please contact this site’s operator directly. # BEGIN Cloudflare Managed content User-agent: Mediapartners-Google Disallow: User-agent: Baiduspider User-agent: 360spider User-agent: Yisouspider User-agent: Sogou web spider User-agent: Sogoubot User-agent: Bytespider User-agent: haosouspider User-agent: YodaoBot User-agent: Bingbot User-agent: Googlebot User-agent: msnbot User-agent: * Disallow: /*Special:UserLogin #Disallow: /Talk Disallow: /%E8%AE%A8%E8%AE%BA Disallow: /thumb.php Disallow: /index.php # 2018-12-18 comment below line because google webmaster tools can not get resouce of load.php # Disallow: /load.php # Disallow: /images/ Disallow: /skins/ Disallow: /Special Disallow: /%E7%89%B9%E6%AE%8A Disallow: /*action= Disallow: /*oldid= Disallow: /*diff= Disallow: /*printable= Disallow: /1027280/ # Directory refused to crawl. User-agent: Amazonbot Disallow: / User-agent: SemrushBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: GPTBot Disallow: / User-agent: ChatGPT Disallow: / # END Cloudflare Managed Content # sitemap start Sitemap: https://foreign.mingluji.com/sitemap.xml Sitemap: https://foreign.mingluji.com/rss.xml # sitemap end Crawl-delay: 10 Allow: /index.php?title=%E7%89%B9%E6%AE%8A:%E6%9C%80%E8%BF%91%E6%9B%B4%E6%94%B9 Allow: /index.php?title=%E7%89%B9%E6%AE%8A:%E6%9C%80%E6%96%B0%E9%A1%B5%E9%9D%A2 Allow: /index.php?title=Special:%E6%9C%80%E8%BF%91%E6%9B%B4%E6%94%B9 Allow: /index.php?title=Special:%E6%9C%80%E6%96%B0%E9%A1%B5%E9%9D%A2 Allow: /index.php?title=Special:Recentchanges Allow: /index.php?title=Special:Newpages Allow: /index.php?title=Category: Allow: /index.php?title=%E5%88%86%E7%B1%BB: ##Crawl-delay: 300 # set to 300 seconds to wait between successive requests to the same server for Yahoo Slurp ##Request-rate: 1/10 # maximum rate is one page every 5 seconds #Visit-time: 0000-0800 #Request-rate: 1/20s 1020-1200 # between 10:20 to 12:00, 1 visit in 20s # Add End # # Crawlers that are kind enough to obey, but which we'd rather not have # unless they're feeding search engines. User-agent: UbiCrawler Disallow: / User-agent: DOC Disallow: / User-agent: Zao Disallow: / # Some bots are known to be trouble, particularly those designed to copy # entire sites. Please obey robots.txt. User-agent: sitecheck.internetseer.com Disallow: / User-agent: Zealbot Disallow: / User-agent: MSIECrawler Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: WebCopier Disallow: / User-agent: Fetch Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZIP Disallow: / User-agent: linko Disallow: / User-agent: HTTrack Disallow: / User-agent: Microsoft.URL.Control Disallow: / User-agent: Xenu Disallow: / User-agent: larbin Disallow: / User-agent: libwww Disallow: / User-agent: ZyBORG Disallow: / User-agent: Download Ninja Disallow: / # Sorry, wget in its recursive mode is a frequent problem. # Please read the man page and use it properly; there is a # --wait option you can use to set the delay between hits, # for instance. User-agent: wget Disallow: / # The 'grub' distributed client has been *very* poorly behaved. User-agent: grub-client Disallow: / # Doesn't follow robots.txt anyway, but... User-agent: k2spider Disallow: / # Hits many times per second, not acceptable # http://www.nameprotect.com/botinfo.html User-agent: NPBot Disallow: / # A capture bot, downloads gazillions of pages with no public benefit # http://www.webreaper.net/ User-agent: WebReaper Disallow: / # Don't allow the wayback-maschine to index user-pages #User-agent: ia_archiver #Disallow: /wiki/User #Disallow: /wiki/Benutzer # # Friendly, low-speed bots are welcome viewing article pages, but not # dynamically-generated pages please. # # Inktomi's "Slurp" can read a minimum delay between hits; if your # bot supports such a thing using the 'Crawl-delay' or another # instruction, please let us know. #