##ACAP version=1.0 # robots.txt generated at http://www.mcanerin.com User-agent: twiceler Disallow: / User-agent: Gigabot Disallow: User-agent: Scrubby Disallow: User-agent: Robozilla Disallow: User-agent: Nutch Disallow: User-agent: baiduspider Disallow: / User-agent: naverbot Disallow: / User-agent: yeti Disallow: / User-agent: * Disallow: Crawl-delay: 10 Disallow: /cgi-bin/ Disallow: /cache/ Disallow: /cache2/ Disallow: /WebResource.axd/ Disallow: /ScriptResource.axd/ Disallow: /NonUserHtmlViewer.aspx Disallow: /Search.aspx # Added to remove google crawl errors on 3/26/2008 Disallow: /LandingControls Disallow: /RegistrationLockdownControls Disallow: /members/results.asp Disallow: /hitzone/results.asp Disallow: /online Disallow: /topics Disallow: /archive Disallow: /DesktopModules/AdvancedSearch Disallow: /newspapers1/na0013/5425151/19764460_clean.html Disallow: /ajxSearch.asmx # Added to remove google crawl errors on 3/26/2008 # added to stop bots from trying to crawl these folders 4/16/08 Disallow: /Scripts Disallow: /Images Disallow: /css Sitemap: http://www.newspaperarchive.com/na_web_pages.xml Sitemap: http://www.newspaperarchive.com/sm_index_1.xml sitemap: http://www.newspaperarchive.com/sm_index_2.xml Sitemap: http://www.newspaperarchive.com/sm_index_3.xml sitemap: http://www.newspaperarchive.com/sm_index_4.xml sitemap: http://www.newspaperarchive.com/bth_archive.xml Sitemap: http://www.newspaperarchive.com/dp_archive.xml # robots.txt generated at http://www.mcanerin.com ACAP-crawler: twiceler # User-agent: twiceler ACAP-disallow-crawl: / # Disallow: / ACAP-crawler: Gigabot # User-agent: Gigabot ACAP-crawler: Scrubby # User-agent: Scrubby ACAP-crawler: Robozilla # User-agent: Robozilla ACAP-crawler: Nutch # User-agent: Nutch ACAP-crawler: baiduspider # User-agent: baiduspider ACAP-disallow-crawl: / # Disallow: / ACAP-crawler: naverbot # User-agent: naverbot ACAP-disallow-crawl: / # Disallow: / ACAP-crawler: yeti # User-agent: yeti ACAP-disallow-crawl: / # Disallow: / ACAP-crawler: * # User-agent: * ACAP-disallow-crawl: /cgi-bin/ # Disallow: /cgi-bin/ ACAP-disallow-crawl: /cache/ # Disallow: /cache/ ACAP-disallow-crawl: /cache2/ # Disallow: /cache2/ ACAP-disallow-crawl: /WebResource.axd/ # Disallow: /WebResource.axd/ ACAP-disallow-crawl: /ScriptResource.axd/ # Disallow: /ScriptResource.axd/ ACAP-disallow-crawl: /NonUserHtmlViewer.aspx # Disallow: /NonUserHtmlViewer.aspx ACAP-disallow-crawl: /Search.aspx # Disallow: /Search.aspx # Added to remove google crawl errors on 3/26/2008 ACAP-disallow-crawl: /LandingControls # Disallow: /LandingControls ACAP-disallow-crawl: /RegistrationLockdownControls # Disallow: /RegistrationLockdownControls ACAP-disallow-crawl: /members/results.asp # Disallow: /members/results.asp ACAP-disallow-crawl: /hitzone/results.asp # Disallow: /hitzone/results.asp ACAP-disallow-crawl: /online # Disallow: /online ACAP-disallow-crawl: /topics # Disallow: /topics ACAP-disallow-crawl: /archive # Disallow: /archive ACAP-disallow-crawl: /DesktopModules/AdvancedSearch # Disallow: /DesktopModules/AdvancedSearch ACAP-disallow-crawl: /newspapers1/na0013/5425151/19764460_clean.html # Disallow: /newspapers1/na0013/5425151/19764460_clean.html ACAP-disallow-crawl: /ajxSearch.asmx # Disallow: /ajxSearch.asmx # Added to remove google crawl errors on 3/26/2008 # added to stop bots from trying to crawl these folders 4/16/08 ACAP-disallow-crawl: /Scripts # Disallow: /Scripts ACAP-disallow-crawl: /Images # Disallow: /Images ACAP-disallow-crawl: /css # Disallow: /css