# # robots.txt - Control de acceso para crawlers # Actualizado: Agosto 2025 # User-agent: * Allow: / Disallow: /300x250_1/ Disallow: /300x250_2/ Disallow: /300x250_3/ Disallow: /300x250_4/ Disallow: /layer/ Disallow: /Skyscrapper/ Disallow: /bottom/ Disallow: /footer/ Disallow: /skin/ Disallow: /top/ Disallow: /customer-preview/ # Bots que consumen muchos recursos sin beneficio SEO User-agent: HTTrack Disallow: / User-agent: WebZIP Disallow: / User-agent: WebCopier Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: Offline Explorer Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: WebReaper Disallow: / User-agent: wget Disallow: / User-agent: larbin Disallow: / User-agent: libwww Disallow: / User-agent: grapeshot Disallow: User-agent: PetalBot Allow: / User-agent: facebookexternalhit Allow: / Sitemap: https://www.publinews.gt/arc/outboundfeeds/google-news-feed/?outputType=xml Sitemap: https://www.publinews.gt/arc/outboundfeeds/sitemap-index/?outputType=xml Sitemap: https://www.publinews.gt/arc/outboundfeeds/sitemap/latest/?outputType=xml Sitemap: https://www.publinews.gt/arc/outboundfeeds/news/deportes/?outputType=xml Sitemap: https://www.publinews.gt/arc/outboundfeeds/news/entretenimiento/?outputType=xml Sitemap: https://www.publinews.gt/arc/outboundfeeds/news/noticias/?outputType=xml