diff --git a/lists/mozilla-top500/list.json b/lists/mozilla-top500/list.json index 39560f2..cf06db4 100644 --- a/lists/mozilla-top500/list.json +++ b/lists/mozilla-top500/list.json @@ -4,6 +4,7 @@ "name": "Top 500 domains and pages from Mozilla", "type": "hostname", "list": [ + "123-reg-expired.co.uk", "163.com", "1688.com", "1and1.com", @@ -15,11 +16,15 @@ "a8.net", "aarp.org", "abc.net.au", + "abcnews.go.com", "about.com", "aboutads.info", "aboutcookies.org", + "account.1und1.de", + "accounts.google.com/ServiceLogin?service=jotspot", "accuweather.com", "acm.org", + "add.my.yahoo.com/content", "addthis.com", "addtoany.com", "admin.ch", @@ -40,20 +45,26 @@ "android.com", "aol.com", "apache.org", + "api.whatsapp.com", "apple.com", + "apple.com/mac", "archive.org", "arstechnica.com", "artisteer.com", "arxiv.org", "athemes.com", + "athemes.com/theme/sydney", "att.com", + "automattic.com", "azurewebsites.net", + "b.hatena.ne.jp/entry", "baidu.com", "bandcamp.com", "barnesandnoble.com", "bbb.org", "bbc.co.uk", "bbc.com", + "bbs.dedecms.com", "behance.net", "beian.gov.cn", "berkeley.edu", @@ -64,6 +75,7 @@ "bit.ly", "bitbucket.org", "bizjournals.com", + "bizvektor.com", "blackberry.com", "blogger.com", "blogspot.co.uk", @@ -76,14 +88,18 @@ "bmj.com", "booking.com", "box.com", + "br.wordpress.org", + "brokercheck.finra.org", "bund.de", "businessinsider.com", "businesswire.com", "buydomains.com", "buzzfeed.com", "ca.gov", + "calendar.google.com/calendar/render", "cam.ac.uk", "canada.ca", + "catchthemes.com", "cbc.ca", "cbslocal.com", "cbsnews.com", @@ -93,31 +109,52 @@ "chicagotribune.com", "cisco.com", "clickbank.net", + "cloud.feedly.com", "cloudfront.net", "cmu.edu", + "cn.wordpress.org", "cnbc.com", "cnet.com", "cnn.com", + "codex.wordpress.org", "colorlib.com", "columbia.edu", "congress.gov", + "connect.mail.ru/share", "constantcontact.com", "cornell.edu", "cpanel.com", "cpanel.net", "creativecommons.org", + "creativecommons.org/licenses/by-nc-sa/3.0", + "creativecommons.org/licenses/by-sa/2.0", + "creativecommons.org/licenses/by-sa/3.0", + "creativecommons.org/licenses/by/2.0", + "creativecommons.org/licenses/by/3.0", + "creativecommons.org/licenses/by/4.0", "cryoutcreations.eu", + "cyberchimps.com/responsive-theme", "dailymail.co.uk", "dailymotion.com", + "de-de.facebook.com/policy.php", + "de.wordpress.org", "debian.org", "dedecms.com", + "del.icio.us/post", "delicious.com", "deloitte.com", + "devblog.plesk.com", + "developers.facebook.com/docs/plugins", + "developers.google.com/analytics/devguides/collection/analyticsjs/cookie-usage", + "developers.google.com/analytics/devguides/collection/analyticsjs/cookie-usage?hl=es&csw=1", "deviantart.com", "dhs.gov", "dictionary.com", "digg.com", + "digg.com/submit", + "discuz.qq.com/service/security", "disqus.com", + "disqus.com/?ref_noscript", "dmca.com", "doi.org", "dol.gov", @@ -135,6 +172,8 @@ "duke.edu", "e-recht24.de", "ebay.com", + "ec.europa.eu/consumers/odr", + "ec.europa.eu/info/departments/justice-and-consumers_en", "economist.com", "ed.gov", "eepurl.com", @@ -146,6 +185,7 @@ "engadget.com", "entrepreneur.com", "epa.gov", + "es.wordpress.org", "etracker.de", "etsy.com", "europa.eu", @@ -155,6 +195,7 @@ "example.com", "exblog.jp", "facebook.com", + "facebook.com/sharer.php", "fao.org", "fastcompany.com", "fb.com", @@ -164,6 +205,9 @@ "fcc.gov", "fda.gov", "feedburner.com", + "feedburner.google.com", + "feedjit.com", + "feedly.com/index.html", "flickr.com", "forbes.com", "fortune.com", @@ -174,15 +218,26 @@ "ftc.gov", "g.co", "gartner.com", + "generatepress.com", "geocities.jp", "gesetze-im-internet.de", + "get.adobe.com/de/reader", + "get.adobe.com/flashplayer", + "get.adobe.com/jp/reader", + "get.adobe.com/reader", + "getbootstrap.com", "getpocket.com", + "getpocket.com/save", "giphy.com", "github.com", "github.io", "globo.com", + "gmail.com", + "gmpg.org/xfn", "gnu.org", "go.com", + "go.cpanel.net/cleardnscache", + "go.microsoft.com/fwlink/?linkid=66138&clcid=0x409", "godaddy.com", "gofundme.com", "goo.gl", @@ -209,22 +264,32 @@ "gotowebinar.com", "gpo.gov", "gravatar.com", + "gravatar.com/site/signup", + "gtranslate.net", "guardian.co.uk", "harvard.edu", "hatena.ne.jp", "hbr.org", + "help.opera.com/Windows/10.00/it/cookies.html", "hhs.gov", "hibu.com", "hilton.com", "histats.com", "hollywoodreporter.com", "home.pl", + "home.pl/kontakt", + "homeads.home.pl/ads/www/delivery/ck.php?n=f90e22f", "homestead.com", + "hootsuite.com", "hostgator.com", + "hostingmanager.secureserver.net", "hostnet.nl", "house.gov", "houzz.com", "hp.com", + "html5up.net", + "httpd.apache.org", + "httpd.apache.org/docs/2.4/mod/mod_userdir.html", "hubspot.com", "huffingtonpost.com", "ibm.com", @@ -244,36 +309,54 @@ "irs.gov", "iso.org", "issuu.com", + "it.wordpress.org", "iubenda.com", + "ja.wordpress.org", + "jalbum.net", + "jalbum.net/en", "japanpost.jp", "java.com", "jiathis.com", + "jigsaw.w3.org/css-validator", + "jigsaw.w3.org/css-validator/check/referer", + "jigsaw.w3.org/css-validator/check/referer?profile=css3", "jimdo.com", + "joomla-extensions.kubik-rubik.de", "joomla.org", + "jquery.com", "jugem.jp", "justgiving.com", "justice.gov", + "kb.plesk.com", "kickstarter.com", "latimes.com", + "lazaworx.com", "libsyn.com", + "lifestream.aol.com", "line.me", "linkedin.com", "list-manage.com", "list-manage1.com", + "listings.homestead.com", "live.com", "livedoor.jp", "livejournal.com", "loc.gov", + "logc204.xiti.com/go.click?xts=453041&s2=14&p=homepage::kundendefault::index::button-mehr-info&clic=N&type=click", "loopia.com", "loopia.se", "macromedia.com", + "mail.google.com/mail", "mail.ru", "mailchimp.com", "mapquest.com", + "maps.google.com", + "maps.google.com/maps?f=d&source=s_d&daddr=&saddr=&hl=en&geocode=&mra=ls&sll=37.0625,-95.677068&sspn=49.176833,114.257812&ie=UTF8&t=h&z=12", "marriott.com", "mashable.com", "medium.com", "meetup.com", + "megagroup.ru", "mhlw.go.jp", "microsoft.com", "miibeian.gov.cn", @@ -282,6 +365,7 @@ "mit.edu", "mlb.com", "mlit.go.jp", + "mobirise.com", "moodle.org", "moz.com", "mozilla.com", @@ -291,6 +375,7 @@ "mynavi.jp", "myshopify.com", "myspace.com", + "myspace.com/Modules/PostTo/Pages", "mysql.com", "namejet.com", "nasa.gov", @@ -299,7 +384,9 @@ "naver.com", "nazwa.pl", "nbcnews.com", + "netcn.console.aliyun.com/core/host/list2", "netflix.com", + "netscape.aol.com", "netscape.com", "networkadvertising.org", "networksolutions.com", @@ -310,6 +397,7 @@ "nifty.com", "nih.gov", "nist.gov", + "nl.wordpress.org", "noaa.gov", "npr.org", "nps.gov", @@ -323,12 +411,16 @@ "one.com", "opencart.com", "opensource.org", + "opensource.org/licenses/gpl-license.php", "opera.com", + "optout.networkadvertising.org", "oracle.com", "oreilly.com", "oup.com", + "outlook.live.com/owa", "ow.ly", "ox.ac.uk", + "panel.dreamhost.com", "parallels.com", "paypal.com", "pbs.org", @@ -337,9 +429,22 @@ "php.net", "phpbb.com", "pinterest.com", + "pinterest.com/pin/create/button", + "pinterest.com/pin/create/button/?description=", + "pinterest.com/pin/create/button/?media=", + "pixabay.com", + "pl.wordpress.org", + "planet.wordpress.org", "playstation.com", "plesk.com", "plos.org", + "plus.google.com", + "plus.google.com/communities/109881979300958500728", + "plus.google.com/share", + "plus.google.com/share?url=", + "plusone.google.com/_/+1/confirm?hl=en", + "presscustomizr.com", + "presscustomizr.com/customizr", "prestashop.com", "prnewswire.com", "psu.edu", @@ -351,10 +456,13 @@ "rambler.ru", "redcross.org", "reddit.com", + "reddit.com/submit", "reference.com", "researchgate.net", "reuters.com", "rs6.net", + "ru.wordpress.org", + "safeharbor.export.gov/companyinfo.aspx?id=16626", "sagepub.com", "sakura.ne.jp", "samsung.com", @@ -373,9 +481,11 @@ "si.edu", "sina.com.cn", "siteorigin.com", + "sites.google.com", "skype.com", "slate.com", "slideshare.net", + "smallbusiness.yahoo.com/webhosting", "snapchat.com", "sogou.com", "sohu.com", @@ -390,34 +500,64 @@ "stanford.edu", "starwoodhotels.com", "statcounter.com", + "statcounter.com/free-hit-counter", + "statcounter.com/free-web-stats", + "statcounter.com/shopify", + "statcounter.com/tumblr", "state.gov", "steampowered.com", "storify.com", "studiopress.com", "stumbleupon.com", "sun.com", + "support.apple.com/it-it/HT201265", + "support.apple.com/kb/PH5042", + "support.apple.com/kb/ph5042", + "support.google.com/analytics/answer/6004245?hl=de", + "support.google.com/answer/23852", + "support.google.com/chrome/answer/95647?hl=es", + "support.google.com/chrome/answer/95647?hl=it", + "support.google.com/chrome/bin/answer.py?hl=es&answer=95647", + "support.microsoft.com/help/17442", + "support.microsoft.com/windows", + "support.mozilla.org/es/kb/habilitar-y-deshabilitar-cookies-que-los-sitios-we", + "support.mozilla.org/it/kb/Attivare%20e%20disattivare%20i%20cookie", + "support.plesk.com", + "support.plesk.com/hc", "surveymonkey.com", "symantec.com", "t.co", "t.me", + "talk.plesk.com", "tandfonline.com", "taobao.com", "teamviewer.com", "techcrunch.com", + "technorati.com/faves", "ted.com", "telegram.me", "telegraph.co.uk", + "templated.co", "theatlantic.com", "theguardian.com", "thehill.com", + "theme-fusion.com", "themeforest.net", "themegrill.com", + "themegrill.com/themes/colormag", + "themegrill.com/themes/spacious", "thenextweb.com", "theverge.com", "ticketmaster.com", "time.com", "tmall.com", "today.com", + "tools.google.com/dlpage/gaoptout", + "tools.google.com/dlpage/gaoptout?hl=de", + "tools.google.com/dlpage/gaoptout?hl=en", + "tools.google.com/dlpage/gaoptout?hl=it", + "top100.rambler.ru/top100", + "translate.google.com", "tripadvisor.co.uk", "tripadvisor.com", "trustpilot.com", @@ -425,6 +565,15 @@ "tumblr.com", "twitch.tv", "twitter.com", + "twitter.com/Plesk", + "twitter.com/account/settings", + "twitter.com/home", + "twitter.com/intent/tweet", + "twitter.com/intent/tweet?text=", + "twitter.com/onecom", + "twitter.com/privacy", + "twitter.com/share", + "twitter.com/share?text=", "typeform.com", "typepad.com", "uchicago.edu", @@ -448,13 +597,21 @@ "ustream.tv", "utexas.edu", "va.gov", + "validator.w3.org", + "validator.w3.org/check", + "validator.w3.org/check/referer", + "validator.w3.org/check?uri=referer", "variety.com", "venturebeat.com", "vice.com", "vimeo.com", + "vinaora.com", "visma.com", "vk.com", + "vk.com/login?act=vkcomredirect&to=c2hhcmUucGhw", + "vk.com/share.php", "vkontakte.ru", + "vkontakte.ru/share.php", "w3.org", "w3schools.com", "warnerbros.com", @@ -473,22 +630,320 @@ "wikimedia.org", "wikipedia.org", "wiley.com", + "windows.microsoft.com/en-us/internet-explorer/products/ie/home", + "windows.microsoft.com/es-es/windows7/how-to-manage-cookies-in-internet-explorer-9", + "windows.microsoft.com/it-it/windows-vista/block-or-allow-cookies", "windowsphone.com", "wired.com", "wisc.edu", "wix.com", "wixsite.com", + "woocommerce.com", "wordpress.com", + "wordpress.com/?ref=footer_blog", + "wordpress.com/?ref=footer_website", + "wordpress.com/themes", "wordpress.org", + "wordpress.org/extend/ideas", + "wordpress.org/extend/plugins", + "wordpress.org/extend/themes", + "wordpress.org/news", + "wordpress.org/plugins", + "wordpress.org/plugins/asesor-cookies-para-la-ley-en-espana", + "wordpress.org/support", + "wordpress.org/support/forum/requests-and-feedback", + "wordpress.org/themes", "worldbank.org", + "wowslider.com", "wp.com", "wp.me", + "wpfr.net", "wsimg.com", "wsj.com", "wufoo.com", "wunderground.com", + "www-redirect.ext.hp.com", + "www.000webhost.com/migrate?static=true", + "www.163.com", + "www.1und1.de", + "www.22.cn", + "www.4.cn/company/contactus", + "www.51.la/?19089091", + "www.aboutads.info/choices", + "www.aboutcookies.org", + "www.addthis.com/bookmark.php", + "www.addthis.com/bookmark.php?v=20", + "www.addthis.com/bookmark.php?v=250", + "www.addtoany.com/share", + "www.addtoany.com/share_save", + "www.adobe.com", + "www.adobe.com/go/getflash", + "www.adobe.com/jp/products/acrobat/readstep2.html", + "www.adobe.com/products/acrobat/readstep.html", + "www.adobe.com/products/acrobat/readstep2.html", + "www.adobe.com/shockwave/download/download.cgi?P1_Prod_Version=ShockwaveFlash&promoid=BIOW", + "www.alipay.com", + "www.allaboutcookies.org", + "www.amazon.com", + "www.andersnoren.se", + "www.aol.com", + "www.apache.org", + "www.apache.org/licenses/LICENSE-2.0", + "www.apple.com", + "www.apple.com/mac", + "www.apple.com/safari", + "www.artisteer.com/?p=joomla_templates", + "www.authorize.net", + "www.axs.com", + "www.baidu.com", + "www.bbc.co.uk", + "www.bing.com", + "www.blogger.com", + "www.bluehost.com", + "www.booking.com", + "www.cdc.gov", + "www.chronoengine.com", + "www.cia.gov/redirects/ciaredirect.html", + "www.cisco.com", + "www.cnn.com", + "www.comsenz.com", + "www.cryoutcreations.eu", + "www.dedecms.com", + "www.discuz.net", + "www.domainname.de", + "www.domainnameshop.com", + "www.domainnameshop.com/whois", + "www.domeneshop.no", + "www.dreamhost.com", + "www.dropbox.com", + "www.drupal.org", + "www.e-recht24.de", + "www.e-recht24.de/artikel/datenschutz/6590-facebook-like-button-datenschutz-disclaimer.html", + "www.e-recht24.de/artikel/datenschutz/6635-datenschutz-rechtliche-risiken-bei-der-nutzung-von-google-analytics-und-googleadsense.html", + "www.e-recht24.de/impressum-generator.html", + "www.e-recht24.de/muster-datenschutzerklaerung.html", + "www.e-recht24.de/muster-disclaimer.htm", + "www.e-recht24.de/muster-disclaimer.html", + "www.ebay.com", + "www.elegantthemes.com", + "www.enable-javascript.com", + "www.ename.com.cn/custompage/custompagestyle", + "www.enom.com/help/Default.aspx", + "www.epa.gov", + "www.example.com", + "www.facebook.com", + "www.facebook.com/Onecom", + "www.facebook.com/Plesk", + "www.facebook.com/about/privacy", + "www.facebook.com/business/dashboard", + "www.facebook.com/facebook", + "www.facebook.com/help/cookies", + "www.facebook.com/home.php", + "www.facebook.com/policy.php", + "www.facebook.com/share.php", + "www.facebook.com/sharer.php", + "www.facebook.com/sharer.php?t=", + "www.facebook.com/sharer/sharer.php", + "www.facebook.com/sharer/sharer.php?src=sdkpreparse", + "www.facebook.com/sharer/sharer.php?u=", + "www.fda.gov", + "www.finra.org", + "www.flickr.com", + "www.format.com/l/your_new_portfolio", + "www.forpsi.com", + "www.freecsstemplates.org", + "www.gimp.org", + "www.gmail.com", + "www.gnu.org", + "www.gnu.org/copyleft/gpl.html", + "www.gnu.org/licenses/gpl-2.0.html", + "www.gnu.org/licenses/gpl.html", + "www.godaddy.com", + "www.godaddy.com/hosting/website-builder.aspx?isc=wscfwst304", + "www.godaddy.com/websites/website-builder", + "www.godaddy.com/websites/website-builder?cvosrc=assets.wsb_badge.wsb_badge", + "www.google.co.jp", + "www.google.co.uk", + "www.google.com", + "www.google.com/a/UniversalLogin?service=jotspot", + "www.google.com/analytics", + "www.google.com/analytics/learn/privacy.html", + "www.google.com/analytics/terms/de.html", + "www.google.com/calendar/render", + "www.google.com/chrome", + "www.google.com/gmail", + "www.google.com/intl/de/+/policy/+1button.html", + "www.google.com/intl/de/analytics/privacyoverview.html", + "www.google.com/intl/de/policies/privacy", + "www.google.com/intl/en/policies/privacy", + "www.google.com/intl/it/policies/privacy", + "www.google.com/policies/privacy", + "www.google.com/policies/privacy/ads", + "www.google.com/policies/technologies/cookies", + "www.google.com/privacy_ads.html", + "www.google.com/search?q=whois", + "www.google.com/support/bin/answer.py?answer=23852", + "www.google.de", + "www.google.de/intl/de/policies/privacy", + "www.google.it/intl/it/policies/privacy", + "www.gosuslugi.ru", + "www.gov.cn", "www.gov.uk", + "www.graphene-theme.com", + "www.haosou.com", + "www.histats.com", + "www.homestead.com", + "www.hostgator.com", + "www.hotmail.com", + "www.hp.com", + "www.huffingtonpost.com", + "www.hupso.com/share", + "www.ibm.com", + "www.ifeng.com", + "www.imdb.com", + "www.instagram.com", + "www.iqiyi.com", + "www.irs.gov", + "www.ispconfig.org", + "www.jd.com", + "www.jiathis.com/share", + "www.joomla.org", + "www.joomlatune.com", + "www.joomshaper.com", + "www.jssor.com", + "www.kickstarter.com", + "www.kriesi.at", + "www.linkedin.com", + "www.linkedin.com/cws/share", + "www.linkedin.com/legal/privacy-policy", + "www.linkedin.com/shareArticle?mini=true", + "www.linkwithin.com", + "www.liveinternet.ru/click", + "www.loopia.se", + "www.luminate.com/webhosting", + "www.lycos.com", + "www.macromedia.com/go/getflashplayer", + "www.mapquest.com", + "www.mapy.cz", + "www.mediawiki.org", + "www.mhthemes.com", + "www.microsoft.com", + "www.microsoft.com/en-us/windows", + "www.miibeian.gov.cn", + "www.miitbeian.gov.cn", + "www.mijndomein.nl", + "www.mijndomein.nl/producten", + "www.mijndomein.nl/producten/websitemaker", + "www.mozilla.org", + "www.mozilla.org/en-US", + "www.mozilla.org/en-US/firefox/new", + "www.mozilla.org/firefox/new", + "www.msn.com", + "www.myspace.com", + "www.myspace.com/Modules/PostTo/Pages", + "www.mysql.com", + "www.nasa.gov", + "www.netvibes.com/subscribe.php", + "www.networkadvertising.org/choices", + "www.networkadvertising.org/managing/opt_out.asp", + "www.networksolutions.com", + "www.nginx.com", "www.nhs.uk", + "www.nytimes.com", + "www.odin.com", + "www.olark.com/?welcome", + "www.one.com/en", + "www.opencart.com", + "www.opera.com", + "www.oracle.com/index.html", + "www.ovh.com", + "www.pagesjaunes.fr", + "www.parallels.com", + "www.parallels.com/intro", + "www.parallels.com/plesk", + "www.parallels.com/products/automation/intro", + "www.parallels.com/products/containers/intro", + "www.parallels.com/products/desktop/intro", + "www.parallels.com/products/desktop/pd4wl/intro", + "www.parallels.com/products/panel/intro", + "www.parallels.com/products/server/intro", + "www.paypal.com", + "www.people.com.cn", + "www.phoca.cz", + "www.phoca.cz/phocadownload", + "www.phoca.cz/phocagallery", + "www.php.net", + "www.phpbb.com", + "www.phpbb.com/ideas", + "www.pinterest.com", + "www.pinterest.com/pin/create/button", + "www.pinterest.com/pin/create/button/?url=&media=&description=", + "www.plesk.com", + "www.plesk.com/blog", + "www.prestashop.com", + "www.python.org", + "www.qq.com", + "www.redcross.org", + "www.reddit.com", + "www.reuters.com", + "www.safenames.net/?ref=lndrdr", + "www.shinystat.com", + "www.shinystat.com/it", + "www.simplemachines.org", + "www.simplemachines.org/about/smf/license.php", + "www.sina.com.cn", + "www.sipc.org", + "www.skype.com", + "www.slideshare.net", + "www.so.com", + "www.sogou.com", + "www.sohu.com", + "www.statcounter.com", + "www.studiopress.com", + "www.stumbleupon.com", + "www.stumbleupon.com/submit", + "www.taobao.com", + "www.toplist.cz", + "www.tradeindia.com", + "www.tripadvisor.co.uk", + "www.tripadvisor.com", + "www.tucows.com", + "www.tucowsdomains.com", + "www.tumblr.com", + "www.tumblr.com/share/link", + "www.twitter.com", + "www.twitter.com/share", + "www.ubuntu.com", + "www.ucoz.ru", + "www.usatoday.com", + "www.ustream.tv", + "www.value-domain.com", + "www.vektor-inc.co.jp", + "www.visma.com", + "www.w3.org", + "www.washingtonpost.com", + "www.webmd.com", + "www.weebly.com", + "www.whitehouse.gov", + "www.who.int/en", + "www.wikipedia.org", + "www.woothemes.com", + "www.wordpress-fr.net", + "www.wordpress.com", + "www.wordpress.org", + "www.wsj.com", + "www.xing.com/app/share?op=data_protection", + "www.xinhuanet.com", + "www.xml-sitemaps.com", + "www.yahoo.co.jp", + "www.yahoo.com", + "www.yelp.com", + "www.yootheme.com", + "www.youku.com", + "www.youronlinechoices.com", + "www.youronlinechoices.com/uk/your-ad-choices", + "www.youtube.com", + "www.zend.com", "xing.com", "xinhuanet.com", "xiti.com", @@ -497,6 +952,7 @@ "yale.edu", "yandex.ru", "yelp.com", + "yootheme.com", "youku.com", "youronlinechoices.com", "youtu.be", @@ -507,6 +963,8 @@ ], "matching_attributes": [ "hostname", - "domain" + "domain", + "uri", + "url" ] } diff --git a/tools/generate_mozilla-top500.py b/tools/generate_mozilla-top500.py index 9feebc7..a572191 100755 --- a/tools/generate_mozilla-top500.py +++ b/tools/generate_mozilla-top500.py @@ -32,7 +32,7 @@ moz_warninglist['version'] = version moz_warninglist['name'] = "Top 500 domains and pages from Mozilla" moz_warninglist['type'] = 'hostname' moz_warninglist['list'] = [] -moz_warninglist['matching_attributes'] = ['hostname', 'domain'] +moz_warninglist['matching_attributes'] = ['hostname', 'domain', 'uri', 'url'] with open(moz_file_domains) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') @@ -47,6 +47,19 @@ with open(moz_file_domains) as csv_file: moz_warninglist['list'].append(v.rstrip().rstrip('/')) line_count += 1 +with open(moz_file_pages) as csv_file: + csv_reader = csv.reader(csv_file, delimiter=',') + line_count = 0 + for row in csv_reader: + if line_count == 0: + #print(f'Column names are {", ".join(row)}') + line_count += 1 + else: + #print(f'\t{row[0]}. {row[1]}, MozTrust: {row[5]}.') + v = row[1] + moz_warninglist['list'].append(v.rstrip().rstrip('/')) + line_count += 1 + moz_warninglist['list'] = sorted(set(moz_warninglist['list'])) print(json.dumps(moz_warninglist))