new: [list] The Moz Top 500 Domains and Pages (#104)
new: [list] The Moz Top 500 Domains and Pagespull/107/head
commit
284b5fa2c8
|
@ -0,0 +1,10 @@
|
|||
# The Moz Top 500
|
||||
Moz's list of the top 500 domains and pages on the web.
|
||||
|
||||
Contains a list of the top 500 web pages ranked by the number of linking root domains. This data is sourced from the Mozcape web index of 818 Billion domains and 6 Trillion pages.
|
||||
|
||||
## Update list
|
||||
|
||||
```bash
|
||||
../../tools/generate_mozilla-top500.py |jq . > list.json
|
||||
```
|
|
@ -0,0 +1,970 @@
|
|||
{
|
||||
"description": "Event contains one or more entries from the top 500 of the most used domains (Mozilla).",
|
||||
"version": 20190424,
|
||||
"name": "Top 500 domains and pages from Mozilla",
|
||||
"type": "hostname",
|
||||
"list": [
|
||||
"123-reg-expired.co.uk",
|
||||
"163.com",
|
||||
"1688.com",
|
||||
"1and1.com",
|
||||
"1and1.fr",
|
||||
"1und1.de",
|
||||
"360.cn",
|
||||
"4.cn",
|
||||
"51.la",
|
||||
"a8.net",
|
||||
"aarp.org",
|
||||
"abc.net.au",
|
||||
"abcnews.go.com",
|
||||
"about.com",
|
||||
"aboutads.info",
|
||||
"aboutcookies.org",
|
||||
"account.1und1.de",
|
||||
"accounts.google.com/ServiceLogin?service=jotspot",
|
||||
"accuweather.com",
|
||||
"acm.org",
|
||||
"add.my.yahoo.com/content",
|
||||
"addthis.com",
|
||||
"addtoany.com",
|
||||
"admin.ch",
|
||||
"adobe.com",
|
||||
"adweek.com",
|
||||
"alexa.com",
|
||||
"alibaba.com",
|
||||
"aliyun.com",
|
||||
"allaboutcookies.org",
|
||||
"amazon.co.jp",
|
||||
"amazon.co.uk",
|
||||
"amazon.com",
|
||||
"amazon.de",
|
||||
"amazon.fr",
|
||||
"amazonaws.com",
|
||||
"ameblo.jp",
|
||||
"amzn.to",
|
||||
"android.com",
|
||||
"aol.com",
|
||||
"apache.org",
|
||||
"api.whatsapp.com",
|
||||
"apple.com",
|
||||
"apple.com/mac",
|
||||
"archive.org",
|
||||
"arstechnica.com",
|
||||
"artisteer.com",
|
||||
"arxiv.org",
|
||||
"athemes.com",
|
||||
"athemes.com/theme/sydney",
|
||||
"att.com",
|
||||
"automattic.com",
|
||||
"azurewebsites.net",
|
||||
"b.hatena.ne.jp/entry",
|
||||
"baidu.com",
|
||||
"bandcamp.com",
|
||||
"barnesandnoble.com",
|
||||
"bbb.org",
|
||||
"bbc.co.uk",
|
||||
"bbc.com",
|
||||
"bbs.dedecms.com",
|
||||
"behance.net",
|
||||
"beian.gov.cn",
|
||||
"berkeley.edu",
|
||||
"bigcartel.com",
|
||||
"bigcommerce.com",
|
||||
"bing.com",
|
||||
"biomedcentral.com",
|
||||
"bit.ly",
|
||||
"bitbucket.org",
|
||||
"bizjournals.com",
|
||||
"bizvektor.com",
|
||||
"blackberry.com",
|
||||
"blogger.com",
|
||||
"blogspot.co.uk",
|
||||
"blogspot.com",
|
||||
"blogspot.com.es",
|
||||
"blogspot.jp",
|
||||
"bloomberg.com",
|
||||
"bls.gov",
|
||||
"bluehost.com",
|
||||
"bmj.com",
|
||||
"booking.com",
|
||||
"box.com",
|
||||
"br.wordpress.org",
|
||||
"brokercheck.finra.org",
|
||||
"bund.de",
|
||||
"businessinsider.com",
|
||||
"businesswire.com",
|
||||
"buydomains.com",
|
||||
"buzzfeed.com",
|
||||
"ca.gov",
|
||||
"calendar.google.com/calendar/render",
|
||||
"cam.ac.uk",
|
||||
"canada.ca",
|
||||
"catchthemes.com",
|
||||
"cbc.ca",
|
||||
"cbslocal.com",
|
||||
"cbsnews.com",
|
||||
"cdc.gov",
|
||||
"census.gov",
|
||||
"change.org",
|
||||
"chicagotribune.com",
|
||||
"cisco.com",
|
||||
"clickbank.net",
|
||||
"cloud.feedly.com",
|
||||
"cloudfront.net",
|
||||
"cmu.edu",
|
||||
"cn.wordpress.org",
|
||||
"cnbc.com",
|
||||
"cnet.com",
|
||||
"cnn.com",
|
||||
"codex.wordpress.org",
|
||||
"colorlib.com",
|
||||
"columbia.edu",
|
||||
"congress.gov",
|
||||
"connect.mail.ru/share",
|
||||
"constantcontact.com",
|
||||
"cornell.edu",
|
||||
"cpanel.com",
|
||||
"cpanel.net",
|
||||
"creativecommons.org",
|
||||
"creativecommons.org/licenses/by-nc-sa/3.0",
|
||||
"creativecommons.org/licenses/by-sa/2.0",
|
||||
"creativecommons.org/licenses/by-sa/3.0",
|
||||
"creativecommons.org/licenses/by/2.0",
|
||||
"creativecommons.org/licenses/by/3.0",
|
||||
"creativecommons.org/licenses/by/4.0",
|
||||
"cryoutcreations.eu",
|
||||
"cyberchimps.com/responsive-theme",
|
||||
"dailymail.co.uk",
|
||||
"dailymotion.com",
|
||||
"de-de.facebook.com/policy.php",
|
||||
"de.wordpress.org",
|
||||
"debian.org",
|
||||
"dedecms.com",
|
||||
"del.icio.us/post",
|
||||
"delicious.com",
|
||||
"deloitte.com",
|
||||
"devblog.plesk.com",
|
||||
"developers.facebook.com/docs/plugins",
|
||||
"developers.google.com/analytics/devguides/collection/analyticsjs/cookie-usage",
|
||||
"developers.google.com/analytics/devguides/collection/analyticsjs/cookie-usage?hl=es&csw=1",
|
||||
"deviantart.com",
|
||||
"dhs.gov",
|
||||
"dictionary.com",
|
||||
"digg.com",
|
||||
"digg.com/submit",
|
||||
"discuz.qq.com/service/security",
|
||||
"disqus.com",
|
||||
"disqus.com/?ref_noscript",
|
||||
"dmca.com",
|
||||
"doi.org",
|
||||
"dol.gov",
|
||||
"domainactive.co",
|
||||
"domainname.de",
|
||||
"domainnameshop.com",
|
||||
"domainretailing.com",
|
||||
"domeneshop.no",
|
||||
"dot.gov",
|
||||
"doubleclick.net",
|
||||
"dreamhost.com",
|
||||
"dribbble.com",
|
||||
"dropbox.com",
|
||||
"drupal.org",
|
||||
"duke.edu",
|
||||
"e-recht24.de",
|
||||
"ebay.com",
|
||||
"ec.europa.eu/consumers/odr",
|
||||
"ec.europa.eu/info/departments/justice-and-consumers_en",
|
||||
"economist.com",
|
||||
"ed.gov",
|
||||
"eepurl.com",
|
||||
"eff.org",
|
||||
"elegantthemes.com",
|
||||
"elsevier.com",
|
||||
"enable-javascript.com",
|
||||
"ename.com.cn",
|
||||
"engadget.com",
|
||||
"entrepreneur.com",
|
||||
"epa.gov",
|
||||
"es.wordpress.org",
|
||||
"etracker.de",
|
||||
"etsy.com",
|
||||
"europa.eu",
|
||||
"eventbrite.co.uk",
|
||||
"eventbrite.com",
|
||||
"ewebdevelopment.com",
|
||||
"example.com",
|
||||
"exblog.jp",
|
||||
"facebook.com",
|
||||
"facebook.com/sharer.php",
|
||||
"fao.org",
|
||||
"fastcompany.com",
|
||||
"fb.com",
|
||||
"fb.me",
|
||||
"fbcdn.net",
|
||||
"fc2.com",
|
||||
"fcc.gov",
|
||||
"fda.gov",
|
||||
"feedburner.com",
|
||||
"feedburner.google.com",
|
||||
"feedjit.com",
|
||||
"feedly.com/index.html",
|
||||
"flickr.com",
|
||||
"forbes.com",
|
||||
"fortune.com",
|
||||
"foursquare.com",
|
||||
"foxnews.com",
|
||||
"free.fr",
|
||||
"ft.com",
|
||||
"ftc.gov",
|
||||
"g.co",
|
||||
"gartner.com",
|
||||
"generatepress.com",
|
||||
"geocities.jp",
|
||||
"gesetze-im-internet.de",
|
||||
"get.adobe.com/de/reader",
|
||||
"get.adobe.com/flashplayer",
|
||||
"get.adobe.com/jp/reader",
|
||||
"get.adobe.com/reader",
|
||||
"getbootstrap.com",
|
||||
"getpocket.com",
|
||||
"getpocket.com/save",
|
||||
"giphy.com",
|
||||
"github.com",
|
||||
"github.io",
|
||||
"globo.com",
|
||||
"gmail.com",
|
||||
"gmpg.org/xfn",
|
||||
"gnu.org",
|
||||
"go.com",
|
||||
"go.cpanel.net/cleardnscache",
|
||||
"go.microsoft.com/fwlink/?linkid=66138&clcid=0x409",
|
||||
"godaddy.com",
|
||||
"gofundme.com",
|
||||
"goo.gl",
|
||||
"goo.ne.jp",
|
||||
"goodreads.com",
|
||||
"google.be",
|
||||
"google.ca",
|
||||
"google.ch",
|
||||
"google.co.in",
|
||||
"google.co.jp",
|
||||
"google.co.uk",
|
||||
"google.com",
|
||||
"google.com.au",
|
||||
"google.com.br",
|
||||
"google.de",
|
||||
"google.es",
|
||||
"google.fr",
|
||||
"google.it",
|
||||
"google.nl",
|
||||
"google.pl",
|
||||
"google.ru",
|
||||
"googleapis.com",
|
||||
"googleusercontent.com",
|
||||
"gotowebinar.com",
|
||||
"gpo.gov",
|
||||
"gravatar.com",
|
||||
"gravatar.com/site/signup",
|
||||
"gtranslate.net",
|
||||
"guardian.co.uk",
|
||||
"harvard.edu",
|
||||
"hatena.ne.jp",
|
||||
"hbr.org",
|
||||
"help.opera.com/Windows/10.00/it/cookies.html",
|
||||
"hhs.gov",
|
||||
"hibu.com",
|
||||
"hilton.com",
|
||||
"histats.com",
|
||||
"hollywoodreporter.com",
|
||||
"home.pl",
|
||||
"home.pl/kontakt",
|
||||
"homeads.home.pl/ads/www/delivery/ck.php?n=f90e22f",
|
||||
"homestead.com",
|
||||
"hootsuite.com",
|
||||
"hostgator.com",
|
||||
"hostingmanager.secureserver.net",
|
||||
"hostnet.nl",
|
||||
"house.gov",
|
||||
"houzz.com",
|
||||
"hp.com",
|
||||
"html5up.net",
|
||||
"httpd.apache.org",
|
||||
"httpd.apache.org/docs/2.4/mod/mod_userdir.html",
|
||||
"hubspot.com",
|
||||
"huffingtonpost.com",
|
||||
"ibm.com",
|
||||
"icann.org",
|
||||
"icio.us",
|
||||
"ieee.org",
|
||||
"ietf.org",
|
||||
"ifeng.com",
|
||||
"illinois.edu",
|
||||
"imdb.com",
|
||||
"imgur.com",
|
||||
"inc.com",
|
||||
"independent.co.uk",
|
||||
"indiatimes.com",
|
||||
"instagram.com",
|
||||
"intel.com",
|
||||
"irs.gov",
|
||||
"iso.org",
|
||||
"issuu.com",
|
||||
"it.wordpress.org",
|
||||
"iubenda.com",
|
||||
"ja.wordpress.org",
|
||||
"jalbum.net",
|
||||
"jalbum.net/en",
|
||||
"japanpost.jp",
|
||||
"java.com",
|
||||
"jiathis.com",
|
||||
"jigsaw.w3.org/css-validator",
|
||||
"jigsaw.w3.org/css-validator/check/referer",
|
||||
"jigsaw.w3.org/css-validator/check/referer?profile=css3",
|
||||
"jimdo.com",
|
||||
"joomla-extensions.kubik-rubik.de",
|
||||
"joomla.org",
|
||||
"jquery.com",
|
||||
"jugem.jp",
|
||||
"justgiving.com",
|
||||
"justice.gov",
|
||||
"kb.plesk.com",
|
||||
"kickstarter.com",
|
||||
"latimes.com",
|
||||
"lazaworx.com",
|
||||
"libsyn.com",
|
||||
"lifestream.aol.com",
|
||||
"line.me",
|
||||
"linkedin.com",
|
||||
"list-manage.com",
|
||||
"list-manage1.com",
|
||||
"listings.homestead.com",
|
||||
"live.com",
|
||||
"livedoor.jp",
|
||||
"livejournal.com",
|
||||
"loc.gov",
|
||||
"logc204.xiti.com/go.click?xts=453041&s2=14&p=homepage::kundendefault::index::button-mehr-info&clic=N&type=click",
|
||||
"loopia.com",
|
||||
"loopia.se",
|
||||
"macromedia.com",
|
||||
"mail.google.com/mail",
|
||||
"mail.ru",
|
||||
"mailchimp.com",
|
||||
"mapquest.com",
|
||||
"maps.google.com",
|
||||
"maps.google.com/maps?f=d&source=s_d&daddr=&saddr=&hl=en&geocode=&mra=ls&sll=37.0625,-95.677068&sspn=49.176833,114.257812&ie=UTF8&t=h&z=12",
|
||||
"marriott.com",
|
||||
"mashable.com",
|
||||
"medium.com",
|
||||
"meetup.com",
|
||||
"megagroup.ru",
|
||||
"mhlw.go.jp",
|
||||
"microsoft.com",
|
||||
"miibeian.gov.cn",
|
||||
"miitbeian.gov.cn",
|
||||
"mijndomein.nl",
|
||||
"mit.edu",
|
||||
"mlb.com",
|
||||
"mlit.go.jp",
|
||||
"mobirise.com",
|
||||
"moodle.org",
|
||||
"moz.com",
|
||||
"mozilla.com",
|
||||
"mozilla.org",
|
||||
"msdn.com",
|
||||
"msn.com",
|
||||
"mynavi.jp",
|
||||
"myshopify.com",
|
||||
"myspace.com",
|
||||
"myspace.com/Modules/PostTo/Pages",
|
||||
"mysql.com",
|
||||
"namejet.com",
|
||||
"nasa.gov",
|
||||
"nationalgeographic.com",
|
||||
"nature.com",
|
||||
"naver.com",
|
||||
"nazwa.pl",
|
||||
"nbcnews.com",
|
||||
"netcn.console.aliyun.com/core/host/list2",
|
||||
"netflix.com",
|
||||
"netscape.aol.com",
|
||||
"netscape.com",
|
||||
"networkadvertising.org",
|
||||
"networksolutions.com",
|
||||
"newyorker.com",
|
||||
"nginx.com",
|
||||
"nginx.org",
|
||||
"nhk.or.jp",
|
||||
"nifty.com",
|
||||
"nih.gov",
|
||||
"nist.gov",
|
||||
"nl.wordpress.org",
|
||||
"noaa.gov",
|
||||
"npr.org",
|
||||
"nps.gov",
|
||||
"ny.gov",
|
||||
"nytimes.com",
|
||||
"nyu.edu",
|
||||
"ocn.ne.jp",
|
||||
"oecd.org",
|
||||
"office.com",
|
||||
"ok.ru",
|
||||
"one.com",
|
||||
"opencart.com",
|
||||
"opensource.org",
|
||||
"opensource.org/licenses/gpl-license.php",
|
||||
"opera.com",
|
||||
"optout.networkadvertising.org",
|
||||
"oracle.com",
|
||||
"oreilly.com",
|
||||
"oup.com",
|
||||
"outlook.live.com/owa",
|
||||
"ow.ly",
|
||||
"ox.ac.uk",
|
||||
"panel.dreamhost.com",
|
||||
"parallels.com",
|
||||
"paypal.com",
|
||||
"pbs.org",
|
||||
"phoca.cz",
|
||||
"photobucket.com",
|
||||
"php.net",
|
||||
"phpbb.com",
|
||||
"pinterest.com",
|
||||
"pinterest.com/pin/create/button",
|
||||
"pinterest.com/pin/create/button/?description=",
|
||||
"pinterest.com/pin/create/button/?media=",
|
||||
"pixabay.com",
|
||||
"pl.wordpress.org",
|
||||
"planet.wordpress.org",
|
||||
"playstation.com",
|
||||
"plesk.com",
|
||||
"plos.org",
|
||||
"plus.google.com",
|
||||
"plus.google.com/communities/109881979300958500728",
|
||||
"plus.google.com/share",
|
||||
"plus.google.com/share?url=",
|
||||
"plusone.google.com/_/+1/confirm?hl=en",
|
||||
"presscustomizr.com",
|
||||
"presscustomizr.com/customizr",
|
||||
"prestashop.com",
|
||||
"prnewswire.com",
|
||||
"psu.edu",
|
||||
"psychologytoday.com",
|
||||
"python.org",
|
||||
"qq.com",
|
||||
"quantcast.com",
|
||||
"rakuten.co.jp",
|
||||
"rambler.ru",
|
||||
"redcross.org",
|
||||
"reddit.com",
|
||||
"reddit.com/submit",
|
||||
"reference.com",
|
||||
"researchgate.net",
|
||||
"reuters.com",
|
||||
"rs6.net",
|
||||
"ru.wordpress.org",
|
||||
"safeharbor.export.gov/companyinfo.aspx?id=16626",
|
||||
"sagepub.com",
|
||||
"sakura.ne.jp",
|
||||
"samsung.com",
|
||||
"sciencedirect.com",
|
||||
"sciencemag.org",
|
||||
"scientificamerican.com",
|
||||
"scribd.com",
|
||||
"sec.gov",
|
||||
"secureserver.net",
|
||||
"sedo.com",
|
||||
"sedoparking.com",
|
||||
"senate.gov",
|
||||
"shinystat.com",
|
||||
"shop-pro.jp",
|
||||
"shopify.com",
|
||||
"si.edu",
|
||||
"sina.com.cn",
|
||||
"siteorigin.com",
|
||||
"sites.google.com",
|
||||
"skype.com",
|
||||
"slate.com",
|
||||
"slideshare.net",
|
||||
"smallbusiness.yahoo.com/webhosting",
|
||||
"snapchat.com",
|
||||
"sogou.com",
|
||||
"sohu.com",
|
||||
"soundcloud.com",
|
||||
"sourceforge.net",
|
||||
"spotify.com",
|
||||
"springer.com",
|
||||
"squarespace.com",
|
||||
"squareup.com",
|
||||
"ssa.gov",
|
||||
"stackoverflow.com",
|
||||
"stanford.edu",
|
||||
"starwoodhotels.com",
|
||||
"statcounter.com",
|
||||
"statcounter.com/free-hit-counter",
|
||||
"statcounter.com/free-web-stats",
|
||||
"statcounter.com/shopify",
|
||||
"statcounter.com/tumblr",
|
||||
"state.gov",
|
||||
"steampowered.com",
|
||||
"storify.com",
|
||||
"studiopress.com",
|
||||
"stumbleupon.com",
|
||||
"sun.com",
|
||||
"support.apple.com/it-it/HT201265",
|
||||
"support.apple.com/kb/PH5042",
|
||||
"support.apple.com/kb/ph5042",
|
||||
"support.google.com/analytics/answer/6004245?hl=de",
|
||||
"support.google.com/answer/23852",
|
||||
"support.google.com/chrome/answer/95647?hl=es",
|
||||
"support.google.com/chrome/answer/95647?hl=it",
|
||||
"support.google.com/chrome/bin/answer.py?hl=es&answer=95647",
|
||||
"support.microsoft.com/help/17442",
|
||||
"support.microsoft.com/windows",
|
||||
"support.mozilla.org/es/kb/habilitar-y-deshabilitar-cookies-que-los-sitios-we",
|
||||
"support.mozilla.org/it/kb/Attivare%20e%20disattivare%20i%20cookie",
|
||||
"support.plesk.com",
|
||||
"support.plesk.com/hc",
|
||||
"surveymonkey.com",
|
||||
"symantec.com",
|
||||
"t.co",
|
||||
"t.me",
|
||||
"talk.plesk.com",
|
||||
"tandfonline.com",
|
||||
"taobao.com",
|
||||
"teamviewer.com",
|
||||
"techcrunch.com",
|
||||
"technorati.com/faves",
|
||||
"ted.com",
|
||||
"telegram.me",
|
||||
"telegraph.co.uk",
|
||||
"templated.co",
|
||||
"theatlantic.com",
|
||||
"theguardian.com",
|
||||
"thehill.com",
|
||||
"theme-fusion.com",
|
||||
"themeforest.net",
|
||||
"themegrill.com",
|
||||
"themegrill.com/themes/colormag",
|
||||
"themegrill.com/themes/spacious",
|
||||
"thenextweb.com",
|
||||
"theverge.com",
|
||||
"ticketmaster.com",
|
||||
"time.com",
|
||||
"tmall.com",
|
||||
"today.com",
|
||||
"tools.google.com/dlpage/gaoptout",
|
||||
"tools.google.com/dlpage/gaoptout?hl=de",
|
||||
"tools.google.com/dlpage/gaoptout?hl=en",
|
||||
"tools.google.com/dlpage/gaoptout?hl=it",
|
||||
"top100.rambler.ru/top100",
|
||||
"translate.google.com",
|
||||
"tripadvisor.co.uk",
|
||||
"tripadvisor.com",
|
||||
"trustpilot.com",
|
||||
"tucowsdomains.com",
|
||||
"tumblr.com",
|
||||
"twitch.tv",
|
||||
"twitter.com",
|
||||
"twitter.com/Plesk",
|
||||
"twitter.com/account/settings",
|
||||
"twitter.com/home",
|
||||
"twitter.com/intent/tweet",
|
||||
"twitter.com/intent/tweet?text=",
|
||||
"twitter.com/onecom",
|
||||
"twitter.com/privacy",
|
||||
"twitter.com/share",
|
||||
"twitter.com/share?text=",
|
||||
"typeform.com",
|
||||
"typepad.com",
|
||||
"uchicago.edu",
|
||||
"ucl.ac.uk",
|
||||
"ucla.edu",
|
||||
"umblr.com",
|
||||
"umich.edu",
|
||||
"umn.edu",
|
||||
"un.org",
|
||||
"unesco.org",
|
||||
"unicef.org",
|
||||
"unsplash.com",
|
||||
"uol.com.br",
|
||||
"upenn.edu",
|
||||
"usa.gov",
|
||||
"usatoday.com",
|
||||
"usc.edu",
|
||||
"usda.gov",
|
||||
"usgs.gov",
|
||||
"usnews.com",
|
||||
"ustream.tv",
|
||||
"utexas.edu",
|
||||
"va.gov",
|
||||
"validator.w3.org",
|
||||
"validator.w3.org/check",
|
||||
"validator.w3.org/check/referer",
|
||||
"validator.w3.org/check?uri=referer",
|
||||
"variety.com",
|
||||
"venturebeat.com",
|
||||
"vice.com",
|
||||
"vimeo.com",
|
||||
"vinaora.com",
|
||||
"visma.com",
|
||||
"vk.com",
|
||||
"vk.com/login?act=vkcomredirect&to=c2hhcmUucGhw",
|
||||
"vk.com/share.php",
|
||||
"vkontakte.ru",
|
||||
"vkontakte.ru/share.php",
|
||||
"w3.org",
|
||||
"w3schools.com",
|
||||
"warnerbros.com",
|
||||
"washington.edu",
|
||||
"washingtonpost.com",
|
||||
"web.de",
|
||||
"webmd.com",
|
||||
"webs.com",
|
||||
"weebly.com",
|
||||
"weibo.com",
|
||||
"whatsapp.com",
|
||||
"whitehouse.gov",
|
||||
"who.int",
|
||||
"wikia.com",
|
||||
"wikihow.com",
|
||||
"wikimedia.org",
|
||||
"wikipedia.org",
|
||||
"wiley.com",
|
||||
"windows.microsoft.com/en-us/internet-explorer/products/ie/home",
|
||||
"windows.microsoft.com/es-es/windows7/how-to-manage-cookies-in-internet-explorer-9",
|
||||
"windows.microsoft.com/it-it/windows-vista/block-or-allow-cookies",
|
||||
"windowsphone.com",
|
||||
"wired.com",
|
||||
"wisc.edu",
|
||||
"wix.com",
|
||||
"wixsite.com",
|
||||
"woocommerce.com",
|
||||
"wordpress.com",
|
||||
"wordpress.com/?ref=footer_blog",
|
||||
"wordpress.com/?ref=footer_website",
|
||||
"wordpress.com/themes",
|
||||
"wordpress.org",
|
||||
"wordpress.org/extend/ideas",
|
||||
"wordpress.org/extend/plugins",
|
||||
"wordpress.org/extend/themes",
|
||||
"wordpress.org/news",
|
||||
"wordpress.org/plugins",
|
||||
"wordpress.org/plugins/asesor-cookies-para-la-ley-en-espana",
|
||||
"wordpress.org/support",
|
||||
"wordpress.org/support/forum/requests-and-feedback",
|
||||
"wordpress.org/themes",
|
||||
"worldbank.org",
|
||||
"wowslider.com",
|
||||
"wp.com",
|
||||
"wp.me",
|
||||
"wpfr.net",
|
||||
"wsimg.com",
|
||||
"wsj.com",
|
||||
"wufoo.com",
|
||||
"wunderground.com",
|
||||
"www-redirect.ext.hp.com",
|
||||
"www.000webhost.com/migrate?static=true",
|
||||
"www.163.com",
|
||||
"www.1und1.de",
|
||||
"www.22.cn",
|
||||
"www.4.cn/company/contactus",
|
||||
"www.51.la/?19089091",
|
||||
"www.aboutads.info/choices",
|
||||
"www.aboutcookies.org",
|
||||
"www.addthis.com/bookmark.php",
|
||||
"www.addthis.com/bookmark.php?v=20",
|
||||
"www.addthis.com/bookmark.php?v=250",
|
||||
"www.addtoany.com/share",
|
||||
"www.addtoany.com/share_save",
|
||||
"www.adobe.com",
|
||||
"www.adobe.com/go/getflash",
|
||||
"www.adobe.com/jp/products/acrobat/readstep2.html",
|
||||
"www.adobe.com/products/acrobat/readstep.html",
|
||||
"www.adobe.com/products/acrobat/readstep2.html",
|
||||
"www.adobe.com/shockwave/download/download.cgi?P1_Prod_Version=ShockwaveFlash&promoid=BIOW",
|
||||
"www.alipay.com",
|
||||
"www.allaboutcookies.org",
|
||||
"www.amazon.com",
|
||||
"www.andersnoren.se",
|
||||
"www.aol.com",
|
||||
"www.apache.org",
|
||||
"www.apache.org/licenses/LICENSE-2.0",
|
||||
"www.apple.com",
|
||||
"www.apple.com/mac",
|
||||
"www.apple.com/safari",
|
||||
"www.artisteer.com/?p=joomla_templates",
|
||||
"www.authorize.net",
|
||||
"www.axs.com",
|
||||
"www.baidu.com",
|
||||
"www.bbc.co.uk",
|
||||
"www.bing.com",
|
||||
"www.blogger.com",
|
||||
"www.bluehost.com",
|
||||
"www.booking.com",
|
||||
"www.cdc.gov",
|
||||
"www.chronoengine.com",
|
||||
"www.cia.gov/redirects/ciaredirect.html",
|
||||
"www.cisco.com",
|
||||
"www.cnn.com",
|
||||
"www.comsenz.com",
|
||||
"www.cryoutcreations.eu",
|
||||
"www.dedecms.com",
|
||||
"www.discuz.net",
|
||||
"www.domainname.de",
|
||||
"www.domainnameshop.com",
|
||||
"www.domainnameshop.com/whois",
|
||||
"www.domeneshop.no",
|
||||
"www.dreamhost.com",
|
||||
"www.dropbox.com",
|
||||
"www.drupal.org",
|
||||
"www.e-recht24.de",
|
||||
"www.e-recht24.de/artikel/datenschutz/6590-facebook-like-button-datenschutz-disclaimer.html",
|
||||
"www.e-recht24.de/artikel/datenschutz/6635-datenschutz-rechtliche-risiken-bei-der-nutzung-von-google-analytics-und-googleadsense.html",
|
||||
"www.e-recht24.de/impressum-generator.html",
|
||||
"www.e-recht24.de/muster-datenschutzerklaerung.html",
|
||||
"www.e-recht24.de/muster-disclaimer.htm",
|
||||
"www.e-recht24.de/muster-disclaimer.html",
|
||||
"www.ebay.com",
|
||||
"www.elegantthemes.com",
|
||||
"www.enable-javascript.com",
|
||||
"www.ename.com.cn/custompage/custompagestyle",
|
||||
"www.enom.com/help/Default.aspx",
|
||||
"www.epa.gov",
|
||||
"www.example.com",
|
||||
"www.facebook.com",
|
||||
"www.facebook.com/Onecom",
|
||||
"www.facebook.com/Plesk",
|
||||
"www.facebook.com/about/privacy",
|
||||
"www.facebook.com/business/dashboard",
|
||||
"www.facebook.com/facebook",
|
||||
"www.facebook.com/help/cookies",
|
||||
"www.facebook.com/home.php",
|
||||
"www.facebook.com/policy.php",
|
||||
"www.facebook.com/share.php",
|
||||
"www.facebook.com/sharer.php",
|
||||
"www.facebook.com/sharer.php?t=",
|
||||
"www.facebook.com/sharer/sharer.php",
|
||||
"www.facebook.com/sharer/sharer.php?src=sdkpreparse",
|
||||
"www.facebook.com/sharer/sharer.php?u=",
|
||||
"www.fda.gov",
|
||||
"www.finra.org",
|
||||
"www.flickr.com",
|
||||
"www.format.com/l/your_new_portfolio",
|
||||
"www.forpsi.com",
|
||||
"www.freecsstemplates.org",
|
||||
"www.gimp.org",
|
||||
"www.gmail.com",
|
||||
"www.gnu.org",
|
||||
"www.gnu.org/copyleft/gpl.html",
|
||||
"www.gnu.org/licenses/gpl-2.0.html",
|
||||
"www.gnu.org/licenses/gpl.html",
|
||||
"www.godaddy.com",
|
||||
"www.godaddy.com/hosting/website-builder.aspx?isc=wscfwst304",
|
||||
"www.godaddy.com/websites/website-builder",
|
||||
"www.godaddy.com/websites/website-builder?cvosrc=assets.wsb_badge.wsb_badge",
|
||||
"www.google.co.jp",
|
||||
"www.google.co.uk",
|
||||
"www.google.com",
|
||||
"www.google.com/a/UniversalLogin?service=jotspot",
|
||||
"www.google.com/analytics",
|
||||
"www.google.com/analytics/learn/privacy.html",
|
||||
"www.google.com/analytics/terms/de.html",
|
||||
"www.google.com/calendar/render",
|
||||
"www.google.com/chrome",
|
||||
"www.google.com/gmail",
|
||||
"www.google.com/intl/de/+/policy/+1button.html",
|
||||
"www.google.com/intl/de/analytics/privacyoverview.html",
|
||||
"www.google.com/intl/de/policies/privacy",
|
||||
"www.google.com/intl/en/policies/privacy",
|
||||
"www.google.com/intl/it/policies/privacy",
|
||||
"www.google.com/policies/privacy",
|
||||
"www.google.com/policies/privacy/ads",
|
||||
"www.google.com/policies/technologies/cookies",
|
||||
"www.google.com/privacy_ads.html",
|
||||
"www.google.com/search?q=whois",
|
||||
"www.google.com/support/bin/answer.py?answer=23852",
|
||||
"www.google.de",
|
||||
"www.google.de/intl/de/policies/privacy",
|
||||
"www.google.it/intl/it/policies/privacy",
|
||||
"www.gosuslugi.ru",
|
||||
"www.gov.cn",
|
||||
"www.gov.uk",
|
||||
"www.graphene-theme.com",
|
||||
"www.haosou.com",
|
||||
"www.histats.com",
|
||||
"www.homestead.com",
|
||||
"www.hostgator.com",
|
||||
"www.hotmail.com",
|
||||
"www.hp.com",
|
||||
"www.huffingtonpost.com",
|
||||
"www.hupso.com/share",
|
||||
"www.ibm.com",
|
||||
"www.ifeng.com",
|
||||
"www.imdb.com",
|
||||
"www.instagram.com",
|
||||
"www.iqiyi.com",
|
||||
"www.irs.gov",
|
||||
"www.ispconfig.org",
|
||||
"www.jd.com",
|
||||
"www.jiathis.com/share",
|
||||
"www.joomla.org",
|
||||
"www.joomlatune.com",
|
||||
"www.joomshaper.com",
|
||||
"www.jssor.com",
|
||||
"www.kickstarter.com",
|
||||
"www.kriesi.at",
|
||||
"www.linkedin.com",
|
||||
"www.linkedin.com/cws/share",
|
||||
"www.linkedin.com/legal/privacy-policy",
|
||||
"www.linkedin.com/shareArticle?mini=true",
|
||||
"www.linkwithin.com",
|
||||
"www.liveinternet.ru/click",
|
||||
"www.loopia.se",
|
||||
"www.luminate.com/webhosting",
|
||||
"www.lycos.com",
|
||||
"www.macromedia.com/go/getflashplayer",
|
||||
"www.mapquest.com",
|
||||
"www.mapy.cz",
|
||||
"www.mediawiki.org",
|
||||
"www.mhthemes.com",
|
||||
"www.microsoft.com",
|
||||
"www.microsoft.com/en-us/windows",
|
||||
"www.miibeian.gov.cn",
|
||||
"www.miitbeian.gov.cn",
|
||||
"www.mijndomein.nl",
|
||||
"www.mijndomein.nl/producten",
|
||||
"www.mijndomein.nl/producten/websitemaker",
|
||||
"www.mozilla.org",
|
||||
"www.mozilla.org/en-US",
|
||||
"www.mozilla.org/en-US/firefox/new",
|
||||
"www.mozilla.org/firefox/new",
|
||||
"www.msn.com",
|
||||
"www.myspace.com",
|
||||
"www.myspace.com/Modules/PostTo/Pages",
|
||||
"www.mysql.com",
|
||||
"www.nasa.gov",
|
||||
"www.netvibes.com/subscribe.php",
|
||||
"www.networkadvertising.org/choices",
|
||||
"www.networkadvertising.org/managing/opt_out.asp",
|
||||
"www.networksolutions.com",
|
||||
"www.nginx.com",
|
||||
"www.nhs.uk",
|
||||
"www.nytimes.com",
|
||||
"www.odin.com",
|
||||
"www.olark.com/?welcome",
|
||||
"www.one.com/en",
|
||||
"www.opencart.com",
|
||||
"www.opera.com",
|
||||
"www.oracle.com/index.html",
|
||||
"www.ovh.com",
|
||||
"www.pagesjaunes.fr",
|
||||
"www.parallels.com",
|
||||
"www.parallels.com/intro",
|
||||
"www.parallels.com/plesk",
|
||||
"www.parallels.com/products/automation/intro",
|
||||
"www.parallels.com/products/containers/intro",
|
||||
"www.parallels.com/products/desktop/intro",
|
||||
"www.parallels.com/products/desktop/pd4wl/intro",
|
||||
"www.parallels.com/products/panel/intro",
|
||||
"www.parallels.com/products/server/intro",
|
||||
"www.paypal.com",
|
||||
"www.people.com.cn",
|
||||
"www.phoca.cz",
|
||||
"www.phoca.cz/phocadownload",
|
||||
"www.phoca.cz/phocagallery",
|
||||
"www.php.net",
|
||||
"www.phpbb.com",
|
||||
"www.phpbb.com/ideas",
|
||||
"www.pinterest.com",
|
||||
"www.pinterest.com/pin/create/button",
|
||||
"www.pinterest.com/pin/create/button/?url=&media=&description=",
|
||||
"www.plesk.com",
|
||||
"www.plesk.com/blog",
|
||||
"www.prestashop.com",
|
||||
"www.python.org",
|
||||
"www.qq.com",
|
||||
"www.redcross.org",
|
||||
"www.reddit.com",
|
||||
"www.reuters.com",
|
||||
"www.safenames.net/?ref=lndrdr",
|
||||
"www.shinystat.com",
|
||||
"www.shinystat.com/it",
|
||||
"www.simplemachines.org",
|
||||
"www.simplemachines.org/about/smf/license.php",
|
||||
"www.sina.com.cn",
|
||||
"www.sipc.org",
|
||||
"www.skype.com",
|
||||
"www.slideshare.net",
|
||||
"www.so.com",
|
||||
"www.sogou.com",
|
||||
"www.sohu.com",
|
||||
"www.statcounter.com",
|
||||
"www.studiopress.com",
|
||||
"www.stumbleupon.com",
|
||||
"www.stumbleupon.com/submit",
|
||||
"www.taobao.com",
|
||||
"www.toplist.cz",
|
||||
"www.tradeindia.com",
|
||||
"www.tripadvisor.co.uk",
|
||||
"www.tripadvisor.com",
|
||||
"www.tucows.com",
|
||||
"www.tucowsdomains.com",
|
||||
"www.tumblr.com",
|
||||
"www.tumblr.com/share/link",
|
||||
"www.twitter.com",
|
||||
"www.twitter.com/share",
|
||||
"www.ubuntu.com",
|
||||
"www.ucoz.ru",
|
||||
"www.usatoday.com",
|
||||
"www.ustream.tv",
|
||||
"www.value-domain.com",
|
||||
"www.vektor-inc.co.jp",
|
||||
"www.visma.com",
|
||||
"www.w3.org",
|
||||
"www.washingtonpost.com",
|
||||
"www.webmd.com",
|
||||
"www.weebly.com",
|
||||
"www.whitehouse.gov",
|
||||
"www.who.int/en",
|
||||
"www.wikipedia.org",
|
||||
"www.woothemes.com",
|
||||
"www.wordpress-fr.net",
|
||||
"www.wordpress.com",
|
||||
"www.wordpress.org",
|
||||
"www.wsj.com",
|
||||
"www.xing.com/app/share?op=data_protection",
|
||||
"www.xinhuanet.com",
|
||||
"www.xml-sitemaps.com",
|
||||
"www.yahoo.co.jp",
|
||||
"www.yahoo.com",
|
||||
"www.yelp.com",
|
||||
"www.yootheme.com",
|
||||
"www.youku.com",
|
||||
"www.youronlinechoices.com",
|
||||
"www.youronlinechoices.com/uk/your-ad-choices",
|
||||
"www.youtube.com",
|
||||
"www.zend.com",
|
||||
"xing.com",
|
||||
"xinhuanet.com",
|
||||
"xiti.com",
|
||||
"yahoo.co.jp",
|
||||
"yahoo.com",
|
||||
"yale.edu",
|
||||
"yandex.ru",
|
||||
"yelp.com",
|
||||
"yootheme.com",
|
||||
"youku.com",
|
||||
"youronlinechoices.com",
|
||||
"youtu.be",
|
||||
"youtube.com",
|
||||
"zdnet.com",
|
||||
"zendesk.com",
|
||||
"zenfolio.com"
|
||||
],
|
||||
"matching_attributes": [
|
||||
"hostname",
|
||||
"domain",
|
||||
"uri",
|
||||
"url"
|
||||
]
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import datetime
|
||||
import json
|
||||
import csv
|
||||
import os
|
||||
|
||||
# TODO: Include Top500 pages
|
||||
# TODO: Include MozRank
|
||||
|
||||
moz_url_domains = "https://moz.com/top500/domains/csv"
|
||||
moz_url_pages = "https://moz.com/top500/pages/csv"
|
||||
|
||||
moz_file_domains = "/tmp/top500.domains.csv"
|
||||
moz_file_pages = "/tmp/top500.pages.csv"
|
||||
|
||||
user_agent = {"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
|
||||
|
||||
rDomains = requests.get(moz_url_domains, headers=user_agent)
|
||||
rPages = requests.get(moz_url_pages, headers=user_agent)
|
||||
open(moz_file_domains, 'wb').write(rDomains.content)
|
||||
open(moz_file_pages, 'wb').write(rPages.content)
|
||||
|
||||
moz_warninglist = {}
|
||||
version = int(datetime.date.today().strftime('%Y%m%d'))
|
||||
|
||||
moz_warninglist['description'] = "Event contains one or more entries from the top 500 of the most used domains (Mozilla)."
|
||||
d = datetime.datetime.now()
|
||||
moz_warninglist['version'] = version
|
||||
moz_warninglist['name'] = "Top 500 domains and pages from Mozilla"
|
||||
moz_warninglist['type'] = 'hostname'
|
||||
moz_warninglist['list'] = []
|
||||
moz_warninglist['matching_attributes'] = ['hostname', 'domain', 'uri', 'url']
|
||||
|
||||
with open(moz_file_domains) as csv_file:
|
||||
csv_reader = csv.reader(csv_file, delimiter=',')
|
||||
line_count = 0
|
||||
for row in csv_reader:
|
||||
if line_count == 0:
|
||||
#print(f'Column names are {", ".join(row)}')
|
||||
line_count += 1
|
||||
else:
|
||||
#print(f'\t{row[0]}. {row[1]}, MozTrust: {row[5]}.')
|
||||
v = row[1]
|
||||
moz_warninglist['list'].append(v.rstrip().rstrip('/'))
|
||||
line_count += 1
|
||||
|
||||
with open(moz_file_pages) as csv_file:
|
||||
csv_reader = csv.reader(csv_file, delimiter=',')
|
||||
line_count = 0
|
||||
for row in csv_reader:
|
||||
if line_count == 0:
|
||||
#print(f'Column names are {", ".join(row)}')
|
||||
line_count += 1
|
||||
else:
|
||||
#print(f'\t{row[0]}. {row[1]}, MozTrust: {row[5]}.')
|
||||
v = row[1]
|
||||
moz_warninglist['list'].append(v.rstrip().rstrip('/'))
|
||||
line_count += 1
|
||||
|
||||
moz_warninglist['list'] = sorted(set(moz_warninglist['list']))
|
||||
print(json.dumps(moz_warninglist))
|
||||
|
||||
try:
|
||||
os.remove(moz_file_domains)
|
||||
os.remove(moz_file_pages)
|
||||
except:
|
||||
print(f'Perhaps {moz_file_domains}/{moz_file_pages} does not exist.')
|
Loading…
Reference in New Issue