From 34ce028295b52bb52ce160436e4276780d7391f3 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Sat, 28 May 2016 21:31:40 +0200 Subject: [PATCH] Alexa top 1000 MISP warning list added including generation tool --- lists/alexa/list.json | 1010 +++++++++++++++++++++++++++++++++++++++ tools/generate-alexa.py | 33 ++ 2 files changed, 1043 insertions(+) create mode 100644 lists/alexa/list.json create mode 100644 tools/generate-alexa.py diff --git a/lists/alexa/list.json b/lists/alexa/list.json new file mode 100644 index 0000000..b20fc69 --- /dev/null +++ b/lists/alexa/list.json @@ -0,0 +1,1010 @@ +{ + "list": [ + "google.com", + "youtube.com", + "facebook.com", + "baidu.com", + "yahoo.com", + "wikipedia.org", + "amazon.com", + "twitter.com", + "qq.com", + "live.com", + "google.co.in", + "taobao.com", + "bing.com", + "google.co.jp", + "msn.com", + "yahoo.co.jp", + "linkedin.com", + "sina.com.cn", + "weibo.com", + "instagram.com", + "vk.com", + "google.ru", + "yandex.ru", + "google.de", + "hao123.com", + "ebay.com", + "reddit.com", + "google.co.uk", + "amazon.co.jp", + "t.co", + "mail.ru", + "google.fr", + "pinterest.com", + "google.com.br", + "netflix.com", + "tmall.com", + "microsoft.com", + "360.cn", + "google.it", + "onclickads.net", + "wordpress.com", + "google.es", + "blogspot.com", + "tumblr.com", + "imgur.com", + "sohu.com", + "paypal.com", + "chinadaily.com.cn", + "naver.com", + "stackoverflow.com", + "xvideos.com", + "google.com.mx", + "fc2.com", + "aliexpress.com", + "imdb.com", + "apple.com", + "diply.com", + "google.co.kr", + "gmw.cn", + "github.com", + "google.ca", + "pornhub.com", + "blogger.com", + "kat.cr", + "ok.ru", + "amazon.de", + "google.com.hk", + "office.com", + "google.com.tr", + "rakuten.co.jp", + "whatsapp.com", + "nicovideo.jp", + "craigslist.org", + "alibaba.com", + "pixnet.net", + "google.pl", + "amazon.in", + "ask.com", + "outbrain.com", + "xhamster.com", + "jd.com", + "youth.cn", + "googleusercontent.com", + "google.co.id", + "soso.com", + "dropbox.com", + "tianya.cn", + "xinhuanet.com", + "go.com", + "google.com.au", + "coccoc.com", + "amazon.co.uk", + "cntv.cn", + "google.com.tw", + "popads.net", + "microsoftonline.com", + "twitch.tv", + "wikia.com", + "bongacams.com", + "cnn.com", + "360.com", + "adf.ly", + "google.com.eg", + "booking.com", + "bbc.co.uk", + "haosou.com", + "flipkart.com", + "adobe.com", + "google.com.pk", + "chase.com", + "google.com.sa", + "youku.com", + "daum.net", + "163.com", + "google.co.th", + "alipay.com", + "china.com", + "livedoor.jp", + "google.com.ar", + "quora.com", + "google.nl", + "ebay.co.uk", + "espn.go.com", + "nytimes.com", + "sogou.com", + "bilibili.com", + "ebay.de", + "txxx.com", + "bbc.com", + "amazon.cn", + "ettoday.net", + "xnxx.com", + "dailymotion.com", + "indiatimes.com", + "amazonaws.com", + "bankofamerica.com", + "china.com.cn", + "myway.com", + "so.com", + "zillow.com", + "ameblo.jp", + "salesforce.com", + "dailymail.co.uk", + "walmart.com", + "buzzfeed.com", + "vice.com", + "godaddy.com", + "soundcloud.com", + "indeed.com", + "google.co.ve", + "tudou.com", + "mediafire.com", + "globo.com", + "doubleclick.net", + "uol.com.br", + "wellsfargo.com", + "google.com.ua", + "etsy.com", + "zhihu.com", + "adnetworkperformance.com", + "slideshare.net", + "detail.tmall.com", + "yelp.com", + "google.gr", + "aol.com", + "nametests.com", + "avito.ru", + "onlinesbi.com", + "huffingtonpost.com", + "cnzz.com", + "steamcommunity.com", + "weather.com", + "cnet.com", + "google.co.za", + "goo.ne.jp", + "detik.com", + "dmm.co.jp", + "uptodown.com", + "stackexchange.com", + "gfycat.com", + "theguardian.com", + "torrentz.eu", + "naver.jp", + "google.com.co", + "redtube.com", + "force.com", + "9gag.com", + "pixiv.net", + "taringa.net", + "directrev.com", + "putlocker.is", + "bet365.com", + "tripadvisor.com", + "slither.io", + "vimeo.com", + "skype.com", + "amazon.it", + "steampowered.com", + "taboola.com", + "hclips.com", + "kakaku.com", + "feedly.com", + "blogspot.in", + "udn.com", + "flickr.com", + "google.ro", + "google.com.pe", + "bp.blogspot.com", + "nih.gov", + "google.com.ng", + "google.be", + "washingtonpost.com", + "tribunnews.com", + "foxnews.com", + "softonic.com", + "deviantart.com", + "varzesh3.com", + "youporn.com", + "amazon.fr", + "homedepot.com", + "comcast.net", + "tistory.com", + "snapdeal.com", + "mega.nz", + "rdsa2012.com", + "web.de", + "youm7.com", + "target.com", + "ikea.com", + "zol.com.cn", + "orange.fr", + "github.io", + "ifeng.com", + "livejournal.com", + "w3schools.com", + "douyu.com", + "wikihow.com", + "offerjuice.me", + "leboncoin.fr", + "upornia.com", + "google.com.sg", + "iqiyi.com", + "google.se", + "gmx.net", + "americanexpress.com", + "fbcdn.net", + "51.la", + "mozilla.org", + "popcash.net", + "forbes.com", + "theladbible.com", + "wittyfeed.com", + "google.com.ph", + "babytree.com", + "spotify.com", + "wix.com", + "allegro.pl", + "google.at", + "xcar.com.cn", + "google.dz", + "onet.pl", + "irctc.co.in", + "wikimedia.org", + "hdfcbank.com", + "tuberel.com", + "webtretho.com", + "files.wordpress.com", + "imzog.com", + "capitalone.com", + "google.cn", + "twimg.com", + "xfinity.com", + "blog.jp", + "roblox.com", + "abs-cbn.com", + "terraclicks.com", + "pandora.com", + "bestbuy.com", + "google.ch", + "hulu.com", + "weebly.com", + "t-online.de", + "akamaihd.net", + "onedio.com", + "shutterstock.com", + "xywy.com", + "google.pt", + "usps.com", + "groupon.com", + "paytm.com", + "doorblog.jp", + "google.cl", + "mercadolivre.com.br", + "wordreference.com", + "google.cz", + "google.ae", + "chaturbate.com", + "1688.com", + "ltn.com.tw", + "ebay-kleinanzeigen.de", + "wp.pl", + "bitauto.com", + "livejasmin.com", + "rediff.com", + "speedtest.net", + "eksisozluk.com", + "2ch.net", + "icicibank.com", + "wordpress.org", + "about.com", + "goodreads.com", + "sourceforge.net", + "ups.com", + "espncricinfo.com", + "tokopedia.com", + "youtube-mp3.org", + "battle.net", + "csdn.net", + "google.co.il", + "accuweather.com", + "amazon.es", + "trello.com", + "huanqiu.com", + "pinimg.com", + "kaskus.co.id", + "xuite.net", + "ndtv.com", + "slickdeals.net", + "caijing.com.cn", + "yesky.com", + "zendesk.com", + "exoclick.com", + "seznam.cz", + "ruten.com.tw", + "oracle.com", + "google.hu", + "media.tumblr.com", + "businessinsider.com", + "mama.cn", + "hp.com", + "kompas.com", + "archive.org", + "adplxmd.com", + "blastingnews.com", + "mlb.com", + "icloud.com", + "kinogo.co", + "fedex.com", + "google.ie", + "sberbank.ru", + "1905.com", + "addthis.com", + "51yes.com", + "thesaurus.com", + "slack.com", + "samsung.com", + "webmd.com", + "life.tw", + "cnnic.cn", + "usatoday.com", + "telegraph.co.uk", + "giphy.com", + "liputan6.com", + "hurriyet.com.tr", + "kapanlagi.com", + "milliyet.com.tr", + "badoo.com", + "kinopoisk.ru", + "dell.com", + "att.com", + "ign.com", + "gizmodo.com", + "nyaa.se", + "dmm.com", + "tradeadexchange.com", + "liveadexchanger.com", + "ppomppu.co.kr", + "sabah.com.tr", + "mailchimp.com", + "openload.co", + "reimageplus.com", + "secureserver.net", + "loading-delivery2.com", + "gsmarena.com", + "hotstar.com", + "ouo.io", + "rambler.ru", + "sharepoint.com", + "scribd.com", + "bukalapak.com", + "lowes.com", + "cricbuzz.com", + "messenger.com", + "tube8.com", + "intuit.com", + "serving-sys.com", + "evernote.com", + "airbnb.com", + "cloudfront.net", + "rutracker.org", + "blogfa.com", + "livedoor.biz", + "citi.com", + "lifebuzz.com", + "blackboard.com", + "libero.it", + "gmanetwork.com", + "digikala.com", + "enet.com.cn", + "taleo.net", + "ask.fm", + "bloomberg.com", + "verizonwireless.com", + "friv.com", + "39.net", + "google.no", + "freepik.com", + "themeforest.net", + "sahibinden.com", + "billdesk.com", + "nba.com", + "thepiratebay.se", + "doublepimp.com", + "macys.com", + "likes.com", + "naukri.com", + "alicdn.com", + "medium.com", + "avg.com", + "4shared.com", + "repubblica.it", + "siteadvisor.com", + "wp.com", + "wsj.com", + "google.sk", + "extratorrent.cc", + "impress.co.jp", + "blkget.com", + "sh.st", + "livedoor.com", + "kickstarter.com", + "gearbest.com", + "box.com", + "amazon.ca", + "bild.de", + "wetransfer.com", + "ebay.it", + "nownews.com", + "hatena.ne.jp", + "tabelog.com", + "savefrom.net", + "google.dk", + "acfun.tv", + "trackingclick.net", + "ebay.in", + "expedia.com", + "ck101.com", + "instructure.com", + "hm.com", + "ameba.jp", + "dictionary.com", + "jabong.com", + "disqus.com", + "ancestry.com", + "google.az", + "mashable.com", + "hatenablog.com", + "olx.pl", + "subscene.com", + "zippyshare.com", + "merdeka.com", + "hdzog.com", + "kohls.com", + "zoho.com", + "azlyrics.com", + "eastday.com", + "kissanime.to", + "mediab.uy", + "blogimg.jp", + "vk.me", + "spiegel.de", + "mercadolibre.com.ar", + "mi.com", + "ozock.com", + "cnblogs.com", + "rt.com", + "thepiratebay.org", + "thesportbible.com", + "adexc.net", + "upwork.com", + "okezone.com", + "telegram.org", + "marca.com", + "k618.cn", + "surveymonkey.com", + "agar.io", + "engadget.com", + "blogspot.jp", + "realtor.com", + "thefreedictionary.com", + "haber7.com", + "thewatchseries.to", + "inquirer.net", + "daikynguyenvn.com", + "free.fr", + "leagueoflegends.com", + "lifehacker.com", + "4dsply.com", + "elpais.com", + "newegg.com", + "shopify.com", + "yallakora.com", + "umblr.com", + "beeg.com", + "goal.com", + "prjcq.com", + "reuters.com", + "ytimg.com", + "atlassian.net", + "videomega.tv", + "asos.com", + "blog-newstime.com", + "discovercard.com", + "appspot.com", + "stumbleupon.com", + "donga.com", + "17ok.com", + "wikiwiki.jp", + "google.by", + "bhaskar.com", + "meaww.com", + "hespress.com", + "zing.vn", + "baike.com", + "google.kz", + "seesaa.net", + "bookmyshow.com", + "xda-developers.com", + "tutorialspoint.com", + "ebay.com.au", + "ero-advertising.com", + "gismeteo.ru", + "clipconverter.cc", + "rbc.ru", + "gmarket.co.kr", + "infusionsoft.com", + "theverge.com", + "conservativetribune.com", + "google.bg", + "streamcloud.eu", + "souq.com", + "nifty.com", + "behance.net", + "mobile.de", + "albawabhnews.com", + "trulia.com", + "gamefaqs.com", + "wunderground.com", + "buzzlie.com", + "google.lk", + "playstation.com", + "clickadu.com", + "researchgate.net", + "meetup.com", + "costco.com", + "java.com", + "gap.com", + "blogspot.com.es", + "nike.com", + "sakura.ne.jp", + "uploaded.net", + "kooora.com", + "prezi.com", + "lenta.ru", + "hotels.com", + "gamersky.com", + "kijiji.ca", + "youdao.com", + "nordstrom.com", + "gamer.com.tw", + "ca.gov", + "aparat.com", + "weblio.jp", + "quizlet.com", + "japanpost.jp", + "usaa.com", + "google.com.kw", + "blog.me", + "makemytrip.com", + "fidelity.com", + "answers.com", + "yandex.ua", + "southwest.com", + "zomato.com", + "nikkei.com", + "urdupoint.com", + "so-net.ne.jp", + "teepr.com", + "sciencedirect.com", + "list.tmall.com", + "udemy.com", + "hootsuite.com", + "sq.cn", + "seasonvar.ru", + "nbcnews.com", + "instructables.com", + "webex.com", + "ebates.com", + "y8.com", + "fiverr.com", + "list-manage.com", + "thatviralfeed.com", + "allrecipes.com", + "moneycontrol.com", + "norton.com", + "patch.com", + "11st.co.kr", + "sabq.org", + "clien.net", + "dropbooks.tv", + "asahi.com", + "eskimi.com", + "independent.co.uk", + "gamepedia.com", + "adp.com", + "nhk.or.jp", + "lapatilla.com", + "retailmenot.com", + "airtel.in", + "alexa.cn", + "e-hentai.org", + "cookpad.com", + "bleacherreport.com", + "kayak.com", + "urbandictionary.com", + "woot.com", + "jimdo.com", + "shaparak.ir", + "filehippo.com", + "elmogaz.com", + "elmundo.es", + "abcnews.go.com", + "drudgereport.com", + "wayfair.com", + "yadi.sk", + "yandex.com.tr", + "npr.org", + "chaoshi.tmall.com", + "aliyun.com", + "change.org", + "123cha.com", + "mit.edu", + "chinaz.com", + "yodobashi.com", + "thewhizmarketing.com", + "justdial.com", + "olx.in", + "korabia.com", + "wattpad.com", + "zone-telechargement.com", + "photobucket.com", + "itmedia.co.jp", + "line.me", + "lenovo.com", + "pantip.com", + "momoshop.com.tw", + "huaban.com", + "ibm.com", + "rottentomatoes.com", + "cnmo.com", + "4399.com", + "squarespace.com", + "emol.com", + "time.com", + "xe.com", + "www.gov.uk", + "le.com", + "mercadolibre.com.ve", + "mixi.jp", + "interia.pl", + "yaolan.com", + "corriere.it", + "gyazo.com", + "overstock.com", + "netteller.com", + "adidas.tmall.com", + "ensonhaber.com", + "state.gov", + "cbsnews.com", + "intoday.in", + "zulily.com", + "watsons.tmall.com", + "4pda.ru", + "asus.com", + "biglobe.ne.jp", + "cisco.com", + "google.com.vn", + "eyny.com", + "coursera.org", + "prpops.com", + "panda.tv", + "verizon.com", + "saramin.co.kr", + "xunlei.com", + "myfitnesspal.com", + "liveinternet.ru", + "aa.com", + "subito.it", + "manoramaonline.com", + "glassdoor.com", + "google.com.do", + "rednet.cn", + "torcache.net", + "himado.in", + "timeanddate.com", + "vodlocker.com", + "104.com.tw", + "chip.de", + "houzz.com", + "slimspots.com", + "chinaso.com", + "4chan.org", + "yts.ag", + "thekitchn.com", + "fitbit.com", + "ticketmaster.com", + "quikr.com", + "oeeee.com", + "nikkeibp.co.jp", + "people.com", + "twoo.com", + "indiaresults.com", + "reverso.net", + "namu.wiki", + "delta.com", + "pof.com", + "microsoftstore.com", + "eventbrite.com", + "investing.com", + "voc.com.cn", + "jrj.com.cn", + "indianrail.gov.in", + "shopclues.com", + "hupu.com", + "issuu.com", + "indianexpress.com", + "atwiki.jp", + "rarbg.to", + "latimes.com", + "drom.ru", + "misrjournal.com", + "elfagr.org", + "superuser.com", + "elwatannews.com", + "redirectvoluum.com", + "gigazine.net", + "infoseek.co.jp", + "paytm.in", + "flirchi.com", + "ero-video.net", + "gameforge.com", + "howtogeek.com", + "olx.ua", + "custhelp.com", + "php.net", + "munrvscurlms.com", + "google.rs", + "bs.to", + "google.com.ec", + "usbank.com", + "mercadolibre.com.mx", + "okcupid.com", + "rappler.com", + "pch.com", + "kdnet.net", + "thewhizproducts.com", + "india.com", + "asana.com", + "wiktionary.org", + "google.co.nz", + "olx.com.br", + "bola.net", + "buy.tmall.com", + "mirror.co.uk", + "ci123.com", + "academia.edu", + "googleapis.com", + "android.com", + "ontests.me", + "anitube.se", + "mynavi.jp", + "biobiochile.cl", + "syosetu.com", + "adbooth.com", + "heroquizz.com", + "bhphotovideo.com", + "appledaily.com.tw", + "fanpage.gr", + "sears.com", + "tsite.jp", + "as.com", + "abril.com.br", + "getpocket.com", + "battlefield.com", + "zhanqi.tv", + "cnbc.com", + "stockstar.com", + "redfin.com", + "spankbang.com", + "slate.com", + "aastocks.com", + "familydoctor.com.cn", + "bomb01.com", + "constantcontact.com", + "google.com.my", + "livescore.com", + "primewire.ag", + "nexusmods.com", + "united.com", + "gamespot.com", + "tim.it", + "iplt20.com", + "58.com", + "eonline.com", + "geocities.jp", + "auction.co.kr", + "youjizz.com", + "gazetaexpress.com", + "exblog.jp", + "jcpenney.com", + "vid.me", + "rightmove.co.uk", + "cpasbien.cm", + "thehindu.com", + "bitbucket.org", + "hotnewhiphop.com", + "swagbucks.com", + "rapidgator.net", + "ebay.fr", + "alwafd.org", + "images-amazon.com", + "wiley.com", + "xmediaserve.com", + "techcrunch.com", + "cdiscount.com", + "buzzfil.net", + "audible.com", + "ultimate-guitar.com", + "youboy.com", + "bodybuilding.com", + "tmz.com", + "51sole.com", + "commentcamarche.net", + "irs.gov", + "google.hr", + "mackolik.com", + "todayhumor.co.kr", + "staples.com", + "lun.com", + "mega.co.nz", + "politico.com", + "123rf.com", + "thisav.com", + "duckduckgo.com", + "intel.com", + "nypost.com", + "google.lt", + "fanfiction.net", + "bankmellat.ir", + "priceline.com", + "126.com", + "duolingo.com", + "genius.com", + "europa.eu", + "marktplaats.nl", + "pcmag.com", + "wwwpromoter.com", + "pogo.com", + "popsugar.com", + "chron.com", + "mcafee.com", + "weather.gov", + "zappos.com", + "jin115.com", + "paparazzieg.com", + "3dmgame.com", + "investopedia.com", + "bt.com", + "gazeta.pl", + "sankei.com", + "almasryalyoum.com", + "eastmoney.com", + "uptobox.com", + "cbssports.com", + "eroterest.net", + "dcinside.com", + "sfr.fr", + "lazada.co.id", + "idnes.cz", + "foodnetwork.com", + "marriott.com", + "agoda.com", + "ria.ru", + "cocolog-nifty.com", + "mydala.com", + "kotaku.com", + "acunn.com", + "walgreens.com", + "creditkarma.com", + "sozcu.com.tr", + "zara.com", + "inspsearch.com", + "humblebundle.com", + "drive2.ru", + "carview.co.jp", + "ted.com", + "ccm.net", + "nydailynews.com", + "office365.com", + "otto.de", + "hh.ru", + "wargaming.net", + "ew.com", + "yomiuri.co.jp", + "gutefrage.net", + "mediaplex.com", + "farsnews.com", + "xbox.com", + "curapelanatureza.com.br", + "springer.com", + "lemonde.fr", + "sfgate.com", + "fatosdesconhecidos.com.br", + "mgid.com", + "oschina.net", + "tomshardware.com", + "askubuntu.com", + "marketwatch.com", + "newtab-media.com", + "bidvertiser.com", + "deviantart.net", + "neobux.com", + "abplive.in", + "merriam-webster.com", + "vnexpress.net", + "banggood.com", + "cbs.com", + "chosun.com", + "hilton.com", + "mayoclinic.org", + "myfreecams.com", + "sky.com", + "garmin.com", + "mapquest.com", + "hotmovs.com", + "hepsiburada.com", + "milanuncios.com", + "mangafox.me", + "whitepages.com", + "westernjournalism.com", + "thevideo.me", + "hipersushiads.com", + "liveleak.com", + "dafont.com", + "axisbank.co.in", + "excite.co.jp", + "nate.com", + "discuss.com.hk", + "cambridge.org", + "pnc.com", + "givemesport.com", + "yaplakal.com", + "match.com", + "nfl.com", + "wav.tv", + "ampclicks.com", + "vetogate.com", + "yellowpages.com", + "hgtv.com", + "mbc.net", + "6pm.com", + "yjc.ir", + "topix.com", + "google.fi", + "wish.com", + "livestrong.com", + "monster.com", + "uber.com", + "cbc.ca", + "ptt.cc", + "2chblog.jp", + "mundo.com", + "mint.com", + "khanacademy.org", + "basecamp.com", + "labanquepostale.fr", + "apache.org", + "searchincognito.com", + "voyeurhit.com" + ], + "name": "Top 1000 website from Alexa", + "version": "20160528", + "description": "Event contains one or more entries from the top 1000 of the most used website (Alexa).", + "matching_attributes": [ + "hostname", + "domain" + ] +} diff --git a/tools/generate-alexa.py b/tools/generate-alexa.py new file mode 100644 index 0000000..37c6a32 --- /dev/null +++ b/tools/generate-alexa.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import requests +import zipfile +import datetime +import json + +alexa_url = "http://s3.amazonaws.com/alexa-static/top-1mcsv.zip" +alexa_file = "top-1m.csv.zip" +user_agent = {"User-agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"} +r = requests.get(alexa_url, headers=user_agent) +with zipfile.ZipFile(alexa_file, 'r') as alexa_lists: + for name in alexa_lists.namelist(): + if name == "top-1m.csv": + with alexa_lists.open(name) as top: + top1000 = top.readlines()[0:999] + else: + continue + +alexa_warninglist = {} + +alexa_warninglist['description'] = "Event contains one or more entries from the top 1000 of the most used website (Alexa)." +d = datetime.datetime.now() +alexa_warninglist['version'] = "{0}{1:02d}{2:02d}".format(d.year,d.month,d.day) +alexa_warninglist['name'] = "Top 1000 website from Alexa" +alexa_warninglist['list'] = [] +alexa_warninglist['matching_attributes'] = ['hostname','domain'] + +for site in top1000: + v = str(site).split(',')[1] + alexa_warninglist['list'].append(v[:-3]) +print (json.dumps(alexa_warninglist))