From d19dd72f995c3031a5dd5ea9ec7db080540236f3 Mon Sep 17 00:00:00 2001 From: kovacsbalu Date: Mon, 3 Sep 2018 16:01:39 +0200 Subject: [PATCH] Install pystemon and start pystemon-feeder in docker --- Dockerfile | 5 +- bin/LAUNCH.sh | 19 +++- docker_start.sh | 98 +++--------------- pystemon/config.cfg | 219 +++++++++++++++++++++++++++++++++++++++ pystemon/install.sh | 9 ++ pystemon/pystemon.yaml | 230 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 493 insertions(+), 87 deletions(-) create mode 100644 pystemon/config.cfg create mode 100755 pystemon/install.sh create mode 100644 pystemon/pystemon.yaml diff --git a/Dockerfile b/Dockerfile index cdd26d6c..0bf8c3b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,4 +23,7 @@ WORKDIR /opt/AIL # Default to UTF-8 file.encoding ENV LANG C.UTF-8 -CMD bash docker_start.sh +RUN ./pystemon/install.sh + +COPY docker.sh /docker.sh +ENTRYPOINT ["/bin/bash", "docker.sh"] diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index c3bfd8cf..9996992b 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -27,6 +27,7 @@ islogged=`screen -ls | egrep '[0-9]+.Logging_AIL' | cut -d. -f1` isqueued=`screen -ls | egrep '[0-9]+.Queue_AIL' | cut -d. -f1` isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1` isflasked=`screen -ls | egrep '[0-9]+.Flask_AIL' | cut -d. -f1` +isfeeded=`screen -ls | egrep '[0-9]+.Feeder' | cut -d. -f1` function helptext { echo -e $YELLOW" @@ -168,7 +169,7 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Keys" bash -c 'cd '${AIL_BIN}'; ./Keys.py; read x' sleep 0.1 - screen -S "Script_AIL" -X screen -t "Decoder" bash -c 'cd '${AIL_BIN}'; ./Decoder.py; read x' + screen -S "Script_AIL" -X screen -t "Base64" bash -c 'cd '${AIL_BIN}'; ./Base64.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c 'cd '${AIL_BIN}'; ./Bitcoin.py; read x' sleep 0.1 @@ -317,6 +318,19 @@ function launch_flask { fi } +function launch_feeder { + if [[ ! $isfeeded ]]; then + screen -dmS "Feeder" + sleep 0.1 + echo -e $GREEN"\t* Launching Pystemon feeder"$DEFAULT + screen -S "Feeder" -X screen -t "Pystemon_feeder" bash -c 'cd '${AIL_BIN}'; ./feeder/pystemon-feeder.py; read x' + sleep 0.1 + screen -S "Feeder" -X screen -t "Pystemon" bash -c 'cd '${AIL_HOME}/../pystemon'; python2 pystemon.py; read x' + else + echo -e $RED"\t* A Feeder screen is already launched"$DEFAULT + fi +} + function killall { if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted || $isflasked ]]; then echo -e $GREEN"Gracefully closing redis servers"$DEFAULT @@ -325,7 +339,7 @@ function killall { echo -e $GREEN"Gracefully closing ardb servers"$DEFAULT shutting_down_ardb; echo -e $GREEN"Killing all"$DEFAULT - kill $isredis $isardb $islogged $isqueued $isscripted $isflasked + kill $isredis $isardb $islogged $isqueued $isscripted $isflasked $isfeeded sleep 0.2 echo -e $ROSE`screen -ls`$DEFAULT echo -e $GREEN"\t* $isredis $isardb $islogged $isqueued $isscripted killed."$DEFAULT @@ -357,6 +371,7 @@ function launch_all { launch_queues; launch_scripts $1; launch_flask; + launch_feeder; } #If no params, display the menu diff --git a/docker_start.sh b/docker_start.sh index 1f80ecae..9b585e01 100755 --- a/docker_start.sh +++ b/docker_start.sh @@ -1,89 +1,19 @@ -echo "Currently unmaintained, continue at your own risk of not having a working AIL at the end :(" -exit 1 +#!/bin/bash +signalListener() { + "$@" & + pid="$!" + trap "echo 'Stopping'; kill -SIGTERM $pid" SIGINT SIGTERM + + while kill -0 $pid > /dev/null 2>&1; do + wait + done +} + source ./AILENV/bin/activate cd bin +./LAUNCH.sh -l -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_LEVELDB:$PATH -export PATH=$AIL_ARDB:$PATH -if [ -z $1 ]; then - export AILENV=/opt/AIL - else - export AILENV=$1 -fi +signalListener tail -f /dev/null $@ -conf_dir="${AIL_HOME}/configs/" - -screen -dmS "Redis" -screen -S "Redis" -X screen -t "6379" bash -c 'redis-server '$conf_dir'6379.conf ; read x' -screen -S "Redis" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x' -screen -S "Redis" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x' - -# For Words and curves -sleep 0.1 -screen -dmS "ARDB_AIL" -screen -S "ARDB_AIL" -X screen -t "6382" bash -c 'ardb-server '$conf_dir'6382.conf ; read x' - -#Want to launch more level_db? -lvdbhost='127.0.0.1' -lvdbdir="${AIL_HOME}/LEVEL_DB_DATA/" -db1_y='2013' -db2_y='2014' -db3_y='2016' -db4_y='2017' - -dbC_y='3016' - -nb_db=13 - -screen -dmS "LevelDB" -#Add lines here with appropriates options. -screen -S "LevelDB" -X screen -t "2013" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2013/ -P '$db1_y' -M '$nb_db'; read x' -screen -S "LevelDB" -X screen -t "2014" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2014/ -P '$db2_y' -M '$nb_db'; read x' -screen -S "LevelDB" -X screen -t "2016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2016/ -P '$db3_y' -M '$nb_db'; read x' -screen -S "LevelDB" -X screen -t "2016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2017/ -P '$db4_y' -M '$nb_db'; read x' - -# For Curve -screen -S "LevelDB" -X screen -t "3016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'3016/ -P '$dbC_y' -M '$nb_db'; read x' - - -screen -dmS "Logging" -screen -S "Logging" -X screen -t "LogQueue" bash -c 'log_subscriber -p 6380 -c Queuing -l ../logs/; read x' -screen -S "Logging" -X screen -t "LogScript" bash -c 'log_subscriber -p 6380 -c Script -l ../logs/; read x' - -screen -dmS "Queue" -screen -S "Queue" -X screen -t "Queues" bash -c './launch_queues.py; read x' - -screen -dmS "Script" -screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0 -c 1; read x' -screen -S "Script" -X screen -t "Mixer" bash -c './Mixer.py; read x' -screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' -screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' -screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' -screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' -screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' -screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' -screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' -screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' -screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' -screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' -screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' -screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' -screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' -screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' -screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' -screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' -screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' -screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' -screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' -screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' -screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' -screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' -screen -S "Script" -X screen -t "BrowseWarningPaste" bash -c './BrowseWarningPaste.py; read x' -screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' - -cd $AILENV -cd var/www/ -python Flask_server.py +./LAUNCH.sh -k diff --git a/pystemon/config.cfg b/pystemon/config.cfg new file mode 100644 index 00000000..90efee74 --- /dev/null +++ b/pystemon/config.cfg @@ -0,0 +1,219 @@ +[Directories] +bloomfilters = Blooms +dicofilters = Dicos +pastes = PASTES +base64 = BASE64 + +wordtrending_csv = var/www/static/csv/wordstrendingdata +wordsfile = files/wordfile + +protocolstrending_csv = var/www/static/csv/protocolstrendingdata +protocolsfile = files/protocolsfile + +tldstrending_csv = var/www/static/csv/tldstrendingdata +tldsfile = faup/src/data/mozilla.tlds + +domainstrending_csv = var/www/static/csv/domainstrendingdata + +pystemonpath = /opt/pystemon/ + +sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt + +##### Notifications ###### +[Notifications] +ail_domain = http://localhost:7000 +sender = sender@example.com +sender_host = smtp.example.com +sender_port = 1337 + +# optional for using with authenticated SMTP over SSL +# sender_pw = securepassword + +##### Flask ##### +[Flask] +#Number of logs to display in the dashboard +max_dashboard_logs = 15 +#Maximum number of character to display in the toolip +max_preview_char = 250 +#Maximum number of character to display in the modal +max_preview_modal = 800 +#Default number of header to display in trending graphs +default_display = 10 +#Number of minutes displayed for the number of processed pastes. +minute_processed_paste = 10 +#Maximum line length authorized to make a diff between duplicates +DiffMaxLineLength = 10000 + +#### Modules #### +[BankAccount] +max_execution_time = 60 + +[Categ] +#Minimum number of match between the paste and the category file +matchingThreshold=1 + +[Credential] +#Minimum length that a credential must have to be considered as such +minimumLengthThreshold=3 +#Will be pushed as alert if the number of credentials is greater to that number +criticalNumberToAlert=8 +#Will be considered as false positive if less that X matches from the top password list +minTopPassList=5 + +[Curve] +max_execution_time = 90 + +[Base64] +path = Base64/ +max_execution_time = 60 + +[Modules_Duplicates] +#Number of month to look back +maximum_month_range = 3 +#The value where two pastes are considerate duplicate for ssdeep. +threshold_duplicate_ssdeep = 50 +#The value where two pastes are considerate duplicate for tlsh. +threshold_duplicate_tlsh = 52 +#Minimum size of the paste considered +min_paste_size = 0.3 + +[Module_ModuleInformation] +#Threshold to deduce if a module is stuck or not, in seconds. +threshold_stucked_module=600 + +[Module_Mixer] +#Define the configuration of the mixer, possible value: 1, 2 or 3 +operation_mode = 3 +#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) +ttl_duplicate = 86400 + +##### Redis ##### +[Redis_Cache] +host = localhost +port = 6379 +db = 0 + +[Redis_Log] +host = localhost +port = 6380 +db = 0 + +[Redis_Log_submit] +host = localhost +port = 6380 +db = 1 + +[Redis_Queues] +host = localhost +port = 6381 +db = 0 + +[Redis_Data_Merging] +host = localhost +port = 6379 +db = 1 + +[Redis_Paste_Name] +host = localhost +port = 6379 +db = 2 + +[Redis_Mixer_Cache] +host = localhost +port = 6381 +db = 1 + +##### ARDB ##### +[ARDB_Curve] +host = localhost +port = 6382 +db = 1 + +[ARDB_Sentiment] +host = localhost +port = 6382 +db = 4 + +[ARDB_TermFreq] +host = localhost +port = 6382 +db = 2 + +[ARDB_TermCred] +host = localhost +port = 6382 +db = 5 + +[ARDB_DB] +host = localhost +port = 6382 +db = 0 + +[ARDB_Trending] +host = localhost +port = 6382 +db = 3 + +[ARDB_Hashs] +host = localhost +db = 1 + +[ARDB_Tags] +host = localhost +port = 6382 +db = 6 + +[ARDB_Metadata] +host = localhost +port = 6382 +db = 7 + +[ARDB_Statistics] +host = localhost +port = 6382 +db = 8 + +[Url] +cc_critical = DE + +[DomClassifier] +cc = DE +cc_tld = r'\.de$' +dns = 8.8.8.8 + +[Mail] +dns = 8.8.8.8 + +# Indexer configuration +[Indexer] +type = whoosh +path = indexdir +register = indexdir/all_index.txt +#size in Mb +index_max_size = 2000 + +[ailleakObject] +maxDuplicateToPushToMISP=10 + +############################################################################### + +# For multiple feed, add them with "," without space +# e.g.: tcp://127.0.0.1:5556,tcp://127.0.0.1:5557 +[ZMQ_Global] +#address = tcp://crf.circl.lu:5556 +address = tcp://127.0.0.1:5556,tcp://crf.circl.lu:5556 +channel = 102 +bind = tcp://127.0.0.1:5556 + +[ZMQ_Url] +address = tcp://127.0.0.1:5004 +channel = urls + +[ZMQ_FetchedOnion] +address = tcp://127.0.0.1:5005 +channel = FetchedOnion + +[RedisPubSub] +host = localhost +port = 6381 +db = 0 diff --git a/pystemon/install.sh b/pystemon/install.sh new file mode 100755 index 00000000..6620dbf8 --- /dev/null +++ b/pystemon/install.sh @@ -0,0 +1,9 @@ +git clone https://github.com/cvandeplas/pystemon.git /opt/pystemon + +apt-get install -y python-pip python-requests python-yaml python-redis + +pip install beautifulsoup4 + +BASEDIR=$(dirname "$0") +cp $BASEDIR/config.cfg /opt/AIL/bin/packages/ +cp $BASEDIR/pystemon.yaml /opt/pystemon/ diff --git a/pystemon/pystemon.yaml b/pystemon/pystemon.yaml new file mode 100644 index 00000000..819a3cb8 --- /dev/null +++ b/pystemon/pystemon.yaml @@ -0,0 +1,230 @@ +#network: # Network settings +# ip: '1.1.1.1' # Specify source IP address if you want to bind on a specific one + +archive: + save: yes # Keep a copy of pasties that triggered alerts + save-all: yes # Keep a copy of all pasties + dir: "alerts" # Directory where matching pasties should be kept + dir-all: "archive" # Directory where all pasties should be kept (if save-all is set to yes) + compress: yes # Store the pasties compressed + +engine: re # Only re (default) or regex (pip install regex) are supported. +strict_regex: no # when compiling regex, hard fail or not on error + +save-thread: no # Use a separate thread to save pasties + +db: + sqlite3: # Store information about the pastie in a database + enable: no # Activate this DB engine # NOT FULLY IMPLEMENTED + file: 'db.sqlite3' # The filename of the database + lookup: no # lookup sqlite for already seen pasties + +mongo: + save: no # Keep a copy of pasties that triggered alerts + save-all: no # Keep a copy of all pasties + save-profile: # configure which data to save + content-on-miss: no # save the content even on miss + timestamp: no # add the timestamp (UTC) + url: no # add the public URL + site: no # add the site + id: no # add the per-site id + matched: no # add the matched status (usefull if content-on-miss = yes) + filename: no # add the local filename (to no store content in mongodb) + lookup: no # lookup mongodb for already seen pasties + database: "paste" + collection: "paste" + url: "mongodb://localhost" + user: + password: + +redis: + queue: yes # Keep a copy of pasties that triggered alerts + queue-all: yes # Keep a copy of all pasties + server: "localhost" + port: 6379 + database: 10 + lookup: no # lookup redisdb for already seen pasties (NOT IMPLEMENTED) + +email: + alert: no # Enable/disable email alerts + from: alert@example.com + to: alert@example.com + server: 127.0.0.1 # Address of the server (hostname or IP) + port: 25 # Outgoing SMTP port: 25, 587, ... + tls: no # Enable/disable tls support + username: '' # (optional) Username for authentication. Leave blank for no authentication. + password: '' # (optional) Password for authentication. Leave blank for no authentication. + subject: '[pystemon] - {subject}' + +##### +# Definition of regular expressions to search for in the pasties +# +search: +# - description: '' # (optional) A human readable description used in alerts. +# # If left unspecified the search regular expression +# # will be used as description. +# search: '' # The regular expression to search for. +# count: '' # (optional) How many hits should it have to be interesting? +# exclude: '' # (optional) Do not alert if this regular expression matches +# regex-flags: '' # (optional) Regular expression flags to give to the find function. +# # Default = re.IGNORECASE +# # Set to 0 to have no flags set +# # See http://docs.python.org/2/library/re.html#re.DEBUG for more info. +# # Warning: when setting this the default is overridden +# # example: 're.MULTILINE + re.DOTALL + re.IGNORECASE' +# to: '' # (optional) Additional recipients for email alert, comma separated list + + - search: '[^a-zA-Z0-9]example\.com' + - search: '[^a-zA-Z0-9]foobar\.com' + - description: 'Download (non-porn)' + search: 'download' + exclude: 'porn|sex|teen' + count: 4 + +##### +# Configuration section for the paste sites +# +threads: 1 # number of download threads per site +site: +# example.com: +# archive-url: # the url where the list of last pasties is present +# # example: 'http://pastebin.com/archive' +# archive-regex: # a regular expression to extract the pastie-id from the page. +# # do not forget the () to extract the pastie-id +# # example: '.+' +# download-url: # url for the raw pastie. +# # Should contain {id} on the place where the ID of the pastie needs to be placed +# # example: 'http://pastebin.com/raw.php?i={id}' +# public-url: # optional, defaults to be the same as download-url, so it should meet the same requirements +# # is used for display in logging and e-mail notifications +# update-max: 40 # every X seconds check for new updates to see if new pasties are available +# update-min: 30 # a random number will be chosen between these two numbers +# pastie-classname: # OPTIONAL: The name of a custom Class that inherits from Pastie +# # This is practical for sites that require custom fetchPastie() functions + + pastebin.com: + archive-url: 'https://pastebin.com/archive' + archive-regex: '.+' + download-url: 'https://pastebin.com/raw/{id}' + update-max: 50 + update-min: 40 + + # See https://pastebin.com/api_scraping_faq , you will need a pro account on pastebin + pastebin.com_pro: + archive-url: 'https://scrape.pastebin.com/api_scraping.php?limit=500' + archive-regex: '"key": "(.+)",' + download-url: 'https://scrape.pastebin.com/api_scrape_item.php?i={id}' + public-url: 'https://pastebin.com/raw/{id}' + update-max: 50 + update-min: 40 + + slexy.org: + archive-url: 'https://slexy.org/recent' + archive-regex: 'View paste' + download-url: 'https://slexy.org/view/{id}' + pastie-classname: PastieSlexyOrg + + gist.github.com: + archive-url: 'https://gist.github.com/discover' + archive-regex: '' + download-url: 'https://gist.githubusercontent.com/{id}/raw/' + + codepad.org: + archive-url: 'http://codepad.org/recent' + archive-regex: 'view' + download-url: 'http://codepad.org/{id}/raw.txt' + + kpaste.net: + archive-url: 'http://kpaste.net/' + archive-regex: '" href="/(\w+)">' + download-url: 'http://kpaste.net/{id}?raw' + + ideone.com: + archive-url: 'http://ideone.com/recent' + archive-regex: '#' + download-url: 'http://ideone.com/plain/{id}' + + pastebin.ru: + archive-url: 'http://pastebin.ru/' + archive-regex: '' + download-url: 'http://pastebin.ru/{id}' + + pastebin.fr: + archive-url: 'http://pastebin.fr' + archive-regex: '' + # download-url: 'http://pastie.org/pastes/{id}/text' + + # pastebin.ca: + # archive-url: 'http://pastebin.ca' + # archive-regex: 'rel="/preview.php\?id=(\d+)' + # download-url: 'http://pastebin.ca/{id}' + + # nopaste.me: + # archive-url: 'http://nopaste.me/recent' + # archive-regex: '' + # download-url: 'http://nopaste.me/download/{id}.txt' + + # cdv.lt: + # pastie-classname: PastieCdvLt + # archive-url: 'http://cdv.lt/snippets' + # archive-regex: '[0-9]' + # download-url: 'http://cdv.lt/api/snippet/{id}' + + # snipt.net: + # pastie-classname: PastieSniptNet + # archive-url: 'https://snipt.net/public/?rss' + # archive-regex: 'https://snipt.net/(.+)/' + # download-url: 'https://snipt.net/{id}/' + + # quickleak.se: + # archive-url: 'http://www.quickleak.se/last-pastes.html' + # archive-regex: '' + # download-url: 'http://www.quickleak.se/{id}' + +# safebin.net: # FIXME not finished +# archive-url: 'http://safebin.net/?archive' +# archive-regex: '' +# download-url: 'http://safebin.net/{id}' +# update-max: 60 +# update-min: 50 + + +# TODO +# http://www.safebin.net/ # more complex site +# http://www.heypasteit.com/ # http://www.heypasteit.com/clip/0IZA => incremental + +# http://hastebin.com/ # no list of last pastes +# http://sebsauvage.net/paste/ # no list of last pastes +# http://tny.cz/ # no list of last pastes +# https://pastee.org/ # no list of last pastes +# http://paste2.org/ # no list of last pastes +# http://0bin.net/ # no list of last pastes +# http://markable.in/ # no list of last pastes + + +##### +# Configuration section to configure proxies +# Currently only HTTP proxies are permitted +# +proxy: + random: no + file: 'proxies.txt' + +##### +# Configuration section for User-Agents +# +user-agent: + random: no + file: 'user-agents.txt'