Compare commits

...

123 Commits
v5.3 ... master

Author SHA1 Message Date
terrtia 86f312cbc3
chg: [crawler] add function to delete schedules 2024-05-15 10:21:08 +02:00
terrtia 4eb1b01370
chg: [crawler stats] add previous month stats by domain type 2024-05-15 10:03:00 +02:00
terrtia 21642fe9d4
chg: [user message] show user messages by chat 2024-05-13 10:51:46 +02:00
terrtia 0dfd92bcd6
chg: [tron] add TRON crytocurrency detection + correlation. Thanks @pventuzelo for the contribution 2024-05-08 15:14:51 +02:00
terrtia 0c28b38638
fix: [language detector] fix exception if the libretranslate url is not specified 2024-05-06 11:31:37 +02:00
terrtia adbce24128
fix: [ocr] catch cv2 errors 2024-05-02 10:36:20 +02:00
terrtia e21257a3fe
fix: [pgpdump] fix installer, new pgpdump version require to launch autoreconf 2024-04-30 11:57:21 +02:00
terrtia 50ff7529d2
fix: [ocr] catch OSError on MP4 files 2024-04-30 10:17:04 +02:00
terrtia 31b519cc17
chg: [tags] search ocrs and images by tags + fix ocr, filter invalid image 2024-04-26 15:50:58 +02:00
terrtia 2b23d993df
chg: [correlation graph] add date first/last seen separator + fix display of ocr object tags 2024-04-26 11:52:17 +02:00
terrtia 5503d8134a
fix: [ocr] fix ocr supported languages 2024-04-26 10:49:24 +02:00
terrtia 1d1671c00f
fix: [Language] Updated the language detector to return a empty list when no language is detected 2024-04-26 10:39:28 +02:00
terrtia 35502d955f
fix: [ocr] filter ocr supported languages + fix type of object accepted by the tracker 2024-04-26 10:31:31 +02:00
terrtia 26f9e84d97
chg: [doc] add overview 2024-04-25 14:43:26 +02:00
terrtia 42ef6fb2e5
fix: [ocr] fix None copy date 2024-04-25 14:29:30 +02:00
terrtia 20c98de0fa
chg: [ocr] ocr daterange object, get ocrs by daterange + fixs 2024-04-25 14:18:22 +02:00
terrtia 973ced2efe
Merge branch 'master' into ocr 2024-04-24 15:34:22 +02:00
terrtia 7fd8ae4a81
chg: [reprocess tool] add OcrExtractor module + filter image gif 2024-04-24 15:16:18 +02:00
terrtia c25ccb8618
chg: [ocr] add cache + correlation ocr-chats-messages + launch ocr extractor by default 2024-04-24 14:43:11 +02:00
terrtia 0b5a1aa1b8
fix: [correlation] fix objects selector 2024-04-18 15:44:00 +02:00
terrtia 8bd1ae3815
fix: [correlation] fix objects selector 2024-04-18 15:39:31 +02:00
terrtia b552e15a8b
fix: [correlation] fix objects selector 2024-04-18 15:31:34 +02:00
terrtia bc42ff2cd1
Merge branch 'master' into ocr 2024-04-18 15:23:36 +02:00
terrtia 58666f2355
chg: [domains] card domain, fix last check 2024-04-16 22:58:16 +02:00
terrtia 56fae107bf
fix: [ocr] UI correlation, fix language bloc 2024-04-16 22:56:03 +02:00
terrtia 4cb47e8af3
chg: [ocr] detect and translate language + show ocr view + add languages blueprint 2024-04-11 12:15:47 +02:00
terrtia ed13e8bca4
chg: [ocr] get languages model + group extracted content by line + process ocr objects + get all images 2024-04-10 16:43:54 +02:00
terrtia 61701e2fcc
chg: [perf] reduce memory usage 2024-04-09 14:22:11 +02:00
terrtia 6ca4b29329
chg: [ocr] extract text from image + add ocr object 2024-04-08 17:16:07 +02:00
terrtia dbde04caa3
chg: [tracker] add experimental report generator 2024-04-03 17:39:45 +02:00
terrtia a282354fce
fix: [thehive] fix export logger 2024-04-02 09:35:35 +02:00
terrtia 414b5af277
chg: [user-account] add heatmap nb user messages 2024-03-28 14:59:26 +01:00
terrtia f37111fe2b
fix: [UI matches extractor] fix empty matches 2024-03-28 09:47:53 +01:00
terrtia 5fce682541
fix: [UI matches extractor] handle overlapping matches 2024-03-27 16:30:29 +01:00
terrtia 5ec0d7f0cf
chg: [message] show trackers + modules matches 2024-03-27 13:42:15 +01:00
terrtia a3a664b7f1
fix: [languages] fix items language min probability 2024-03-27 11:15:22 +01:00
terrtia ee563a79d3
chg: [trackers] UI: remove object from tracker 2024-03-27 11:03:27 +01:00
terrtia 3ecd3fd023
chg: [user-account] show chats + subchannels 2024-03-26 16:45:42 +01:00
terrtia 9d481bd0b0
merge lang 2024-03-25 16:58:21 +01:00
terrtia de43f350b2
chg: [language] add thread languages stats 2024-03-25 16:55:20 +01:00
terrtia 2db54def46
fix: [chat] fix subchannel-message correlation + fix empty message language detection 2024-03-25 16:36:24 +01:00
terrtia b9c37167ad
chg: [language messages] add nb languages stats by chat/subchannel objects 2024-03-25 14:13:16 +01:00
terrtia 0bacf2b8bd
chg: [requirements] add lexilang 2024-03-22 11:48:44 +01:00
terrtia fa57171937
chg: [trackers match] show objects date 2024-03-22 11:37:35 +01:00
terrtia 59ca8c5d31
chg: [lang] merge master 2024-03-21 11:01:43 +01:00
terrtia 406d72bb52
chg: [requirements] add demoji 2024-03-20 11:39:41 +01:00
terrtia 912eadc563
chg: [user-account] add default card 2024-03-19 12:05:50 +01:00
terrtia 9a0e77dbbd
fix: [chat-subchannel] card empty chat 2024-03-19 11:14:32 +01:00
terrtia d37a56b7d5
chg: [chat-subchannel] add basic + default card 2024-03-19 11:11:56 +01:00
terrtia 599f3ca953
chg: [chat] add chat default + basic card 2024-03-18 15:53:20 +01:00
terrtia e92bf72f64
chg: [image object] add default + basic card 2024-03-18 12:00:41 +01:00
terrtia 8483272ee0
chg: [global module] perf 2024-03-13 14:39:31 +01:00
terrtia 6f2668eff1
chg: [tools] reprocess objects by type 2024-03-13 14:04:23 +01:00
terrtia 13372f8c85
fix: [dashboard] fix objects links 2024-03-13 11:58:40 +01:00
terrtia eb6adc4b98
chg: [modules] improve error output 2024-03-13 10:11:03 +01:00
terrtia 27b2679ba6
fix: [tracker] fix yara content error on empty message 2024-03-13 10:07:11 +01:00
terrtia dc0545dfd0
chg: [RetroHunt] retro hunt on messages 2024-03-12 13:54:56 +01:00
terrtia 9031376b50
fix: [FeederImporter] add all objects in queue 2024-03-12 13:43:19 +01:00
terrtia d526b2fd98
chg: [correlation UI] change Direct Correlations position 2024-03-11 15:36:24 +01:00
terrtia 800098540b
chg: [correlation] description: get message content + don't show etag, cookie_name and hhash by default 2024-03-11 15:33:30 +01:00
terrtia 197ff0222d
chg: [lang] improve language detection + UI: manual translation and detection 2024-03-08 15:26:06 +01:00
terrtia 7acac4dc0c
Merge branch 'master' of github.com:ail-project/ail-framework 2024-03-08 13:54:34 +01:00
terrtia 40a32fec75
fix: [module] fix SQLInjectionDetection object ID 2024-03-08 13:54:14 +01:00
Thirion Aurélien d2c974569e
Merge pull request #215 from NMD03/master
Add [ailbuilder] cleanup
2024-03-07 15:11:59 +01:00
niclas 2b9c1bfda8 Add [ailbuilder] cleanup 2024-03-07 15:06:15 +01:00
terrtia 38d71e97dd
fix: [chats] fix templates, chat subtype 2024-03-05 14:14:04 +01:00
terrtia 87dc619171
chg: [chats] add heatmap nb messages/hour for all messages 2024-03-05 10:56:36 +01:00
terrtia ad039e4720
chg: [api] get chat/subchannel/thread messages 2024-02-29 16:18:22 +01:00
terrtia c22d2982fb
fix: [api] remove debug output 2024-02-29 15:03:39 +01:00
terrtia e1e9609ad9
chg: [api] get object + get investigation 2024-02-29 14:56:45 +01:00
terrtia 35f0d46140
fix: [domains] add missing JS file 2024-02-28 14:21:33 +01:00
terrtia d5e830c591
chg: [domains] add crawler status stats by domain type pie chart 2024-02-28 14:19:47 +01:00
terrtia 142ac83472
chg: [d3 js] add barchart_stack graph function 2024-02-28 13:19:01 +01:00
terrtia 311e6f4bd8
fix: [crawler] fix monthly crawled domains stats svg size 2024-02-27 16:18:06 +01:00
terrtia e6d70e4f7b
fix: [crawler] fix monthly crawled domains stats 2024-02-27 15:24:18 +01:00
terrtia 0d55725e28
chg: [crawler] add monthly crawled domains stats 2024-02-27 14:56:48 +01:00
terrtia afe13185d9
chg: [test] add basic API test 2024-02-27 11:03:11 +01:00
terrtia 775b7fa868
fix: [tests] fix tests + global new file content 2024-02-27 10:15:40 +01:00
terrtia 9917d4212c
chg: [git] update gitignore 2024-02-26 15:41:46 +01:00
terrtia ad63651838
chg: [api] refactor blueprint 2024-02-26 15:35:48 +01:00
terrtia 40b1378b30
chg: [favicon] add v5.4 update + update pylacus 2024-02-21 15:00:27 +01:00
terrtia f980ab5145
fix: [favicon] fix misp object export 2024-02-21 14:56:11 +01:00
terrtia 81c4dde7b0
fix: [favicon] crawler favicon 2024-02-21 14:34:20 +01:00
terrtia c219febd71
chg: [favicon] add favicons objects + correlation 2024-02-21 14:18:09 +01:00
terrtia 9cdfcdfc6b
fix: [vanity] fix title 2024-02-20 09:46:09 +01:00
terrtia 443f4f2fb3
fix: [domain] add screenshot img onerror 2024-02-19 16:23:53 +01:00
terrtia f62ec679cc
chg: [vanity] vanity tree, add links 2024-02-19 16:07:14 +01:00
terrtia 6fa8f6e0bc
fix: [vanity] length > 5 clusters 2024-02-19 15:54:15 +01:00
terrtia 784579baef
fix: [vanity] debug 2024-02-19 15:52:26 +01:00
terrtia cb4345c871
fix: [vanity] debug 2024-02-19 15:50:20 +01:00
terrtia 4c5a0b9906
fix: [vanity] length+1 min vanity = 2 2024-02-19 15:38:51 +01:00
terrtia 273e264659
fix: [vanity] vanity builder 2024-02-19 15:31:55 +01:00
terrtia 88fbe36f70
fix: [vanity] update id domains is up + min vanity = 4 2024-02-19 15:23:16 +01:00
terrtia f07a4b422b
chg: [vanity] add vanity domain explorer + fix blurred screenshot + fix languages search filter 2024-02-19 15:14:37 +01:00
terrtia 495ceea73d
fix: [api] get titles domain 2024-02-16 11:56:48 +01:00
terrtia 782677e8ff
chg: [domain] sort onion vanity name by prefix 2024-02-16 10:31:45 +01:00
terrtia cf62ed49ff
fix: [tracker] fix typo 2024-02-15 16:31:03 +01:00
terrtia 93d1b6fc1f
fix: [tracker] fix message trackers 2024-02-15 16:22:00 +01:00
terrtia 152e7bb51e
chg: [titles] add api to get unsafe titles 2024-02-15 15:45:49 +01:00
terrtia 811ee45993
fix: [titles] add api to get all titles 2024-02-15 15:19:18 +01:00
terrtia 05c8100b8b
fix: [titles] add api to get all titles 2024-02-15 14:45:15 +01:00
terrtia e83323c672
chg: [titles] add api to get all titles 2024-02-15 14:43:58 +01:00
terrtia d31bc7ef58
chg: [tracker] list of last matchs, show description 2024-02-15 12:03:15 +01:00
terrtia 1e8f8e9e9c
chg: [tracker] list of last matchs, show description 2024-02-15 12:02:01 +01:00
terrtia 1a3ffe70c1
fix: [retro_hunt] api delete paused retro_hunt 2024-02-15 11:45:55 +01:00
terrtia 48ce4a76a4
fix: [retro_hunt] delete paused retro_hunt 2024-02-15 11:42:10 +01:00
terrtia 7d42315419
chg: [retro_hunt] show match + fix None subtype 2024-02-15 11:31:17 +01:00
terrtia c260455d14
chg: [titles] add endpoints to get all titles 2024-02-15 10:13:06 +01:00
terrtia 4cf3d628db
chg: [messages] manual translation by language + add new kvrocks lang namespace 2024-02-14 16:43:29 +01:00
terrtia 50bfd92105
chg: [chat] add endpoints to download chat, subchannel and thread, + fix message translated by default 2024-02-13 16:13:18 +01:00
terrtia a9323e076e
chg: [doc] add screenshot 2024-02-12 14:01:46 +01:00
terrtia c5f40d85a8
fix: [doc] fix pystemon install 2024-02-08 14:14:14 +01:00
terrtia 57842c2ecf
fix: [doc] fix pystemon install 2024-02-08 14:07:11 +01:00
terrtia 304afd00aa
chg: [exif] add debug 2024-02-07 10:32:18 +01:00
terrtia 38a918e485
fix: [flask] fix escape import 2024-02-06 11:56:39 +01:00
terrtia 4168d07118
fix: [chats] fix chats image importer 2024-02-06 11:13:45 +01:00
terrtia 88f30833c2
chg: [doc] add discord/telegram chats JSON fields 2024-02-05 16:34:20 +01:00
terrtia d84bc14b62
chg: [HOWTO] Libretranslate Chat translation 2024-02-05 16:22:39 +01:00
terrtia 4c1d058e6d
fix: [language] catch libretranslate exception 2024-02-05 14:19:42 +01:00
terrtia aa56e71631
fix: [language] crawled items, force gcld3 detection 2024-02-05 14:10:19 +01:00
terrtia 99fedf9855
fix: [LAUNCH] update screen status 2024-02-05 11:32:49 +01:00
terrtia 335d94cf79
chg: [requirement] bump flask requirement 2024-02-05 11:21:59 +01:00
terrtia c1529b217d
fix: [LAUNCH] fix killall 2024-02-05 11:09:41 +01:00
168 changed files with 7270 additions and 1932 deletions

1
.gitignore vendored
View File

@ -17,6 +17,7 @@ Blooms
PASTES
CRAWLED_SCREENSHOT
IMAGES
FAVICONS
BASE64
HASHS
DATA_ARDB

View File

@ -1,17 +1,16 @@
# Feeding, adding new features and contributing
# Feeding, Adding new features and Contributing
## [Documentation AIL Importers](./doc/README.md#ail-importers)
## [AIL Importers](./doc/README.md#ail-importers)
[Documentation AIL Importers](./doc/README.md#ail-importers)
Refer to the [AIL Importers Documentation](./doc/README.md#ail-importers)
## How to feed the AIL framework
## Feeding Data to AIL
AIL is an analysis tool, not a collector!
However, if you want to collect some pastes and feed them to AIL, the procedure is described below. Nevertheless, moderate your queries!
1. [AIL Importers](./doc/README.md#ail-importers)
2. ZMQ: Be a collaborator of CIRCL and ask to access our feed. It will be sent to the static IP you are using for AIL.
## How to create a new module
@ -19,22 +18,18 @@ However, if you want to collect some pastes and feed them to AIL, the procedure
To add a new processing or analysis module to AIL, follow these steps:
1. Add your module name in [./configs/modules.cfg](./configs/modules.cfg) and subscribe to at least one module at minimum (Usually, `Item`).
2. Use [./bin/modules/modules/TemplateModule.py](./bin/modules/modules/TemplateModule.py) as a sample module and create a new file in bin/modules with the module name used in the `modules.cfg` configuration.
## How to contribute a module
## Contributions
Feel free to fork the code, play with it, make some patches or add additional analysis modules.
Contributions are welcome! Fork the repository, experiment with the code, and submit your modules or patches through a pull request.
To contribute your module, feel free to pull your contribution.
## Crawler
AIL supports crawling of websites and Tor hidden services. Ensure your Tor client's proxy configuration is correct, especially the SOCKS5 proxy settings.
## Additional information
### Crawler
In AIL, you can crawl websites and Tor hidden services. Don't forget to review the proxy configuration of your Tor client and especially if you enabled the SOCKS5 proxy
![Crawler](./doc/screenshots/ail-lacus.png?raw=true "AIL framework Crawler")
### Installation
@ -45,38 +40,35 @@ In AIL, you can crawl websites and Tor hidden services. Don't forget to review t
1. Lacus URL:
In the web interface, go to `Crawlers` > `Settings` and click on the Edit button
![Splash Manager Config](./doc/screenshots/lacus_config.png?raw=true "AIL Lacus Config")
![AIL Crawler Config](./doc/screenshots/lacus_config.png?raw=true "AIL Lacus Config")
![Splash Manager Config](./doc/screenshots/lacus_config_edit.png?raw=true "AIL Lacus Config")
![AIL Crawler Config Edis](./doc/screenshots/lacus_config_edit.png?raw=true "AIL Lacus Config")
2. Launch AIL Crawlers:
2. Number of Crawlers:
Choose the number of crawlers you want to launch
![Splash Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures.png?raw=true "AIL Lacus Nb Crawlers Config")
![Crawler Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures.png?raw=true "AIL Lacus Nb Crawlers Config")
![Splash Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures_edit.png?raw=true "AIL Lacus Nb Crawlers Config")
![Crawler Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures_edit.png?raw=true "AIL Lacus Nb Crawlers Config")
## Chats Translation with LibreTranslate
### Kvrocks Migration
---------------------
**Important Note:
We are currently working on a [migration script](https://github.com/ail-project/ail-framework/blob/master/update/v5.0/DB_KVROCKS_MIGRATION.py) to facilitate the migration to Kvrocks.
**
Chats message can be translated using [libretranslate](https://github.com/LibreTranslate/LibreTranslate), an open-source self-hosted machine translation.
Please note that the current version of this migration script only supports migrating the database on the same server.
(If you plan to migrate to another server, we will provide additional instructions in this section once the migration script is completed)
### Installation:
1. Install LibreTranslate by running the following command:
```bash
pip install libretranslate
```
2. Run libretranslate:
```bash
libretranslate
```
### Configuration:
To enable LibreTranslate for chat translation, edit the LibreTranslate URL in the [./configs/core.cfg](./configs/core.cfg) file under the [Translation] section.
```
[Translation]
libretranslate = http://127.0.0.1:5000
```
To migrate your database to Kvrocks:
1. Launch ARDB and Kvrocks
2. Pull from remote
```shell
git checkout master
git pull
```
3. Launch the migration script:
```shell
git checkout master
git pull
cd update/v5.0
./DB_KVROCKS_MIGRATION.py
```

View File

@ -29,6 +29,8 @@ AIL framework - Framework for Analysis of Information Leaks
AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services or unstructured data streams. AIL framework is flexible and can be extended to support other functionalities to mine or process sensitive information (e.g. data leak prevention).
![Overview](./doc/screenshots/ail-overview.png?raw=true "AIL framework Overview")
![Dashboard](./doc/screenshots/dashboard0.png?raw=true "AIL framework dashboard")
@ -55,6 +57,8 @@ Allow easy creation and customization by extending an abstract class.
## Features
![Internal](./doc/screenshots/ail-internal.png?raw=true "AIL framework Internal")
- Modular architecture to handle streams of unstructured or structured information
- Default support for external ZMQ feeds, such as provided by CIRCL or other providers
- Multiple Importers and feeds support

View File

@ -275,8 +275,11 @@ function launching_scripts {
screen -S "Script_AIL" -X screen -t "MISP_Thehive_Auto_Push" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./MISP_Thehive_Auto_Push.py; read x"
sleep 0.1
# IMAGES
screen -S "Script_AIL" -X screen -t "Exif" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Exif.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "OcrExtractor" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./OcrExtractor.py; read x"
sleep 0.1
##################################
# TRACKERS MODULES #
@ -578,7 +581,7 @@ function update_thirdparty {
function launch_tests() {
tests_dir=${AIL_HOME}/tests
bin_dir=${AIL_BIN}
python3 -m nose2 --start-dir $tests_dir --coverage $bin_dir --with-coverage testApi test_modules
python3 -m nose2 --start-dir $tests_dir --coverage $bin_dir --with-coverage test_api test_modules
}
function reset_password() {
@ -676,13 +679,16 @@ function menu_display {
check_screens;
while [ "$1" != "" ]; do
case $1 in
-l | --launchAuto ) launch_all "automatic";
-l | --launchAuto ) check_screens;
launch_all "automatic";
;;
-lr | --launchRedis ) launch_redis;
-lr | --launchRedis ) check_screens;
launch_redis;
;;
-la | --launchARDB ) launch_ardb;
;;
-lk | --launchKVROCKS ) launch_kvrocks;
-lk | --launchKVROCKS ) check_screens;
launch_kvrocks;
;;
-lrv | --launchRedisVerify ) launch_redis;
wait_until_redis_is_ready;
@ -692,14 +698,16 @@ while [ "$1" != "" ]; do
;;
--set_kvrocks_namespaces ) set_kvrocks_namespaces;
;;
-k | --killAll ) killall;
-k | --killAll ) check_screens;
killall;
;;
-r | --restart ) killall;
sleep 0.1;
check_screens;
launch_all "automatic";
;;
-ks | --killscript ) killscript;
-ks | --killscript ) check_screens;
killscript;
;;
-m | --menu ) menu_display;
;;

View File

@ -11,7 +11,7 @@ import uuid
import subprocess
from flask import escape
from markupsafe import escape
sys.path.append(os.environ['AIL_BIN'])
##################################

View File

@ -20,6 +20,7 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects import CookiesNames
from lib.objects import Etags
from lib.objects.Domains import Domain
from lib.objects import Favicons
from lib.objects.Items import Item
from lib.objects import Screenshots
from lib.objects import Titles
@ -192,11 +193,13 @@ class Crawler(AbstractModule):
# force=force,
# general_timeout_in_sec=120)
# with_favicon = True,
capture_uuid = self.lacus.enqueue(url=url,
depth=task.get_depth(),
user_agent=task.get_user_agent(),
proxy=task.get_proxy(),
cookies=task.get_cookies(),
with_favicon=True,
force=force,
general_timeout_in_sec=90) # TODO increase timeout if onion ????
@ -244,6 +247,7 @@ class Crawler(AbstractModule):
parent_id = task.get_parent()
entries = self.lacus.get_capture(capture.uuid)
print(entries.get('status'))
self.har = task.get_har()
self.screenshot = task.get_screenshot()
@ -261,6 +265,7 @@ class Crawler(AbstractModule):
# Origin + History + tags
if self.root_item:
self.domain.set_last_origin(parent_id)
self.domain.update_vanity_cluster()
# Tags
for tag in task.get_tags():
self.domain.add_tag(tag)
@ -274,7 +279,7 @@ class Crawler(AbstractModule):
for tag in task.get_tags():
self.domain.add_tag(tag)
self.original_domain.add_history(epoch, root_item=self.root_item)
crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
# crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
print('capture:', capture.uuid, 'completed')
@ -367,6 +372,12 @@ class Crawler(AbstractModule):
etag.add(self.date.replace('/', ''), self.domain)
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
# FAVICON
if entries.get('potential_favicons'):
for favicon in entries['potential_favicons']:
fav = Favicons.create(favicon)
fav.add(item.get_date(), item)
# Next Children
entries_children = entries.get('children')
if entries_children:

View File

@ -103,14 +103,19 @@ class FeederImporter(AbstractImporter):
if data_obj:
objs.add(data_obj)
objs_messages = []
for obj in objs:
if obj.type == 'item': # object save on disk as file (Items)
gzip64_content = feeder.get_gzip64_content()
return obj, f'{feeder_name} {gzip64_content}'
relay_message = f'{feeder_name} {gzip64_content}'
objs_messages.append({'obj': obj, 'message': relay_message})
elif obj.type == 'image':
date = feeder.get_date()
objs_messages.append({'obj': obj, 'message': f'{feeder_name} {date}'})
else: # Messages save on DB
if obj.exists() and obj.type != 'chat':
return obj, f'{feeder_name}'
objs_messages.append({'obj': obj, 'message': feeder_name})
return objs_messages
class FeederModuleImporter(AbstractModule):
def __init__(self):
@ -128,10 +133,8 @@ class FeederModuleImporter(AbstractModule):
def compute(self, message):
# TODO HANDLE Invalid JSON
json_data = json.loads(message)
# TODO multiple objs + messages
obj, relay_message = self.importer.importer(json_data)
####
self.add_message_to_queue(obj=obj, message=relay_message)
for obj_message in self.importer.importer(json_data):
self.add_message_to_queue(obj=obj_message['obj'], message=obj_message['message'])
# Launch Importer

View File

@ -41,6 +41,9 @@ class DefaultFeeder:
def get_source(self):
return self.json_data.get('source')
def get_date(self):
return datetime.date.today().strftime("%Y%m%d")
def get_json_data(self):
"""
Return the JSON data,
@ -63,7 +66,8 @@ class DefaultFeeder:
return self.json_data.get('data')
def get_obj_type(self):
return self.json_data.get('type', 'item')
meta = self.get_json_meta()
return meta.get('type', 'item')
## OVERWRITE ME ##
def get_obj(self):

View File

@ -92,6 +92,14 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
def get_reactions(self):
return self.json_data['meta'].get('reactions', [])
def get_date(self):
if self.json_data['meta'].get('date'):
date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp'])
date = date.strftime('%Y%m%d')
else:
date = datetime.date.today().strftime("%Y%m%d")
return date
def get_message_timestamp(self):
if not self.json_data['meta'].get('date'):
return None
@ -206,8 +214,7 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
subchannel = ChatSubChannels.ChatSubChannel(f'{self.get_chat_id()}/{meta["id"]}', self.get_chat_instance_uuid())
thread = None
# TODO correlation with obj = message/image
subchannel.add(date)
subchannel.add(date, obj)
if meta.get('date'): # TODO check if already exists
subchannel.set_created_at(int(meta['date']['timestamp']))
@ -358,7 +365,58 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
# CHAT
chat_objs = self.process_chat(new_objs, obj, date, timestamp, reply_id=reply_id)
# Message forward
# # TODO HANDLE OTHERS OBJECT TYPE
# # TODO MAKE IT GENERIC FOR OTHERS CHATS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# # Message forward + Discussion
# if self.get_json_meta().get('forward'):
# discussion_id = self.get_json_meta().get('discussion')
# forward_from = self.get_message_forward()
#
# if discussion_id: # TODO HANDLE FORWARDED MESSAGES FROM EXTERNAL CHANNELS
# chat_forward_id = forward_from['from']['id']
# message_forward_id = forward_from['from']['channel_post']
#
# # if chat_forward_id == discussion_id:
# # linked_chat = Chat(chat_forward_id, self.get_chat_instance_uuid())
# # if linked_chat.exists():
# # # create thread
# # # add message replies for each childrens
#
# # TODO HANDLE THREAD
# # TODO Change FORWARD META FIELDS
# # meta['forward'] = {}
# # # CHAT ID
# # # SUBCHANNEL ID -> can be None
# # # Message ID
#
# # meta['forward']['origin']
# # # same as 'forward'
#
# if self.get_json_meta().get('forward'):
# forward = self.get_message_forward()
# f_chat = forward['chat']
# f_subchannel = forward.get('subchannel')
# f_id = forward.get('id')
# if not f_subchannel:
# chat_forward = Chat(f_chat, self.get_chat_instance_uuid())
# if chat_forward.exists():
# for chat_obj in chat_objs:
# if chat_obj.type == 'chat':
# chat_forward.add_relationship(chat_obj.get_global_id(), 'forward')
# # TODO LIST FORWARDED MESSAGES
#
#
# # Discord -> serverID + subchannel ID + message ID
# # Telegram -> chat ID + Message ID
# # + ORIGIN IDs
#
#
#
# # TODO create relationships graph
#
#
# # TODO REMOVE ME
# # Message forward # TODO handle subchannel + message ID
# if self.get_json_meta().get('forward'):
# forward_from = self.get_message_forward()
# print('-----------------------------------------------------------')

View File

@ -9,7 +9,6 @@ The ``Domain``
import os
import sys
import time
import redis
import configparser

View File

@ -16,7 +16,7 @@ import time
import uuid
from enum import Enum
from flask import escape
from markupsafe import escape
sys.path.append(os.environ['AIL_BIN'])
##################################
@ -152,25 +152,30 @@ class Investigation(object):
return r_tracking.smembers(f'investigations:misp:{self.uuid}')
# # TODO: DATE FORMAT
def get_metadata(self, r_str=False):
def get_metadata(self, options=set(), r_str=False):
if r_str:
analysis = self.get_analysis_str()
threat_level = self.get_threat_level_str()
else:
analysis = self.get_analysis()
threat_level = self.get_threat_level()
return {'uuid': self.uuid,
'name': self.get_name(),
# 'name': self.get_name(),
meta = {'uuid': self.uuid,
'threat_level': threat_level,
'analysis': analysis,
'tags': self.get_tags(),
'tags': list(self.get_tags()),
'user_creator': self.get_creator_user(),
'date': self.get_date(),
'timestamp': self.get_timestamp(r_str=r_str),
'last_change': self.get_last_change(r_str=r_str),
'info': self.get_info(),
'nb_objects': self.get_nb_objects(),
'misp_events': self.get_misp_events()}
'misp_events': list(self.get_misp_events())
}
if 'objects' in options:
meta['objects'] = self.get_objects()
return meta
def set_name(self, name):
r_tracking.hset(f'investigations:data:{self.uuid}', 'name', name)
@ -368,6 +373,21 @@ def get_investigations_selector():
#### API ####
def api_get_investigation(investigation_uuid): # TODO check if is UUIDv4
investigation = Investigation(investigation_uuid)
if not investigation.exists():
return {'status': 'error', 'reason': 'Investigation Not Found'}, 404
meta = investigation.get_metadata(options={'objects'}, r_str=False)
# objs = []
# for obj in investigation.get_objects():
# obj_meta = ail_objects.get_object_meta(obj["type"], obj["subtype"], obj["id"], flask_context=True)
# comment = investigation.get_objects_comment(f'{obj["type"]}:{obj["subtype"]}:{obj["id"]}')
# if comment:
# obj_meta['comment'] = comment
# objs.append(obj_meta)
return meta, 200
# # TODO: CHECK Mandatory Fields
# # TODO: SANITYZE Fields
# # TODO: Name ?????

View File

@ -7,6 +7,7 @@ import sys
import html2text
import gcld3
from lexilang.detector import detect as lexilang_detect
from libretranslatepy import LibreTranslateAPI
sys.path.append(os.environ['AIL_BIN'])
@ -17,6 +18,7 @@ from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_lang = config_loader.get_db_conn("Kvrocks_Languages")
TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate')
config_loader = None
@ -256,9 +258,6 @@ def get_iso_from_languages(l_languages, sort=False):
return l_iso
class LanguageDetector:
pass
def get_translator_instance():
return TRANSLATOR_URL
@ -266,7 +265,10 @@ def _get_html2text(content, ignore_links=False):
h = html2text.HTML2Text()
h.ignore_links = ignore_links
h.ignore_images = ignore_links
return h.handle(content)
content = h.handle(content)
if content == '\n\n':
content = ''
return content
def _clean_text_to_translate(content, html=False, keys_blocks=True):
if html:
@ -299,30 +301,150 @@ def _clean_text_to_translate(content, html=False, keys_blocks=True):
content = content.replace(it, '')
return content
#### AIL Objects ####
#### LANGUAGE ENGINE ####
def get_obj_translation(obj_global_id, content, field='', source=None, target='en'):
# first seen
# last seen
# language by date -> iter on object date ????
## Langs
def get_language_obj_types(language):
return r_lang.smembers(f'languages:{language}')
def get_language_objs(language, obj_type, obj_subtype=''):
return r_lang.smembers(f'langs:{obj_type}:{obj_subtype}:{language}')
# def get_languages_objs(languages, obj_type, obj_subtype='')
## Objs
def get_objs_languages(obj_type, obj_subtype=''):
if obj_subtype:
return r_lang.smembers(f'objs:lang:{obj_type}:{obj_subtype}')
else:
return r_lang.smembers(f'objs:langs:{obj_type}')
## Obj
def get_obj_languages(obj_type, obj_subtype, obj_id):
return r_lang.smembers(f'obj:lang:{obj_type}:{obj_subtype}:{obj_id}')
def get_obj_language_stats(obj_type, obj_subtype, obj_id):
return r_lang.zrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, -1, withscores=True)
def get_obj_main_language(obj_type, obj_subtype, obj_id):
language = r_lang.zrevrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, 0)
if language:
return language[0]
# TODO ADD language to CHAT GLOBAL SET
def add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()): # (s)
if not obj_subtype:
obj_subtype = ''
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
r_lang.sadd(f'objs:langs:{obj_type}', language)
r_lang.sadd(f'objs:lang:{obj_type}:{obj_subtype}', language)
new = r_lang.sadd(f'obj:lang:{obj_global_id}', language)
r_lang.sadd(f'languages:{language}', f'{obj_type}:{obj_subtype}') ################### REMOVE ME ???
r_lang.sadd(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id)
if new:
for global_id in objs_containers:
r_lang.zincrby(f'obj:langs:stat:{global_id}', 1, language)
def remove_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()):
if not obj_subtype:
obj_subtype = ''
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
rem = r_lang.srem(f'obj:lang:{obj_global_id}', language)
delete_obj_translation(obj_global_id, language)
r_lang.srem(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id)
if not r_lang.exists(f'langs:{obj_type}:{obj_subtype}:{language}'):
r_lang.srem(f'objs:lang:{obj_type}:{obj_subtype}', language)
r_lang.srem(f'languages:{language}', f'{obj_type}:{obj_subtype}')
if not r_lang.exists(f'objs:lang:{obj_type}:{obj_subtype}'):
if r_lang.scard(f'objs:langs:{obj_type}') <= 1:
r_lang.srem(f'objs:langs:{obj_type}', language)
if rem:
for global_id in objs_containers:
r = r_lang.zincrby(f'obj:langs:stat:{global_id}', -1, language)
if r < 1:
r_lang.zrem(f'obj:langs:stat:{global_id}', language)
# TODO handle fields
def detect_obj_language(obj_type, obj_subtype, obj_id, content, objs_containers=set()):
detector = LanguagesDetector(nb_langs=1)
language = detector.detect(content)
if language:
language = language[0]
previous_lang = get_obj_languages(obj_type, obj_subtype, obj_id)
if previous_lang:
previous_lang = previous_lang.pop()
if language != previous_lang:
remove_obj_language(previous_lang, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
else:
add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
return language
## Translation
def r_get_obj_translation(obj_global_id, language, field=''):
return r_lang.hget(f'tr:{obj_global_id}:{field}', language)
def _get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()):
"""
Returns translated content
Returns translated content
"""
translation = r_cache.get(f'translation:{target}:{obj_global_id}:{field}')
translation = r_cache.get(f'translation:{language}:{obj_global_id}:{field}')
r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0)
if translation:
# DEBUG
# print('cache')
# r_cache.expire(f'translation:{target}:{obj_global_id}:{field}', 0)
# r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0)
return translation
translation = LanguageTranslator().translate(content, source=source, target=target)
# TODO HANDLE FIELDS TRANSLATION
translation = r_get_obj_translation(obj_global_id, language, field=field)
if not translation:
source, translation = LanguageTranslator().translate(content, source=source, target=language)
if source:
obj_type, subtype, obj_id = obj_global_id.split(':', 2)
add_obj_language(source, obj_type, subtype, obj_id, objs_containers=objs_containers)
if translation:
r_cache.set(f'translation:{target}:{obj_global_id}:{field}', translation)
r_cache.expire(f'translation:{target}:{obj_global_id}:{field}', 300)
r_cache.set(f'translation:{language}:{obj_global_id}:{field}', translation)
r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 300)
return translation
## --AIL Objects-- ##
def get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()):
return _get_obj_translation(obj_global_id, language, source=source, content=content, field=field, objs_containers=objs_containers)
# TODO Force to edit ????
def set_obj_translation(obj_global_id, language, translation, field=''):
r_cache.delete(f'translation:{language}:{obj_global_id}:')
return r_lang.hset(f'tr:{obj_global_id}:{field}', language, translation)
def delete_obj_translation(obj_global_id, language, field=''):
r_cache.delete(f'translation:{language}:{obj_global_id}:')
r_lang.hdel(f'tr:{obj_global_id}:{field}', language)
## --LANGUAGE ENGINE-- ##
#### AIL Objects ####
class LanguagesDetector:
def __init__(self, nb_langs=3, min_proportion=0.2, min_probability=0.7, min_len=0):
self.lt = LibreTranslateAPI(get_translator_instance())
def __init__(self, nb_langs=3, min_proportion=0.2, min_probability=-1, min_len=0):
lt_url = get_translator_instance()
if not lt_url:
self.lt = None
else:
self.lt = LibreTranslateAPI(get_translator_instance())
try:
self.lt.languages()
except Exception:
@ -339,37 +461,71 @@ class LanguagesDetector:
if self.min_len > 0:
if len(content) < self.min_len:
return languages
# p = self.detector.FindTopNMostFreqLangs(content, num_langs=3)
# for lang in p:
# print(lang.language, lang.probability, lang.proportion, lang.is_reliable)
# print('------------------------------------------------')
for lang in self.detector.FindTopNMostFreqLangs(content, num_langs=self.nb_langs):
if lang.proportion >= self.min_proportion and lang.probability >= self.min_probability and lang.is_reliable:
languages.append(lang.language)
return languages
def detect_lexilang(self, content):
language, prob = lexilang_detect(content)
if prob > 0 and self.min_probability == -1:
return [language]
elif prob > 0.4:
return [language]
else:
return []
def detect_libretranslate(self, content):
languages = []
try:
# [{"confidence": 0.6, "language": "en"}]
resp = self.lt.detect(content)
except: # TODO ERROR MESSAGE
resp = []
except Exception as e: # TODO ERROR MESSAGE
raise Exception(f'libretranslate error: {e}')
# resp = []
if resp:
if isinstance(resp, dict):
raise Exception(f'libretranslate error {resp}')
for language in resp:
if language.confidence >= self.min_probability:
languages.append(language)
return languages
def detect(self, content):
def detect(self, content, force_gcld3=False): # TODO detect length between 20-200 ????
if not content:
return []
content = _clean_text_to_translate(content, html=True)
if not content:
return []
# DEBUG
# print('-------------------------------------------------------')
# print(content)
# print(len(content))
# lexilang
if len(content) < 150:
# print('lexilang')
languages = self.detect_lexilang(content)
# gcld3
if len(content) >= 200 or not self.lt:
language = self.detect_gcld3(content)
# libretranslate
else:
language = self.detect_libretranslate(content)
return language
# if len(content) >= 200 or not self.lt or force_gcld3:
# print('gcld3')
languages = self.detect_gcld3(content)
# libretranslate
# else:
# languages = self.detect_libretranslate(content)
if not languages:
languages = []
return languages
class LanguageTranslator:
def __init__(self):
self.lt = LibreTranslateAPI(get_translator_instance())
self.ld = LanguagesDetector(nb_langs=1)
def languages(self):
languages = []
@ -399,13 +555,13 @@ class LanguageTranslator:
return language[0].get('language')
def detect(self, content):
# gcld3
if len(content) >= 200:
language = self.detect_gcld3(content)
# libretranslate
else:
language = self.detect_libretranslate(content)
return language
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print(content)
language = self.ld.detect(content)
if language:
# print(language[0])
# print('##############################################################')
return language[0]
def translate(self, content, source=None, target="en"): # TODO source target
if target not in get_translation_languages():
@ -424,9 +580,9 @@ class LanguageTranslator:
translation = None
# TODO LOG and display error
if translation == content:
print('EQUAL')
# print('EQUAL')
translation = None
return translation
return source, translation
LIST_LANGUAGES = {}

View File

@ -32,6 +32,9 @@ config_loader = None
# # # # UNSAFE TAGS # # # #
# set of unsafe tags
UNSAFE_TAGS = None
def build_unsafe_tags():
tags = set()
# CE content
@ -52,12 +55,12 @@ def is_tags_safe(ltags):
:return: is a tag in the set unsafe
:rtype: boolean
"""
return unsafe_tags.isdisjoint(ltags)
global UNSAFE_TAGS
if UNSAFE_TAGS is None:
UNSAFE_TAGS = build_unsafe_tags()
return UNSAFE_TAGS.isdisjoint(ltags)
# set of unsafe tags
unsafe_tags = build_unsafe_tags()
# - - - UNSAFE TAGS - - - #
# # TODO: verify tags + object_type
@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
#### Taxonomies ####
TAXONOMIES = {}
TAXONOMIES = None
def load_taxonomies():
global TAXONOMIES
manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
TAXONOMIES = Taxonomies(manifest_path=manifest)
load_taxonomies()
def get_taxonomies():
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.keys()
# TODO rename me to get enabled_taxonomies
@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
r_tags.srem('taxonomies:enabled', taxonomy)
def exists_taxonomy(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy) is not None
def get_taxonomy_description(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).description
def get_taxonomy_name(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).name
def get_taxonomy_predicates(taxonomy):
@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
return meta
def get_taxonomy_refs(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).refs
def get_taxonomy_version(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).version
def get_taxonomy_tags(taxonomy, enabled=False):
if TAXONOMIES is None:
load_taxonomies()
taxonomy_obj = TAXONOMIES.get(taxonomy)
tags = []
for p, content in taxonomy_obj.items():
@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
meta = {}
if not exists_taxonomy(taxonomy_name):
return meta
if TAXONOMIES is None:
load_taxonomies()
taxonomy = TAXONOMIES.get(taxonomy_name)
meta['description'] = taxonomy.description
meta['name'] = taxonomy.name
@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
if not exists_taxonomy(taxonomy):
return {'error': f'taxonomy {taxonomy} not found'}, 404
tags = data.get('tags', [])
if TAXONOMIES is None:
load_taxonomies()
taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
for tag in tags:
if tag not in taxonomy_tags:
@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
def enable_taxonomy_tags(taxonomy):
enable_taxonomy(taxonomy)
if TAXONOMIES is None:
load_taxonomies()
for tag in TAXONOMIES.get(taxonomy).machinetags():
add_taxonomy_tag_enabled(taxonomy, tag)
@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
#
# TODO Synonyms
GALAXIES = {}
CLUSTERS = {}
GALAXIES = None
CLUSTERS = None
def load_galaxies():
global GALAXIES
galaxies = []
@ -298,11 +317,10 @@ def load_galaxies():
clusters.append(json.load(f))
CLUSTERS = Clusters(clusters)
# LOAD GALAXY + CLUSTERS
load_galaxies()
def get_galaxies():
if GALAXIES is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return GALAXIES.keys()
# TODO RENAME ME
@ -310,9 +328,15 @@ def get_active_galaxies():
return r_tags.smembers('galaxies:enabled')
def get_galaxy(galaxy_name):
if GALAXIES is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return GALAXIES.get(galaxy_name)
def exists_galaxy(galaxy):
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.get(galaxy) is not None
def is_galaxy_enabled(galaxy):
@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
def get_clusters():
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.keys()
def get_cluster(cluster_type):
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.get(cluster_type)
def get_galaxy_tags(galaxy_type):
@ -1558,14 +1588,14 @@ def get_obj_date(object_type, object_id):
return None
# API QUERY
def api_delete_obj_tags(tags=[], object_id=None, object_type="item"):
def api_delete_obj_tags(tags=[], object_id=None, object_type="item", subtype=''):
if not object_id:
return ({'status': 'error', 'reason': 'object id not found'}, 404)
if not tags:
return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400)
for tag in tags:
res = delete_object_tag(tag, object_type, object_id, subtype='')
res = delete_object_tag(tag, object_type, object_id, subtype=subtype)
if res:
return res

View File

@ -12,11 +12,10 @@ import yara
import datetime
import base64
from ail_typo_squatting import runAll
import math
from collections import defaultdict
from flask import escape
from markupsafe import escape
from textblob import TextBlob
from nltk.tokenize import RegexpTokenizer
@ -38,24 +37,22 @@ logger = logging.getLogger()
config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
items_dir = config_loader.get_config_str("Directories", "pastes")
if items_dir[-1] == '/':
items_dir = items_dir[:-1]
config_loader = None
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
email_regex = re.compile(email_regex)
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
special_characters.add('\\s')
# NLTK tokenizer
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
TOKENIZER = None
def init_tokenizer():
global TOKENIZER
TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
def get_special_characters():
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
special_characters.add('\\s')
return special_characters
###############
#### UTILS ####
def is_valid_uuid_v4(curr_uuid):
@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
return False
def is_valid_mail(email):
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
email_regex = re.compile(email_regex)
result = email_regex.match(email)
if result:
return True
@ -385,7 +384,7 @@ class Tracker:
r_tracker.srem(f'obj:tracker:{obj_type}:{subtype}:{obj_id}:{self.uuid}', date)
r_tracker.srem(f'obj:trackers:{obj_type}:{subtype}:{obj_id}', self.uuid)
r_tracker.srem(f'tracker:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
r_tracker.srem(f'tracker:objs:{self.uuid}:{obj_type}', f'{subtype}:{obj_id}')
self.update_daterange()
# TODO escape custom tags
@ -400,6 +399,9 @@ class Tracker:
tracker_type = 'yara'
elif tracker_type == 'typosquatting':
from ail_typo_squatting import runAll
domain = to_track.split(" ")[0]
typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
for typo in typo_generation:
@ -701,7 +703,7 @@ def get_trackers_dashboard():
for raw in r_tracker.lrange('trackers:dashboard', 0, -1):
tracker_uuid, timestamp, obj_type, subtype, obj_id = raw.split(':', 4)
tracker = Tracker(tracker_uuid)
meta = tracker.get_meta(options={'tags'})
meta = tracker.get_meta(options={'description', 'tags'})
if not meta.get('type'):
meta['type'] = 'Tracker DELETED'
timestamp = datetime.datetime.fromtimestamp(float(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
# force lowercase
to_track = to_track.lower()
word_set = set(to_track)
set_inter = word_set.intersection(special_characters)
set_inter = word_set.intersection(get_special_characters())
if set_inter:
return {"status": "error",
"reason": f'special character(s) not allowed: {set_inter}',
@ -929,7 +931,7 @@ def api_add_tracker(dict_input, user_id):
# Filters # TODO MOVE ME
filters = dict_input.get('filters', {})
if filters:
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
if filters.keys() == set(get_objects_tracked()) and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
filters = {}
for obj_type in filters:
if obj_type not in get_objects_tracked():
@ -1004,7 +1006,7 @@ def api_edit_tracker(dict_input, user_id):
# Filters # TODO MOVE ME
filters = dict_input.get('filters', {})
if filters:
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
if filters.keys() == set(get_objects_tracked()) and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
if not filters['decoded'] and not filters['item']:
filters = {}
for obj_type in filters:
@ -1055,6 +1057,37 @@ def api_delete_tracker(data, user_id):
tracker = Tracker(tracker_uuid)
return tracker.delete(), 200
def api_tracker_add_object(data, user_id):
tracker_uuid = data.get('uuid')
res = api_check_tracker_acl(tracker_uuid, user_id)
if res:
return res
tracker = Tracker(tracker_uuid)
object_gid = data.get('gid')
date = data.get('date')
if date:
if not Date.validate_str_date(date):
date = None
try:
obj_type, subtype, obj_id = object_gid.split(':', 2)
except (AttributeError, IndexError):
return {"status": "error", "reason": "Invalid Object"}, 400
return tracker.add(obj_type, subtype, obj_id, date=date), 200
def api_tracker_remove_object(data, user_id):
tracker_uuid = data.get('uuid')
res = api_check_tracker_acl(tracker_uuid, user_id)
if res:
return res
tracker = Tracker(tracker_uuid)
object_gid = data.get('gid')
try:
obj_type, subtype, obj_id = object_gid.split(':', 2)
except (AttributeError, IndexError):
return {"status": "error", "reason": "Invalid Object"}, 400
return tracker.remove(obj_type, subtype, obj_id), 200
## -- CREATE TRACKER -- ##
####################
@ -1082,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
words_dict = defaultdict(int)
if filtering:
blob = TextBlob(content, tokenizer=tokenizer)
if TOKENIZER is None:
init_tokenizer()
blob = TextBlob(content, tokenizer=TOKENIZER)
else:
blob = TextBlob(content)
for word in blob.tokens:
@ -1530,7 +1565,7 @@ class RetroHunt:
self._set_state(state)
def delete(self):
if self.is_running() and self.get_state() != 'completed':
if self.is_running() and self.get_state() not in ['completed', 'paused']:
return None
# Delete custom rule
@ -1616,6 +1651,19 @@ def get_retro_hunt_metas():
tasks.append(retro_hunt.get_meta(options={'date', 'progress', 'nb_match', 'tags'}))
return tasks
## Objects ##
def is_obj_retro_hunted(obj_type, subtype, obj_id):
return r_tracker.exists(f'obj:retro_hunts:{obj_type}:{subtype}:{obj_id}')
def get_obj_retro_hunts(obj_type, subtype, obj_id):
return r_tracker.smembers(f'obj:retro_hunts:{obj_type}:{subtype}:{obj_id}')
def delete_obj_retro_hunts(obj_type, subtype, obj_id):
for retro_uuid in get_obj_retro_hunts(obj_type, subtype, obj_id):
retro_hunt = RetroHunt(retro_uuid)
retro_hunt.remove(obj_type, subtype, obj_id)
## API ##
def api_check_retro_hunt_task_uuid(task_uuid):
if not is_valid_uuid_v4(task_uuid):
@ -1736,7 +1784,7 @@ def api_delete_retro_hunt_task(task_uuid):
if res:
return res
retro_hunt = RetroHunt(task_uuid)
if retro_hunt.is_running() and retro_hunt.get_state() != 'completed':
if retro_hunt.is_running() and retro_hunt.get_state() not in ['completed', 'paused']:
return {"status": "error", "reason": "You can't delete a running task"}, 400
else:
return retro_hunt.delete(), 200
@ -1756,9 +1804,9 @@ def _fix_db_custom_tags():
#### -- ####
if __name__ == '__main__':
# if __name__ == '__main__':
_fix_db_custom_tags()
# _fix_db_custom_tags()
# fix_all_tracker_uuid_list()
# res = get_all_tracker_uuid()
# print(len(res))

83
bin/lib/ail_api.py Executable file
View File

@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import re
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib import Users
config_loader = ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
def check_token_format(token, search=re.compile(r'[^a-zA-Z0-9_-]').search): ####################################################
return not bool(search(token))
def is_valid_token(token):
return Users.exists_token(token)
def get_user_from_token(token):
return Users.get_token_user(token)
def is_user_in_role(role, token): # verify_user_role
# User without API
if role == 'user_no_api':
return False
user_id = get_user_from_token(token)
if user_id:
return Users.is_in_role(user_id, role)
else:
return False
#### Brute Force Protection ####
def get_failed_login(ip_address):
return r_cache.get(f'failed_login_ip_api:{ip_address}')
def incr_failed_login(ip_address):
r_cache.incr(f'failed_login_ip_api:{ip_address}')
r_cache.expire(f'failed_login_ip_api:{ip_address}', 300)
def get_brute_force_ttl(ip_address):
return r_cache.ttl(f'failed_login_ip_api:{ip_address}')
def is_brute_force_protected(ip_address):
failed_login = get_failed_login(ip_address)
if failed_login:
failed_login = int(failed_login)
if failed_login >= 5:
return True
else:
return False
return False
#### --Brute Force Protection-- ####
def authenticate_user(token, ip_address):
if is_brute_force_protected(ip_address):
ip_ttl = get_brute_force_ttl(ip_address)
return {'status': 'error', 'reason': f'Max Connection Attempts reached, Please wait {ip_ttl}s'}, 401
try:
if len(token) != 55:
return {'status': 'error', 'reason': 'Invalid Token Length, required==55'}, 400
if not check_token_format(token):
return {'status': 'error', 'reason': 'Malformed Authentication String'}, 400
if is_valid_token(token):
return True, 200
# Failed Login
else:
incr_failed_login(ip_address)
return {'status': 'error', 'reason': 'Authentication failed'}, 401
except Exception as e:
print(e) # TODO Logs
return {'status': 'error', 'reason': 'Malformed Authentication String'}, 400

View File

@ -3,7 +3,7 @@
import os
import sys
from uuid import uuid4
import uuid
sys.path.append(os.environ['AIL_BIN'])
##################################
@ -18,7 +18,14 @@ config_loader = None
AIL_OBJECTS = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cookie-name', 'cve', 'cryptocurrency', 'decoded',
'domain', 'etag', 'favicon', 'file-name', 'hhhash',
'item', 'image', 'message', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
AIL_OBJECTS_WITH_SUBTYPES = {'chat', 'chat-subchannel', 'cryptocurrency', 'pgp', 'username', 'user-account'}
# TODO by object TYPE ????
AIL_OBJECTS_CORRELATIONS_DEFAULT = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cve', 'cryptocurrency', 'decoded',
'domain', 'favicon', 'file-name',
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid')
@ -31,24 +38,37 @@ def _set_ail_uuid():
r_serv_db.set('ail:uuid', ail_uuid)
return ail_uuid
def is_valid_uuid_v4(header_uuid):
try:
header_uuid = header_uuid.replace('-', '')
uuid_test = uuid.UUID(hex=header_uuid, version=4)
return uuid_test.hex == header_uuid
except:
return False
def generate_uuid():
return str(uuid4())
return str(uuid.uuid4())
#### AIL OBJECTS ####
def get_all_objects():
return AIL_OBJECTS
def is_object_type(obj_type):
return obj_type in AIL_OBJECTS
def get_objects_with_subtypes():
return ['chat', 'cryptocurrency', 'pgp', 'username', 'user-account']
return AIL_OBJECTS_WITH_SUBTYPES
def get_object_all_subtypes(obj_type): # TODO Dynamic subtype
if obj_type == 'chat':
return r_object.smembers(f'all_chat:subtypes')
if obj_type == 'chat-subchannel':
return r_object.smembers(f'all_chat-subchannel:subtypes')
if obj_type == 'chat-thread':
return r_object.smembers(f'all_chat-thread:subtypes')
if obj_type == 'cryptocurrency':
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'tron', 'zcash']
if obj_type == 'pgp':
return ['key', 'mail', 'name']
if obj_type == 'username':
@ -57,14 +77,17 @@ def get_object_all_subtypes(obj_type): # TODO Dynamic subtype
return r_object.smembers(f'all_chat:subtypes')
return []
def get_default_correlation_objects():
return AIL_OBJECTS_CORRELATIONS_DEFAULT
def get_obj_queued():
return ['item', 'image']
return ['item', 'image', 'message', 'ocr']
def get_objects_tracked():
return ['decoded', 'item', 'pgp', 'title']
return ['decoded', 'item', 'pgp', 'message', 'ocr', 'title']
def get_objects_retro_hunted():
return ['decoded', 'item']
return ['decoded', 'item', 'message']
def get_all_objects_with_subtypes_tuple():
str_objs = []
@ -82,7 +105,7 @@ def unpack_obj_global_id(global_id, r_type='tuple'):
obj = global_id.split(':', 2)
return {'type': obj[0], 'subtype': obj[1], 'id': obj[2]}
else: # tuple(type, subtype, id)
return global_id.split(':', 2)
return global_id.split(':', 2) # TODO REPLACE get_obj_type_subtype_id_from_global_id(global_id)
def unpack_objs_global_id(objs_global_id, r_type='tuple'):
objs = []

View File

@ -139,6 +139,10 @@ class AILQueue:
def error(self):
r_queues.hdel(f'modules', f'{self.pid}:{self.name}')
def end(self):
self.clear()
r_queues.hdel(f'modules', f'{self.pid}:{self.name}')
def get_queues_modules():
return r_queues.hkeys('queues')

View File

@ -8,7 +8,6 @@ import sys
import requests
sys.path.append(os.environ['AIL_BIN'])
from lib.objects.CryptoCurrencies import CryptoCurrency
logger = logging.getLogger()
@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
# filter btc seen in ail
def filter_btc_seen(btc_addr_set):
from lib.objects import CryptoCurrencies
list_seen_btc = []
for btc_addr in btc_addr_set:
cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin')
cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
if cryptocurrency.exists():
list_seen_btc.append(btc_addr)
return list_seen_btc

View File

@ -11,6 +11,7 @@ import sys
import time
import uuid
from datetime import datetime
sys.path.append(os.environ['AIL_BIN'])
##################################
@ -287,6 +288,10 @@ def get_obj_chat(chat_type, chat_subtype, chat_id):
elif chat_type == 'chat-thread':
return ChatThreads.ChatThread(chat_id, chat_subtype)
def get_obj_chat_from_global_id(chat_gid):
chat_type, chat_subtype, chat_id = chat_gid.split(':', 2)
return get_obj_chat(chat_type, chat_subtype, chat_id)
def get_obj_chat_meta(obj_chat, new_options=set()):
options = {}
if obj_chat.type == 'chat':
@ -321,7 +326,192 @@ def get_threads_metas(threads):
def get_username_meta_from_global_id(username_global_id):
_, instance_uuid, username_id = username_global_id.split(':', 2)
username = Usernames.Username(username_id, instance_uuid)
return username.get_meta()
return username.get_meta(options={'icon'})
###############################################################################
# TODO Pagination
def list_messages_to_dict(l_messages_id, translation_target=None):
options = {'content', 'files-names', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}
meta = {}
curr_date = None
for mess_id in l_messages_id:
message = Messages.Message(mess_id[1:])
timestamp = message.get_timestamp()
date_day = message.get_date()
date_day = f'{date_day[0:4]}/{date_day[4:6]}/{date_day[6:8]}'
if date_day != curr_date:
meta[date_day] = []
curr_date = date_day
meta_mess = message.get_meta(options=options, timestamp=timestamp, translation_target=translation_target)
meta[date_day].append(meta_mess)
# if mess_dict.get('tags'):
# for tag in mess_dict['tags']:
# if tag not in tags:
# tags[tag] = 0
# tags[tag] += 1
# return messages, pagination, tags
return meta
# TODO Filter
## Instance type
## Chats IDS
## SubChats IDS
## Threads IDS
## Daterange
def get_messages_iterator(filters={}):
for instance_uuid in get_chat_service_instances():
for chat_id in ChatServiceInstance(instance_uuid).get_chats():
chat = Chats.Chat(chat_id, instance_uuid)
# subchannels
for subchannel_gid in chat.get_subchannels():
_, _, subchannel_id = subchannel_gid.split(':', 2)
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
messages, _ = subchannel._get_messages(nb=-1)
for mess in messages:
_, _, message_id = mess[0].split(':', )
yield Messages.Message(message_id)
# threads
# threads
for threads in chat.get_threads():
thread = ChatThreads.ChatThread(threads['id'], instance_uuid)
messages, _ = thread._get_messages(nb=-1)
for mess in messages:
message_id, _, message_id = mess[0].split(':', )
yield Messages.Message(message_id)
# messages
messages, _ = chat._get_messages(nb=-1)
for mess in messages:
_, _, message_id = mess[0].split(':', )
yield Messages.Message(message_id)
# threads ???
def get_nb_messages_iterator(filters={}):
nb_messages = 0
for instance_uuid in get_chat_service_instances():
for chat_id in ChatServiceInstance(instance_uuid).get_chats():
chat = Chats.Chat(chat_id, instance_uuid)
# subchannels
for subchannel_gid in chat.get_subchannels():
_, _, subchannel_id = subchannel_gid.split(':', 2)
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
nb_messages += subchannel.get_nb_messages()
# threads
for threads in chat.get_threads():
thread = ChatThreads.ChatThread(threads['id'], instance_uuid)
nb_messages += thread.get_nb_messages()
# messages
nb_messages += chat.get_nb_messages()
return nb_messages
def get_user_account_chats_meta(user_id, chats, subchannels):
meta = []
for chat_g_id in chats:
c_subtype, c_id = chat_g_id.split(':', 1)
chat = Chats.Chat(c_id, c_subtype)
chat_meta = chat.get_meta(options={'icon', 'info', 'nb_participants', 'tags_safe', 'username'})
if chat_meta['username']:
chat_meta['username'] = get_username_meta_from_global_id(chat_meta['username'])
chat_meta['nb_messages'] = len(chat.get_user_messages(user_id))
chat_meta['subchannels'] = []
for subchannel_gid in chat.get_subchannels():
if subchannel_gid[16:] in subchannels:
_, s_subtype, s_id = subchannel_gid.split(':', 2)
subchannel = ChatSubChannels.ChatSubChannel(s_id, s_subtype)
subchannel_meta = subchannel.get_meta(options={'created_at'})
subchannel_meta['nb_messages'] = len(subchannel.get_user_messages(user_id))
chat_meta['subchannels'].append(subchannel_meta)
meta.append(chat_meta)
return meta
def get_user_account_chat_message(user_id, subtype, chat_id): # TODO subchannel + threads ...
meta = {}
chat = Chats.Chat(chat_id, subtype)
chat_meta = chat.get_meta(options={'icon', 'info', 'nb_participants', 'tags_safe', 'username'})
if chat_meta['username']:
chat_meta['username'] = get_username_meta_from_global_id(chat_meta['username'])
meta['messages'] = list_messages_to_dict(chat.get_user_messages(user_id), translation_target=None)
return meta
def get_user_account_nb_all_week_messages(user_id, chats, subchannels):
week = {}
# Init
for day in ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']:
week[day] = {}
for i in range(24):
week[day][i] = 0
# chats
for chat_g_id in chats:
c_subtype, c_id = chat_g_id.split(':', 1)
chat = Chats.Chat(c_id, c_subtype)
for message in chat.get_user_messages(user_id):
timestamp = message.split('/', 2)[1]
timestamp = datetime.utcfromtimestamp(float(timestamp))
date_name = timestamp.strftime('%a')
week[date_name][timestamp.hour] += 1
stats = []
nb_day = 0
for day in week:
for hour in week[day]:
stats.append({'date': day, 'day': nb_day, 'hour': hour, 'count': week[day][hour]})
nb_day += 1
return stats
def _get_chat_card_meta_options():
return {'created_at', 'icon', 'info', 'nb_participants', 'origin_link', 'subchannels', 'tags_safe', 'threads', 'translation', 'username'}
def _get_message_bloc_meta_options():
return {'chat', 'content', 'files-names', 'icon', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions','thread', 'translation', 'user-account'}
def get_message_report(l_mess): # TODO Force language + translation
translation_target = 'en'
chats = {}
messages = []
mess_options = _get_message_bloc_meta_options()
l_mess = sorted(l_mess, key=lambda x: x[2])
for m in l_mess:
message = Messages.Message(m[2])
meta = message.get_meta(options=mess_options, translation_target=translation_target)
if meta['chat'] not in chats:
chat = Chats.Chat(meta['chat'], message.get_chat_instance())
meta_chat = chat.get_meta(options=_get_chat_card_meta_options(), translation_target=translation_target)
if meta_chat['username']:
meta_chat['username'] = get_username_meta_from_global_id(meta_chat['username'])
chats[chat.id] = meta_chat
# stats
chats[chat.id]['t_messages'] = 1
else:
chats[meta['chat']]['t_messages'] += 1
messages.append(meta)
return chats, messages
#### FIX ####
def fix_correlations_subchannel_message():
for instance_uuid in get_chat_service_instances():
for chat_id in ChatServiceInstance(instance_uuid).get_chats():
chat = Chats.Chat(chat_id, instance_uuid)
# subchannels
for subchannel_gid in chat.get_subchannels():
_, _, subchannel_id = subchannel_gid.split(':', 2)
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
messages, _ = subchannel._get_messages(nb=-1)
for mess in messages:
_, _, message_id = mess[0].split(':', )
subchannel.add_correlation('message', '', message_id)
#### API ####
@ -331,11 +521,12 @@ def api_get_chat_service_instance(chat_instance_uuid):
return {"status": "error", "reason": "Unknown uuid"}, 404
return chat_instance.get_meta({'chats'}), 200
def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, page=-1):
def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, page=-1, messages=True):
chat = Chats.Chat(chat_id, chat_instance_uuid)
if not chat.exists():
return {"status": "error", "reason": "Unknown chat"}, 404
meta = chat.get_meta({'created_at', 'icon', 'info', 'nb_participants', 'subchannels', 'threads', 'translation', 'username'}, translation_target=translation_target)
# print(chat.get_obj_language_stats())
meta = chat.get_meta({'created_at', 'icon', 'info', 'nb_participants', 'subchannels', 'tags_safe', 'threads', 'translation', 'username'}, translation_target=translation_target)
if meta['username']:
meta['username'] = get_username_meta_from_global_id(meta['username'])
if meta['subchannels']:
@ -343,17 +534,25 @@ def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, pa
else:
if translation_target not in Language.get_translation_languages():
translation_target = None
meta['messages'], meta['pagination'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target, nb=nb, page=page)
if messages:
meta['messages'], meta['pagination'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target, nb=nb, page=page)
return meta, 200
def api_get_nb_message_by_week(chat_id, chat_instance_uuid):
chat = Chats.Chat(chat_id, chat_instance_uuid)
def api_get_nb_message_by_week(chat_type, chat_instance_uuid, chat_id):
chat = get_obj_chat(chat_type, chat_instance_uuid, chat_id)
if not chat.exists():
return {"status": "error", "reason": "Unknown chat"}, 404
week = chat.get_nb_message_this_week()
# week = chat.get_nb_message_by_week('20231109')
return week, 200
def api_get_nb_week_messages(chat_type, chat_instance_uuid, chat_id):
chat = get_obj_chat(chat_type, chat_instance_uuid, chat_id)
if not chat.exists():
return {"status": "error", "reason": "Unknown chat"}, 404
week = chat.get_nb_week_messages()
return week, 200
def api_get_chat_participants(chat_type, chat_subtype, chat_id):
if chat_type not in ['chat', 'chat-subchannel', 'chat-thread']:
return {"status": "error", "reason": "Unknown chat type"}, 400
@ -373,6 +572,7 @@ def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None, nb=
subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid)
if not subchannel.exists():
return {"status": "error", "reason": "Unknown subchannel"}, 404
# print(subchannel.get_obj_language_stats())
meta = subchannel.get_meta({'chat', 'created_at', 'icon', 'nb_messages', 'nb_participants', 'threads', 'translation'}, translation_target=translation_target)
if meta['chat']:
meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
@ -387,6 +587,7 @@ def api_get_thread(thread_id, thread_instance_uuid, translation_target=None, nb=
thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid)
if not thread.exists():
return {"status": "error", "reason": "Unknown thread"}, 404
# print(thread.get_obj_language_stats())
meta = thread.get_meta({'chat', 'nb_messages', 'nb_participants'})
# if meta['chat']:
# meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
@ -397,14 +598,101 @@ def api_get_message(message_id, translation_target=None):
message = Messages.Message(message_id)
if not message.exists():
return {"status": "error", "reason": "Unknown uuid"}, 404
meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target)
meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target)
return meta, 200
def api_message_detect_language(message_id):
message = Messages.Message(message_id)
if not message.exists():
return {"status": "error", "reason": "Unknown uuid"}, 404
lang = message.detect_language()
return {"language": lang}, 200
def api_manually_translate_message(message_id, source, translation_target, translation):
message = Messages.Message(message_id)
if not message.exists():
return {"status": "error", "reason": "Unknown uuid"}, 404
if translation:
if len(translation) > 200000: # TODO REVIEW LIMIT
return {"status": "error", "reason": "Max Size reached"}, 400
all_languages = Language.get_translation_languages()
if source not in all_languages:
return {"status": "error", "reason": "Unknown source Language"}, 400
message_language = message.get_language()
if message_language != source:
message.edit_language(message_language, source)
if translation:
if translation_target not in all_languages:
return {"status": "error", "reason": "Unknown target Language"}, 400
message.set_translation(translation_target, translation)
# TODO SANITYZE translation
return None, 200
def api_get_user_account(user_id, instance_uuid, translation_target=None):
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
if not user_account.exists():
return {"status": "error", "reason": "Unknown user-account"}, 404
meta = user_account.get_meta({'chats', 'icon', 'info', 'subchannels', 'threads', 'translation', 'username', 'username_meta'}, translation_target=translation_target)
if meta['chats']:
meta['chats'] = get_user_account_chats_meta(user_id, meta['chats'], meta['subchannels'])
return meta, 200
def api_get_user_account_chat_messages(user_id, instance_uuid, chat_id, translation_target=None):
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
if not user_account.exists():
return {"status": "error", "reason": "Unknown user-account"}, 404
meta = get_user_account_chat_message(user_id, instance_uuid, chat_id)
meta['user-account'] = user_account.get_meta({'icon', 'info', 'translation', 'username', 'username_meta'}, translation_target=translation_target)
resp = api_get_chat(chat_id, instance_uuid, translation_target=translation_target, messages=False)
if resp[1] != 200:
return resp
meta['chat'] = resp[0]
return meta, 200
def api_get_user_account_nb_all_week_messages(user_id, instance_uuid):
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
if not user_account.exists():
return {"status": "error", "reason": "Unknown user-account"}, 404
week = get_user_account_nb_all_week_messages(user_account.id, user_account.get_chats(), user_account.get_chat_subchannels())
return week, 200
def api_chat_messages(subtype, chat_id):
chat = Chats.Chat(chat_id, subtype)
if not chat.exists():
return {"status": "error", "reason": "Unknown chat"}, 404
meta = chat.get_meta({'created_at', 'info', 'nb_participants', 'subchannels', 'threads', 'username'}) # 'icon' 'translation'
if meta['username']:
meta['username'] = get_username_meta_from_global_id(meta['username'])
if meta['subchannels']:
meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels'])
else:
options = {'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}
meta['messages'], _, _ = chat.get_messages(nb=-1, options=options)
return meta, 200
def api_subchannel_messages(subtype, subchannel_id):
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, subtype)
if not subchannel.exists():
return {"status": "error", "reason": "Unknown subchannel"}, 404
meta = subchannel.get_meta(
{'chat', 'created_at', 'nb_messages', 'nb_participants', 'threads'})
if meta['chat']:
meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
if meta.get('threads'):
meta['threads'] = get_threads_metas(meta['threads'])
if meta.get('username'):
meta['username'] = get_username_meta_from_global_id(meta['username'])
options = {'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}
meta['messages'], _, _ = subchannel.get_messages(nb=-1, options=options)
return meta, 200
def api_thread_messages(subtype, thread_id):
thread = ChatThreads.ChatThread(thread_id, subtype)
if not thread.exists():
return {"status": "error", "reason": "Unknown thread"}, 404
meta = thread.get_meta({'chat', 'nb_messages', 'nb_participants'})
options = {'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}
meta['messages'], _, _ = thread.get_messages(nb=-1, options=options)
return meta, 200
# # # # # # # # # # LATER

View File

@ -41,25 +41,26 @@ config_loader = None
##################################
CORRELATION_TYPES_BY_OBJ = {
"chat": ["chat-subchannel", "chat-thread", "image", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
"chat-subchannel": ["chat", "chat-thread", "image", "message", "user-account"],
"chat-thread": ["chat", "chat-subchannel", "image", "message", "user-account"], # TODO user account
"chat": ["chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"], # TODO user account
"cookie-name": ["domain"],
"cryptocurrency": ["domain", "item", "message"],
"cve": ["domain", "item", "message"],
"decoded": ["domain", "item", "message"],
"cryptocurrency": ["domain", "item", "message", "ocr"],
"cve": ["domain", "item", "message", "ocr"],
"decoded": ["domain", "item", "message", "ocr"],
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
"file-name": ["chat", "message"],
"hhhash": ["domain"],
"image": ["chat", "message", "user-account"],
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "user-account"], # TODO subchannel + threads ????
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "pgp", "user-account"], # chat ??
"pgp": ["domain", "item", "message"],
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
"pgp": ["domain", "item", "message", "ocr"],
"screenshot": ["domain", "item"],
"title": ["domain", "item"],
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "username"],
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "ocr", "username"],
"username": ["domain", "item", "message", "user-account"],
}

View File

@ -925,6 +925,19 @@ def get_crawlers_stats_by_day(date, domain_type):
'down': r_crawler.scard(f'{domain_type}_down:{date}'),
}
def get_crawlers_stats_by_month(domain_type, date=None):
stats = []
for date in Date.get_month_dates(date=date):
stats.append(get_crawlers_stats_by_day(date, domain_type))
return stats
def get_crawlers_stats_up_down_by_month(domain_type, date=None):
stats = {'down': 0, 'up': 0}
for date in Date.get_month_dates(date=date):
day = get_crawlers_stats_by_day(date, domain_type)
stats['down'] += day.get('down', 0)
stats['up'] += day.get('up', 0)
return stats
def get_crawlers_stats(domain_type=None):
stats = {}
@ -1273,6 +1286,11 @@ def create_schedule(frequency, user, url, depth=1, har=True, screenshot=True, he
schedule.create(frequency, user, url, depth=depth, har=har, screenshot=screenshot, header=header, cookiejar=cookiejar, proxy=proxy, user_agent=user_agent, tags=tags)
return schedule_uuid
def _delete_schedules():
for schedule_uuid in get_schedulers_uuid():
schedule = CrawlerSchedule(schedule_uuid)
schedule.delete()
# TODO sanityze UUID
def api_delete_schedule(data):
schedule_uuid = data.get('uuid')
@ -1660,7 +1678,6 @@ def create_task(url, depth=1, har=True, screenshot=True, header=None, cookiejar=
external=external)
return task_uuid
## -- CRAWLER TASK -- ##
#### CRAWLER TASK API ####

View File

@ -19,3 +19,6 @@ class ModuleQueueError(AILError):
class MISPConnectionError(AILError):
pass
class AILObjectUnknown(AILError):
pass

View File

@ -40,6 +40,11 @@ r_key = regex_helper.generate_redis_cache_key('extractor')
# TODO UI Link
CORRELATION_TO_EXTRACT = {
'item': ['cve', 'cryptocurrency', 'title', 'username'],
'message': ['cve', 'cryptocurrency', 'username']
}
MODULES = {
'infoleak:automatic-detection="credit-card"': CreditCards(queue=False),
'infoleak:automatic-detection="iban"': Iban(queue=False),
@ -57,9 +62,27 @@ tools = Tools(queue=False)
for tool_name in tools.get_tools():
MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
def get_correl_match(extract_type, obj_id, content):
def merge_overlap(extracted):
merged = []
curr_start, curr_end, curr_string_match, curr_obj_ref = extracted[0]
curr_obj_ref = [(curr_obj_ref, curr_string_match)]
for start, end, mstring, ref in extracted[1:]:
# overlap
if start <= curr_end:
curr_string_match += mstring[curr_end - start:]
curr_end = max(curr_end, end)
curr_obj_ref.append((ref, mstring))
else:
merged.append((curr_start, curr_end, curr_string_match, curr_obj_ref))
curr_start, curr_end, curr_string_match, curr_obj_ref = start, end, mstring, [(ref, mstring)]
merged.append((curr_start, curr_end, curr_string_match, curr_obj_ref))
return merged
def get_correl_match(extract_type, obj, content):
extracted = []
correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
correl = correlations_engine.get_correlation_by_correl_type(obj.type, obj.get_subtype(r_str=True), obj.id, extract_type)
to_extract = []
map_subtype = {}
map_value_id = {}
@ -75,18 +98,20 @@ def get_correl_match(extract_type, obj_id, content):
sha256_val = sha256(value.encode()).hexdigest()
map_value_id[sha256_val] = value
if to_extract:
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
for obj in objs:
if map_subtype.get(obj[2]):
subtype = map_subtype[obj[2]]
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content)
if extract_type == 'title' and objs:
objs = [objs[0]]
for ob in objs:
if map_subtype.get(ob[2]):
subtype = map_subtype[ob[2]]
else:
subtype = ''
sha256_val = sha256(obj[2].encode()).hexdigest()
sha256_val = sha256(ob[2].encode()).hexdigest()
value_id = map_value_id.get(sha256_val)
if not value_id:
logger.critical(f'Error module extractor: {sha256_val}\n{extract_type}\n{subtype}\n{value_id}\n{map_value_id}\n{objs}')
value_id = 'ERROR'
extracted.append([obj[0], obj[1], obj[2], f'{extract_type}:{subtype}:{value_id}'])
extracted.append([ob[0], ob[1], ob[2], f'{extract_type}:{subtype}:{value_id}'])
return extracted
def _get_yara_match(data):
@ -100,7 +125,7 @@ def _get_yara_match(data):
return yara.CALLBACK_CONTINUE
def _get_word_regex(word):
return '(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
return '(?i)(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
def convert_byte_offset_to_string(b_content, offset):
byte_chunk = b_content[:offset + 1]
@ -115,17 +140,18 @@ def convert_byte_offset_to_string(b_content, offset):
# TODO RETRO HUNTS
# TODO TRACKER TYPE IN UI
def get_tracker_match(obj_id, content):
def get_tracker_match(obj, content):
extracted = []
extracted_yara = []
trackers = Tracker.get_obj_trackers('item', '', obj_id)
obj_gid = obj.get_global_id()
trackers = Tracker.get_obj_trackers(obj.type, obj.get_subtype(r_str=True), obj.id)
for tracker_uuid in trackers:
tracker = Tracker.Tracker(tracker_uuid)
tracker_type = tracker.get_type()
# print(tracker_type)
tracked = tracker.get_tracked()
if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_id, content)
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content)
for match in regex_match:
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
elif tracker_type == 'yara':
@ -147,11 +173,25 @@ def get_tracker_match(obj_id, content):
words = [tracked]
for word in words:
regex = _get_word_regex(word)
regex_match = regex_helper.regex_finditer(r_key, regex, obj_id, content)
regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content)
# print(regex_match)
for match in regex_match:
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
# Retro Hunt
retro_hunts = Tracker.get_obj_retro_hunts(obj.type, obj.get_subtype(r_str=True), obj.id)
for retro_uuid in retro_hunts:
retro_hunt = Tracker.RetroHunt(retro_uuid)
rule = retro_hunt.get_rule(r_compile=True)
rule.match(data=content.encode(), callback=_get_yara_match,
which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
r_cache.delete(f'extractor:yara:match:{r_key}')
extracted = []
for match in yara_match:
start, end, value = match.split(':', 2)
extracted_yara.append([int(start), int(end), value, f'retro_hunt:{retro_hunt.uuid}'])
# Convert byte offset to string offset
if extracted_yara:
b_content = content.encode()
@ -168,99 +208,101 @@ def get_tracker_match(obj_id, content):
# Type:subtype:id
# tag:iban
# tracker:uuid
def extract(obj_id, content=None):
item = Item(obj_id)
if not item.exists():
# def extract(obj_id, content=None):
def extract(obj_type, subtype, obj_id, content=None):
obj = ail_objects.get_object(obj_type, subtype, obj_id)
if not obj.exists():
return []
obj_gid = obj.get_global_id()
# CHECK CACHE
cached = r_cache.get(f'extractor:cache:{obj_id}')
cached = r_cache.get(f'extractor:cache:{obj_gid}')
# cached = None
if cached:
r_cache.expire(f'extractor:cache:{obj_id}', 300)
r_cache.expire(f'extractor:cache:{obj_gid}', 300)
return json.loads(cached)
if not content:
content = item.get_content()
content = obj.get_content()
extracted = get_tracker_match(obj_id, content)
extracted = get_tracker_match(obj, content)
# print(item.get_tags())
for tag in item.get_tags():
for tag in obj.get_tags():
if MODULES.get(tag):
# print(tag)
module = MODULES.get(tag)
matches = module.extract(obj_id, content, tag)
matches = module.extract(obj, content, tag)
if matches:
extracted = extracted + matches
for obj_t in ['cve', 'cryptocurrency', 'title', 'username']: # Decoded, PGP->extract bloc
matches = get_correl_match(obj_t, obj_id, content)
for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
matches = get_correl_match(obj_t, obj, content)
if matches:
extracted = extracted + matches
# SORT By Start Pos
extracted = sorted(extracted, key=itemgetter(0))
# print(extracted)
if extracted:
extracted = sorted(extracted, key=itemgetter(0))
extracted = merge_overlap(extracted)
# Save In Cache
if extracted:
extracted_dump = json.dumps(extracted)
r_cache.set(f'extractor:cache:{obj_id}', extracted_dump)
r_cache.expire(f'extractor:cache:{obj_id}', 300) # TODO Reduce CACHE ???????????????
r_cache.set(f'extractor:cache:{obj_gid}', extracted_dump)
r_cache.expire(f'extractor:cache:{obj_gid}', 300) # TODO Reduce CACHE ???????????????
return extracted
# TODO ADD LINK UI
def get_extracted_by_match(extracted):
matches = {}
for start, end, value, str_obj in extracted:
for start, end, value, raw_objs in extracted:
if str_obj not in matches:
matches[str_obj] = {}
ob_type, row_id = str_obj.split(':', 1)
if ob_type == 'tag': # TODO put me in object class
matches[str_obj]['subtype'] = 'tag'
matches[str_obj]['id'] = row_id
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf02b', 'color': '#28a745', 'radius': 5}
matches[str_obj]['link'] = ''
elif ob_type == 'tracker': # TODO put me in object class
matches[str_obj]['subtype'] = 'tracker'
matches[str_obj]['id'] = row_id
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf05b', 'color': '#ffc107', 'radius': 5}
matches[str_obj]['link'] = ''
else:
row_id = row_id.split(':', 1)
if len(row_id) == 2:
subtype = row_id[0]
obj_id = row_id[1]
for raw in raw_objs:
str_obj, str_match = raw
if str_obj not in matches:
matches[str_obj] = {}
ob_type, row_id = str_obj.split(':', 1)
if ob_type == 'tag': # TODO put me in object class
matches[str_obj]['subtype'] = 'tag'
matches[str_obj]['id'] = row_id
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf02b', 'color': '#28a745', 'radius': 5}
matches[str_obj]['link'] = ''
elif ob_type == 'tracker': # TODO put me in object class
matches[str_obj]['subtype'] = 'tracker'
matches[str_obj]['id'] = row_id
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf05b', 'color': '#ffc107', 'radius': 5}
matches[str_obj]['link'] = ''
elif ob_type == 'retro_hunt': # TODO put me in object class
matches[str_obj]['subtype'] = 'retro_hunt'
matches[str_obj]['id'] = row_id
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf05b', 'color': '#008107', 'radius': 5}
matches[str_obj]['link'] = ''
else:
subtype = ''
obj_id = row_id[0]
matches[str_obj]['subtype'] = subtype
matches[str_obj]['id'] = obj_id
matches[str_obj]['icon'] = ail_objects.get_object_svg(ob_type, subtype, obj_id)
matches[str_obj]['link'] = ail_objects.get_object_link(ob_type, subtype, obj_id)
row_id = row_id.split(':', 1)
if len(row_id) == 2:
subtype = row_id[0]
obj_id = row_id[1]
else:
subtype = ''
obj_id = row_id[0]
matches[str_obj]['subtype'] = subtype
matches[str_obj]['id'] = obj_id
matches[str_obj]['icon'] = ail_objects.get_object_svg(ob_type, subtype, obj_id)
matches[str_obj]['link'] = ail_objects.get_object_link(ob_type, subtype, obj_id)
matches[str_obj]['matches'] = []
matches[str_obj]['matches'] = []
match = [start, end, value]
matches[str_obj]['matches'].append(match)
match = [start, end, str_match]
matches[str_obj]['matches'].append(match)
return matches
# if __name__ == '__main__':
# t0 = time.time()
# obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
# obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
# obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
# # obj_id = 'tests/2021/01/01/credit_cards.gz'
# # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
# obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
# obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
# obj_id = 'crawled/2023/02/21/circl.lu1c300acb-0cbe-480f-917e-9afe3ec958e8'
#
# extract(obj_id)
#
# # get_obj_correl('cve', obj_id, content)

View File

@ -106,7 +106,7 @@ def create(thread_id, chat_instance, chat_id, subchannel_id, message_id, contain
new_thread_id = f'{chat_id}/{subchannel_id}/{thread_id}'
thread = ChatThread(new_thread_id, chat_instance)
if not thread.exists():
if not thread.is_children():
thread.create(container_obj, message_id)
return thread

View File

@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
from lib.data_retention_engine import update_obj_date
from lib.objects import ail_objects
from lib.objects.abstract_subtype_object import get_all_id
# from lib.data_retention_engine import update_obj_date
from lib.timeline_engine import Timeline
from lib.correlations_engine import get_correlation_by_correl_type
config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
@ -51,11 +48,18 @@ class Chat(AbstractChatObject):
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
url = url_for('chats_explorer.chats_explorer_chat', subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url
def get_origin_link(self):
if self.subtype == '00098785-7e70-5d12-a120-c5cdc1252b2b':
username = self.get_username()
if username:
username = username.split(':', 2)[2]
return f'https://t.me/{username}'
def get_svg_icon(self): # TODO
# if self.subtype == 'telegram':
# style = 'fab'
@ -75,6 +79,7 @@ class Chat(AbstractChatObject):
meta['name'] = self.get_name()
meta['tags'] = self.get_tags(r_list=True)
if 'icon' in options:
meta['svg_icon'] = self.get_svg_icon()
meta['icon'] = self.get_icon()
meta['img'] = meta['icon']
if 'info' in options:
@ -99,6 +104,8 @@ class Chat(AbstractChatObject):
meta['threads'] = self.get_threads()
if 'tags_safe' in options:
meta['tags_safe'] = self.is_tags_safe(meta['tags'])
if 'origin_link' in options:
meta['origin_link'] = self.get_origin_link()
return meta
def get_misp_object(self):

View File

@ -60,7 +60,7 @@ class CryptoCurrency(AbstractSubtypeObject):
pass
def is_valid_address(self):
if self.type == 'bitcoin' or self.type == 'dash' or self.type == 'litecoin':
if self.subtype == 'bitcoin' or self.subtype == 'dash' or self.subtype == 'litecoin' or self.subtype == 'tron':
return check_base58_address(self.id)
else:
return True
@ -80,6 +80,8 @@ class CryptoCurrency(AbstractSubtypeObject):
return 'ZEC'
elif self.subtype == 'dash':
return 'DASH'
elif self.subtype == 'tron':
return 'TRX'
return None
def get_link(self, flask_context=False):
@ -140,7 +142,7 @@ class CryptoCurrency(AbstractSubtypeObject):
def get_all_subtypes():
# return ail_core.get_object_all_subtypes(self.type)
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'tron', 'zcash']
# def build_crypto_regex(subtype, search_id):
@ -172,6 +174,8 @@ def get_subtype_by_symbol(symbol):
return 'zcash'
elif symbol == 'DASH':
return 'dash'
elif symbol == 'TRX':
return 'tron'
return None
@ -189,10 +193,6 @@ def get_all_cryptocurrencies_by_subtype(subtype):
def sanitize_cryptocurrency_name_to_search(name_to_search, subtype): # TODO FILTER NAME + Key + mail
if subtype == '':
pass
elif subtype == 'name':
pass
elif subtype == 'mail':
pass
return name_to_search
def search_cryptocurrency_by_name(name_to_search, subtype, r_pos=False):

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import itertools
import json
import os
import re
import sys
@ -208,7 +209,7 @@ class Domain(AbstractObject):
def get_screenshot(self):
last_item = self.get_last_item_root()
if last_item:
screenshot = self._get_external_correlation('item', '', last_item, 'screenshot').get('screenshot')
screenshot = self.get_obj_correlations('item', '', last_item, ['screenshot']).get('screenshot')
if screenshot:
return screenshot.pop()[1:]
@ -391,7 +392,7 @@ class Domain(AbstractObject):
print(har)
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
# Screenshot
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
screenshot = self.get_obj_correlations('item', '', item_id, ['screenshot'])
if screenshot and screenshot['screenshot']:
screenshot = screenshot['screenshot'].pop()[1:]
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
@ -410,6 +411,10 @@ class Domain(AbstractObject):
r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
r_crawler.sadd(f'domain:language:{self.id}', language)
def update_vanity_cluster(self):
if self.get_domain_type() == 'onion':
update_vanity_cluster(self.id)
############################################################################
############################################################################
@ -643,6 +648,82 @@ def api_search_domains_by_name(name_to_search, domain_types, meta=False, page=1)
################################################################################
################################################################################
# if __name__ == '__main__':
# dom = Domain('')
# dom.get_download_zip()
#### Vanity Explorer ####
# TODO ADD ME IN OBJ CLASS
def get_domain_vanity(domain, len_vanity=4):
return domain[:len_vanity]
def get_vanity_clusters(nb_min=4):
return r_crawler.zrange('vanity:onion:4', nb_min, '+inf', byscore=True, withscores=True)
def get_vanity_domains(vanity, len_vanity=4, meta=False):
if len_vanity == 4:
domains = r_crawler.smembers(f'vanity:{int(len_vanity)}:{vanity}')
else:
domains = []
for domain in r_crawler.smembers(f'vanity:4:{vanity[:4]}'):
dom_vanity = get_domain_vanity(domain, len_vanity=len_vanity)
if vanity == dom_vanity:
domains.append(domain)
if meta:
metas = []
for domain in domains:
metas.append(Domain(domain).get_meta(options={'languages', 'screenshot', 'tags_safe'}))
return metas
else:
return domains
def get_vanity_cluster(vanity, len_vanity=4, nb_min=4):
if len_vanity == 4:
return get_vanity_clusters(nb_min=nb_min)
else:
clusters = {}
for domain in get_vanity_domains(vanity[:4], len_vanity=4):
new_vanity = get_domain_vanity(domain, len_vanity=len_vanity)
if new_vanity not in clusters:
clusters[new_vanity] = 0
clusters[new_vanity] += 1
to_remove = []
for new_vanity in clusters:
if clusters[new_vanity] < nb_min:
to_remove.append(new_vanity)
for new_vanity in to_remove:
del clusters[new_vanity]
return clusters
def get_vanity_nb_domains(vanity, len_vanity=4):
return r_crawler.scard(f'vanity:{int(len_vanity)}:{vanity}')
# TODO BUILD DICTIONARY
def update_vanity_cluster(domain):
vanity = get_domain_vanity(domain, len_vanity=4)
add = r_crawler.sadd(f'vanity:4:{vanity}', domain)
if add == 1:
r_crawler.zadd('vanity:onion:4', {vanity: 1}, incr=True)
def _rebuild_vanity_clusters():
for vanity in r_crawler.zrange('vanity:onion:4', 0, -1):
r_crawler.delete(f'vanity:4:{vanity}')
r_crawler.delete('vanity:onion:4')
for domain in get_domains_up_by_type('onion'):
update_vanity_cluster(domain)
def cluster_onion_domain_vanity(len_vanity=4):
domains = {}
occurrences = {}
for domain in get_domains_up_by_type('onion'):
start = domain[:len_vanity]
if start not in domains:
domains[start] = []
occurrences[start] = 0
domains[start].append(domain)
occurrences[start] += 1
# print(json.dumps(domains))
res = dict(sorted(occurrences.items(), key=lambda item: item[1], reverse=True))
print(json.dumps(res))
if __name__ == '__main__':
_rebuild_vanity_clusters()

View File

@ -1,12 +1,14 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import base64
import mmh3
import os
import sys
from flask import url_for
from io import BytesIO
from flask import url_for
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
@ -18,6 +20,7 @@ from lib.objects.abstract_daterange_object import AbstractDaterangeObject, Abstr
config_loader = ConfigLoader()
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
FAVICON_FOLDER = config_loader.get_files_directory('favicons')
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
@ -40,10 +43,6 @@ class Favicon(AbstractDaterangeObject):
# # TODO:
pass
def get_content(self, r_type='str'):
if r_type == 'str':
return self._get_field('content')
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
@ -53,7 +52,31 @@ class Favicon(AbstractDaterangeObject):
# TODO # CHANGE COLOR
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf20a', 'color': '#1E88E5', 'radius': 5} # f0c8 f45c
return {'style': 'fas', 'icon': '\uf089', 'color': '#E1F5D0', 'radius': 5} # f0c8 f45c f089
def get_rel_path(self): # TODO USE MUMUR HASH
rel_path = os.path.join(self.id[0:1], self.id[1:2], self.id[2:3], self.id[3:4], self.id[4:5], self.id[5:6], self.id[6:])
return rel_path
def get_filepath(self):
filename = os.path.join(FAVICON_FOLDER, self.get_rel_path())
return os.path.realpath(filename)
def get_file_content(self, r_type='str'):
filepath = self.get_filepath()
if r_type == 'str':
with open(filepath, 'rb') as f:
file_content = f.read()
b64 = base64.b64encode(file_content)
# b64 = base64.encodebytes(file_content)
return b64.decode()
elif r_type == 'io':
with open(filepath, 'rb') as f:
file_content = BytesIO(f.read())
return file_content
def get_content(self, r_type='str'):
return self.get_file_content()
def get_misp_object(self):
obj_attrs = []
@ -69,7 +92,7 @@ class Favicon(AbstractDaterangeObject):
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
obj_attrs.append(obj.add_attribute('favicon-mmh3', value=self.id))
obj_attrs.append(obj.add_attribute('favicon', value=self.get_content(r_type='bytes')))
obj_attrs.append(obj.add_attribute('favicon', value=self.get_content()))
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
@ -78,29 +101,32 @@ class Favicon(AbstractDaterangeObject):
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['img'] = self.id
meta['tags'] = self.get_tags(r_list=True)
if 'content' in options:
meta['content'] = self.get_content()
if 'tags_safe' in options:
meta['tags_safe'] = self.is_tags_safe(meta['tags'])
return meta
# def get_links(self):
# # TODO GET ALL URLS FROM CORRELATED ITEMS
def create(self, content, _first_seen=None, _last_seen=None):
if not isinstance(content, str):
content = content.decode()
self._set_field('content', content)
def create(self, content): # TODO first seen / last seen options
filepath = self.get_filepath()
dirname = os.path.dirname(filepath)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filepath, 'wb') as f:
f.write(content)
self._create()
def create_favicon(content, url=None): # TODO URL ????
if isinstance(content, str):
content = content.encode()
favicon_id = mmh3.hash_bytes(content)
def create(b_content, size_limit=5000000, b64=False, force=False):
if isinstance(b_content, str):
b_content = b_content.encode()
b64 = base64.encodebytes(b_content) # newlines inserted after every 76 bytes of output
favicon_id = str(mmh3.hash(b64))
favicon = Favicon(favicon_id)
if not favicon.exists():
favicon.create(content)
favicon.create(b_content)
return favicon
class Favicons(AbstractDaterangeObjects):
"""

View File

@ -2,6 +2,7 @@
# -*-coding:UTF-8 -*
import base64
import magic
import os
import sys
@ -50,7 +51,7 @@ class Image(AbstractDaterangeObject):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
url = f'/correlation/show?type={self.type}&id={self.id}'
return url
def get_svg_icon(self):
@ -64,6 +65,14 @@ class Image(AbstractDaterangeObject):
filename = os.path.join(IMAGE_FOLDER, self.get_rel_path())
return os.path.realpath(filename)
def is_gif(self, filepath=None):
if not filepath:
filepath = self.get_filepath()
mime = magic.from_file(filepath, mime=True)
if mime == 'image/gif':
return True
return False
def get_file_content(self):
filepath = self.get_filepath()
with open(filepath, 'rb') as f:
@ -71,7 +80,10 @@ class Image(AbstractDaterangeObject):
return file_content
def get_content(self, r_type='str'):
return self.get_file_content()
if r_type == 'str':
return None
else:
return self.get_file_content()
def get_misp_object(self):
obj_attrs = []
@ -106,6 +118,20 @@ class Image(AbstractDaterangeObject):
def get_screenshot_dir():
return IMAGE_FOLDER
def get_all_images():
images = []
for root, dirs, files in os.walk(get_screenshot_dir()):
for file in files:
path = f'{root}{file}'
image_id = path.replace(IMAGE_FOLDER, '').replace('/', '')
images.append(image_id)
return images
def get_all_images_objects(filters={}):
for image_id in get_all_images():
yield Image(image_id)
def create(content, size_limit=5000000, b64=False, force=False):
size = (len(content)*3) / 4
@ -131,5 +157,6 @@ class Images(AbstractDaterangeObjects):
# if __name__ == '__main__':
# print(json.dumps(get_all_images()))
# name_to_search = '29ba'
# print(search_screenshots_by_name(name_to_search))

View File

@ -305,6 +305,8 @@ class Item(AbstractObject):
meta['investigations'] = self.get_investigations()
if 'link' in options:
meta['link'] = self.get_link(flask_context=True)
if 'last_full_date' in options:
meta['last_full_date'] = f"{meta['date'][0:4]}-{meta['date'][5:7]}-{meta['date'][8:10]}"
# meta['encoding'] = None
return meta
@ -339,9 +341,9 @@ class Item(AbstractObject):
return {'nb': nb_line, 'max_length': max_length}
# TODO RENAME ME
def get_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
def get_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7, force_gcld3=False):
ld = LanguagesDetector(nb_langs=num_langs, min_proportion=min_proportion, min_probability=min_probability, min_len=min_len)
return ld.detect(self.get_content())
return ld.detect(self.get_content(), force_gcld3=force_gcld3)
def get_mimetype(self, content=None):
if not content:

View File

@ -71,6 +71,10 @@ class Message(AbstractObject):
def get_basename(self):
return os.path.basename(self.id)
def get_chat_instance(self):
c_id = self.id.split('/')
return c_id[0]
def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ???????
"""
Returns content
@ -85,11 +89,16 @@ class Message(AbstractObject):
if r_type == 'str':
return content
elif r_type == 'bytes':
return content.encode()
if content:
return content.encode()
def get_date(self):
timestamp = self.get_timestamp()
return datetime.fromtimestamp(float(timestamp)).strftime('%Y%m%d')
return datetime.utcfromtimestamp(float(timestamp)).strftime('%Y%m%d')
def get_last_full_date(self):
timestamp = datetime.utcfromtimestamp(float(self.get_timestamp()))
return timestamp.strftime('%Y-%m-%d %H:%M:%S')
def get_timestamp(self):
dirs = self.id.split('/')
@ -102,9 +111,24 @@ class Message(AbstractObject):
return message_id
def get_chat_id(self): # TODO optimize -> use me to tag Chat
chat_id = self.get_basename().rsplit('_', 1)[0]
return chat_id
c_id = self.id.split('/')
return c_id[2]
def get_chat(self):
c_id = self.id.split('/')
return f'chat:{c_id[0]}:{c_id[2]}'
def get_subchannel(self):
subchannel = self.get_correlation('chat-subchannel')
if subchannel.get('chat-subchannel'):
return f'chat-subchannel:{subchannel["chat-subchannel"].pop()}'
def get_current_thread(self):
subchannel = self.get_correlation('chat-thread')
if subchannel.get('chat-thread'):
return f'chat-thread:{subchannel["chat-thread"].pop()}'
# children thread
def get_thread(self):
for child in self.get_childrens():
obj_type, obj_subtype, obj_id = child.split(':', 2)
@ -116,12 +140,15 @@ class Message(AbstractObject):
# TODO get channel ID
# TODO get thread ID
def _get_image_ocr(self, obj_id):
return bool(self.get_correlation('ocr').get('ocr'))
def get_images(self):
images = []
for child in self.get_childrens():
obj_type, _, obj_id = child.split(':', 2)
if obj_type == 'image':
images.append(obj_id)
images.append({'id': obj_id, 'ocr': self._get_image_ocr(obj_id)})
return images
def get_user_account(self, meta=False):
@ -175,30 +202,12 @@ class Message(AbstractObject):
# message media
# flag is deleted -> event or missing from feeder pass ???
def get_translation(self, content=None, source=None, target='fr'):
"""
Returns translated content
"""
# return self._get_field('translated')
global_id = self.get_global_id()
translation = r_cache.get(f'translation:{target}:{global_id}')
r_cache.expire(f'translation:{target}:{global_id}', 0)
if translation:
return translation
if not content:
content = self.get_content()
translation = Language.LanguageTranslator().translate(content, source=source, target=target)
if translation:
r_cache.set(f'translation:{target}:{global_id}', translation)
r_cache.expire(f'translation:{target}:{global_id}', 300)
return translation
def _set_translation(self, translation):
"""
Set translated content
"""
return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time
def get_language(self):
languages = self.get_languages()
if languages:
return languages.pop()
else:
return None
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True)}
@ -236,7 +245,7 @@ class Message(AbstractObject):
# return r_object.hget(f'meta:item::{self.id}', 'url')
# options: set of optional meta fields
def get_meta(self, options=None, timestamp=None, translation_target='en'):
def get_meta(self, options=None, timestamp=None, translation_target=''):
"""
:type options: set
:type timestamp: float
@ -250,10 +259,12 @@ class Message(AbstractObject):
timestamp = self.get_timestamp()
else:
timestamp = float(timestamp)
timestamp = datetime.fromtimestamp(float(timestamp))
meta['date'] = timestamp.strftime('%Y/%m/%d')
timestamp = datetime.utcfromtimestamp(float(timestamp))
meta['date'] = timestamp.strftime('%Y-%m-%d')
meta['hour'] = timestamp.strftime('%H:%M:%S')
meta['full_date'] = timestamp.isoformat(' ')
if 'last_full_date' in options:
meta['last_full_date'] = meta['full_date']
meta['source'] = self.get_source()
# optional meta fields
@ -289,8 +300,16 @@ class Message(AbstractObject):
meta['files-names'] = self.get_files_names()
if 'reactions' in options:
meta['reactions'] = self.get_reactions()
if 'language' in options:
meta['language'] = self.get_language()
if 'translation' in options and translation_target:
meta['translation'] = self.translate(content=meta.get('content'), target=translation_target)
if meta.get('language'):
source = meta['language']
else:
source = None
meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target)
if 'language' in options:
meta['language'] = self.get_language()
# meta['encoding'] = None
return meta
@ -301,14 +320,30 @@ class Message(AbstractObject):
# content = self.get_content()
# translated = argostranslate.translate.translate(content, 'ru', 'en')
# # Save translation
# self._set_translation(translated)
# return translated
def create(self, content, translation=None, tags=[]):
## Language ##
def get_objs_container(self):
objs_containers = set()
# chat
objs_containers.add(self.get_chat())
subchannel = self.get_subchannel()
if subchannel:
objs_containers.add(subchannel)
thread = self.get_current_thread()
if thread:
objs_containers.add(thread)
return objs_containers
#- Language -#
def create(self, content, language=None, translation=None, tags=[]):
self._set_field('content', content)
# r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content)
if translation:
self._set_translation(translation)
if not language and content:
language = self.detect_language()
if translation and content:
self.set_translation(language, translation)
for tag in tags:
self.add_tag(tag)
@ -339,7 +374,6 @@ def create(obj_id, content, translation=None, tags=[]):
message.create(content, translation=translation, tags=tags)
return message
# TODO Encode translation

336
bin/lib/objects/Ocrs.py Executable file
View File

@ -0,0 +1,336 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from datetime import datetime
from io import BytesIO
from PIL import Image
from PIL import ImageDraw
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
from lib.ConfigLoader import ConfigLoader
from packages import Date
# from lib import Language
# from lib.data_retention_engine import update_obj_date, get_obj_date_first
from flask import url_for
config_loader = ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
IMAGE_FOLDER = config_loader.get_files_directory('images')
config_loader = None
# SET x1,y1:x2,y2:x3,y3:x4,y4:extracted_text
class Ocr(AbstractDaterangeObject):
"""
AIL Message Object. (strings)
"""
def __init__(self, id):
super(Ocr, self).__init__('ocr', id)
def exists(self):
return r_object.exists(f'ocr:{self.id}')
def get_content(self, r_type='str'):
"""
Returns content
"""
global_id = self.get_global_id()
content = r_cache.get(f'content:{global_id}')
if not content:
dict_content = {}
for extracted in r_object.smembers(f'ocr:{self.id}'):
extracted = extracted.split(':', 4)
x, y = extracted[0].split(',', 1)
# get text line, y +- 20
rounded_y = round(int(y) / 20) * 20
if rounded_y not in dict_content:
dict_content[rounded_y] = []
dict_content[rounded_y].append((int(x), int(y), extracted[-1]))
content = ''
new_line = True
l_key = sorted(dict_content.keys())
for key in l_key:
dict_content[key] = sorted(dict_content[key], key=lambda c: c[0])
for text in dict_content[key]:
if new_line:
content = f'{content}{text[2]}'
new_line = False
else:
content = f'{content} {text[2]}'
content = f'{content}\n'
new_line = True
# Set Cache
if content:
global_id = self.get_global_id()
r_cache.set(f'content:{global_id}', content)
r_cache.expire(f'content:{global_id}', 300)
if r_type == 'str':
return content
elif r_type == 'bytes':
if content:
return content.encode()
def get_date(self): # TODO
return Date.get_today_date_str()
def get_source(self): # TODO
"""
Returns source/feeder name
"""
return 'ocr'
# l_source = self.id.split('/')[:-2]
# return os.path.join(*l_source)
def get_basename(self): # TODO
return 'ocr'
def get_language(self):
languages = self.get_languages()
if languages:
return languages.pop()
else:
return None
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf065', 'color': 'yellow', 'radius': 5}
def get_image_path(self):
rel_path = os.path.join(self.id[0:2], self.id[2:4], self.id[4:6], self.id[6:8], self.id[8:10], self.id[10:12], self.id[12:])
filename = os.path.join(IMAGE_FOLDER, rel_path)
return os.path.realpath(filename)
def get_misp_object(self): # TODO
obj = MISPObject('instant-message', standalone=True)
obj_date = self.get_date()
if obj_date:
obj.first_seen = obj_date
else:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
# obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
# obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
# obj.add_attribute('sensor', value=get_ail_uuid())]
obj_attrs = []
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
# options: set of optional meta fields
def get_meta(self, options=None, translation_target=''):
"""
:type options: set
"""
if options is None:
options = set()
meta = self._get_meta(options=options)
meta['tags'] = self.get_tags()
meta['content'] = self.get_content()
# optional meta fields
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
if 'link' in options:
meta['link'] = self.get_link(flask_context=True)
if 'icon' in options:
meta['svg_icon'] = self.get_svg_icon()
if 'img' in options:
meta['img'] = self.draw_bounding_boxs()
if 'map' in options:
meta['map'] = self.get_img_map_coords()
if 'language' in options:
meta['language'] = self.get_language()
if 'translation' in options and translation_target:
if meta.get('language'):
source = meta['language']
else:
source = None
meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target)
if 'language' in options:
meta['language'] = self.get_language()
return meta
def get_objs_container(self):
objs_containers = set()
# chat
objs_containers.add(self.get_first_correlation('chat'))
subchannel = self.get_first_correlation('chat-subchannel')
if subchannel:
objs_containers.add(subchannel)
thread = self.get_first_correlation('chat-thread')
if thread:
objs_containers.add(thread)
return objs_containers
def create_coord_str(self, bbox):
c1, c2, c3, c4 = bbox
x1, y1 = c1
x2, y2 = c2
x3, y3 = c3
x4, y4 = c4
return f'{int(x1)},{int(y1)}:{int(x2)},{int(y2)}:{int(x3)},{int(y3)}:{int(x4)},{int(y4)}'
def _unpack_coord(self, coord):
return coord.split(',', 1)
def get_coords(self):
coords = []
for extracted in r_object.smembers(f'ocr:{self.id}'):
coord = []
bbox = extracted.split(':', 4)[:-1]
for c in bbox:
x, y = self._unpack_coord(c)
coord.append((int(x), int(y)))
coords.append(coord)
return coords
def get_img_map_coords(self):
coords = []
for extracted in r_object.smembers(f'ocr:{self.id}'):
extract = extracted.split(':', 4)
x1, y1 = self._unpack_coord(extract[0])
x2, y2 = self._unpack_coord(extract[1])
x3, y3 = self._unpack_coord(extract[2])
x4, y4 = self._unpack_coord(extract[3])
coords.append((f'{x1},{y1},{x2},{y2},{x3},{y3},{x4},{y4}', extract[4]))
return coords
def edit_text(self, coordinates, text, new_text, new_coordinates=None):
pass
def add_text(self, coordinates, text):
val = f'{coordinates}:{text}'
return r_object.sadd(f'ocr:{self.id}', val)
def remove_text(self, val):
return r_object.srem(f'ocr:{self.id}', val)
def update_correlation(self, date=None):
if date:
self.add(date, None)
image_correl = self.get_obj_correlations('image', '', self.id)
for obj_type in image_correl:
if obj_type != 'ocr':
for obj_raw in image_correl[obj_type]:
obj_subtype, obj_id = obj_raw.split(':', 1)
self.add_correlation(obj_type, obj_subtype, obj_id)
def create(self, extracted_texts, tags=[]):
# r_object.sadd(f'{self.type}:all', self.id)
created = False
for extracted in extracted_texts:
bbox, text = extracted
if len(text) > 1:
str_coords = self.create_coord_str(bbox)
self.add_text(str_coords, text)
created = True
if created:
# Correlations
self._copy_from('image', self.id)
self.update_correlation()
self.add_correlation('image', '', self.id)
for tag in tags:
self.add_tag(tag)
return self.id
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
r_object.delete(f'ocr:{self.id}')
def draw_bounding_boxs(self):
img = Image.open(self.get_image_path()).convert("RGBA")
draw = ImageDraw.Draw(img)
for bbox in self.get_coords():
c1, c2, c3, c4 = bbox
draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
# img.show()
buff = BytesIO()
img.save(buff, "PNG")
return buff.getvalue()
def create(obj_id, detections, tags=[]):
obj = Ocr(obj_id)
if not obj.exists():
obj_id = obj.create(detections, tags=tags)
if obj_id:
return obj
# TODO preload languages
def extract_text(image_path, languages, threshold=0.2):
import easyocr
reader = easyocr.Reader(languages, verbose=False)
texts = reader.readtext(image_path)
# print(texts)
extracted = []
for bbox, text, score in texts:
if score > threshold:
extracted.append((bbox, text))
return extracted
def get_ocr_languages():
return {'af', 'ar', 'as', 'az', 'be', 'bg', 'bh', 'bs', 'cs', 'cy', 'da', 'de', 'en', 'es', 'et', 'fa', 'fr', 'ga', 'hi', 'hr', 'hu', 'id', 'is', 'it', 'ja', 'kn', 'ko', 'ku', 'la', 'lt', 'lv', 'mi', 'mn', 'mr', 'ms', 'mt', 'ne', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'sw', 'ta', 'te', 'th', 'tl', 'tr', 'ug', 'uk', 'ur', 'uz', 'vi', 'zh'}
def sanityze_ocr_languages(languages, ocr_languages=None):
langs = set()
if not ocr_languages:
ocr_languages = get_ocr_languages()
for lang in languages:
if lang in ocr_languages:
if lang == 'zh':
langs.add('ch_sim')
elif lang == 'sr':
langs.add('rs_latin')
else:
langs.add(lang)
return langs
class Ocrs(AbstractDaterangeObjects):
"""
OCR Objects
"""
def __init__(self):
super().__init__('ocr', Ocr)
def sanitize_id_to_search(self, name_to_search):
return name_to_search # TODO
#### API ####
def api_get_ocr(obj_id, translation_target=None):
ocr = Ocr(obj_id)
if not ocr.exists():
return {"status": "error", "reason": "Unknown ocr"}, 404
meta = ocr.get_meta({'content', 'icon', 'img', 'language', 'link', 'map', 'translation'}, translation_target=translation_target)
return meta, 200

View File

@ -49,9 +49,9 @@ class UserAccount(AbstractSubtypeObject):
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
url = url_for('chats_explorer.objects_user_account', type=self.type, subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
url = f'{baseurl}/objects/user-account?&subtype={self.subtype}&id={self.id}'
return url
def get_svg_icon(self): # TODO change icon/color
@ -127,6 +127,13 @@ class UserAccount(AbstractSubtypeObject):
def update_username_timeline(self, username_global_id, timestamp):
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
def get_messages(self):
messages = []
for mess in self.get_correlation('message'):
messages.append(f'message:{mess}')
return messages
def get_messages_by_chat_obj(self, chat_obj):
messages = []
for mess in self.get_correlation_iter_obj(chat_obj, 'message'):
@ -143,13 +150,14 @@ class UserAccount(AbstractSubtypeObject):
if meta['username']:
_, username_account_subtype, username_account_id = meta['username'].split(':', 3)
if 'username_meta' in options:
meta['username'] = Usernames.Username(username_account_id, username_account_subtype).get_meta()
meta['username'] = Usernames.Username(username_account_id, username_account_subtype).get_meta(options={'icon'})
else:
meta['username'] = {'type': 'username', 'subtype': username_account_subtype, 'id': username_account_id}
if 'usernames' in options:
meta['usernames'] = self.get_usernames()
if 'icon' in options:
meta['icon'] = self.get_icon()
meta['svg_icon'] = meta['icon']
if 'info' in options:
meta['info'] = self.get_info()
if 'translation' in options and translation_target:

View File

@ -51,8 +51,6 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
# get useraccount / username
# get users ?
# timeline name ????
# info
# created
# last imported/updated
# TODO get instance
@ -97,7 +95,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
def get_created_at(self, date=False):
created_at = self._get_field('created_at')
if date and created_at:
created_at = datetime.fromtimestamp(float(created_at))
created_at = datetime.utcfromtimestamp(float(created_at))
created_at = created_at.isoformat(' ')
return created_at
@ -176,7 +174,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
def get_nb_message_by_hours(self, date_day, nb_day):
hours = []
# start=0, end=23
timestamp = time.mktime(datetime.strptime(date_day, "%Y%m%d").timetuple())
timestamp = time.mktime(datetime.strptime(date_day, "%Y%m%d").utctimetuple())
for i in range(24):
timestamp_end = timestamp + 3600
nb_messages = r_object.zcount(f'messages:{self.type}:{self.subtype}:{self.id}', timestamp, timestamp_end)
@ -197,12 +195,42 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
week_date = Date.get_current_week_day()
return self.get_nb_message_by_week(week_date)
def get_message_meta(self, message, timestamp=None, translation_target='en'): # TODO handle file message
def get_nb_week_messages(self):
week = {}
# Init
for day in ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']:
week[day] = {}
for i in range(24):
week[day][i] = 0
# chat
for mess_t in r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True):
timestamp = datetime.utcfromtimestamp(float(mess_t[1]))
date_name = timestamp.strftime('%a')
week[date_name][timestamp.hour] += 1
subchannels = self.get_subchannels()
for gid in subchannels:
for mess_t in r_object.zrange(f'messages:{gid}', 0, -1, withscores=True):
timestamp = datetime.utcfromtimestamp(float(mess_t[1]))
date_name = timestamp.strftime('%a')
week[date_name][timestamp.hour] += 1
stats = []
nb_day = 0
for day in week:
for hour in week[day]:
stats.append({'date': day, 'day': nb_day, 'hour': hour, 'count': week[day][hour]})
nb_day += 1
return stats
def get_message_meta(self, message, timestamp=None, translation_target='', options=None): # TODO handle file message
message = Messages.Message(message[9:])
meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, timestamp=timestamp, translation_target=translation_target)
if not options:
options = {'content', 'files-names', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}
meta = message.get_meta(options=options, timestamp=timestamp, translation_target=translation_target)
return meta
def get_messages(self, start=0, page=-1, nb=500, unread=False, translation_target='en'): # threads ???? # TODO ADD last/first message timestamp + return page
def get_messages(self, start=0, page=-1, nb=500, unread=False, options=None, translation_target='en'): # threads ???? # TODO ADD last/first message timestamp + return page
# TODO return message meta
tags = {}
messages = {}
@ -220,11 +248,11 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
mess, pagination = self._get_messages(nb=nb, page=page)
for message in mess:
timestamp = message[1]
date_day = datetime.fromtimestamp(timestamp).strftime('%Y/%m/%d')
date_day = datetime.utcfromtimestamp(timestamp).strftime('%Y/%m/%d')
if date_day != curr_date:
messages[date_day] = []
curr_date = date_day
mess_dict = self.get_message_meta(message[0], timestamp=timestamp, translation_target=translation_target)
mess_dict = self.get_message_meta(message[0], timestamp=timestamp, translation_target=translation_target, options=options)
messages[date_day].append(mess_dict)
if mess_dict.get('tags'):
@ -279,6 +307,9 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
def get_nb_participants(self):
return self.get_nb_correlation('user-account')
def get_user_messages(self, user_id):
return self.get_correlation_iter('user-account', self.subtype, user_id, 'message')
# TODO move me to abstract subtype
class AbstractChatObjects(ABC):
def __init__(self, type):

View File

@ -71,7 +71,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
else:
return last_seen
def get_nb_seen(self): # TODO REPLACE ME -> correlation image
def get_nb_seen(self): # TODO REPLACE ME -> correlation image chats
return self.get_nb_correlation('item') + self.get_nb_correlation('message')
def get_nb_seen_by_date(self, date):
@ -88,6 +88,8 @@ class AbstractDaterangeObject(AbstractObject, ABC):
meta_dict['nb_seen'] = self.get_nb_seen()
if 'sparkline' in options:
meta_dict['sparkline'] = self.get_sparkline()
if 'last_full_date'in options:
meta_dict['last_full_date'] = self.get_last_full_date()
return meta_dict
def set_first_seen(self, first_seen):
@ -125,6 +127,20 @@ class AbstractDaterangeObject(AbstractObject, ABC):
def _add_create(self):
r_object.sadd(f'{self.type}:all', self.id)
def _copy_from(self, obj_type, obj_id):
first_seen = r_object.hget(f'meta:{obj_type}:{obj_id}', 'first_seen')
last_seen = r_object.hget(f'meta:{obj_type}:{obj_id}', 'last_seen')
if first_seen and last_seen:
for date in Date.get_daterange(first_seen, last_seen):
nb = r_object.zscore(f'{obj_type}:date:{date}', self.id)
if nb:
r_object.zincrby(f'{self.type}:date:{date}', nb, self.id)
update_obj_date(first_seen, self.type)
update_obj_date(last_seen, self.type)
self._add_create()
self.set_first_seen(first_seen)
self.set_last_seen(last_seen)
def _add(self, date, obj): # TODO OBJ=None
if not self.exists():
self._add_create()
@ -229,6 +245,17 @@ class AbstractDaterangeObjects(ABC):
def sanitize_content_to_search(self, content_to_search):
return content_to_search
def get_contents_ids(self):
titles = {}
for obj_id in self.get_ids():
obj = self.obj_class(obj_id)
content = obj.get_content()
if content not in titles:
titles[content] = []
for domain in obj.get_correlation('domain').get('domain', []):
titles[content].append(domain[1:])
return titles
def search_by_content(self, content_to_search, r_pos=False, case_sensitive=True):
objs = {}
if case_sensitive:

View File

@ -25,7 +25,7 @@ from lib import Duplicate
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship
from lib.Language import get_obj_translation
from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, detect_obj_language, get_obj_language_stats, get_obj_translation, set_obj_translation, delete_obj_translation, get_obj_main_language
from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
logging.config.dictConfig(ail_logger.get_config(name='ail'))
@ -67,12 +67,15 @@ class AbstractObject(ABC):
def get_global_id(self):
return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}'
def get_last_full_date(self):
return None
def get_default_meta(self, tags=False, link=False):
dict_meta = {'id': self.get_id(),
'type': self.get_type(),
'subtype': self.get_subtype(r_str=True)}
if tags:
dict_meta['tags'] = self.get_tags()
dict_meta['tags'] = self.get_tags(r_list=True)
if link:
dict_meta['link'] = self.get_link()
return dict_meta
@ -222,11 +225,11 @@ class AbstractObject(ABC):
## Correlation ##
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
def get_obj_correlations(self, obj_type, obj_subtype, obj_id, filter_types=[]):
"""
Get object correlation
"""
return get_correlations(req_type, req_subtype, req_id, filter_types=[obj_type])
return get_correlations(obj_type, obj_subtype, obj_id, filter_types=filter_types)
def get_correlation(self, obj_type):
"""
@ -234,6 +237,11 @@ class AbstractObject(ABC):
"""
return get_correlations(self.type, self.subtype, self.id, filter_types=[obj_type])
def get_first_correlation(self, obj_type):
correlation = self.get_correlation(obj_type)
if correlation.get(obj_type):
return f'{obj_type}:{correlation[obj_type].pop()}'
def get_correlations(self, filter_types=[], unpack=False):
"""
Get object correlations
@ -302,15 +310,51 @@ class AbstractObject(ABC):
## -Relationship- ##
## Translation ##
def get_objs_container(self):
return set()
## Language ##
def get_languages(self):
return get_obj_languages(self.type, self.get_subtype(r_str=True), self.id)
def add_language(self, language):
return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container())
def remove_language(self, language):
return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container())
def edit_language(self, old_language, new_language):
if old_language:
self.remove_language(old_language)
self.add_language(new_language)
def detect_language(self, field=''):
return detect_obj_language(self.type, self.get_subtype(r_str=True), self.id, self.get_content(), objs_containers=self.get_objs_container())
def get_obj_language_stats(self):
return get_obj_language_stats(self.type, self.get_subtype(r_str=True), self.id)
def get_main_language(self):
return get_obj_main_language(self.type, self.get_subtype(r_str=True), self.id)
def get_translation(self, language, field=''):
return get_obj_translation(self.get_global_id(), language, field=field, objs_containers=self.get_objs_container())
def set_translation(self, language, translation, field=''):
return set_obj_translation(self.get_global_id(), language, translation, field=field)
def delete_translation(self, language, field=''):
return delete_obj_translation(self.get_global_id(), language, field=field)
def translate(self, content=None, field='', source=None, target='en'):
global_id = self.get_global_id()
if not content:
content = self.get_content()
return get_obj_translation(global_id, content, field=field, source=source, target=target)
translation = get_obj_translation(global_id, target, source=source, content=content, field=field, objs_containers=self.get_objs_container())
return translation
## -Translation- ##
## -Language- ##
## Parent ##

View File

@ -85,6 +85,11 @@ class AbstractSubtypeObject(AbstractObject, ABC):
else:
return int(nb)
def get_last_full_date(self):
last_seen = self.get_last_seen()
if last_seen:
return f'{last_seen[0:4]}-{last_seen[4:6]}-{last_seen[6:8]}'
def _get_meta(self, options=None):
if options is None:
options = set()

View File

@ -7,13 +7,18 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.exceptions import AILObjectUnknown
from lib.ConfigLoader import ConfigLoader
from lib.ail_core import get_all_objects, get_object_all_subtypes
from lib.ail_core import get_all_objects, get_object_all_subtypes, get_objects_with_subtypes, get_default_correlation_objects
from lib import correlations_engine
from lib import relationships_engine
from lib import btc_ail
from lib import Language
from lib import Tag
from lib import chats_viewer
from lib.objects import Chats
from lib.objects import ChatSubChannels
from lib.objects import ChatThreads
@ -23,16 +28,17 @@ from lib.objects.Cves import Cve
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
from lib.objects.Domains import Domain
from lib.objects import Etags
from lib.objects.Favicons import Favicon
from lib.objects import Favicons
from lib.objects import FilesNames
from lib.objects import HHHashs
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
from lib.objects import Images
from lib.objects.Messages import Message
from lib.objects import Messages
from lib.objects import Ocrs
from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot
from lib.objects import Titles
from lib.objects.UsersAccount import UserAccount
from lib.objects import UsersAccount
from lib.objects import Usernames
config_loader = ConfigLoader()
@ -43,64 +49,123 @@ config_loader = None
def is_valid_object_type(obj_type):
return obj_type in get_all_objects()
def is_object_subtype(obj_type):
return obj_type in get_objects_with_subtypes()
def is_valid_object_subtype(obj_type, subtype):
return subtype in get_object_all_subtypes(obj_type)
def sanitize_objs_types(objs):
def sanitize_objs_types(objs, default=False):
l_types = []
for obj in objs:
if is_valid_object_type(obj):
l_types.append(obj)
if not l_types:
l_types = get_all_objects()
if default:
l_types = get_default_correlation_objects()
else:
l_types = get_all_objects()
return l_types
#### OBJECT ####
def get_object(obj_type, subtype, obj_id):
if obj_type == 'item':
return Item(obj_id)
elif obj_type == 'domain':
return Domain(obj_id)
elif obj_type == 'decoded':
return Decoded(obj_id)
elif obj_type == 'chat':
return Chats.Chat(obj_id, subtype)
elif obj_type == 'chat-subchannel':
return ChatSubChannels.ChatSubChannel(obj_id, subtype)
elif obj_type == 'chat-thread':
return ChatThreads.ChatThread(obj_id, subtype)
elif obj_type == 'cookie-name':
return CookiesNames.CookieName(obj_id)
elif obj_type == 'cve':
return Cve(obj_id)
elif obj_type == 'etag':
return Etags.Etag(obj_id)
elif obj_type == 'favicon':
return Favicon(obj_id)
elif obj_type == 'file-name':
return FilesNames.FileName(obj_id)
elif obj_type == 'hhhash':
return HHHashs.HHHash(obj_id)
elif obj_type == 'image':
return Images.Image(obj_id)
elif obj_type == 'message':
return Message(obj_id)
elif obj_type == 'screenshot':
return Screenshot(obj_id)
elif obj_type == 'cryptocurrency':
return CryptoCurrencies.CryptoCurrency(obj_id, subtype)
elif obj_type == 'pgp':
return Pgps.Pgp(obj_id, subtype)
elif obj_type == 'title':
return Titles.Title(obj_id)
elif obj_type == 'user-account':
return UserAccount(obj_id, subtype)
elif obj_type == 'username':
return Usernames.Username(obj_id, subtype)
if not subtype:
if obj_type == 'item':
return Item(obj_id)
elif obj_type == 'domain':
return Domain(obj_id)
elif obj_type == 'decoded':
return Decoded(obj_id)
elif obj_type == 'cookie-name':
return CookiesNames.CookieName(obj_id)
elif obj_type == 'cve':
return Cve(obj_id)
elif obj_type == 'etag':
return Etags.Etag(obj_id)
elif obj_type == 'favicon':
return Favicons.Favicon(obj_id)
elif obj_type == 'file-name':
return FilesNames.FileName(obj_id)
elif obj_type == 'hhhash':
return HHHashs.HHHash(obj_id)
elif obj_type == 'image':
return Images.Image(obj_id)
elif obj_type == 'message':
return Messages.Message(obj_id)
elif obj_type == 'ocr':
return Ocrs.Ocr(obj_id)
elif obj_type == 'screenshot':
return Screenshot(obj_id)
elif obj_type == 'title':
return Titles.Title(obj_id)
else:
raise AILObjectUnknown(f'Unknown AIL object: {obj_type} {subtype} {obj_id}')
# SUBTYPES
else:
raise Exception(f'Unknown AIL object: {obj_type} {subtype} {obj_id}')
if obj_type == 'chat':
return Chats.Chat(obj_id, subtype)
elif obj_type == 'chat-subchannel':
return ChatSubChannels.ChatSubChannel(obj_id, subtype)
elif obj_type == 'chat-thread':
return ChatThreads.ChatThread(obj_id, subtype)
elif obj_type == 'cryptocurrency':
return CryptoCurrencies.CryptoCurrency(obj_id, subtype)
elif obj_type == 'pgp':
return Pgps.Pgp(obj_id, subtype)
elif obj_type == 'user-account':
return UsersAccount.UserAccount(obj_id, subtype)
elif obj_type == 'username':
return Usernames.Username(obj_id, subtype)
else:
raise AILObjectUnknown(f'Unknown AIL object: {obj_type} {subtype} {obj_id}')
def get_objects(objects):
def exists_obj(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
if obj:
return obj.exists()
else:
return False
#### API ####
def api_get_object(obj_type, obj_subtype, obj_id):
if not obj_id:
return {'status': 'error', 'reason': 'Invalid object id'}, 400
if not is_valid_object_type(obj_type):
return {'status': 'error', 'reason': 'Invalid object type'}, 400
if obj_subtype:
if not is_valid_object_subtype(obj_type, obj_subtype):
return {'status': 'error', 'reason': 'Invalid object subtype'}, 400
obj = get_object(obj_type, obj_subtype, obj_id)
if not obj.exists():
return {'status': 'error', 'reason': 'Object Not Found'}, 404
options = {'chat', 'content', 'created_at', 'files-names', 'icon', 'images', 'info', 'nb_participants', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account', 'username', 'subchannels', 'threads'}
return obj.get_meta(options=options), 200
def api_get_object_type_id(obj_type, obj_id):
if not is_valid_object_type(obj_type):
return {'status': 'error', 'reason': 'Invalid object type'}, 400
if is_object_subtype(obj_type):
subtype, obj_id = obj_type.split('/', 1)
else:
subtype = None
return api_get_object(obj_type, subtype, obj_id)
def api_get_object_global_id(global_id):
obj_type, subtype, obj_id = global_id.split(':', 2)
return api_get_object(obj_type, subtype, obj_id)
#### --API-- ####
#########################################################################################
#########################################################################################
#########################################################################################
def get_objects(objects): # TODO RENAME ME
objs = set()
for obj in objects:
if isinstance(obj, dict):
@ -108,7 +173,7 @@ def get_objects(objects):
obj_subtype = obj['subtype']
obj_id = obj['id']
if 'lvl' in obj:
correl_objs = get_obj_correlations_objs(obj_type, obj_subtype, obj_id, lvl=obj['lvl'])
correl_objs = get_obj_correlations_objs(obj_type, obj_subtype, obj_id, lvl=int(obj['lvl']))
objs = objs.union(correl_objs)
else:
obj_type, obj_subtype, obj_id = obj
@ -119,14 +184,6 @@ def get_objects(objects):
return ail_objects
def exists_obj(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
if obj:
return obj.exists()
else:
return False
def get_obj_global_id(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
return obj.get_global_id()
@ -199,8 +256,9 @@ def get_objects_meta(objs, options=set(), flask_context=False):
def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon()
meta = obj.get_meta(options={'chat', 'chats', 'created_at', 'icon', 'info', 'map', 'nb_messages', 'nb_participants', 'threads', 'username'})
# meta['icon'] = obj.get_svg_icon()
meta['svg_icon'] = obj.get_svg_icon()
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
meta['sparkline'] = obj.get_sparkline()
if obj_type == 'cve':
@ -218,6 +276,34 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
meta["add_tags_modal"] = Tag.get_modal_add_tags(obj.id, obj.get_type(), obj.get_subtype(r_str=True))
return meta
#### OBJ LANGUAGES ####
def api_detect_language(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
if not obj.exists():
return {"status": "error", "reason": "Unknown obj"}, 404
lang = obj.detect_language()
return {"language": lang}, 200
def api_manually_translate(obj_type, subtype, obj_id, source, translation_target, translation):
obj = get_object(obj_type, subtype, obj_id)
if not obj.exists():
return {"status": "error", "reason": "Unknown obj"}, 404
if translation:
if len(translation) > 200000: # TODO REVIEW LIMIT
return {"status": "error", "reason": "Max Size reached"}, 400
all_languages = Language.get_translation_languages()
if source not in all_languages:
return {"status": "error", "reason": "Unknown source Language"}, 400
obj_language = obj.get_language()
if obj_language != source:
obj.edit_language(obj_language, source)
if translation:
if translation_target not in all_languages:
return {"status": "error", "reason": "Unknown target Language"}, 400
obj.set_translation(translation_target, translation)
# TODO SANITYZE translation
return None, 200
#### OBJ FILTERS ####
@ -239,10 +325,15 @@ def is_filtered(obj, filters):
def obj_iterator(obj_type, filters):
if obj_type == 'decoded':
return get_all_decodeds_objects(filters=filters)
elif obj_type == 'image':
return Images.get_all_images_objects(filters=filters)
elif obj_type == 'item':
return get_all_items_objects(filters=filters)
elif obj_type == 'pgp':
return Pgps.get_all_pgps_objects(filters=filters)
elif obj_type == 'message':
return chats_viewer.get_messages_iterator(filters=filters)
def card_objs_iterators(filters):
nb = 0
@ -257,6 +348,8 @@ def card_obj_iterator(obj_type, filters):
return get_nb_items_objects(filters=filters)
elif obj_type == 'pgp':
return Pgps.nb_all_pgps_objects(filters=filters)
elif obj_type == 'message':
return chats_viewer.get_nb_messages_iterator(filters=filters)
def get_ui_obj_tag_table_keys(obj_type): # TODO REMOVE ME
"""
@ -387,7 +480,7 @@ def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lv
def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
objs = set()
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden)
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, int(lvl), nb_max, objs_hidden)
return objs
def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):

View File

@ -22,11 +22,8 @@ REGEX_JOIN_HASH = re.compile(r'[0-9a-zA-z-]+')
## ##
def save_item_correlation(username, item_id, item_date):
Username.save_item_correlation('telegram', username, item_id, item_date)
def save_telegram_invite_hash(invite_hash, item_id):
r_obj.sadd('telegram:invite_code', f'{invite_hash};{item_id}')
def save_telegram_invite_hash(invite_hash, obj_global_id):
r_obj.sadd('telegram:invite_code', f'{invite_hash};{obj_global_id}')
def get_data_from_telegram_url(base_url, url_path):
dict_url = {}

View File

@ -61,7 +61,7 @@ class ApiKey(AbstractModule):
if google_api_key:
print(f'found google api key: {to_print}')
self.redis_logger.warning(f'{to_print}Checked {len(google_api_key)} found Google API Key;{item.get_id()}')
self.redis_logger.warning(f'{to_print}Checked {len(google_api_key)} found Google API Key;{self.obj.get_global_id()}')
tag = 'infoleak:automatic-detection="google-api-key"'
self.add_message_to_queue(message=tag, queue='Tags')
@ -69,10 +69,10 @@ class ApiKey(AbstractModule):
# # TODO: # FIXME: AWS regex/validate/sanitize KEY + SECRET KEY
if aws_access_key:
print(f'found AWS key: {to_print}')
self.redis_logger.warning(f'{to_print}Checked {len(aws_access_key)} found AWS Key;{item.get_id()}')
self.redis_logger.warning(f'{to_print}Checked {len(aws_access_key)} found AWS Key;{self.obj.get_global_id()}')
if aws_secret_key:
print(f'found AWS secret key')
self.redis_logger.warning(f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{item.get_id()}')
self.redis_logger.warning(f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{self.obj.get_global_id()}')
tag = 'infoleak:automatic-detection="aws-key"'
self.add_message_to_queue(message=tag, queue='Tags')

View File

@ -89,7 +89,7 @@ class Categ(AbstractModule):
# Search for pattern categories in obj content
for categ, pattern in self.categ_words:
if obj.type == 'message':
if obj.type == 'message' or obj.type == 'ocr':
self.add_message_to_queue(message='0', queue=categ)
else:

View File

@ -103,11 +103,11 @@ class Credential(AbstractModule):
print(message)
to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'
to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{self.obj.get_global_id()}'
# num of creds above threshold, publish an alert
if nb_cred > self.criticalNumberToAlert:
print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
print(f"========> Found more than 10 credentials in this file : {self.obj.get_global_id()}")
self.redis_logger.warning(to_print)
tag = 'infoleak:automatic-detection="credential"'

View File

@ -58,9 +58,9 @@ class CreditCards(AbstractModule):
if lib_refine.is_luhn_valid(clean_card):
return clean_card
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
cards = self.regex_finditer(self.regex, obj_id, content)
cards = self.regex_finditer(self.regex, obj.get_global_id(), content)
for card in cards:
start, end, value = card
if self.get_valid_card(value):
@ -86,7 +86,7 @@ class CreditCards(AbstractModule):
# print(creditcard_set)
to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
if creditcard_set:
mess = f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.id}'
mess = f'{to_print}Checked {len(creditcard_set)} valid number(s);{self.obj.get_global_id()}'
print(mess)
self.redis_logger.warning(mess)
@ -96,7 +96,7 @@ class CreditCards(AbstractModule):
if r_result:
return creditcard_set
else:
self.redis_logger.info(f'{to_print}CreditCard related;{item.id}')
self.redis_logger.info(f'{to_print}CreditCard related;{self.obj.get_global_id()}')
if __name__ == '__main__':

View File

@ -92,7 +92,13 @@ CURRENCIES = {
'regex': r'\b(?<![+/=])X[A-Za-z0-9]{33}(?![+/=])\b',
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dash-address"',
}
},
'tron': {
'name': 'tron', # e.g. TYdds9VLDjUshf9tbsXSfGUZNzJSbbBeat
'regex': r'\b(?<![+/=])T[0-9a-zA-Z]{33}(?![+/=])\b',
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="tron-address"',
},
}
##################################
##################################
@ -149,7 +155,7 @@ class Cryptocurrencies(AbstractModule, ABC):
item.get_date(),
item.get_basename())
self.redis_logger.warning('{}Detected {} {} private key;{}'.format(
to_print, len(private_keys), currency['name'], item_id))
to_print, len(private_keys), currency['name'], self.obj.get_global_id()))
else:
private_keys = []

View File

@ -56,7 +56,7 @@ class CveModule(AbstractModule):
cve = Cves.Cve(cve_id)
cve.add(date, item)
warning = f'{item_id} contains CVEs {cves}'
warning = f'{self.obj.get_global_id()} contains CVEs {cves}'
print(warning)
self.redis_logger.warning(warning)

View File

@ -82,20 +82,20 @@ class DomClassifier(AbstractModule):
localizeddomains = self.dom_classifier.include(expression=self.cc_tld)
if localizeddomains:
print(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{self.obj.get_global_id()}")
if self.cc:
localizeddomains = self.dom_classifier.localizedomain(cc=self.cc)
if localizeddomains:
print(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{self.obj.get_global_id()}")
if r_result:
return self.dom_classifier.vdomain
except IOError as err:
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
raise Exception(f"CRC Checksum Failed on: {item.get_id()}")
raise Exception(f"CRC Checksum Failed on: {self.obj.get_global_id()}")
if __name__ == "__main__":

View File

@ -92,10 +92,10 @@ class Duplicates(AbstractModule):
Duplicate.save_object_hash(algo, curr_date_ymonth, self.algos[algo]['hash'], item.get_id())
if nb_duplicates:
self.redis_logger.info(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {nb_duplicates};{item.get_id()}')
self.redis_logger.info(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {nb_duplicates};{self.obj.get_global_id()}')
y = time.time()
print(f'{item.get_id()} Processed in {y-x} sec')
print(f'{self.obj.get_global_id()} Processed in {y-x} sec')
# self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))

View File

@ -42,13 +42,17 @@ class Exif(AbstractModule):
img_exif = img.getexif()
print(img_exif)
if img_exif:
self.logger.critical(f'Exif: {self.get_obj().id}')
gps = img_exif.get(34853)
print(gps)
self.logger.critical(f'gps: {gps}')
for key, val in img_exif.items():
if key in ExifTags.TAGS:
print(f'{ExifTags.TAGS[key]}:{val}')
self.logger.critical(f'{ExifTags.TAGS[key]}:{val}')
else:
print(f'{key}:{val}')
self.logger.critical(f'{key}:{val}')
sys.exit(0)
# tag = 'infoleak:automatic-detection="cve"'

View File

@ -81,10 +81,9 @@ class Global(AbstractModule):
def compute(self, message, r_result=False): # TODO move OBJ ID sanitization to importer
# Recovering the streamed message infos
gzip64encoded = message
if self.obj.type == 'item':
if gzip64encoded:
if message:
# Creating the full filepath
filename = os.path.join(self.ITEMS_FOLDER, self.obj.id)
@ -97,7 +96,7 @@ class Global(AbstractModule):
else:
# Decode compressed base64
decoded = base64.standard_b64decode(gzip64encoded)
decoded = base64.standard_b64decode(message)
new_file_content = self.gunzip_bytes_obj(filename, decoded)
# TODO REWRITE ME
@ -105,6 +104,11 @@ class Global(AbstractModule):
filename = self.check_filename(filename, new_file_content)
if filename:
new_obj_id = filename.replace(self.ITEMS_FOLDER, '', 1)
new_obj = Item(new_obj_id)
new_obj.sanitize_id()
self.set_obj(new_obj)
# create subdir
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
@ -124,11 +128,11 @@ class Global(AbstractModule):
else:
self.logger.info(f"Empty Item: {message} not processed")
elif self.obj.type == 'message':
elif self.obj.type == 'message' or self.obj.type == 'ocr':
# TODO send to specific object queue => image, ...
self.add_message_to_queue(obj=self.obj, queue='Item')
elif self.obj.type == 'image':
self.add_message_to_queue(obj=self.obj, queue='Image')
self.add_message_to_queue(obj=self.obj, queue='Image', message=message)
else:
self.logger.critical(f"Empty obj: {self.obj} {message} not processed")

View File

@ -82,8 +82,8 @@ class IPAddress(AbstractModule):
matching_ips.append(address)
if len(matching_ips) > 0:
self.logger.info(f'{item.get_id()} contains {len(matching_ips)} IPs')
self.redis_logger.warning(f'{item.get_id()} contains {item.get_id()} IPs')
self.logger.info(f'{self.obj.get_global_id()} contains {len(matching_ips)} IPs')
self.redis_logger.warning(f'{self.obj.get_global_id()} contains IPs')
# Tag message with IP
tag = 'infoleak:automatic-detection="ip"'

View File

@ -62,9 +62,9 @@ class Iban(AbstractModule):
return True
return False
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
ibans = self.regex_finditer(self.iban_regex, obj_id, content)
ibans = self.regex_finditer(self.iban_regex, obj.get_global_id(), content)
for iban in ibans:
start, end, value = iban
value = ''.join(e for e in value if e.isalnum())
@ -95,7 +95,7 @@ class Iban(AbstractModule):
# Statistics.add_module_tld_stats_by_date('iban', date, iban[0:2], 1)
to_print = f'Iban;{item.get_source()};{item.get_date()};{item.get_basename()};'
self.redis_logger.warning(f'{to_print}Checked found {len(valid_ibans)} IBAN;{item_id}')
self.redis_logger.warning(f'{to_print}Checked found {len(valid_ibans)} IBAN;{self.obj.get_global_id()}')
# Tags
tag = 'infoleak:automatic-detection="iban"'
self.add_message_to_queue(message=tag, queue='Tags')

View File

@ -63,7 +63,7 @@ class Keys(AbstractModule):
get_pgp_content = False
if KeyEnum.PGP_MESSAGE.value in content:
self.redis_logger.warning(f'{item.get_basename()} has a PGP enc message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has a PGP enc message')
tag = 'infoleak:automatic-detection="pgp-message"'
self.add_message_to_queue(message=tag, queue='Tags')
@ -81,21 +81,21 @@ class Keys(AbstractModule):
get_pgp_content = True
if KeyEnum.PGP_PRIVATE_KEY_BLOCK.value in content:
self.redis_logger.warning(f'{item.get_basename()} has a pgp private key block message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has a pgp private key block message')
tag = 'infoleak:automatic-detection="pgp-private-key"'
self.add_message_to_queue(message=tag, queue='Tags')
get_pgp_content = True
if KeyEnum.CERTIFICATE.value in content:
self.redis_logger.warning(f'{item.get_basename()} has a certificate message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has a certificate message')
tag = 'infoleak:automatic-detection="certificate"'
self.add_message_to_queue(message=tag, queue='Tags')
# find = True
if KeyEnum.RSA_PRIVATE_KEY.value in content:
self.redis_logger.warning(f'{item.get_basename()} has a RSA private key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has a RSA private key message')
print('rsa private key message found')
tag = 'infoleak:automatic-detection="rsa-private-key"'
@ -103,7 +103,7 @@ class Keys(AbstractModule):
# find = True
if KeyEnum.PRIVATE_KEY.value in content:
self.redis_logger.warning(f'{item.get_basename()} has a private key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has a private key message')
print('private key message found')
tag = 'infoleak:automatic-detection="private-key"'
@ -111,7 +111,7 @@ class Keys(AbstractModule):
# find = True
if KeyEnum.ENCRYPTED_PRIVATE_KEY.value in content:
self.redis_logger.warning(f'{item.get_basename()} has an encrypted private key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has an encrypted private key message')
print('encrypted private key message found')
tag = 'infoleak:automatic-detection="encrypted-private-key"'
@ -119,7 +119,7 @@ class Keys(AbstractModule):
# find = True
if KeyEnum.OPENSSH_PRIVATE_KEY.value in content:
self.redis_logger.warning(f'{item.get_basename()} has an openssh private key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has an openssh private key message')
print('openssh private key message found')
tag = 'infoleak:automatic-detection="private-ssh-key"'
@ -127,7 +127,7 @@ class Keys(AbstractModule):
# find = True
if KeyEnum.SSH2_ENCRYPTED_PRIVATE_KEY.value in content:
self.redis_logger.warning(f'{item.get_basename()} has an ssh2 private key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has an ssh2 private key message')
print('SSH2 private key message found')
tag = 'infoleak:automatic-detection="private-ssh-key"'
@ -135,7 +135,7 @@ class Keys(AbstractModule):
# find = True
if KeyEnum.OPENVPN_STATIC_KEY_V1.value in content:
self.redis_logger.warning(f'{item.get_basename()} has an openssh private key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has an openssh private key message')
print('OpenVPN Static key message found')
tag = 'infoleak:automatic-detection="vpn-static-key"'
@ -143,21 +143,21 @@ class Keys(AbstractModule):
# find = True
if KeyEnum.DSA_PRIVATE_KEY.value in content:
self.redis_logger.warning(f'{item.get_basename()} has a dsa private key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has a dsa private key message')
tag = 'infoleak:automatic-detection="dsa-private-key"'
self.add_message_to_queue(message=tag, queue='Tags')
# find = True
if KeyEnum.EC_PRIVATE_KEY.value in content:
self.redis_logger.warning(f'{item.get_basename()} has an ec private key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has an ec private key message')
tag = 'infoleak:automatic-detection="ec-private-key"'
self.add_message_to_queue(message=tag, queue='Tags')
# find = True
if KeyEnum.PUBLIC_KEY.value in content:
self.redis_logger.warning(f'{item.get_basename()} has a public key message')
self.redis_logger.warning(f'{self.obj.get_global_id()} has a public key message')
tag = 'infoleak:automatic-detection="public-key"'
self.add_message_to_queue(message=tag, queue='Tags')

View File

@ -30,9 +30,13 @@ class Languages(AbstractModule):
if obj.type == 'item':
if obj.is_crawled():
domain = Domain(obj.get_domain())
for lang in obj.get_languages(min_probability=0.8):
for lang in obj.get_languages(min_probability=0.8, force_gcld3=True):
print(lang)
domain.add_language(lang)
# Detect Chat Message Language
# elif obj.type == 'message':
# lang = obj.detect_language()
# print(self.obj.id, lang)
if __name__ == '__main__':

View File

@ -70,7 +70,7 @@ class LibInjection(AbstractModule):
print(f"Detected (libinjection) SQL in URL: {item_id}")
print(unquote(url))
to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{self.obj.get_global_id()}'
self.redis_logger.warning(to_print)
# Add tag

View File

@ -57,7 +57,7 @@ class MISP_Thehive_Auto_Push(AbstractModule):
Tag.set_auto_push_status('misp', 'ConnectionError')
else:
Tag.set_auto_push_status('misp', '')
self.logger.info('MISP Pushed:', tag, '->', item_id)
self.logger.info(f'MISP Pushed: {tag} -> {item_id}')
if 'thehive' in self.tags:
if tag in self.tags['thehive']:
@ -68,7 +68,7 @@ class MISP_Thehive_Auto_Push(AbstractModule):
Tag.set_auto_push_status('thehive', 'Request Entity Too Large')
else:
Tag.set_auto_push_status('thehive', '')
self.logger.info('thehive Pushed:', tag, '->', item_id)
self.logger.info(f'thehive Pushed: {tag} -> {item_id}')
if __name__ == "__main__":

View File

@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages #
##################################
from modules.abstract_module import AbstractModule
from lib.objects.Items import Item
from lib.ConfigLoader import ConfigLoader
# from lib import Statistics
@ -118,10 +117,10 @@ class Mail(AbstractModule):
print(e)
return valid_mxdomain
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
mxdomains = {}
mails = self.regex_finditer(self.email_regex, obj_id, content)
mails = self.regex_finditer(self.email_regex, obj.get_global_id(), content)
for mail in mails:
start, end, value = mail
mxdomain = value.rsplit('@', 1)[1].lower()
@ -172,7 +171,7 @@ class Mail(AbstractModule):
# for tld in mx_tlds:
# Statistics.add_module_tld_stats_by_date('mail', item_date, tld, mx_tlds[tld])
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{item.id}'
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{self.obj.get_global_id()}'
if num_valid_email > self.mail_threshold:
print(f'{item.id} Checked {num_valid_email} e-mail(s)')
self.redis_logger.warning(msg)

View File

@ -218,7 +218,7 @@ class Mixer(AbstractModule):
if self.obj.type == 'item':
self.add_message_to_queue(obj=self.obj, message=gzip64encoded)
else:
self.add_message_to_queue(obj=self.obj)
self.add_message_to_queue(obj=self.obj, message=gzip64encoded)
if __name__ == "__main__":

132
bin/modules/OcrExtractor.py Executable file
View File

@ -0,0 +1,132 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The OcrExtractor Module
======================
"""
##################################
# Import External packages
##################################
import cv2
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib import chats_viewer
from lib.objects import Messages
from lib.objects import Ocrs
# Default to eng
def get_model_languages(obj, add_en=True):
if add_en:
model_languages = {'en'}
else:
model_languages = set()
ob = obj.get_first_correlation('message')
if ob:
message = Messages.Message(ob.split(':', 2)[-1])
lang = message.get_language()
if lang:
model_languages.add(lang)
return model_languages
ob = obj.get_first_correlation('chat-subchannel')
if ob:
ob = chats_viewer.get_obj_chat_from_global_id(ob)
lang = ob.get_main_language()
if lang:
model_languages.add(lang)
return model_languages
ob = obj.get_first_correlation('chat')
if ob:
ob = chats_viewer.get_obj_chat_from_global_id(ob)
lang = ob.get_main_language()
if lang:
model_languages.add(lang)
return model_languages
return model_languages
# TODO thread
class OcrExtractor(AbstractModule):
"""
OcrExtractor for AIL framework
"""
def __init__(self):
super(OcrExtractor, self).__init__()
# Waiting time in seconds between to message processed
self.pending_seconds = 1
config_loader = ConfigLoader()
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
self.ocr_languages = Ocrs.get_ocr_languages()
# Send module state to logs
self.logger.info(f'Module {self.module_name} initialized')
def is_cached(self):
return self.r_cache.exists(f'ocr:no:{self.obj.id}')
def add_to_cache(self):
self.r_cache.setex(f'ocr:no:{self.obj.id}', 86400, 0)
def compute(self, message):
image = self.get_obj()
date = message
ocr = Ocrs.Ocr(image.id)
if self.is_cached():
return None
if self.obj.is_gif():
self.logger.warning(f'Ignoring GIF: {self.obj.id}')
return None
if not ocr.exists():
path = image.get_filepath()
languages = get_model_languages(image)
languages = Ocrs.sanityze_ocr_languages(languages, ocr_languages=self.ocr_languages)
print(image.id, languages)
try:
texts = Ocrs.extract_text(path, languages)
except (OSError, ValueError, cv2.error) as e:
self.logger.warning(e)
self.obj.add_tag('infoleak:confirmed="false-positive"')
texts = None
if texts:
print('create')
ocr = Ocrs.create(image.id, texts)
if ocr:
self.add_message_to_queue(ocr)
else:
print('no text')
self.add_to_cache()
# Save in cache
else:
print('no text detected')
self.add_to_cache()
else:
# print(image.id)
# print('update correlation', date)
ocr.update_correlation(date=date)
if __name__ == '__main__':
module = OcrExtractor()
module.run()

View File

@ -55,9 +55,9 @@ class Onion(AbstractModule):
# TEMP var: SAVE I2P Domain (future I2P crawler)
# self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
onions = self.regex_finditer(self.onion_regex, obj_id, content)
onions = self.regex_finditer(self.onion_regex, obj.get_global_id(), content)
for onion in onions:
start, end, value = onion
url_unpack = crawlers.unpack_url(value)
@ -98,8 +98,8 @@ class Onion(AbstractModule):
print(f'{domain} added to crawler queue: {task_uuid}')
else:
to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};'
print(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
self.redis_logger.warning(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
print(f'{to_print}Detected {len(domains)} .onion(s);{self.obj.get_global_id()}')
self.redis_logger.warning(f'{to_print}Detected {len(domains)} .onion(s);{self.obj.get_global_id()}')
# TAG Item
tag = 'infoleak:automatic-detection="onion"'

View File

@ -56,7 +56,7 @@ class Pasties(AbstractModule):
with open(domains_pasties) as f:
for line in f:
url = line.strip()
if url: # TODO validate line
if url: # TODO validate line
self.faup.decode(url)
url_decoded = self.faup.get()
host = url_decoded['host']
@ -135,7 +135,7 @@ class Pasties(AbstractModule):
if path.startswith(url_path):
if url_path != path and url_path != path_end:
print('send to crawler', url_path, url)
self.send_to_crawler(url, self.obj.id))
self.send_to_crawler(url, self.obj.id)
break

View File

@ -41,9 +41,9 @@ class Phone(AbstractModule):
# Waiting time in seconds between to message processed
self.pending_seconds = 1
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
phones = self.regex_phone_iter('ZZ', obj_id, content)
phones = self.regex_phone_iter('ZZ', obj.get_global_id(), content)
for phone in phones:
extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
return extracted
@ -62,7 +62,7 @@ class Phone(AbstractModule):
tag = 'infoleak:automatic-detection="phone-number"'
self.add_message_to_queue(message=tag, queue='Tags')
self.redis_logger.warning(f'{item.get_id()} contains {len(phone)} Phone numbers')
self.redis_logger.warning(f'{self.obj.get_global_id()} contains {len(phone)} Phone numbers')
# # List of the regex results in the Item, may be null
# results = self.REG_PHONE.findall(content)

View File

@ -51,13 +51,13 @@ class SQLInjectionDetection(AbstractModule):
self.faup.decode(url)
url_parsed = self.faup.get()
print(f"Detected SQL in URL: {item_id}")
print(f"Detected SQL in URL: {item.id}")
print(urllib.request.unquote(url))
to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{self.obj.get_global_id()}'
self.redis_logger.warning(to_print)
# Tag
tag = f'infoleak:automatic-detection="sql-injection";{item_id}'
tag = f'infoleak:automatic-detection="sql-injection"'
self.add_message_to_queue(message=tag, queue='Tags')
# statistics

View File

@ -41,7 +41,7 @@ class Tags(AbstractModule):
# Create a new tag
item.add_tag(tag)
print(f'{item.get_id()}: Tagged {tag}')
print(f'{self.obj.get_global_id()}: Tagged {tag}')
# Forward message to channel
self.add_message_to_queue(message=tag, queue='Tag_feed')

View File

@ -62,7 +62,7 @@ class Telegram(AbstractModule):
print(f'username: {user_id}')
invite_hash = dict_url.get('invite_hash')
if invite_hash:
telegram.save_telegram_invite_hash(invite_hash, item.id)
telegram.save_telegram_invite_hash(invite_hash, self.obj.get_global_id())
print(f'invite code: {invite_hash}')
invite_code_found = True

View File

@ -78,7 +78,7 @@ class Urls(AbstractModule):
except AttributeError:
url = url_decoded['url']
print(url, item.get_id())
print(url, self.obj.get_global_id())
self.add_message_to_queue(message=str(url), queue='Url')
self.logger.debug(f"url_parsed: {url}")

View File

@ -76,6 +76,14 @@ class AbstractModule(ABC):
def get_obj(self):
return self.obj
def set_obj(self, new_obj):
if self.obj:
old_id = self.obj.id
self.obj = new_obj
self.queue.rename_message_obj(self.obj.id, old_id)
else:
self.obj = new_obj
def get_message(self):
"""
Get message from the Redis Queue (QueueIn)
@ -171,7 +179,10 @@ class AbstractModule(ABC):
trace = traceback.format_tb(err.__traceback__)
trace = ''.join(trace)
self.logger.critical(f"Error in module {self.module_name}: {__name__} : {err}")
self.logger.critical(f"Module {self.module_name} input message: {message}")
if message:
self.logger.critical(f"Module {self.module_name} input message: {message}")
if self.obj:
self.logger.critical(f"{self.module_name} Obj: {self.obj.get_global_id()}")
self.logger.critical(trace)
if isinstance(err, ModuleQueueError):

View File

@ -1,6 +1,8 @@
#!/usr/bin/python3
import datetime
import time
from calendar import monthrange
from dateutil.rrule import rrule, MONTHLY
from dateutil.relativedelta import relativedelta
@ -90,6 +92,18 @@ def get_current_week_day():
start = dt - datetime.timedelta(days=dt.weekday())
return start.strftime("%Y%m%d")
def get_current_utc_full_time():
timestamp = datetime.datetime.fromtimestamp(time.time())
return timestamp.strftime('%Y-%m-%d %H:%M:%S')
def get_month_dates(date=None):
if date:
date = convert_date_str_to_datetime(date)
else:
date = datetime.date.today()
num_days = monthrange(date.year, date.month)[1]
return [datetime.date(date.year, date.month, day).strftime("%Y%m%d") for day in range(1, num_days+1)]
def get_date_week_by_date(date):
dt = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8]))
start = dt - datetime.timedelta(days=dt.weekday())
@ -249,3 +263,9 @@ def sanitise_daterange(date_from, date_to, separator='', date_type='str'):
date_from = date_to
date_to = res
return date_from, date_to
def get_previous_month_date():
now = datetime.date.today()
first = now.replace(day=1)
last_month = first - datetime.timedelta(days=1)
return last_month.strftime("%Y%m%d")

View File

@ -88,6 +88,9 @@ class Retro_Hunt_Module(AbstractModule):
for obj in ail_objects.obj_iterator(obj_type, filters):
self.obj = obj
content = obj.get_content(r_type='bytes')
if not content:
continue
rule.match(data=content, callback=self.yara_rules_match,
which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout)
@ -128,7 +131,7 @@ class Retro_Hunt_Module(AbstractModule):
self.redis_logger.info(f'{self.module_name}, Retro hunt {task_uuid} match found: {obj_id}')
print(f'Retro hunt {task_uuid} match found: {self.obj.get_type()} {obj_id}')
self.retro_hunt.add(self.obj.get_type(), self.obj.get_subtype(), obj_id)
self.retro_hunt.add(self.obj.get_type(), self.obj.get_subtype(r_str=True), obj_id)
# TODO FILTER Tags

View File

@ -116,8 +116,8 @@ class Tracker_Regex(AbstractModule):
if ail_objects.is_filtered(obj, filters):
continue
print(f'new tracked regex found: {tracker_name} in {obj_id}')
self.redis_logger.warning(f'new tracked regex found: {tracker_name} in {obj_id}')
print(f'new tracked regex found: {tracker_name} in {self.obj.get_global_id()}')
self.redis_logger.warning(f'new tracked regex found: {tracker_name} in {self.obj.get_global_id()}')
tracker.add(obj.get_type(), obj.get_subtype(r_str=True), obj_id)

View File

@ -93,7 +93,7 @@ class Tracker_Term(AbstractModule):
try:
dict_words_freq = Tracker.get_text_word_frequency(content)
except TimeoutException:
self.redis_logger.warning(f"{obj.get_id()} processing timeout")
self.redis_logger.warning(f"{self.obj.get_global_id()} processing timeout")
else:
signal.alarm(0)
@ -124,8 +124,8 @@ class Tracker_Term(AbstractModule):
if ail_objects.is_filtered(obj, filters):
continue
print(f'new tracked term {tracker_uuid} found: {tracker_name} in {obj_id}')
self.redis_logger.warning(f'new tracked term found: {tracker_name} in {obj_id}')
print(f'new tracked term {tracker_uuid} found: {tracker_name} in {self.obj.get_global_id()}')
self.redis_logger.warning(f'new tracked term found: {tracker_name} in {self.obj.get_global_id()}')
tracker.add(obj.get_type(), obj.get_subtype(), obj_id)

View File

@ -75,8 +75,8 @@ class Tracker_Typo_Squatting(AbstractModule):
if ail_objects.is_filtered(obj, filters):
continue
print(f'new tracked typosquatting found: {tracked} in {obj_id}')
self.redis_logger.warning(f'tracker typosquatting: {tracked} in {obj_id}')
print(f'new tracked typosquatting found: {tracked} in {self.obj.get_global_id()}')
self.redis_logger.warning(f'tracker typosquatting: {tracked} in {self.obj.get_global_id()}')
tracker.add(obj.get_type(), obj.get_subtype(r_str=True), obj_id)

View File

@ -62,13 +62,15 @@ class Tracker_Yara(AbstractModule):
return None
content = self.obj.get_content(r_type='bytes')
if not content:
return None
try:
yara_match = self.rules[obj_type].match(data=content, callback=self.yara_rules_match,
which_callbacks=yara.CALLBACK_MATCHES, timeout=60)
if yara_match:
self.redis_logger.warning(f'tracker yara: new match {self.obj.get_id()}: {yara_match}')
print(f'{self.obj.get_id()}: {yara_match}')
self.redis_logger.warning(f'tracker yara: new match {self.obj.get_global_id()}: {yara_match}')
print(f'{self.obj.get_global_id()}: {yara_match}')
except yara.TimeoutError:
print(f'{self.obj.get_id()}: yara scanning timed out')
self.redis_logger.info(f'{self.obj.get_id()}: yara scanning timed out')

View File

@ -903,6 +903,7 @@ namespace.cor ail_correls
namespace.crawl ail_crawlers
namespace.db ail_datas
namespace.dup ail_dups
namespace.lg ail_langs
namespace.obj ail_objs
namespace.rel ail_rels
namespace.stat ail_stats

View File

@ -7,6 +7,7 @@ crawled = crawled
har = CRAWLED_SCREENSHOT
screenshot = CRAWLED_SCREENSHOT/screenshot
images = IMAGES
favicons = FAVICONS
wordtrending_csv = var/www/static/csv/wordstrendingdata
wordsfile = files/wordfile
@ -191,6 +192,11 @@ host = localhost
port = 6383
password = ail_crawlers
[Kvrocks_Languages]
host = localhost
port = 6383
password = ail_langs
[Kvrocks_Objects]
host = localhost
port = 6383

View File

@ -162,6 +162,9 @@ publish = Tags
subscribe = Image
publish = Tags
[OcrExtractor]
subscribe = Image
publish = Item
######## CORE ########

View File

@ -52,6 +52,8 @@ Available Importers:
```
git clone https://github.com/cvandeplas/pystemon.git
```
Clone it into the same directory as AIL if you wish to launch it via the AIL launcher.
2. Edit configuration file for pystemon ```pystemon/pystemon.yaml```:
- Configure the storage section according to your needs:
@ -80,7 +82,7 @@ Available Importers:
```shell
cd ail-framework/
. ./AILENV/bin/activate
cd pystemon/
cd ../pystemon/
pip install -U -r requirements.txt
```
4. Edit the configuration file ```ail-framework/configs/core.cfg```:
@ -189,9 +191,119 @@ from GHArchive, collect and feed AIL
- [ail-feeder-leak](https://github.com/ail-project/ail-feeder-leak): Automates the process of feeding files to AIL, using data chunking to handle large files.
- [ail-feeder-atom-rss](https://github.com/ail-project/ail-feeder-atom-rss) Atom and RSS feeder for AIL.
- [ail-feeder-jsonlogs](https://github.com/ail-project/ail-feeder-jsonlogs) Aggregate JSON log lines and pushes them to AIL.
### AIL Chats Feeders List:
- [ail-feeder-discord](https://github.com/ail-project/ail-feeder-discord) Discord Feeder.
- [ail-feeder-telegram](https://github.com/ail-project/ail-feeder-telegram) Telegram Channels and User Feeder.
### Chats Message
Overview of the JSON fields used by the Chat feeder.
```
{
"data": "New NFT Scam available,"
"meta": {
"chat": {
"date": {
"datestamp": "2023-01-10 08:19:16",
"timestamp": 1673870217.0,
"timezone": "UTC"
},
"icon": "AAAAAAAA",
"id": 123456,
"info": "",
"name": "NFT legit",
"subchannel": {
"date": {
"datestamp": "2023-08-10 08:19:18",
"timestamp": 1691655558.0,
"timezone": "UTC"
},
"id": 285,
"name": "Market"
},
},
"date": {
"datestamp": "2024-02-01 13:43:46",
"timestamp": 1707139999.0,
"timezone": "UTC"
},
"id": 16,
"reply_to": {
"message_id": 12
},
"sender": {
"first_name": "nftmaster",
"icon": "AAAAAAAA",
"id": 5684,
"info": "best legit NFT vendor",
"username": "nft_best"
},
"type": "message"
},
"source": "ail_feeder_telegram",
"source-uuid": "9cde0855-248b-4439-b964-0495b9b2b8bb"
}
```
#### 1. "data"
- Content of the message.
#### 2. "meta"
- Provides metadata about the message.
##### "type":
- Indicates the type of message. It can be either "message" or "image".
##### "id":
- The unique identifier of the message.
##### "date":
- Represents the timestamp of the message.
- "datestamp": The date in the format "YYYY-MM-DD HH:MM:SS".
- "timestamp": The timestamp representing the date and time.
- "timezone": The timezone in which the date and time are specified (e.g., "UTC").
##### "reply_to":
- The unique identifier of a message to which this message is a reply (optional).
- "message_id": The unique identifier of the replied message.
##### "sender":
- Contains information about the sender of the message.
- "id": The unique identifier for the sender.
- "info": Additional information about the sender (optional).
- "username": The sender's username (optional).
- "firstname": The sender's firstname (optional).
- "lastname": The sender's lastname (optional).
- "phone": The sender's phone (optional).
##### "chat":
- Contains information about the chat where the message was sent.
- "date": The chat creation date.
- "datestamp": The date in the format "YYYY-MM-DD HH:MM:SS".
- "timestamp": The timestamp representing the date and time.
- "timezone": The timezone in which the date and time are specified (e.g., "UTC").
- "icon": The icon associated with the chat (optional).
- "id": The unique identifier of the chat.
- "info": Chat description/info (optional).
- "name": The name of the chat.
- "username": The username of the chat (optional).
- "subchannel": If this message is posted in a subchannel within the chat (optional).
- "date": The subchannel creation date.
- "datestamp": The date in the format "YYYY-MM-DD HH:MM:SS".
- "timestamp": The timestamp representing the date and time.
- "timezone": The timezone in which the date and time are specified (e.g., "UTC").
- "id": The unique identifier of the subchannel.
- "name": The name of the subchannel (optional).
#### 3. "source"
- Indicates the feeder name.
#### 4. "source-uuid"
- The UUID associated with the source.
#### Example: Feeding AIL with Conti leaks
```python

Binary file not shown.

After

Width:  |  Height:  |  Size: 208 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 143 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 144 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

View File

@ -72,6 +72,7 @@ popd
# pgpdump
test ! -d pgpdump && git clone https://github.com/kazu-yamamoto/pgpdump.git
pushd pgpdump/
autoreconf -fiW all
./configure
make
sudo make install

View File

@ -3,6 +3,7 @@ import requests
import subprocess
import re
import os
import shutil
from time import sleep
from typing import List, Optional
from pathlib import Path
@ -70,6 +71,14 @@ class Repo:
except Exception as e:
print(f"Failed to run {cmd} for {self.id}: {e}")
def cleanup(self, num_to_keep: int) -> None:
files = os.listdir(self.outputdir)
repo_images = [f for f in files if f.startswith(self.name)]
if len(repo_images) > num_to_keep:
repo_images.sort(key=lambda x: os.path.getmtime(os.path.join(self.outputdir, x)))
for image in repo_images[:-num_to_keep]:
shutil.rmtree(os.path.join(self.outputdir, image))
class GitHub(Repo):
"""Class for tracking GitHub repositories."""
@ -126,8 +135,10 @@ def main():
while True:
for repo in repos:
repo.build()
repo.cleanup(num_to_keep=3)
for package in aptpkg:
package.build()
repo.cleanup(num_to_keep=3)
sleep(config["check_interval"])
if __name__ == "__main__":

View File

@ -37,13 +37,17 @@ textblob>=0.15.3
html2text>=2020.1.16
beautifulsoup4>4.8.2
#Crawler
# Crawler
scrapy>2.0.0
scrapy-splash>=0.7.2
# Languages
gcld3
libretranslatepy
lexilang
# Demoji
git+https://github.com/ail-project/demoji
#Graph
numpy>1.18.1
@ -71,13 +75,16 @@ pylibinjection>=0.2.4
phonenumbers>8.12.1
# Web
flask==2.3.3
flask>=2.3.3
flask-login
bcrypt>3.1.6
# Ail typo squatting
ail_typo_squatting
# OCR
easyocr
# Tests
nose2>=0.12.0
coverage>=5.5

View File

@ -1,176 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import time
import unittest
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import Tag
from packages import Import_helper
sys.path.append(os.environ['AIL_FLASK'])
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
from Flask_server import app
# def parse_response(obj, ail_response):
# res_json = ail_response.get_json()
# if 'status' in res_json:
# if res_json['status'] == 'error':
# return obj.fail('{}: {}: {}'.format(ail_response.status_code, res_json['status'], res_json['reason']))
# return res_json
#
#
# def get_api_key():
# api_file = os.path.join(os.environ['AIL_HOME'], 'DEFAULT_PASSWORD')
# if os.path.isfile(api_file):
# with open(os.path.join(os.environ['AIL_HOME'], 'DEFAULT_PASSWORD'), 'r') as f:
# content = f.read()
# content = content.splitlines()
# apikey = content[-1]
# apikey = apikey.replace('API_Key=', '', 1)
# # manual tests
# else:
# apikey = sys.argv[1]
# return apikey
#
#
# APIKEY = get_api_key()
#
#
# class TestApiV1(unittest.TestCase):
# import_uuid = None
# item_id = None
#
# def setUp(self):
# self.app = app
# self.app.config['TESTING'] = True
# self.client = self.app.test_client()
# self.apikey = APIKEY
# self.item_content = "text to import"
# self.item_tags = ["infoleak:analyst-detection=\"private-key\""]
# self.expected_tags = ["infoleak:analyst-detection=\"private-key\"", 'infoleak:submission="manual"']
#
# # POST /api/v1/import/item
# def test_0001_api_import_item(self):
# input_json = {"type": "text", "tags": self.item_tags, "text": self.item_content}
# req = self.client.post('/api/v1/import/item', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# import_uuid = req_json['uuid']
# self.__class__.import_uuid = import_uuid
# self.assertTrue(Import_helper.is_valid_uuid_v4(import_uuid))
#
# # POST /api/v1/get/import/item
# def test_0002_api_get_import_item(self):
# input_json = {"uuid": self.__class__.import_uuid}
# item_not_imported = True
# import_timout = 60
# start = time.time()
#
# while item_not_imported:
# req = self.client.post('/api/v1/get/import/item', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# if req_json['status'] == 'imported':
# try:
# item_id = req_json['items'][0]
# item_not_imported = False
# except Exception as e:
# if time.time() - start > import_timout:
# item_not_imported = False
# self.fail("Import error: {}".format(req_json))
# else:
# if time.time() - start > import_timout:
# item_not_imported = False
# self.fail("Import Timeout, import status: {}".format(req_json['status']))
# self.__class__.item_id = item_id
#
# # Process item
# time.sleep(5)
#
# # POST /api/v1/get/item/content
# def test_0003_api_get_item_content(self):
# input_json = {"id": self.__class__.item_id}
# req = self.client.post('/api/v1/get/item/content', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# item_content = req_json['content']
# self.assertEqual(item_content, self.item_content)
#
# # POST /api/v1/get/item/tag
# def test_0004_api_get_item_tag(self):
# input_json = {"id": self.__class__.item_id}
# req = self.client.post('/api/v1/get/item/tag', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# item_tags = req_json['tags']
# self.assertCountEqual(item_tags, self.expected_tags)
#
# # POST /api/v1/get/item/tag
# def test_0005_api_get_item_default(self):
# input_json = {"id": self.__class__.item_id}
# req = self.client.post('/api/v1/get/item/default', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# item_tags = req_json['tags']
# self.assertCountEqual(item_tags, self.expected_tags)
# item_content = req_json['content']
# self.assertEqual(item_content, self.item_content)
#
# # POST /api/v1/get/item/tag
# # # TODO: add more test
# def test_0006_api_get_item(self):
# input_json = {"id": self.__class__.item_id, "content": True}
# req = self.client.post('/api/v1/get/item', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# item_tags = req_json['tags']
# self.assertCountEqual(item_tags, self.expected_tags)
# item_content = req_json['content']
# self.assertEqual(item_content, self.item_content)
#
# # POST api/v1/add/item/tag
# def test_0007_api_add_item_tag(self):
# tags_to_add = ["infoleak:analyst-detection=\"api-key\""]
# current_item_tag = Tag.get_obj_tag(self.__class__.item_id)
# current_item_tag.append(tags_to_add[0])
#
# # galaxy_to_add = ["misp-galaxy:stealer=\"Vidar\""]
# input_json = {"id": self.__class__.item_id, "tags": tags_to_add}
# req = self.client.post('/api/v1/add/item/tag', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# item_tags = req_json['tags']
# self.assertEqual(item_tags, tags_to_add)
#
# new_item_tag = Tag.get_obj_tag(self.__class__.item_id)
# self.assertCountEqual(new_item_tag, current_item_tag)
#
# # DELETE api/v1/delete/item/tag
# def test_0008_api_add_item_tag(self):
# tags_to_delete = ["infoleak:analyst-detection=\"api-key\""]
# input_json = {"id": self.__class__.item_id, "tags": tags_to_delete}
# req = self.client.delete('/api/v1/delete/item/tag', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# item_tags = req_json['tags']
# self.assertCountEqual(item_tags, tags_to_delete)
# current_item_tag = Tag.get_obj_tag(self.__class__.item_id)
# if tags_to_delete[0] in current_item_tag:
# self.fail('Tag no deleted')
#
# # POST api/v1/get/tag/metadata
# def test_0009_api_add_item_tag(self):
# input_json = {"tag": self.item_tags[0]}
# req = self.client.post('/api/v1/get/tag/metadata', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# self.assertEqual(req_json['tag'], self.item_tags[0])
#
# # GET api/v1/get/tag/all
# def test_0010_api_add_item_tag(self):
# input_json = {"tag": self.item_tags[0]}
# req = self.client.get('/api/v1/get/tag/all', json=input_json, headers={'Authorization': self.apikey})
# req_json = parse_response(self, req)
# self.assertTrue(req_json['tags'])
#
#
if __name__ == "__main__":
unittest.main(argv=['first-arg-is-ignored'], exit=False)

40
tests/test_api.py Normal file
View File

@ -0,0 +1,40 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import unittest
from pyail import PyAIL
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import Users
sys.path.append(os.environ['AIL_FLASK'])
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
class TestApiV1(unittest.TestCase):
def setUp(self):
# TODO GET HOST + PORT
self.ail = PyAIL('https://localhost:7000', Users.get_user_token('admin@admin.test'), ssl=False)
# GET /api/v1/ping
def test_0001_api_ping(self):
r = self.ail.ping_ail()
self.assertEqual(r.get('status'), 'pong')
# # GET /api/v1/uuid
# def test_0001_api_uuid(self):
# r = self.ail.get_uuid()
#
# # GET /api/v1/version
# def test_0001_api_version(self):
# r = self.ail.get_version()
if __name__ == "__main__":
unittest.main(exit=False)

View File

@ -10,7 +10,10 @@ from base64 import b64encode
from distutils.dir_util import copy_tree
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
# Modules Classes
from modules.ApiKey import ApiKey
from modules.Categ import Categ
@ -22,87 +25,91 @@ from modules.Onion import Onion
from modules.Telegram import Telegram
# project packages
from lib.ConfigLoader import ConfigLoader
import lib.crawlers as crawlers
import lib.objects.Items as Items
#### COPY SAMPLES ####
config_loader = ConfigLoader()
# # TODO:move me in new Item package
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
ITEMS_FOLDER = Items.ITEMS_FOLDER
TESTS_ITEMS_FOLDER = os.path.join(ITEMS_FOLDER, 'tests')
sample_dir = os.path.join(os.environ['AIL_HOME'], 'samples')
copy_tree(sample_dir, TESTS_ITEMS_FOLDER)
#### ---- ####
class Test_Module_ApiKey(unittest.TestCase):
class TestModuleApiKey(unittest.TestCase):
def setUp(self):
self.module_obj = ApiKey()
self.module_obj.debug = True
self.module = ApiKey()
self.module.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/api_keys.gz'
self.module.obj = Items.Item(item_id)
google_api_key = 'AIza00000000000000000000000_example-KEY'
aws_access_key = 'AKIAIOSFODNN7EXAMPLE'
aws_secret_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
matches = self.module_obj.compute(f'{item_id} 3', r_result=True)
matches = self.module.compute('3', r_result=True)
self.assertCountEqual(matches[0], {google_api_key})
self.assertCountEqual(matches[1], {aws_access_key})
self.assertCountEqual(matches[2], {aws_secret_key})
class Test_Module_Categ(unittest.TestCase):
class TestModuleCateg(unittest.TestCase):
def setUp(self):
self.module_obj = Categ()
self.module_obj.debug = True
self.module = Categ()
self.module.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/categ.gz'
self.module.obj = Items.Item(item_id)
test_categ = ['CreditCards', 'Mail', 'Onion', 'Urls', 'Credential', 'Cve']
result = self.module_obj.compute(item_id, r_result=True)
print(result)
result = self.module.compute(None, r_result=True)
self.assertCountEqual(result, test_categ)
class Test_Module_CreditCards(unittest.TestCase):
class TestModuleCreditCards(unittest.TestCase):
def setUp(self):
self.module_obj = CreditCards()
self.module_obj.debug = True
self.module = CreditCards()
self.module.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/credit_cards.gz 7'
item_id = 'tests/2021/01/01/credit_cards.gz'
self.module.obj = Items.Item(item_id)
test_cards = ['341039324930797', # American Express
'6011613905509166', # Discover Card
'3547151714018657', # Japan Credit Bureau (JCB)
'5492981206527330', # 16 digits MasterCard
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators
]
]
result = self.module_obj.compute(item_id, r_result=True)
result = self.module.compute('7', r_result=True)
self.assertCountEqual(result, test_cards)
class Test_Module_DomClassifier(unittest.TestCase):
class TestModuleDomClassifier(unittest.TestCase):
def setUp(self):
self.module_obj = DomClassifier()
self.module_obj.debug = True
self.module = DomClassifier()
self.module.debug = True
def test_module(self):
test_host = 'foo.be'
item_id = 'tests/2021/01/01/domain_classifier.gz'
msg = f'{test_host} {item_id}'
result = self.module_obj.compute(msg, r_result=True)
self.module.obj = Items.Item(item_id)
result = self.module.compute(f'{test_host}', r_result=True)
self.assertTrue(len(result))
class Test_Module_Global(unittest.TestCase):
class TestModuleGlobal(unittest.TestCase):
def setUp(self):
self.module_obj = Global()
self.module_obj.debug = True
self.module = Global()
self.module.debug = True
def test_module(self):
# # TODO: delete item
@ -113,24 +120,20 @@ class Test_Module_Global(unittest.TestCase):
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
item_content_1 = b64encode(gzip.compress(item_content)).decode()
item_content_2 = b64encode(gzip.compress(item_content + b' more text ...')).decode()
message = f'{item_id} {item_content_1}'
self.module.obj = Items.Item(item_id)
# Test new item
result = self.module_obj.compute(message, r_result=True)
print(f'test new item: {result}')
result = self.module.compute(item_content_1, r_result=True)
self.assertEqual(result, item_id)
# Test duplicate
result = self.module_obj.compute(message, r_result=True)
print(f'test duplicate {result}')
result = self.module.compute(item_content_1, r_result=True)
self.assertIsNone(result)
# Test same id with != content
item = Items.Item('tests/2021/01/01/global_831875da824fc86ab5cc0e835755b520.gz')
item.delete()
message = f'{item_id} {item_content_2}'
result = self.module_obj.compute(message, r_result=True)
print(f'test same id with != content: {result}')
result = self.module.compute(item_content_2, r_result=True)
self.assertIn(item_id[:-3], result)
self.assertNotEqual(result, item_id)
@ -139,40 +142,46 @@ class Test_Module_Global(unittest.TestCase):
# item.delete()
# # TODO: remove from queue
class Test_Module_Keys(unittest.TestCase):
class TestModuleKeys(unittest.TestCase):
def setUp(self):
self.module_obj = Keys()
self.module_obj.debug = True
self.module = Keys()
self.module.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/keys.gz'
self.module.obj = Items.Item(item_id)
# # TODO: check results
result = self.module_obj.compute(item_id)
self.module.compute(None)
class Test_Module_Onion(unittest.TestCase):
class TestModuleOnion(unittest.TestCase):
def setUp(self):
self.module_obj = Onion()
self.module_obj.debug = True
self.module = Onion()
self.module.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/onion.gz'
domain_1 = 'eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion'
domain_2 = 'www.facebookcorewwwi.onion'
self.module.obj = Items.Item(item_id)
# domain_1 = 'eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion'
# domain_2 = 'www.facebookcorewwwi.onion'
self.module_obj.compute(f'{item_id} 3')
self.module.compute(f'3')
class Test_Module_Telegram(unittest.TestCase):
class TestModuleTelegram(unittest.TestCase):
def setUp(self):
self.module_obj = Telegram()
self.module_obj.debug = True
self.module = Telegram()
self.module.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/keys.gz'
self.module.obj = Items.Item(item_id)
# # TODO: check results
result = self.module_obj.compute(item_id)
self.module.compute(None)
if __name__ == '__main__':

77
tools/reprocess_objects.py Executable file
View File

@ -0,0 +1,77 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Reprocess AIL Objects by Object Type
================
Send ALL objects by type in queues
"""
import argparse
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ail_core import is_object_type
from lib import ail_queues
from lib.objects import ail_objects
# from modules.ApiKey import ApiKey
# from modules.Categ import Categ
# from modules.CreditCards import CreditCards
# from modules.DomClassifier import DomClassifier
# from modules.Global import Global
# from modules.Keys import Keys
# from modules.Onion import Onion
# from modules.Telegram import Telegram
from modules.Languages import Languages
from modules.OcrExtractor import OcrExtractor
MODULES = {
'Languages': Languages,
'OcrExtractor': OcrExtractor
}
def reprocess_message_objects(object_type, module_name=None):
if module_name:
module = MODULES[module_name]()
for obj in ail_objects.obj_iterator(object_type, filters={}):
if not obj.exists():
print(f'ERROR: object does not exist, {obj.id}')
continue
module.obj = obj
module.compute(None)
else:
queue = ail_queues.AILQueue('FeederModuleImporter', -1)
for obj in ail_objects.obj_iterator(object_type, filters={}):
queue.send_message(obj.get_global_id(), message='reprocess')
queue.end()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Reprocess AIL Objects')
parser.add_argument('-t', '--type', type=str, help='AIL Object Type', required=True)
parser.add_argument('-m', '--module', type=str, help='AIL Module Name')
args = parser.parse_args()
if not args.type:
parser.print_help()
sys.exit(0)
obj_type = args.type
if not is_object_type(obj_type):
raise Exception(f'Invalid Object Type: {obj_type}')
if obj_type not in ['image', 'item', 'message']:
raise Exception(f'Currently not supported Object Type: {obj_type}')
modulename = args.module
if modulename not in MODULES:
raise Exception(f'Currently not supported Module: {modulename}')
reprocess_message_objects(obj_type, module_name=modulename)

26
update/v5.4/Update.py Executable file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.ail_updater import AIL_Updater
from lib import ail_updates
from lib import chats_viewer
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
chats_viewer.fix_correlations_subchannel_message()
updater = Updater('v5.4')
updater.run_update()

43
update/v5.4/Update.sh Executable file
View File

@ -0,0 +1,43 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Updating python packages ..."$DEFAULT
echo ""
pip install -U pyail
pip install -U pylacus
pip install -U git+https://github.com/ail-project/demoji
pip install -U lexilang
bash ${AIL_BIN}/LAUNCH.sh -lrv
bash ${AIL_BIN}/LAUNCH.sh -lkv
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v5.4/Update.py
wait
echo ""
echo ""
exit 0

26
update/v5.5/Update.py Executable file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.ail_updater import AIL_Updater
from lib import ail_updates
from lib import chats_viewer
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
chats_viewer.fix_correlations_subchannel_message()
updater = Updater('v5.5')
updater.run_update()

40
update/v5.5/Update.sh Executable file
View File

@ -0,0 +1,40 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Updating python packages ..."$DEFAULT
echo ""
pip install -U easyocr
bash ${AIL_BIN}/LAUNCH.sh -lrv
bash ${AIL_BIN}/LAUNCH.sh -lkv
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v5.5/Update.py
wait
echo ""
echo ""
exit 0

View File

@ -35,6 +35,7 @@ import Flask_config
from blueprints.root import root
from blueprints.crawler_splash import crawler_splash
from blueprints.correlation import correlation
from blueprints.languages_ui import languages_ui
from blueprints.tags_ui import tags_ui
from blueprints.import_export import import_export
from blueprints.investigations_b import investigations_b
@ -52,6 +53,10 @@ from blueprints.objects_etag import objects_etag
from blueprints.objects_hhhash import objects_hhhash
from blueprints.chats_explorer import chats_explorer
from blueprints.objects_image import objects_image
from blueprints.objects_ocr import objects_ocr
from blueprints.objects_favicon import objects_favicon
from blueprints.api_rest import api_rest
Flask_dir = os.environ['AIL_FLASK']
@ -94,6 +99,7 @@ app.config['MAX_CONTENT_LENGTH'] = 900 * 1024 * 1024
app.register_blueprint(root, url_prefix=baseUrl)
app.register_blueprint(crawler_splash, url_prefix=baseUrl)
app.register_blueprint(correlation, url_prefix=baseUrl)
app.register_blueprint(languages_ui, url_prefix=baseUrl)
app.register_blueprint(tags_ui, url_prefix=baseUrl)
app.register_blueprint(import_export, url_prefix=baseUrl)
app.register_blueprint(investigations_b, url_prefix=baseUrl)
@ -111,6 +117,9 @@ app.register_blueprint(objects_etag, url_prefix=baseUrl)
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
app.register_blueprint(chats_explorer, url_prefix=baseUrl)
app.register_blueprint(objects_image, url_prefix=baseUrl)
app.register_blueprint(objects_ocr, url_prefix=baseUrl)
app.register_blueprint(objects_favicon, url_prefix=baseUrl)
app.register_blueprint(api_rest, url_prefix=baseUrl)
# ========= =========#
@ -123,8 +132,6 @@ login_manager = LoginManager()
login_manager.login_view = 'root.login'
login_manager.init_app(app)
print()
# ========= LOGIN MANAGER ========
@login_manager.user_loader
@ -231,18 +238,25 @@ def _handle_client_error(e):
anchor_id = anchor_id.replace('/', '_')
api_doc_url = 'https://github.com/ail-project/ail-framework/tree/master/doc#{}'.format(anchor_id)
res_dict['documentation'] = api_doc_url
return Response(json.dumps(res_dict, indent=2, sort_keys=True), mimetype='application/json'), 405
return Response(json.dumps(res_dict) + '\n', mimetype='application/json'), 405
else:
return e
@app.errorhandler(404)
def error_page_not_found(e):
if request.path.startswith('/api/'): ## # TODO: add baseUrl
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}) + '\n', mimetype='application/json'), 404
else:
# avoid endpoint enumeration
return page_not_found(e)
@app.errorhandler(500)
def _handle_client_error(e):
if request.path.startswith('/api/'):
return Response(json.dumps({"status": "error", "reason": "Server Error"}) + '\n', mimetype='application/json'), 500
else:
return e
@login_required
def page_not_found(e):
# avoid endpoint enumeration
@ -255,6 +269,10 @@ default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]
for taxonomy in default_taxonomies:
Tag.enable_taxonomy_tags(taxonomy)
# rrrr = [str(p) for p in app.url_map.iter_rules()]
# for p in rrrr:
# print(p)
# ============ MAIN ============
if __name__ == "__main__":

View File

@ -0,0 +1,251 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
"""
import os
import sys
import json
from functools import wraps
from flask import request, Blueprint, Response
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_api
from lib import ail_core
from lib import ail_updates
from lib import crawlers
from lib import chats_viewer
from lib import Investigations
from lib import Tag
from lib.objects import ail_objects
from lib.objects import Domains
from lib.objects import Titles
from importer.FeederImporter import api_add_json_feeder_to_queue
# ============ BLUEPRINT ============
api_rest = Blueprint('api_rest', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates'))
# ============ AUTH FUNCTIONS ============
def get_auth_from_header():
token = request.headers.get('Authorization').replace(' ', '') # remove space
return token
def token_required(user_role):
def actual_decorator(funct):
@wraps(funct)
def api_token(*args, **kwargs):
# Check AUTH Header
if not request.headers.get('Authorization'):
return create_json_response({'status': 'error', 'reason': 'Authentication needed'}, 401)
# Check Role
if not user_role:
return create_json_response({'status': 'error', 'reason': 'Invalid Role'}, 401)
token = get_auth_from_header()
ip_source = request.remote_addr
data, status_code = ail_api.authenticate_user(token, ip_address=ip_source)
if status_code != 200:
return create_json_response(data, status_code)
elif data:
# check user role
if not ail_api.is_user_in_role(user_role, token):
return create_json_response({'status': 'error', 'reason': 'Access Forbidden'}, 403)
else:
# User Authenticated + In Role
return funct(*args, **kwargs)
else:
return create_json_response({'status': 'error', 'reason': 'Internal'}, 400)
return api_token
return actual_decorator
# ============ FUNCTIONS ============
def create_json_response(data, status_code):
return Response(json.dumps(data) + "\n", mimetype='application/json'), status_code
# ============= ROUTES ==============
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # CORE # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@api_rest.route("api/v1/ping", methods=['GET'])
@token_required('read_only')
def v1_ping():
return create_json_response({'status': 'pong'}, 200)
@api_rest.route("api/v1/uuid", methods=['GET'])
@token_required('read_only')
def v1_uuid():
ail_uid = ail_core.get_ail_uuid()
return create_json_response({'uuid': ail_uid}, 200)
@api_rest.route("api/v1/version", methods=['GET'])
@token_required('read_only')
def v1_version():
version = ail_updates.get_ail_version()
return create_json_response({'version': version}, 200)
@api_rest.route("api/v1/pyail/version", methods=['GET'])
@token_required('read_only')
def v1_pyail_version():
ail_version = 'v1.0.0'
return create_json_response({'version': ail_version}, 200)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # CRAWLERS # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # TODO: ADD RESULT JSON Response
@api_rest.route("api/v1/add/crawler/task", methods=['POST']) # TODO V2 Migration
@token_required('analyst')
def add_crawler_task():
data = request.get_json()
user_token = get_auth_from_header()
user_id = ail_api.get_user_from_token(user_token)
res = crawlers.api_add_crawler_task(data, user_id=user_id)
if res:
return create_json_response(res[0], res[1])
dict_res = {'url': data['url']}
return create_json_response(dict_res, 200)
@api_rest.route("api/v1/add/crawler/capture", methods=['POST']) # TODO V2 Migration
@token_required('analyst')
def add_crawler_capture():
data = request.get_json()
user_token = get_auth_from_header()
user_id = ail_api.get_user_from_token(user_token)
res = crawlers.api_add_crawler_capture(data, user_id)
if res:
return create_json_response(res[0], res[1])
dict_res = {'url': data['url']}
return create_json_response(dict_res, 200)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # IMPORTERS # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@api_rest.route("api/v1/import/json/item", methods=['POST']) # TODO V2 Migration
@token_required('user')
def import_json_item():
data_json = request.get_json()
res = api_add_json_feeder_to_queue(data_json)
return Response(json.dumps(res[0]), mimetype='application/json'), res[1]
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # OBJECTS # # # # # # # # # # # # # # # # # # # TODO LIST OBJ TYPES + SUBTYPES
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@api_rest.route("api/v1/object", methods=['GET']) # TODO options
@token_required('read_only')
def v1_object():
obj_gid = request.args.get('gid')
if obj_gid:
r = ail_objects.api_get_object_global_id(obj_gid)
else:
obj_type = request.args.get('type')
obj_subtype = request.args.get('subtype')
obj_id = request.args.get('id')
r = ail_objects.api_get_object(obj_type, obj_subtype, obj_id)
return create_json_response(r[0], r[1])
@api_rest.route("api/v1/obj/gid/<path:object_global_id>", methods=['GET']) # TODO REMOVE ME ????
@token_required('read_only')
def v1_object_global_id(object_global_id):
r = ail_objects.api_get_object_global_id(object_global_id)
return create_json_response(r[0], r[1])
# @api_rest.route("api/v1/object/<object_type>/<object_subtype>/<path:object_id>", methods=['GET'])
@api_rest.route("api/v1/obj/<object_type>/<path:object_id>", methods=['GET']) # TODO REMOVE ME ????
@token_required('read_only')
def v1_object_type_id(object_type, object_id):
r = ail_objects.api_get_object_type_id(object_type, object_id)
return create_json_response(r[0], r[1])
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # CHATS # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@api_rest.route("api/v1/chat/messages", methods=['GET'])
@token_required('analyst')
def objects_chat_messages():
obj_subtype = request.args.get('subtype')
obj_id = request.args.get('id')
r = chats_viewer.api_chat_messages(obj_subtype, obj_id)
return create_json_response(r[0], r[1])
@api_rest.route("api/v1/chat-subchannel/messages", methods=['GET'])
@token_required('analyst')
def objects_chat_subchannel_messages():
obj_subtype = request.args.get('subtype')
obj_id = request.args.get('id')
r = chats_viewer.api_subchannel_messages(obj_subtype, obj_id)
return create_json_response(r[0], r[1])
@api_rest.route("api/v1/chat-thread/messages", methods=['GET'])
@token_required('analyst')
def objects_chat_thread_messages():
obj_subtype = request.args.get('subtype')
obj_id = request.args.get('id')
r = chats_viewer.api_thread_messages(obj_subtype, obj_id)
return create_json_response(r[0], r[1])
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # TITLES # # # # # # # # # # # # # # # # # # # TODO TO REVIEW
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@api_rest.route("api/v1/titles/download", methods=['GET']) # TODO RENAME ->api/v1/titles/domains
@token_required('analyst')
def objects_titles_download():
return create_json_response(Titles.Titles().get_contents_ids(), 200)
# TODO
@api_rest.route("api/v1/titles/download/unsafe", methods=['GET']) # TODO RENAME ->api/v1/titles/domains/unsafe
@token_required('analyst')
def objects_titles_download_unsafe():
all_titles = {}
unsafe_tags = Tag.unsafe_tags
for tag in unsafe_tags:
domains = Tag.get_tag_objects(tag, 'domain')
for domain_id in domains:
domain = Domains.Domain(domain_id)
domain_titles = domain.get_correlation('title').get('title', [])
for dt in domain_titles:
title = Titles.Title(dt[1:])
title_content = title.get_content()
if title_content and title_content != 'None':
if title_content not in all_titles:
all_titles[title_content] = []
all_titles[title_content].append(domain.get_id())
return Response(json.dumps(all_titles), mimetype='application/json'), 200
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # INVESTIGATIONS # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@api_rest.route("api/v1/investigation/<investigation_uuid>", methods=['GET']) # TODO options
@token_required('read_only')
def v1_investigation(investigation_uuid):
r = Investigations.api_get_investigation(investigation_uuid)
return create_json_response(r[0], r[1])
# TODO CATCH REDIRECT

View File

@ -23,6 +23,7 @@ from lib import ail_core
from lib import chats_viewer
from lib import Language
from lib import Tag
from lib import module_extractor
# ============ BLUEPRINT ============
chats_explorer = Blueprint('chats_explorer', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/chats_explorer'))
@ -58,7 +59,7 @@ def chats_explorer_networks():
networks = chats_viewer.get_chat_service_instances_by_protocol(protocol)
if len(networks) == 1:
instance_uuid = list(networks.values())[0]
return redirect(url_for('chats_explorer.chats_explorer_instance', uuid=instance_uuid))
return redirect(url_for('chats_explorer.chats_explorer_instance', subtype=instance_uuid))
else:
return render_template('chats_networks.html', protocol=protocol, networks=networks)
@ -67,7 +68,7 @@ def chats_explorer_networks():
@login_required
@login_read_only
def chats_explorer_instance():
intance_uuid = request.args.get('uuid')
intance_uuid = request.args.get('subtype')
chat_instance = chats_viewer.api_get_chat_service_instance(intance_uuid)
if chat_instance[1] != 200:
return create_json_response(chat_instance[0], chat_instance[1])
@ -80,7 +81,7 @@ def chats_explorer_instance():
@login_read_only
def chats_explorer_chat():
chat_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
instance_uuid = request.args.get('subtype')
target = request.args.get('target')
if target == "Don't Translate":
target = None
@ -92,15 +93,31 @@ def chats_explorer_chat():
else:
chat = chat[0]
languages = Language.get_translation_languages()
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label,
ail_tags=Tag.get_modal_add_tags(chat['id'], chat['type'], chat['subtype']),
translation_languages=languages, translation_target=target)
@chats_explorer.route("chats/explorer/messages/stats/week", methods=['GET'])
@login_required
@login_read_only
def chats_explorer_messages_stats_week():
chat_type = request.args.get('type')
instance_uuid = request.args.get('subtype')
chat_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
week = chats_viewer.api_get_nb_message_by_week(chat_id, instance_uuid)
week = chats_viewer.api_get_nb_message_by_week(chat_type, instance_uuid, chat_id)
if week[1] != 200:
return create_json_response(week[0], week[1])
else:
return jsonify(week[0])
@chats_explorer.route("chats/explorer/messages/stats/week/all", methods=['GET'])
@login_required
@login_read_only
def chats_explorer_messages_stats_week_all():
chat_type = request.args.get('type')
instance_uuid = request.args.get('subtype')
chat_id = request.args.get('id')
week = chats_viewer.api_get_nb_week_messages(chat_type, instance_uuid, chat_id) # TODO SELECT DATE
if week[1] != 200:
return create_json_response(week[0], week[1])
else:
@ -111,7 +128,7 @@ def chats_explorer_messages_stats_week():
@login_read_only
def objects_subchannel_messages():
subchannel_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
instance_uuid = request.args.get('subtype')
target = request.args.get('target')
if target == "Don't Translate":
target = None
@ -123,14 +140,16 @@ def objects_subchannel_messages():
else:
subchannel = subchannel[0]
languages = Language.get_translation_languages()
return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
return render_template('SubChannelMessages.html', subchannel=subchannel,
ail_tags=Tag.get_modal_add_tags(subchannel['id'], subchannel['type'], subchannel['subtype']),
bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
@chats_explorer.route("/chats/explorer/thread", methods=['GET'])
@login_required
@login_read_only
def objects_thread_messages():
thread_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
instance_uuid = request.args.get('subtype')
target = request.args.get('target')
if target == "Don't Translate":
target = None
@ -158,6 +177,51 @@ def chats_explorer_chat_participants():
meta = meta[0]
return render_template('chat_participants.html', meta=meta, bootstrap_label=bootstrap_label)
@chats_explorer.route("/chats/explorer/chat/download", methods=['GET'])
@login_required
@login_read_only
def chats_explorer_chat_download():
chat_id = request.args.get('id')
chat_subtype = request.args.get('subtype')
chat = chats_viewer.api_chat_messages(chat_subtype, chat_id)
if chat[1] != 200:
if chat[1] == 404:
abort(404)
else:
return create_json_response(chat[0], chat[1])
else:
return jsonify(chat[0])
@chats_explorer.route("/chats/explorer/subchannel/download", methods=['GET'])
@login_required
@login_read_only
def objects_subchannel_messages_download():
subchannel_id = request.args.get('id')
instance_uuid = request.args.get('subtype')
subchannel = chats_viewer.api_subchannel_messages(instance_uuid, subchannel_id)
if subchannel[1] != 200:
return create_json_response(subchannel[0], subchannel[1])
else:
return jsonify(subchannel[0])
@chats_explorer.route("/chats/explorer/thread/download", methods=['GET'])
@login_required
@login_read_only
def objects_thread_messages_download():
thread_id = request.args.get('id')
instance_uuid = request.args.get('subtype')
thread = chats_viewer.api_thread_messages(instance_uuid, thread_id)
if thread[1] != 200:
return create_json_response(thread[0], thread[1])
else:
return jsonify(thread[0])
#### ####
@chats_explorer.route("/objects/message", methods=['GET'])
@login_required
@login_read_only
@ -172,10 +236,48 @@ def objects_message():
else:
message = message[0]
languages = Language.get_translation_languages()
extracted = module_extractor.extract('message', '', message['id'], content=message['content'])
extracted_matches = module_extractor.get_extracted_by_match(extracted)
message['extracted'] = extracted
message['extracted_matches'] = extracted_matches
return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label,
translation_languages=languages, translation_target=target,
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))
@chats_explorer.route("/objects/message/translate", methods=['POST'])
@login_required
@login_read_only
def objects_message_translate():
message_id = request.form.get('id')
source = request.form.get('language_target')
target = request.form.get('target')
translation = request.form.get('translation')
if target == "Don't Translate":
target = None
resp = chats_viewer.api_manually_translate_message(message_id, source, target, translation)
if resp[1] != 200:
return create_json_response(resp[0], resp[1])
else:
if request.referrer:
return redirect(request.referrer)
else:
return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target))
@chats_explorer.route("/objects/message/detect/language", methods=['GET'])
@login_required
@login_read_only
def objects_message_detect_language():
message_id = request.args.get('id')
target = request.args.get('target')
resp = chats_viewer.api_message_detect_language(message_id)
if resp[1] != 200:
return create_json_response(resp[0], resp[1])
else:
if request.referrer:
return redirect(request.referrer)
else:
return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target))
@chats_explorer.route("/objects/user-account", methods=['GET'])
@login_required
@login_read_only
@ -192,4 +294,40 @@ def objects_user_account():
user_account = user_account[0]
languages = Language.get_translation_languages()
return render_template('user_account.html', meta=user_account, bootstrap_label=bootstrap_label,
ail_tags=Tag.get_modal_add_tags(user_account['id'], user_account['type'], user_account['subtype']),
translation_languages=languages, translation_target=target)
@chats_explorer.route("/objects/user-account/chat", methods=['GET'])
@login_required
@login_read_only
def objects_user_account_chat():
instance_uuid = request.args.get('subtype')
user_id = request.args.get('id')
chat_id = request.args.get('chat_id')
target = request.args.get('target')
if target == "Don't Translate":
target = None
meta = chats_viewer.api_get_user_account_chat_messages(user_id, instance_uuid, chat_id, translation_target=target)
if meta[1] != 200:
return create_json_response(meta[0], meta[1])
else:
meta = meta[0]
languages = Language.get_translation_languages()
return render_template('chats_explorer/user_chat_messages.html', meta=meta, bootstrap_label=bootstrap_label,
ail_tags=Tag.get_modal_add_tags(meta['user-account']['id'], meta['user-account']['type'], meta['user-account']['subtype']),
translation_languages=languages, translation_target=target)
@chats_explorer.route("objects/user-account/messages/stats/week/all", methods=['GET'])
@login_required
@login_read_only
def user_account_messages_stats_week_all():
instance_uuid = request.args.get('subtype')
user_id = request.args.get('id')
week = chats_viewer.api_get_user_account_nb_all_week_messages(user_id, instance_uuid)
if week[1] != 200:
return create_json_response(week[0], week[1])
else:
return jsonify(week[0])

View File

@ -87,58 +87,10 @@ def show_correlation():
## get all selected correlations
filter_types = []
correl_option = request.form.get('CookieNameCheck')
if correl_option:
filter_types.append('cookie-name')
correl_option = request.form.get('EtagCheck')
if correl_option:
filter_types.append('etag')
correl_option = request.form.get('CveCheck')
if correl_option:
filter_types.append('cve')
correl_option = request.form.get('CryptocurrencyCheck')
if correl_option:
filter_types.append('cryptocurrency')
correl_option = request.form.get('HHHashCheck')
if correl_option:
filter_types.append('hhhash')
correl_option = request.form.get('PgpCheck')
if correl_option:
filter_types.append('pgp')
correl_option = request.form.get('UsernameCheck')
if correl_option:
filter_types.append('username')
correl_option = request.form.get('DecodedCheck')
if correl_option:
filter_types.append('decoded')
correl_option = request.form.get('ScreenshotCheck')
if correl_option:
filter_types.append('screenshot')
# correlation_objects
correl_option = request.form.get('DomainCheck')
if correl_option:
filter_types.append('domain')
correl_option = request.form.get('ItemCheck')
if correl_option:
filter_types.append('item')
correl_option = request.form.get('chatCheck')
if correl_option:
filter_types.append('chat')
correl_option = request.form.get('subchannelCheck')
if correl_option:
filter_types.append('chat-subchannel')
correl_option = request.form.get('threadCheck')
if correl_option:
filter_types.append('chat-thread')
correl_option = request.form.get('messageCheck')
if correl_option:
filter_types.append('message')
correl_option = request.form.get('imageCheck')
if correl_option:
filter_types.append('image')
correl_option = request.form.get('user_accountCheck')
if correl_option:
filter_types.append('user-account')
for ob_type in ail_objects.get_all_objects():
correl_option = request.form.get(f'{ob_type}_Check')
if correl_option:
filter_types.append(ob_type)
# list as params
filter_types = ",".join(filter_types)
@ -162,7 +114,7 @@ def show_correlation():
related_btc = bool(request.args.get('related_btc', False))
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','), default=True)
# check if obj_id exist
if not ail_objects.exists_obj(obj_type, subtype, obj_id):
@ -187,8 +139,11 @@ def show_correlation():
else:
dict_object["subtype"] = ''
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc)
dict_object["metadata_card"]['tags_safe'] = True
return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label,
tags_selector_data=Tag.get_tags_selector_data())
tags_selector_data=Tag.get_tags_selector_data(),
meta=dict_object["metadata_card"],
ail_tags=dict_object["metadata_card"]["add_tags_modal"])
@correlation.route('/correlation/get/description')
@login_required
@ -203,7 +158,10 @@ def get_description():
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404
# object exist
else:
res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'icon', 'tags', 'tags_safe'},
options = {'icon', 'tags', 'tags_safe'}
if obj_type == 'message':
options.add('content')
res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options=options,
flask_context=True)
if 'tags' in res:
res['tags'] = list(res['tags'])
@ -350,6 +308,10 @@ def show_relationship():
dict_object["metadata"]['type_id'] = subtype
else:
dict_object["subtype"] = ''
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id)
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id)
dict_object["metadata_card"]['tags_safe'] = True
return render_template("show_relationship.html", dict_object=dict_object, bootstrap_label=bootstrap_label,
tags_selector_data=Tag.get_tags_selector_data())
tags_selector_data=Tag.get_tags_selector_data(),
meta=dict_object["metadata_card"],
ail_tags=dict_object["metadata_card"]["add_tags_modal"])

View File

@ -306,6 +306,40 @@ def crawlers_last_domains_json():
stats.append(crawlers.get_crawlers_stats_by_day(date, domain_type))
return jsonify(stats)
@crawler_splash.route('/crawlers/last/domains/month/json')
@login_required
@login_read_only
def crawlers_last_domains_month_json():
domain_type = request.args.get('type')
if domain_type not in crawlers.get_crawler_all_types():
return jsonify({'error': 'Invalid domain type'}), 400
stats = crawlers.get_crawlers_stats_by_month(domain_type)
return jsonify(stats)
@crawler_splash.route('/crawlers/last/domains/month/previous/json')
@login_required
@login_read_only
def crawlers_last_domains_previous_month_json():
domain_type = request.args.get('type')
if domain_type not in crawlers.get_crawler_all_types():
return jsonify({'error': 'Invalid domain type'}), 400
date = Date.get_previous_month_date()
stats = crawlers.get_crawlers_stats_by_month(domain_type, date=date)
return jsonify(stats)
@crawler_splash.route('/crawlers/last/domains/status/month/json')
@login_required
@login_read_only
def crawlers_last_domains_status_month_json():
domain_type = request.args.get('type')
if domain_type not in crawlers.get_crawler_all_types():
return jsonify({'error': 'Invalid domain type'}), 400
stats = crawlers.get_crawlers_stats_up_down_by_month(domain_type)
data = []
for key in stats:
data.append({'name': key, 'value': stats[key]})
return jsonify(data)
#### Domains ####
@ -576,6 +610,37 @@ def domains_search_date_post():
type=domain_type, down=down, up=up))
@crawler_splash.route('/domains/explorer/vanity', methods=['GET'])
@login_required
@login_analyst
def domains_explorer_vanity_clusters():
nb_min = request.args.get('min', 4)
if int(nb_min) < 0:
nb_min = 4
vanity_clusters = Domains.get_vanity_clusters(nb_min=nb_min)
return render_template("explorer_vanity_clusters.html", vanity_clusters=vanity_clusters,
length=4)
@crawler_splash.route('/domains/explorer/vanity/explore', methods=['GET'])
@login_required
@login_analyst
def domains_explorer_vanity_explore():
vanity = request.args.get('vanity')
nb_min = request.args.get('min', 2) # TODO SHOW DOMAINS OPTIONS + HARD CODED DOMAINS LIMIT FOR RENDER
length = len(vanity)
if int(nb_min) < 0:
nb_min = 4
vanity_clusters = Domains.get_vanity_cluster(vanity, len_vanity=length+1, nb_min=nb_min)
vanity_domains = Domains.get_vanity_domains(vanity, len_vanity=length, meta=True)
vanities_tree = []
for i in range(4, length):
vanities_tree.append(vanity[:i])
if length == len(vanity):
vanities_tree.append(vanity)
return render_template("explorer_vanity_domains.html", vanity_clusters=vanity_clusters,
bootstrap_label=bootstrap_label, vanity=vanity, vanities_tree=vanities_tree,
vanity_domains=vanity_domains, length=length)
##-- --##

View File

@ -9,7 +9,7 @@ import os
import sys
import json
from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, escape, abort
from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
from flask_login import login_required, current_user, login_user, logout_user
sys.path.append('modules')
@ -24,6 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib import ail_core
from lib.objects import ail_objects
from lib import chats_viewer
from lib import item_basic
from lib import Tracker
from lib import Tag
@ -174,7 +175,7 @@ def show_tracker():
if date_from:
date_from, date_to = Date.sanitise_daterange(date_from, date_to)
objs = tracker.get_objs_by_daterange(date_from, date_to)
meta['objs'] = ail_objects.get_objects_meta(objs, flask_context=True)
meta['objs'] = ail_objects.get_objects_meta(objs, options={'last_full_date'}, flask_context=True)
else:
date_from = ''
date_to = ''
@ -372,6 +373,78 @@ def get_json_tracker_graph():
res = Tracker.get_trackers_graph_by_day([tracker_uuid])
return jsonify(res)
@hunters.route('/tracker/object/add', methods=['GET'])
@login_required
@login_admin
def tracker_object_add():
user_id = current_user.get_id()
tracker_uuid = request.args.get('uuid')
object_global_id = request.args.get('gid')
if object_global_id.startswith('messages::'):
obj = ail_objects.get_obj_from_global_id(object_global_id)
date = obj.get_date()
else:
date = request.args.get('date') # TODO check daterange
res = Tracker.api_tracker_add_object({'uuid': tracker_uuid, 'gid': object_global_id, 'date': date}, user_id)
if res[1] != 200:
return create_json_response(res[0], res[1])
else:
if request.referrer:
return redirect(request.referrer)
else:
return redirect(url_for('hunters.show_tracker', uuid=tracker_uuid))
@hunters.route('/tracker/object/remove', methods=['GET'])
@login_required
@login_analyst
def tracker_object_remove():
user_id = current_user.get_id()
tracker_uuid = request.args.get('uuid')
object_global_id = request.args.get('gid')
res = Tracker.api_tracker_remove_object({'uuid': tracker_uuid, 'gid': object_global_id}, user_id)
if res[1] != 200:
return create_json_response(res[0], res[1])
else:
if request.referrer:
return redirect(request.referrer)
else:
return redirect(url_for('hunters.show_tracker', uuid=tracker_uuid))
@hunters.route('/tracker/objects', methods=['GET'])
@login_required
@login_admin
def tracker_objects():
user_id = current_user.get_id()
tracker_uuid = request.args.get('uuid', None)
res = Tracker.api_is_allowed_to_edit_tracker(tracker_uuid, user_id)
if res[1] != 200: # invalid access
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
tracker = Tracker.Tracker(tracker_uuid)
meta = tracker.get_meta(options={'description', 'sparkline', 'tags', 'nb_objs'})
if meta['type'] == 'yara':
yara_rule_content = Tracker.get_yara_rule_content(meta['tracked'])
else:
yara_rule_content = None
chats, messages = chats_viewer.get_message_report(tracker.get_objs())
meta['date'] = Date.get_current_utc_full_time()
return render_template("messages_report.html", meta=meta, yara_rule_content=yara_rule_content,
chats=chats, messages=messages, bootstrap_label=bootstrap_label)
# TODO
# Manual - Title
# - Summary
# Messages table
# Timeline messages by chats - line
# pie charts NB messages all chats
# Barchart NB messages by days
####################
# RETRO HUNT #

View File

@ -209,6 +209,14 @@ def unregister_investigation():
def get_investigations_selector_json():
return jsonify(Investigations.get_investigations_selector())
@investigations_b.route("/object/gid")
@login_required
@login_read_only
def get_object_gid():
obj_global_id = request.args.get('gid')
ail_obj = ail_objects.get_obj_from_global_id(obj_global_id)
url = ail_obj.get_link(flask_context=True)
return redirect(url)
#
# @investigations_b.route("/object/item") #completely shows the paste in a new tab

View File

@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
"""
import os
import sys
import json
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_core
from lib import Language
from lib import Tag
from lib.objects import ail_objects
# ============ BLUEPRINT ============
languages_ui = Blueprint('languages_ui', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/chats_explorer'))
# ============ VARIABLES ============
# bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
def create_json_response(data, status_code):
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
# ============ FUNCTIONS ============
# ============= ROUTES ==============
@languages_ui.route("/languages/object/translate", methods=['POST'])
@login_required
@login_read_only
def translate_object():
obj_type = request.form.get('type')
subtype = request.form.get('subtype')
obj_id = request.form.get('id')
source = request.form.get('language_target')
target = request.form.get('target')
translation = request.form.get('translation')
if target == "Don't Translate":
target = None
resp = ail_objects.api_manually_translate(obj_type, subtype, obj_id, source, target, translation)
if resp[1] != 200:
return create_json_response(resp[0], resp[1])
else:
if request.referrer:
return redirect(request.referrer)
else:
if obj_type == 'ocr':
return redirect(url_for('objects_ocr.object_ocr', id=obj_id, target=target)) # TODO change to support all objects
@languages_ui.route("/languages/object/detect/language", methods=['GET'])
@login_required
@login_read_only
def detect_object_language():
obj_type = request.args.get('type')
subtype = request.args.get('subtype')
obj_id = request.args.get('id')
target = request.args.get('target')
resp = ail_objects.api_detect_language(obj_type, subtype, obj_id)
if resp[1] != 200:
return create_json_response(resp[0], resp[1])
else:
if request.referrer:
return redirect(request.referrer)
else:
if obj_type == 'ocr':
return redirect(url_for('objects_ocr.object_ocr', id=obj_id, target=target)) # TODO change to support all objects

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
'''
import os
import sys
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file, send_from_directory
from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only, no_cache
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects import Favicons
from packages import Date
# ============ BLUEPRINT ============
objects_favicon = Blueprint('objects_favicon', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/favicon'))
# ============ VARIABLES ============
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
# ============ FUNCTIONS ============
@objects_favicon.route('/favicon/<path:filename>')
@login_required
@login_read_only
@no_cache
def favicon(filename):
if not filename:
abort(404)
if not 9 <= len(filename) <= 11:
abort(404)
filename = filename.replace('/', '')
fav = Favicons.Favicon(filename)
return send_from_directory(Favicons.FAVICON_FOLDER, fav.get_rel_path(), as_attachment=False, mimetype='image')
@objects_favicon.route("/objects/favicons", methods=['GET'])
@login_required
@login_read_only
def objects_favicons():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
show_objects = request.args.get('show_objects')
date = Date.sanitise_date_range(date_from, date_to)
date_from = date['date_from']
date_to = date['date_to']
if show_objects:
dict_objects = Favicons.Favicons().api_get_meta_by_daterange(date_from, date_to)
else:
dict_objects = {}
print(dict_objects)
return render_template("FaviconDaterange.html", date_from=date_from, date_to=date_to,
dict_objects=dict_objects, show_objects=show_objects)
@objects_favicon.route("/objects/favicons/post", methods=['POST'])
@login_required
@login_read_only
def objects_favicons_post():
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
show_objects = request.form.get('show_objects')
return redirect(url_for('objects_favicon.objects_favicons', date_from=date_from, date_to=date_to, show_objects=show_objects))
@objects_favicon.route("/objects/favicons/range/json", methods=['GET'])
@login_required
@login_read_only
def objects_favicons_range_json():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
date = Date.sanitise_date_range(date_from, date_to)
date_from = date['date_from']
date_to = date['date_to']
return jsonify(Favicons.Favicons().api_get_chart_nb_by_daterange(date_from, date_to))
# ============= ROUTES ==============

Some files were not shown because too many files have changed in this diff Show More