mirror of https://github.com/CIRCL/AIL-framework
Compare commits
123 Commits
Author | SHA1 | Date |
---|---|---|
terrtia | 86f312cbc3 | |
terrtia | 4eb1b01370 | |
terrtia | 21642fe9d4 | |
terrtia | 0dfd92bcd6 | |
terrtia | 0c28b38638 | |
terrtia | adbce24128 | |
terrtia | e21257a3fe | |
terrtia | 50ff7529d2 | |
terrtia | 31b519cc17 | |
terrtia | 2b23d993df | |
terrtia | 5503d8134a | |
terrtia | 1d1671c00f | |
terrtia | 35502d955f | |
terrtia | 26f9e84d97 | |
terrtia | 42ef6fb2e5 | |
terrtia | 20c98de0fa | |
terrtia | 973ced2efe | |
terrtia | 7fd8ae4a81 | |
terrtia | c25ccb8618 | |
terrtia | 0b5a1aa1b8 | |
terrtia | 8bd1ae3815 | |
terrtia | b552e15a8b | |
terrtia | bc42ff2cd1 | |
terrtia | 58666f2355 | |
terrtia | 56fae107bf | |
terrtia | 4cb47e8af3 | |
terrtia | ed13e8bca4 | |
terrtia | 61701e2fcc | |
terrtia | 6ca4b29329 | |
terrtia | dbde04caa3 | |
terrtia | a282354fce | |
terrtia | 414b5af277 | |
terrtia | f37111fe2b | |
terrtia | 5fce682541 | |
terrtia | 5ec0d7f0cf | |
terrtia | a3a664b7f1 | |
terrtia | ee563a79d3 | |
terrtia | 3ecd3fd023 | |
terrtia | 9d481bd0b0 | |
terrtia | de43f350b2 | |
terrtia | 2db54def46 | |
terrtia | b9c37167ad | |
terrtia | 0bacf2b8bd | |
terrtia | fa57171937 | |
terrtia | 59ca8c5d31 | |
terrtia | 406d72bb52 | |
terrtia | 912eadc563 | |
terrtia | 9a0e77dbbd | |
terrtia | d37a56b7d5 | |
terrtia | 599f3ca953 | |
terrtia | e92bf72f64 | |
terrtia | 8483272ee0 | |
terrtia | 6f2668eff1 | |
terrtia | 13372f8c85 | |
terrtia | eb6adc4b98 | |
terrtia | 27b2679ba6 | |
terrtia | dc0545dfd0 | |
terrtia | 9031376b50 | |
terrtia | d526b2fd98 | |
terrtia | 800098540b | |
terrtia | 197ff0222d | |
terrtia | 7acac4dc0c | |
terrtia | 40a32fec75 | |
Thirion Aurélien | d2c974569e | |
niclas | 2b9c1bfda8 | |
terrtia | 38d71e97dd | |
terrtia | 87dc619171 | |
terrtia | ad039e4720 | |
terrtia | c22d2982fb | |
terrtia | e1e9609ad9 | |
terrtia | 35f0d46140 | |
terrtia | d5e830c591 | |
terrtia | 142ac83472 | |
terrtia | 311e6f4bd8 | |
terrtia | e6d70e4f7b | |
terrtia | 0d55725e28 | |
terrtia | afe13185d9 | |
terrtia | 775b7fa868 | |
terrtia | 9917d4212c | |
terrtia | ad63651838 | |
terrtia | 40b1378b30 | |
terrtia | f980ab5145 | |
terrtia | 81c4dde7b0 | |
terrtia | c219febd71 | |
terrtia | 9cdfcdfc6b | |
terrtia | 443f4f2fb3 | |
terrtia | f62ec679cc | |
terrtia | 6fa8f6e0bc | |
terrtia | 784579baef | |
terrtia | cb4345c871 | |
terrtia | 4c5a0b9906 | |
terrtia | 273e264659 | |
terrtia | 88fbe36f70 | |
terrtia | f07a4b422b | |
terrtia | 495ceea73d | |
terrtia | 782677e8ff | |
terrtia | cf62ed49ff | |
terrtia | 93d1b6fc1f | |
terrtia | 152e7bb51e | |
terrtia | 811ee45993 | |
terrtia | 05c8100b8b | |
terrtia | e83323c672 | |
terrtia | d31bc7ef58 | |
terrtia | 1e8f8e9e9c | |
terrtia | 1a3ffe70c1 | |
terrtia | 48ce4a76a4 | |
terrtia | 7d42315419 | |
terrtia | c260455d14 | |
terrtia | 4cf3d628db | |
terrtia | 50bfd92105 | |
terrtia | a9323e076e | |
terrtia | c5f40d85a8 | |
terrtia | 57842c2ecf | |
terrtia | 304afd00aa | |
terrtia | 38a918e485 | |
terrtia | 4168d07118 | |
terrtia | 88f30833c2 | |
terrtia | d84bc14b62 | |
terrtia | 4c1d058e6d | |
terrtia | aa56e71631 | |
terrtia | 99fedf9855 | |
terrtia | 335d94cf79 | |
terrtia | c1529b217d |
|
@ -17,6 +17,7 @@ Blooms
|
|||
PASTES
|
||||
CRAWLED_SCREENSHOT
|
||||
IMAGES
|
||||
FAVICONS
|
||||
BASE64
|
||||
HASHS
|
||||
DATA_ARDB
|
||||
|
|
72
HOWTO.md
72
HOWTO.md
|
@ -1,17 +1,16 @@
|
|||
|
||||
# Feeding, adding new features and contributing
|
||||
# Feeding, Adding new features and Contributing
|
||||
|
||||
## [Documentation AIL Importers](./doc/README.md#ail-importers)
|
||||
## [AIL Importers](./doc/README.md#ail-importers)
|
||||
|
||||
[Documentation AIL Importers](./doc/README.md#ail-importers)
|
||||
Refer to the [AIL Importers Documentation](./doc/README.md#ail-importers)
|
||||
|
||||
## How to feed the AIL framework
|
||||
## Feeding Data to AIL
|
||||
|
||||
AIL is an analysis tool, not a collector!
|
||||
However, if you want to collect some pastes and feed them to AIL, the procedure is described below. Nevertheless, moderate your queries!
|
||||
|
||||
1. [AIL Importers](./doc/README.md#ail-importers)
|
||||
|
||||
2. ZMQ: Be a collaborator of CIRCL and ask to access our feed. It will be sent to the static IP you are using for AIL.
|
||||
|
||||
## How to create a new module
|
||||
|
@ -19,22 +18,18 @@ However, if you want to collect some pastes and feed them to AIL, the procedure
|
|||
To add a new processing or analysis module to AIL, follow these steps:
|
||||
|
||||
1. Add your module name in [./configs/modules.cfg](./configs/modules.cfg) and subscribe to at least one module at minimum (Usually, `Item`).
|
||||
|
||||
2. Use [./bin/modules/modules/TemplateModule.py](./bin/modules/modules/TemplateModule.py) as a sample module and create a new file in bin/modules with the module name used in the `modules.cfg` configuration.
|
||||
|
||||
|
||||
## How to contribute a module
|
||||
## Contributions
|
||||
|
||||
Feel free to fork the code, play with it, make some patches or add additional analysis modules.
|
||||
Contributions are welcome! Fork the repository, experiment with the code, and submit your modules or patches through a pull request.
|
||||
|
||||
To contribute your module, feel free to pull your contribution.
|
||||
## Crawler
|
||||
|
||||
AIL supports crawling of websites and Tor hidden services. Ensure your Tor client's proxy configuration is correct, especially the SOCKS5 proxy settings.
|
||||
|
||||
## Additional information
|
||||
|
||||
### Crawler
|
||||
|
||||
In AIL, you can crawl websites and Tor hidden services. Don't forget to review the proxy configuration of your Tor client and especially if you enabled the SOCKS5 proxy
|
||||
![Crawler](./doc/screenshots/ail-lacus.png?raw=true "AIL framework Crawler")
|
||||
|
||||
### Installation
|
||||
|
||||
|
@ -45,38 +40,35 @@ In AIL, you can crawl websites and Tor hidden services. Don't forget to review t
|
|||
1. Lacus URL:
|
||||
In the web interface, go to `Crawlers` > `Settings` and click on the Edit button
|
||||
|
||||
![Splash Manager Config](./doc/screenshots/lacus_config.png?raw=true "AIL Lacus Config")
|
||||
![AIL Crawler Config](./doc/screenshots/lacus_config.png?raw=true "AIL Lacus Config")
|
||||
|
||||
![Splash Manager Config](./doc/screenshots/lacus_config_edit.png?raw=true "AIL Lacus Config")
|
||||
![AIL Crawler Config Edis](./doc/screenshots/lacus_config_edit.png?raw=true "AIL Lacus Config")
|
||||
|
||||
2. Launch AIL Crawlers:
|
||||
2. Number of Crawlers:
|
||||
Choose the number of crawlers you want to launch
|
||||
|
||||
![Splash Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures.png?raw=true "AIL Lacus Nb Crawlers Config")
|
||||
![Crawler Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures.png?raw=true "AIL Lacus Nb Crawlers Config")
|
||||
|
||||
![Splash Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures_edit.png?raw=true "AIL Lacus Nb Crawlers Config")
|
||||
![Crawler Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures_edit.png?raw=true "AIL Lacus Nb Crawlers Config")
|
||||
|
||||
## Chats Translation with LibreTranslate
|
||||
|
||||
### Kvrocks Migration
|
||||
---------------------
|
||||
**Important Note:
|
||||
We are currently working on a [migration script](https://github.com/ail-project/ail-framework/blob/master/update/v5.0/DB_KVROCKS_MIGRATION.py) to facilitate the migration to Kvrocks.
|
||||
**
|
||||
Chats message can be translated using [libretranslate](https://github.com/LibreTranslate/LibreTranslate), an open-source self-hosted machine translation.
|
||||
|
||||
Please note that the current version of this migration script only supports migrating the database on the same server.
|
||||
(If you plan to migrate to another server, we will provide additional instructions in this section once the migration script is completed)
|
||||
### Installation:
|
||||
1. Install LibreTranslate by running the following command:
|
||||
```bash
|
||||
pip install libretranslate
|
||||
```
|
||||
2. Run libretranslate:
|
||||
```bash
|
||||
libretranslate
|
||||
```
|
||||
|
||||
### Configuration:
|
||||
To enable LibreTranslate for chat translation, edit the LibreTranslate URL in the [./configs/core.cfg](./configs/core.cfg) file under the [Translation] section.
|
||||
```
|
||||
[Translation]
|
||||
libretranslate = http://127.0.0.1:5000
|
||||
```
|
||||
|
||||
To migrate your database to Kvrocks:
|
||||
1. Launch ARDB and Kvrocks
|
||||
2. Pull from remote
|
||||
```shell
|
||||
git checkout master
|
||||
git pull
|
||||
```
|
||||
3. Launch the migration script:
|
||||
```shell
|
||||
git checkout master
|
||||
git pull
|
||||
cd update/v5.0
|
||||
./DB_KVROCKS_MIGRATION.py
|
||||
```
|
||||
|
|
|
@ -29,6 +29,8 @@ AIL framework - Framework for Analysis of Information Leaks
|
|||
|
||||
AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services or unstructured data streams. AIL framework is flexible and can be extended to support other functionalities to mine or process sensitive information (e.g. data leak prevention).
|
||||
|
||||
![Overview](./doc/screenshots/ail-overview.png?raw=true "AIL framework Overview")
|
||||
|
||||
![Dashboard](./doc/screenshots/dashboard0.png?raw=true "AIL framework dashboard")
|
||||
|
||||
|
||||
|
@ -55,6 +57,8 @@ Allow easy creation and customization by extending an abstract class.
|
|||
|
||||
## Features
|
||||
|
||||
![Internal](./doc/screenshots/ail-internal.png?raw=true "AIL framework Internal")
|
||||
|
||||
- Modular architecture to handle streams of unstructured or structured information
|
||||
- Default support for external ZMQ feeds, such as provided by CIRCL or other providers
|
||||
- Multiple Importers and feeds support
|
||||
|
|
|
@ -275,8 +275,11 @@ function launching_scripts {
|
|||
screen -S "Script_AIL" -X screen -t "MISP_Thehive_Auto_Push" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./MISP_Thehive_Auto_Push.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
# IMAGES
|
||||
screen -S "Script_AIL" -X screen -t "Exif" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Exif.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "OcrExtractor" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./OcrExtractor.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
##################################
|
||||
# TRACKERS MODULES #
|
||||
|
@ -578,7 +581,7 @@ function update_thirdparty {
|
|||
function launch_tests() {
|
||||
tests_dir=${AIL_HOME}/tests
|
||||
bin_dir=${AIL_BIN}
|
||||
python3 -m nose2 --start-dir $tests_dir --coverage $bin_dir --with-coverage testApi test_modules
|
||||
python3 -m nose2 --start-dir $tests_dir --coverage $bin_dir --with-coverage test_api test_modules
|
||||
}
|
||||
|
||||
function reset_password() {
|
||||
|
@ -676,13 +679,16 @@ function menu_display {
|
|||
check_screens;
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-l | --launchAuto ) launch_all "automatic";
|
||||
-l | --launchAuto ) check_screens;
|
||||
launch_all "automatic";
|
||||
;;
|
||||
-lr | --launchRedis ) launch_redis;
|
||||
-lr | --launchRedis ) check_screens;
|
||||
launch_redis;
|
||||
;;
|
||||
-la | --launchARDB ) launch_ardb;
|
||||
;;
|
||||
-lk | --launchKVROCKS ) launch_kvrocks;
|
||||
-lk | --launchKVROCKS ) check_screens;
|
||||
launch_kvrocks;
|
||||
;;
|
||||
-lrv | --launchRedisVerify ) launch_redis;
|
||||
wait_until_redis_is_ready;
|
||||
|
@ -692,14 +698,16 @@ while [ "$1" != "" ]; do
|
|||
;;
|
||||
--set_kvrocks_namespaces ) set_kvrocks_namespaces;
|
||||
;;
|
||||
-k | --killAll ) killall;
|
||||
-k | --killAll ) check_screens;
|
||||
killall;
|
||||
;;
|
||||
-r | --restart ) killall;
|
||||
sleep 0.1;
|
||||
check_screens;
|
||||
launch_all "automatic";
|
||||
;;
|
||||
-ks | --killscript ) killscript;
|
||||
-ks | --killscript ) check_screens;
|
||||
killscript;
|
||||
;;
|
||||
-m | --menu ) menu_display;
|
||||
;;
|
||||
|
|
|
@ -11,7 +11,7 @@ import uuid
|
|||
|
||||
import subprocess
|
||||
|
||||
from flask import escape
|
||||
from markupsafe import escape
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
|
|
|
@ -20,6 +20,7 @@ from lib.ConfigLoader import ConfigLoader
|
|||
from lib.objects import CookiesNames
|
||||
from lib.objects import Etags
|
||||
from lib.objects.Domains import Domain
|
||||
from lib.objects import Favicons
|
||||
from lib.objects.Items import Item
|
||||
from lib.objects import Screenshots
|
||||
from lib.objects import Titles
|
||||
|
@ -192,11 +193,13 @@ class Crawler(AbstractModule):
|
|||
# force=force,
|
||||
# general_timeout_in_sec=120)
|
||||
|
||||
# with_favicon = True,
|
||||
capture_uuid = self.lacus.enqueue(url=url,
|
||||
depth=task.get_depth(),
|
||||
user_agent=task.get_user_agent(),
|
||||
proxy=task.get_proxy(),
|
||||
cookies=task.get_cookies(),
|
||||
with_favicon=True,
|
||||
force=force,
|
||||
general_timeout_in_sec=90) # TODO increase timeout if onion ????
|
||||
|
||||
|
@ -244,6 +247,7 @@ class Crawler(AbstractModule):
|
|||
parent_id = task.get_parent()
|
||||
|
||||
entries = self.lacus.get_capture(capture.uuid)
|
||||
|
||||
print(entries.get('status'))
|
||||
self.har = task.get_har()
|
||||
self.screenshot = task.get_screenshot()
|
||||
|
@ -261,6 +265,7 @@ class Crawler(AbstractModule):
|
|||
# Origin + History + tags
|
||||
if self.root_item:
|
||||
self.domain.set_last_origin(parent_id)
|
||||
self.domain.update_vanity_cluster()
|
||||
# Tags
|
||||
for tag in task.get_tags():
|
||||
self.domain.add_tag(tag)
|
||||
|
@ -274,7 +279,7 @@ class Crawler(AbstractModule):
|
|||
for tag in task.get_tags():
|
||||
self.domain.add_tag(tag)
|
||||
self.original_domain.add_history(epoch, root_item=self.root_item)
|
||||
crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
|
||||
# crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
|
||||
|
||||
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
|
||||
print('capture:', capture.uuid, 'completed')
|
||||
|
@ -367,6 +372,12 @@ class Crawler(AbstractModule):
|
|||
etag.add(self.date.replace('/', ''), self.domain)
|
||||
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
|
||||
|
||||
# FAVICON
|
||||
if entries.get('potential_favicons'):
|
||||
for favicon in entries['potential_favicons']:
|
||||
fav = Favicons.create(favicon)
|
||||
fav.add(item.get_date(), item)
|
||||
|
||||
# Next Children
|
||||
entries_children = entries.get('children')
|
||||
if entries_children:
|
||||
|
|
|
@ -103,14 +103,19 @@ class FeederImporter(AbstractImporter):
|
|||
if data_obj:
|
||||
objs.add(data_obj)
|
||||
|
||||
objs_messages = []
|
||||
for obj in objs:
|
||||
if obj.type == 'item': # object save on disk as file (Items)
|
||||
gzip64_content = feeder.get_gzip64_content()
|
||||
return obj, f'{feeder_name} {gzip64_content}'
|
||||
relay_message = f'{feeder_name} {gzip64_content}'
|
||||
objs_messages.append({'obj': obj, 'message': relay_message})
|
||||
elif obj.type == 'image':
|
||||
date = feeder.get_date()
|
||||
objs_messages.append({'obj': obj, 'message': f'{feeder_name} {date}'})
|
||||
else: # Messages save on DB
|
||||
if obj.exists() and obj.type != 'chat':
|
||||
return obj, f'{feeder_name}'
|
||||
|
||||
objs_messages.append({'obj': obj, 'message': feeder_name})
|
||||
return objs_messages
|
||||
|
||||
class FeederModuleImporter(AbstractModule):
|
||||
def __init__(self):
|
||||
|
@ -128,10 +133,8 @@ class FeederModuleImporter(AbstractModule):
|
|||
def compute(self, message):
|
||||
# TODO HANDLE Invalid JSON
|
||||
json_data = json.loads(message)
|
||||
# TODO multiple objs + messages
|
||||
obj, relay_message = self.importer.importer(json_data)
|
||||
####
|
||||
self.add_message_to_queue(obj=obj, message=relay_message)
|
||||
for obj_message in self.importer.importer(json_data):
|
||||
self.add_message_to_queue(obj=obj_message['obj'], message=obj_message['message'])
|
||||
|
||||
|
||||
# Launch Importer
|
||||
|
|
|
@ -41,6 +41,9 @@ class DefaultFeeder:
|
|||
def get_source(self):
|
||||
return self.json_data.get('source')
|
||||
|
||||
def get_date(self):
|
||||
return datetime.date.today().strftime("%Y%m%d")
|
||||
|
||||
def get_json_data(self):
|
||||
"""
|
||||
Return the JSON data,
|
||||
|
@ -63,7 +66,8 @@ class DefaultFeeder:
|
|||
return self.json_data.get('data')
|
||||
|
||||
def get_obj_type(self):
|
||||
return self.json_data.get('type', 'item')
|
||||
meta = self.get_json_meta()
|
||||
return meta.get('type', 'item')
|
||||
|
||||
## OVERWRITE ME ##
|
||||
def get_obj(self):
|
||||
|
|
|
@ -92,6 +92,14 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
|
|||
def get_reactions(self):
|
||||
return self.json_data['meta'].get('reactions', [])
|
||||
|
||||
def get_date(self):
|
||||
if self.json_data['meta'].get('date'):
|
||||
date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp'])
|
||||
date = date.strftime('%Y%m%d')
|
||||
else:
|
||||
date = datetime.date.today().strftime("%Y%m%d")
|
||||
return date
|
||||
|
||||
def get_message_timestamp(self):
|
||||
if not self.json_data['meta'].get('date'):
|
||||
return None
|
||||
|
@ -206,8 +214,7 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
|
|||
subchannel = ChatSubChannels.ChatSubChannel(f'{self.get_chat_id()}/{meta["id"]}', self.get_chat_instance_uuid())
|
||||
thread = None
|
||||
|
||||
# TODO correlation with obj = message/image
|
||||
subchannel.add(date)
|
||||
subchannel.add(date, obj)
|
||||
|
||||
if meta.get('date'): # TODO check if already exists
|
||||
subchannel.set_created_at(int(meta['date']['timestamp']))
|
||||
|
@ -358,7 +365,58 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
|
|||
# CHAT
|
||||
chat_objs = self.process_chat(new_objs, obj, date, timestamp, reply_id=reply_id)
|
||||
|
||||
# Message forward
|
||||
# # TODO HANDLE OTHERS OBJECT TYPE
|
||||
# # TODO MAKE IT GENERIC FOR OTHERS CHATS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# # Message forward + Discussion
|
||||
# if self.get_json_meta().get('forward'):
|
||||
# discussion_id = self.get_json_meta().get('discussion')
|
||||
# forward_from = self.get_message_forward()
|
||||
#
|
||||
# if discussion_id: # TODO HANDLE FORWARDED MESSAGES FROM EXTERNAL CHANNELS
|
||||
# chat_forward_id = forward_from['from']['id']
|
||||
# message_forward_id = forward_from['from']['channel_post']
|
||||
#
|
||||
# # if chat_forward_id == discussion_id:
|
||||
# # linked_chat = Chat(chat_forward_id, self.get_chat_instance_uuid())
|
||||
# # if linked_chat.exists():
|
||||
# # # create thread
|
||||
# # # add message replies for each childrens
|
||||
#
|
||||
# # TODO HANDLE THREAD
|
||||
# # TODO Change FORWARD META FIELDS
|
||||
# # meta['forward'] = {}
|
||||
# # # CHAT ID
|
||||
# # # SUBCHANNEL ID -> can be None
|
||||
# # # Message ID
|
||||
#
|
||||
# # meta['forward']['origin']
|
||||
# # # same as 'forward'
|
||||
#
|
||||
# if self.get_json_meta().get('forward'):
|
||||
# forward = self.get_message_forward()
|
||||
# f_chat = forward['chat']
|
||||
# f_subchannel = forward.get('subchannel')
|
||||
# f_id = forward.get('id')
|
||||
# if not f_subchannel:
|
||||
# chat_forward = Chat(f_chat, self.get_chat_instance_uuid())
|
||||
# if chat_forward.exists():
|
||||
# for chat_obj in chat_objs:
|
||||
# if chat_obj.type == 'chat':
|
||||
# chat_forward.add_relationship(chat_obj.get_global_id(), 'forward')
|
||||
# # TODO LIST FORWARDED MESSAGES
|
||||
#
|
||||
#
|
||||
# # Discord -> serverID + subchannel ID + message ID
|
||||
# # Telegram -> chat ID + Message ID
|
||||
# # + ORIGIN IDs
|
||||
#
|
||||
#
|
||||
#
|
||||
# # TODO create relationships graph
|
||||
#
|
||||
#
|
||||
# # TODO REMOVE ME
|
||||
# # Message forward # TODO handle subchannel + message ID
|
||||
# if self.get_json_meta().get('forward'):
|
||||
# forward_from = self.get_message_forward()
|
||||
# print('-----------------------------------------------------------')
|
||||
|
|
|
@ -9,7 +9,6 @@ The ``Domain``
|
|||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import redis
|
||||
import configparser
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ import time
|
|||
import uuid
|
||||
|
||||
from enum import Enum
|
||||
from flask import escape
|
||||
from markupsafe import escape
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
|
@ -152,25 +152,30 @@ class Investigation(object):
|
|||
return r_tracking.smembers(f'investigations:misp:{self.uuid}')
|
||||
|
||||
# # TODO: DATE FORMAT
|
||||
def get_metadata(self, r_str=False):
|
||||
def get_metadata(self, options=set(), r_str=False):
|
||||
if r_str:
|
||||
analysis = self.get_analysis_str()
|
||||
threat_level = self.get_threat_level_str()
|
||||
else:
|
||||
analysis = self.get_analysis()
|
||||
threat_level = self.get_threat_level()
|
||||
return {'uuid': self.uuid,
|
||||
'name': self.get_name(),
|
||||
|
||||
# 'name': self.get_name(),
|
||||
meta = {'uuid': self.uuid,
|
||||
'threat_level': threat_level,
|
||||
'analysis': analysis,
|
||||
'tags': self.get_tags(),
|
||||
'tags': list(self.get_tags()),
|
||||
'user_creator': self.get_creator_user(),
|
||||
'date': self.get_date(),
|
||||
'timestamp': self.get_timestamp(r_str=r_str),
|
||||
'last_change': self.get_last_change(r_str=r_str),
|
||||
'info': self.get_info(),
|
||||
'nb_objects': self.get_nb_objects(),
|
||||
'misp_events': self.get_misp_events()}
|
||||
'misp_events': list(self.get_misp_events())
|
||||
}
|
||||
if 'objects' in options:
|
||||
meta['objects'] = self.get_objects()
|
||||
return meta
|
||||
|
||||
def set_name(self, name):
|
||||
r_tracking.hset(f'investigations:data:{self.uuid}', 'name', name)
|
||||
|
@ -368,6 +373,21 @@ def get_investigations_selector():
|
|||
|
||||
#### API ####
|
||||
|
||||
def api_get_investigation(investigation_uuid): # TODO check if is UUIDv4
|
||||
investigation = Investigation(investigation_uuid)
|
||||
if not investigation.exists():
|
||||
return {'status': 'error', 'reason': 'Investigation Not Found'}, 404
|
||||
|
||||
meta = investigation.get_metadata(options={'objects'}, r_str=False)
|
||||
# objs = []
|
||||
# for obj in investigation.get_objects():
|
||||
# obj_meta = ail_objects.get_object_meta(obj["type"], obj["subtype"], obj["id"], flask_context=True)
|
||||
# comment = investigation.get_objects_comment(f'{obj["type"]}:{obj["subtype"]}:{obj["id"]}')
|
||||
# if comment:
|
||||
# obj_meta['comment'] = comment
|
||||
# objs.append(obj_meta)
|
||||
return meta, 200
|
||||
|
||||
# # TODO: CHECK Mandatory Fields
|
||||
# # TODO: SANITYZE Fields
|
||||
# # TODO: Name ?????
|
||||
|
|
|
@ -7,6 +7,7 @@ import sys
|
|||
import html2text
|
||||
|
||||
import gcld3
|
||||
from lexilang.detector import detect as lexilang_detect
|
||||
from libretranslatepy import LibreTranslateAPI
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -17,6 +18,7 @@ from lib.ConfigLoader import ConfigLoader
|
|||
|
||||
config_loader = ConfigLoader()
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
r_lang = config_loader.get_db_conn("Kvrocks_Languages")
|
||||
TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate')
|
||||
config_loader = None
|
||||
|
||||
|
@ -256,9 +258,6 @@ def get_iso_from_languages(l_languages, sort=False):
|
|||
return l_iso
|
||||
|
||||
|
||||
class LanguageDetector:
|
||||
pass
|
||||
|
||||
def get_translator_instance():
|
||||
return TRANSLATOR_URL
|
||||
|
||||
|
@ -266,7 +265,10 @@ def _get_html2text(content, ignore_links=False):
|
|||
h = html2text.HTML2Text()
|
||||
h.ignore_links = ignore_links
|
||||
h.ignore_images = ignore_links
|
||||
return h.handle(content)
|
||||
content = h.handle(content)
|
||||
if content == '\n\n':
|
||||
content = ''
|
||||
return content
|
||||
|
||||
def _clean_text_to_translate(content, html=False, keys_blocks=True):
|
||||
if html:
|
||||
|
@ -299,30 +301,150 @@ def _clean_text_to_translate(content, html=False, keys_blocks=True):
|
|||
content = content.replace(it, '')
|
||||
return content
|
||||
|
||||
#### AIL Objects ####
|
||||
#### LANGUAGE ENGINE ####
|
||||
|
||||
def get_obj_translation(obj_global_id, content, field='', source=None, target='en'):
|
||||
# first seen
|
||||
# last seen
|
||||
# language by date -> iter on object date ????
|
||||
|
||||
## Langs
|
||||
def get_language_obj_types(language):
|
||||
return r_lang.smembers(f'languages:{language}')
|
||||
|
||||
def get_language_objs(language, obj_type, obj_subtype=''):
|
||||
return r_lang.smembers(f'langs:{obj_type}:{obj_subtype}:{language}')
|
||||
|
||||
# def get_languages_objs(languages, obj_type, obj_subtype='')
|
||||
|
||||
## Objs
|
||||
def get_objs_languages(obj_type, obj_subtype=''):
|
||||
if obj_subtype:
|
||||
return r_lang.smembers(f'objs:lang:{obj_type}:{obj_subtype}')
|
||||
else:
|
||||
return r_lang.smembers(f'objs:langs:{obj_type}')
|
||||
|
||||
## Obj
|
||||
def get_obj_languages(obj_type, obj_subtype, obj_id):
|
||||
return r_lang.smembers(f'obj:lang:{obj_type}:{obj_subtype}:{obj_id}')
|
||||
|
||||
def get_obj_language_stats(obj_type, obj_subtype, obj_id):
|
||||
return r_lang.zrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, -1, withscores=True)
|
||||
|
||||
def get_obj_main_language(obj_type, obj_subtype, obj_id):
|
||||
language = r_lang.zrevrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, 0)
|
||||
if language:
|
||||
return language[0]
|
||||
|
||||
# TODO ADD language to CHAT GLOBAL SET
|
||||
def add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()): # (s)
|
||||
if not obj_subtype:
|
||||
obj_subtype = ''
|
||||
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||
|
||||
r_lang.sadd(f'objs:langs:{obj_type}', language)
|
||||
r_lang.sadd(f'objs:lang:{obj_type}:{obj_subtype}', language)
|
||||
new = r_lang.sadd(f'obj:lang:{obj_global_id}', language)
|
||||
|
||||
r_lang.sadd(f'languages:{language}', f'{obj_type}:{obj_subtype}') ################### REMOVE ME ???
|
||||
r_lang.sadd(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id)
|
||||
|
||||
if new:
|
||||
for global_id in objs_containers:
|
||||
r_lang.zincrby(f'obj:langs:stat:{global_id}', 1, language)
|
||||
|
||||
|
||||
def remove_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()):
|
||||
if not obj_subtype:
|
||||
obj_subtype = ''
|
||||
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||
rem = r_lang.srem(f'obj:lang:{obj_global_id}', language)
|
||||
|
||||
delete_obj_translation(obj_global_id, language)
|
||||
|
||||
r_lang.srem(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id)
|
||||
if not r_lang.exists(f'langs:{obj_type}:{obj_subtype}:{language}'):
|
||||
r_lang.srem(f'objs:lang:{obj_type}:{obj_subtype}', language)
|
||||
r_lang.srem(f'languages:{language}', f'{obj_type}:{obj_subtype}')
|
||||
if not r_lang.exists(f'objs:lang:{obj_type}:{obj_subtype}'):
|
||||
if r_lang.scard(f'objs:langs:{obj_type}') <= 1:
|
||||
r_lang.srem(f'objs:langs:{obj_type}', language)
|
||||
|
||||
if rem:
|
||||
for global_id in objs_containers:
|
||||
r = r_lang.zincrby(f'obj:langs:stat:{global_id}', -1, language)
|
||||
if r < 1:
|
||||
r_lang.zrem(f'obj:langs:stat:{global_id}', language)
|
||||
|
||||
# TODO handle fields
|
||||
def detect_obj_language(obj_type, obj_subtype, obj_id, content, objs_containers=set()):
|
||||
detector = LanguagesDetector(nb_langs=1)
|
||||
language = detector.detect(content)
|
||||
if language:
|
||||
language = language[0]
|
||||
previous_lang = get_obj_languages(obj_type, obj_subtype, obj_id)
|
||||
if previous_lang:
|
||||
previous_lang = previous_lang.pop()
|
||||
if language != previous_lang:
|
||||
remove_obj_language(previous_lang, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
|
||||
add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
|
||||
else:
|
||||
add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
|
||||
return language
|
||||
|
||||
## Translation
|
||||
def r_get_obj_translation(obj_global_id, language, field=''):
|
||||
return r_lang.hget(f'tr:{obj_global_id}:{field}', language)
|
||||
|
||||
def _get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()):
|
||||
"""
|
||||
Returns translated content
|
||||
Returns translated content
|
||||
"""
|
||||
translation = r_cache.get(f'translation:{target}:{obj_global_id}:{field}')
|
||||
translation = r_cache.get(f'translation:{language}:{obj_global_id}:{field}')
|
||||
r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0)
|
||||
if translation:
|
||||
# DEBUG
|
||||
# print('cache')
|
||||
# r_cache.expire(f'translation:{target}:{obj_global_id}:{field}', 0)
|
||||
# r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0)
|
||||
return translation
|
||||
translation = LanguageTranslator().translate(content, source=source, target=target)
|
||||
# TODO HANDLE FIELDS TRANSLATION
|
||||
translation = r_get_obj_translation(obj_global_id, language, field=field)
|
||||
if not translation:
|
||||
source, translation = LanguageTranslator().translate(content, source=source, target=language)
|
||||
if source:
|
||||
obj_type, subtype, obj_id = obj_global_id.split(':', 2)
|
||||
add_obj_language(source, obj_type, subtype, obj_id, objs_containers=objs_containers)
|
||||
if translation:
|
||||
r_cache.set(f'translation:{target}:{obj_global_id}:{field}', translation)
|
||||
r_cache.expire(f'translation:{target}:{obj_global_id}:{field}', 300)
|
||||
r_cache.set(f'translation:{language}:{obj_global_id}:{field}', translation)
|
||||
r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 300)
|
||||
return translation
|
||||
|
||||
## --AIL Objects-- ##
|
||||
def get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()):
|
||||
return _get_obj_translation(obj_global_id, language, source=source, content=content, field=field, objs_containers=objs_containers)
|
||||
|
||||
|
||||
# TODO Force to edit ????
|
||||
|
||||
def set_obj_translation(obj_global_id, language, translation, field=''):
|
||||
r_cache.delete(f'translation:{language}:{obj_global_id}:')
|
||||
return r_lang.hset(f'tr:{obj_global_id}:{field}', language, translation)
|
||||
|
||||
def delete_obj_translation(obj_global_id, language, field=''):
|
||||
r_cache.delete(f'translation:{language}:{obj_global_id}:')
|
||||
r_lang.hdel(f'tr:{obj_global_id}:{field}', language)
|
||||
|
||||
## --LANGUAGE ENGINE-- ##
|
||||
|
||||
|
||||
#### AIL Objects ####
|
||||
|
||||
class LanguagesDetector:
|
||||
|
||||
def __init__(self, nb_langs=3, min_proportion=0.2, min_probability=0.7, min_len=0):
|
||||
self.lt = LibreTranslateAPI(get_translator_instance())
|
||||
def __init__(self, nb_langs=3, min_proportion=0.2, min_probability=-1, min_len=0):
|
||||
lt_url = get_translator_instance()
|
||||
if not lt_url:
|
||||
self.lt = None
|
||||
else:
|
||||
self.lt = LibreTranslateAPI(get_translator_instance())
|
||||
try:
|
||||
self.lt.languages()
|
||||
except Exception:
|
||||
|
@ -339,37 +461,71 @@ class LanguagesDetector:
|
|||
if self.min_len > 0:
|
||||
if len(content) < self.min_len:
|
||||
return languages
|
||||
# p = self.detector.FindTopNMostFreqLangs(content, num_langs=3)
|
||||
# for lang in p:
|
||||
# print(lang.language, lang.probability, lang.proportion, lang.is_reliable)
|
||||
# print('------------------------------------------------')
|
||||
for lang in self.detector.FindTopNMostFreqLangs(content, num_langs=self.nb_langs):
|
||||
if lang.proportion >= self.min_proportion and lang.probability >= self.min_probability and lang.is_reliable:
|
||||
languages.append(lang.language)
|
||||
return languages
|
||||
|
||||
def detect_lexilang(self, content):
|
||||
language, prob = lexilang_detect(content)
|
||||
if prob > 0 and self.min_probability == -1:
|
||||
return [language]
|
||||
elif prob > 0.4:
|
||||
return [language]
|
||||
else:
|
||||
return []
|
||||
|
||||
def detect_libretranslate(self, content):
|
||||
languages = []
|
||||
try:
|
||||
# [{"confidence": 0.6, "language": "en"}]
|
||||
resp = self.lt.detect(content)
|
||||
except: # TODO ERROR MESSAGE
|
||||
resp = []
|
||||
except Exception as e: # TODO ERROR MESSAGE
|
||||
raise Exception(f'libretranslate error: {e}')
|
||||
# resp = []
|
||||
if resp:
|
||||
if isinstance(resp, dict):
|
||||
raise Exception(f'libretranslate error {resp}')
|
||||
for language in resp:
|
||||
if language.confidence >= self.min_probability:
|
||||
languages.append(language)
|
||||
return languages
|
||||
|
||||
def detect(self, content):
|
||||
def detect(self, content, force_gcld3=False): # TODO detect length between 20-200 ????
|
||||
if not content:
|
||||
return []
|
||||
content = _clean_text_to_translate(content, html=True)
|
||||
if not content:
|
||||
return []
|
||||
# DEBUG
|
||||
# print('-------------------------------------------------------')
|
||||
# print(content)
|
||||
# print(len(content))
|
||||
# lexilang
|
||||
if len(content) < 150:
|
||||
# print('lexilang')
|
||||
languages = self.detect_lexilang(content)
|
||||
# gcld3
|
||||
if len(content) >= 200 or not self.lt:
|
||||
language = self.detect_gcld3(content)
|
||||
# libretranslate
|
||||
else:
|
||||
language = self.detect_libretranslate(content)
|
||||
return language
|
||||
# if len(content) >= 200 or not self.lt or force_gcld3:
|
||||
# print('gcld3')
|
||||
languages = self.detect_gcld3(content)
|
||||
# libretranslate
|
||||
# else:
|
||||
# languages = self.detect_libretranslate(content)
|
||||
if not languages:
|
||||
languages = []
|
||||
return languages
|
||||
|
||||
class LanguageTranslator:
|
||||
|
||||
def __init__(self):
|
||||
self.lt = LibreTranslateAPI(get_translator_instance())
|
||||
self.ld = LanguagesDetector(nb_langs=1)
|
||||
|
||||
def languages(self):
|
||||
languages = []
|
||||
|
@ -399,13 +555,13 @@ class LanguageTranslator:
|
|||
return language[0].get('language')
|
||||
|
||||
def detect(self, content):
|
||||
# gcld3
|
||||
if len(content) >= 200:
|
||||
language = self.detect_gcld3(content)
|
||||
# libretranslate
|
||||
else:
|
||||
language = self.detect_libretranslate(content)
|
||||
return language
|
||||
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++')
|
||||
# print(content)
|
||||
language = self.ld.detect(content)
|
||||
if language:
|
||||
# print(language[0])
|
||||
# print('##############################################################')
|
||||
return language[0]
|
||||
|
||||
def translate(self, content, source=None, target="en"): # TODO source target
|
||||
if target not in get_translation_languages():
|
||||
|
@ -424,9 +580,9 @@ class LanguageTranslator:
|
|||
translation = None
|
||||
# TODO LOG and display error
|
||||
if translation == content:
|
||||
print('EQUAL')
|
||||
# print('EQUAL')
|
||||
translation = None
|
||||
return translation
|
||||
return source, translation
|
||||
|
||||
|
||||
LIST_LANGUAGES = {}
|
||||
|
|
|
@ -32,6 +32,9 @@ config_loader = None
|
|||
|
||||
# # # # UNSAFE TAGS # # # #
|
||||
|
||||
# set of unsafe tags
|
||||
UNSAFE_TAGS = None
|
||||
|
||||
def build_unsafe_tags():
|
||||
tags = set()
|
||||
# CE content
|
||||
|
@ -52,12 +55,12 @@ def is_tags_safe(ltags):
|
|||
:return: is a tag in the set unsafe
|
||||
:rtype: boolean
|
||||
"""
|
||||
return unsafe_tags.isdisjoint(ltags)
|
||||
global UNSAFE_TAGS
|
||||
if UNSAFE_TAGS is None:
|
||||
UNSAFE_TAGS = build_unsafe_tags()
|
||||
return UNSAFE_TAGS.isdisjoint(ltags)
|
||||
|
||||
|
||||
# set of unsafe tags
|
||||
unsafe_tags = build_unsafe_tags()
|
||||
|
||||
# - - - UNSAFE TAGS - - - #
|
||||
|
||||
# # TODO: verify tags + object_type
|
||||
|
@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
|
|||
|
||||
#### Taxonomies ####
|
||||
|
||||
TAXONOMIES = {}
|
||||
TAXONOMIES = None
|
||||
def load_taxonomies():
|
||||
global TAXONOMIES
|
||||
manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
|
||||
TAXONOMIES = Taxonomies(manifest_path=manifest)
|
||||
|
||||
|
||||
load_taxonomies()
|
||||
|
||||
def get_taxonomies():
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.keys()
|
||||
|
||||
# TODO rename me to get enabled_taxonomies
|
||||
|
@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
|
|||
r_tags.srem('taxonomies:enabled', taxonomy)
|
||||
|
||||
def exists_taxonomy(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy) is not None
|
||||
|
||||
def get_taxonomy_description(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy).description
|
||||
|
||||
def get_taxonomy_name(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy).name
|
||||
|
||||
def get_taxonomy_predicates(taxonomy):
|
||||
|
@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
|
|||
return meta
|
||||
|
||||
def get_taxonomy_refs(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy).refs
|
||||
|
||||
def get_taxonomy_version(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy).version
|
||||
|
||||
def get_taxonomy_tags(taxonomy, enabled=False):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
taxonomy_obj = TAXONOMIES.get(taxonomy)
|
||||
tags = []
|
||||
for p, content in taxonomy_obj.items():
|
||||
|
@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
|
|||
meta = {}
|
||||
if not exists_taxonomy(taxonomy_name):
|
||||
return meta
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
taxonomy = TAXONOMIES.get(taxonomy_name)
|
||||
meta['description'] = taxonomy.description
|
||||
meta['name'] = taxonomy.name
|
||||
|
@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
|
|||
if not exists_taxonomy(taxonomy):
|
||||
return {'error': f'taxonomy {taxonomy} not found'}, 404
|
||||
tags = data.get('tags', [])
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
|
||||
for tag in tags:
|
||||
if tag not in taxonomy_tags:
|
||||
|
@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
|
|||
|
||||
def enable_taxonomy_tags(taxonomy):
|
||||
enable_taxonomy(taxonomy)
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
for tag in TAXONOMIES.get(taxonomy).machinetags():
|
||||
add_taxonomy_tag_enabled(taxonomy, tag)
|
||||
|
||||
|
@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
|
|||
#
|
||||
|
||||
# TODO Synonyms
|
||||
|
||||
GALAXIES = {}
|
||||
CLUSTERS = {}
|
||||
GALAXIES = None
|
||||
CLUSTERS = None
|
||||
def load_galaxies():
|
||||
global GALAXIES
|
||||
galaxies = []
|
||||
|
@ -298,11 +317,10 @@ def load_galaxies():
|
|||
clusters.append(json.load(f))
|
||||
CLUSTERS = Clusters(clusters)
|
||||
|
||||
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
|
||||
def get_galaxies():
|
||||
if GALAXIES is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return GALAXIES.keys()
|
||||
|
||||
# TODO RENAME ME
|
||||
|
@ -310,9 +328,15 @@ def get_active_galaxies():
|
|||
return r_tags.smembers('galaxies:enabled')
|
||||
|
||||
def get_galaxy(galaxy_name):
|
||||
if GALAXIES is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return GALAXIES.get(galaxy_name)
|
||||
|
||||
def exists_galaxy(galaxy):
|
||||
if CLUSTERS is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return CLUSTERS.get(galaxy) is not None
|
||||
|
||||
def is_galaxy_enabled(galaxy):
|
||||
|
@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
|
|||
|
||||
|
||||
def get_clusters():
|
||||
if CLUSTERS is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return CLUSTERS.keys()
|
||||
|
||||
def get_cluster(cluster_type):
|
||||
if CLUSTERS is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return CLUSTERS.get(cluster_type)
|
||||
|
||||
def get_galaxy_tags(galaxy_type):
|
||||
|
@ -1558,14 +1588,14 @@ def get_obj_date(object_type, object_id):
|
|||
return None
|
||||
|
||||
# API QUERY
|
||||
def api_delete_obj_tags(tags=[], object_id=None, object_type="item"):
|
||||
def api_delete_obj_tags(tags=[], object_id=None, object_type="item", subtype=''):
|
||||
if not object_id:
|
||||
return ({'status': 'error', 'reason': 'object id not found'}, 404)
|
||||
if not tags:
|
||||
return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400)
|
||||
|
||||
for tag in tags:
|
||||
res = delete_object_tag(tag, object_type, object_id, subtype='')
|
||||
res = delete_object_tag(tag, object_type, object_id, subtype=subtype)
|
||||
if res:
|
||||
return res
|
||||
|
||||
|
|
|
@ -12,11 +12,10 @@ import yara
|
|||
import datetime
|
||||
import base64
|
||||
|
||||
from ail_typo_squatting import runAll
|
||||
import math
|
||||
|
||||
from collections import defaultdict
|
||||
from flask import escape
|
||||
from markupsafe import escape
|
||||
from textblob import TextBlob
|
||||
from nltk.tokenize import RegexpTokenizer
|
||||
|
||||
|
@ -38,24 +37,22 @@ logger = logging.getLogger()
|
|||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
|
||||
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
|
||||
|
||||
items_dir = config_loader.get_config_str("Directories", "pastes")
|
||||
if items_dir[-1] == '/':
|
||||
items_dir = items_dir[:-1]
|
||||
config_loader = None
|
||||
|
||||
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
|
||||
email_regex = re.compile(email_regex)
|
||||
|
||||
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
|
||||
special_characters.add('\\s')
|
||||
|
||||
# NLTK tokenizer
|
||||
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
|
||||
TOKENIZER = None
|
||||
|
||||
def init_tokenizer():
|
||||
global TOKENIZER
|
||||
TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
|
||||
gaps=True, discard_empty=True)
|
||||
|
||||
def get_special_characters():
|
||||
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
|
||||
special_characters.add('\\s')
|
||||
return special_characters
|
||||
|
||||
###############
|
||||
#### UTILS ####
|
||||
def is_valid_uuid_v4(curr_uuid):
|
||||
|
@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
|
|||
return False
|
||||
|
||||
def is_valid_mail(email):
|
||||
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
|
||||
email_regex = re.compile(email_regex)
|
||||
result = email_regex.match(email)
|
||||
if result:
|
||||
return True
|
||||
|
@ -385,7 +384,7 @@ class Tracker:
|
|||
r_tracker.srem(f'obj:tracker:{obj_type}:{subtype}:{obj_id}:{self.uuid}', date)
|
||||
|
||||
r_tracker.srem(f'obj:trackers:{obj_type}:{subtype}:{obj_id}', self.uuid)
|
||||
r_tracker.srem(f'tracker:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
|
||||
r_tracker.srem(f'tracker:objs:{self.uuid}:{obj_type}', f'{subtype}:{obj_id}')
|
||||
self.update_daterange()
|
||||
|
||||
# TODO escape custom tags
|
||||
|
@ -400,6 +399,9 @@ class Tracker:
|
|||
tracker_type = 'yara'
|
||||
|
||||
elif tracker_type == 'typosquatting':
|
||||
|
||||
from ail_typo_squatting import runAll
|
||||
|
||||
domain = to_track.split(" ")[0]
|
||||
typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
|
||||
for typo in typo_generation:
|
||||
|
@ -701,7 +703,7 @@ def get_trackers_dashboard():
|
|||
for raw in r_tracker.lrange('trackers:dashboard', 0, -1):
|
||||
tracker_uuid, timestamp, obj_type, subtype, obj_id = raw.split(':', 4)
|
||||
tracker = Tracker(tracker_uuid)
|
||||
meta = tracker.get_meta(options={'tags'})
|
||||
meta = tracker.get_meta(options={'description', 'tags'})
|
||||
if not meta.get('type'):
|
||||
meta['type'] = 'Tracker DELETED'
|
||||
timestamp = datetime.datetime.fromtimestamp(float(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
|
|||
# force lowercase
|
||||
to_track = to_track.lower()
|
||||
word_set = set(to_track)
|
||||
set_inter = word_set.intersection(special_characters)
|
||||
set_inter = word_set.intersection(get_special_characters())
|
||||
if set_inter:
|
||||
return {"status": "error",
|
||||
"reason": f'special character(s) not allowed: {set_inter}',
|
||||
|
@ -929,7 +931,7 @@ def api_add_tracker(dict_input, user_id):
|
|||
# Filters # TODO MOVE ME
|
||||
filters = dict_input.get('filters', {})
|
||||
if filters:
|
||||
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||
if filters.keys() == set(get_objects_tracked()) and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||
filters = {}
|
||||
for obj_type in filters:
|
||||
if obj_type not in get_objects_tracked():
|
||||
|
@ -1004,7 +1006,7 @@ def api_edit_tracker(dict_input, user_id):
|
|||
# Filters # TODO MOVE ME
|
||||
filters = dict_input.get('filters', {})
|
||||
if filters:
|
||||
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||
if filters.keys() == set(get_objects_tracked()) and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||
if not filters['decoded'] and not filters['item']:
|
||||
filters = {}
|
||||
for obj_type in filters:
|
||||
|
@ -1055,6 +1057,37 @@ def api_delete_tracker(data, user_id):
|
|||
tracker = Tracker(tracker_uuid)
|
||||
return tracker.delete(), 200
|
||||
|
||||
def api_tracker_add_object(data, user_id):
|
||||
tracker_uuid = data.get('uuid')
|
||||
res = api_check_tracker_acl(tracker_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
tracker = Tracker(tracker_uuid)
|
||||
object_gid = data.get('gid')
|
||||
date = data.get('date')
|
||||
if date:
|
||||
if not Date.validate_str_date(date):
|
||||
date = None
|
||||
try:
|
||||
obj_type, subtype, obj_id = object_gid.split(':', 2)
|
||||
except (AttributeError, IndexError):
|
||||
return {"status": "error", "reason": "Invalid Object"}, 400
|
||||
return tracker.add(obj_type, subtype, obj_id, date=date), 200
|
||||
|
||||
def api_tracker_remove_object(data, user_id):
|
||||
tracker_uuid = data.get('uuid')
|
||||
res = api_check_tracker_acl(tracker_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
|
||||
tracker = Tracker(tracker_uuid)
|
||||
object_gid = data.get('gid')
|
||||
try:
|
||||
obj_type, subtype, obj_id = object_gid.split(':', 2)
|
||||
except (AttributeError, IndexError):
|
||||
return {"status": "error", "reason": "Invalid Object"}, 400
|
||||
return tracker.remove(obj_type, subtype, obj_id), 200
|
||||
|
||||
## -- CREATE TRACKER -- ##
|
||||
|
||||
####################
|
||||
|
@ -1082,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
|
|||
words_dict = defaultdict(int)
|
||||
|
||||
if filtering:
|
||||
blob = TextBlob(content, tokenizer=tokenizer)
|
||||
if TOKENIZER is None:
|
||||
init_tokenizer()
|
||||
blob = TextBlob(content, tokenizer=TOKENIZER)
|
||||
else:
|
||||
blob = TextBlob(content)
|
||||
for word in blob.tokens:
|
||||
|
@ -1530,7 +1565,7 @@ class RetroHunt:
|
|||
self._set_state(state)
|
||||
|
||||
def delete(self):
|
||||
if self.is_running() and self.get_state() != 'completed':
|
||||
if self.is_running() and self.get_state() not in ['completed', 'paused']:
|
||||
return None
|
||||
|
||||
# Delete custom rule
|
||||
|
@ -1616,6 +1651,19 @@ def get_retro_hunt_metas():
|
|||
tasks.append(retro_hunt.get_meta(options={'date', 'progress', 'nb_match', 'tags'}))
|
||||
return tasks
|
||||
|
||||
## Objects ##
|
||||
|
||||
def is_obj_retro_hunted(obj_type, subtype, obj_id):
|
||||
return r_tracker.exists(f'obj:retro_hunts:{obj_type}:{subtype}:{obj_id}')
|
||||
|
||||
def get_obj_retro_hunts(obj_type, subtype, obj_id):
|
||||
return r_tracker.smembers(f'obj:retro_hunts:{obj_type}:{subtype}:{obj_id}')
|
||||
|
||||
def delete_obj_retro_hunts(obj_type, subtype, obj_id):
|
||||
for retro_uuid in get_obj_retro_hunts(obj_type, subtype, obj_id):
|
||||
retro_hunt = RetroHunt(retro_uuid)
|
||||
retro_hunt.remove(obj_type, subtype, obj_id)
|
||||
|
||||
## API ##
|
||||
def api_check_retro_hunt_task_uuid(task_uuid):
|
||||
if not is_valid_uuid_v4(task_uuid):
|
||||
|
@ -1736,7 +1784,7 @@ def api_delete_retro_hunt_task(task_uuid):
|
|||
if res:
|
||||
return res
|
||||
retro_hunt = RetroHunt(task_uuid)
|
||||
if retro_hunt.is_running() and retro_hunt.get_state() != 'completed':
|
||||
if retro_hunt.is_running() and retro_hunt.get_state() not in ['completed', 'paused']:
|
||||
return {"status": "error", "reason": "You can't delete a running task"}, 400
|
||||
else:
|
||||
return retro_hunt.delete(), 200
|
||||
|
@ -1756,9 +1804,9 @@ def _fix_db_custom_tags():
|
|||
#### -- ####
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# if __name__ == '__main__':
|
||||
|
||||
_fix_db_custom_tags()
|
||||
# _fix_db_custom_tags()
|
||||
# fix_all_tracker_uuid_list()
|
||||
# res = get_all_tracker_uuid()
|
||||
# print(len(res))
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import Users
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
config_loader = None
|
||||
|
||||
|
||||
def check_token_format(token, search=re.compile(r'[^a-zA-Z0-9_-]').search): ####################################################
|
||||
return not bool(search(token))
|
||||
|
||||
def is_valid_token(token):
|
||||
return Users.exists_token(token)
|
||||
|
||||
def get_user_from_token(token):
|
||||
return Users.get_token_user(token)
|
||||
|
||||
def is_user_in_role(role, token): # verify_user_role
|
||||
# User without API
|
||||
if role == 'user_no_api':
|
||||
return False
|
||||
|
||||
user_id = get_user_from_token(token)
|
||||
if user_id:
|
||||
return Users.is_in_role(user_id, role)
|
||||
else:
|
||||
return False
|
||||
|
||||
#### Brute Force Protection ####
|
||||
|
||||
def get_failed_login(ip_address):
|
||||
return r_cache.get(f'failed_login_ip_api:{ip_address}')
|
||||
|
||||
def incr_failed_login(ip_address):
|
||||
r_cache.incr(f'failed_login_ip_api:{ip_address}')
|
||||
r_cache.expire(f'failed_login_ip_api:{ip_address}', 300)
|
||||
|
||||
def get_brute_force_ttl(ip_address):
|
||||
return r_cache.ttl(f'failed_login_ip_api:{ip_address}')
|
||||
|
||||
def is_brute_force_protected(ip_address):
|
||||
failed_login = get_failed_login(ip_address)
|
||||
if failed_login:
|
||||
failed_login = int(failed_login)
|
||||
if failed_login >= 5:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return False
|
||||
|
||||
#### --Brute Force Protection-- ####
|
||||
|
||||
def authenticate_user(token, ip_address):
|
||||
if is_brute_force_protected(ip_address):
|
||||
ip_ttl = get_brute_force_ttl(ip_address)
|
||||
return {'status': 'error', 'reason': f'Max Connection Attempts reached, Please wait {ip_ttl}s'}, 401
|
||||
|
||||
try:
|
||||
if len(token) != 55:
|
||||
return {'status': 'error', 'reason': 'Invalid Token Length, required==55'}, 400
|
||||
if not check_token_format(token):
|
||||
return {'status': 'error', 'reason': 'Malformed Authentication String'}, 400
|
||||
|
||||
if is_valid_token(token):
|
||||
return True, 200
|
||||
# Failed Login
|
||||
else:
|
||||
incr_failed_login(ip_address)
|
||||
return {'status': 'error', 'reason': 'Authentication failed'}, 401
|
||||
except Exception as e:
|
||||
print(e) # TODO Logs
|
||||
return {'status': 'error', 'reason': 'Malformed Authentication String'}, 400
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
import os
|
||||
import sys
|
||||
from uuid import uuid4
|
||||
import uuid
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
|
@ -18,7 +18,14 @@ config_loader = None
|
|||
|
||||
AIL_OBJECTS = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cookie-name', 'cve', 'cryptocurrency', 'decoded',
|
||||
'domain', 'etag', 'favicon', 'file-name', 'hhhash',
|
||||
'item', 'image', 'message', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
|
||||
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
|
||||
|
||||
AIL_OBJECTS_WITH_SUBTYPES = {'chat', 'chat-subchannel', 'cryptocurrency', 'pgp', 'username', 'user-account'}
|
||||
|
||||
# TODO by object TYPE ????
|
||||
AIL_OBJECTS_CORRELATIONS_DEFAULT = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cve', 'cryptocurrency', 'decoded',
|
||||
'domain', 'favicon', 'file-name',
|
||||
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
|
||||
|
||||
def get_ail_uuid():
|
||||
ail_uuid = r_serv_db.get('ail:uuid')
|
||||
|
@ -31,24 +38,37 @@ def _set_ail_uuid():
|
|||
r_serv_db.set('ail:uuid', ail_uuid)
|
||||
return ail_uuid
|
||||
|
||||
def is_valid_uuid_v4(header_uuid):
|
||||
try:
|
||||
header_uuid = header_uuid.replace('-', '')
|
||||
uuid_test = uuid.UUID(hex=header_uuid, version=4)
|
||||
return uuid_test.hex == header_uuid
|
||||
except:
|
||||
return False
|
||||
|
||||
def generate_uuid():
|
||||
return str(uuid4())
|
||||
return str(uuid.uuid4())
|
||||
|
||||
#### AIL OBJECTS ####
|
||||
|
||||
def get_all_objects():
|
||||
return AIL_OBJECTS
|
||||
|
||||
def is_object_type(obj_type):
|
||||
return obj_type in AIL_OBJECTS
|
||||
|
||||
def get_objects_with_subtypes():
|
||||
return ['chat', 'cryptocurrency', 'pgp', 'username', 'user-account']
|
||||
return AIL_OBJECTS_WITH_SUBTYPES
|
||||
|
||||
def get_object_all_subtypes(obj_type): # TODO Dynamic subtype
|
||||
if obj_type == 'chat':
|
||||
return r_object.smembers(f'all_chat:subtypes')
|
||||
if obj_type == 'chat-subchannel':
|
||||
return r_object.smembers(f'all_chat-subchannel:subtypes')
|
||||
if obj_type == 'chat-thread':
|
||||
return r_object.smembers(f'all_chat-thread:subtypes')
|
||||
if obj_type == 'cryptocurrency':
|
||||
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
|
||||
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'tron', 'zcash']
|
||||
if obj_type == 'pgp':
|
||||
return ['key', 'mail', 'name']
|
||||
if obj_type == 'username':
|
||||
|
@ -57,14 +77,17 @@ def get_object_all_subtypes(obj_type): # TODO Dynamic subtype
|
|||
return r_object.smembers(f'all_chat:subtypes')
|
||||
return []
|
||||
|
||||
def get_default_correlation_objects():
|
||||
return AIL_OBJECTS_CORRELATIONS_DEFAULT
|
||||
|
||||
def get_obj_queued():
|
||||
return ['item', 'image']
|
||||
return ['item', 'image', 'message', 'ocr']
|
||||
|
||||
def get_objects_tracked():
|
||||
return ['decoded', 'item', 'pgp', 'title']
|
||||
return ['decoded', 'item', 'pgp', 'message', 'ocr', 'title']
|
||||
|
||||
def get_objects_retro_hunted():
|
||||
return ['decoded', 'item']
|
||||
return ['decoded', 'item', 'message']
|
||||
|
||||
def get_all_objects_with_subtypes_tuple():
|
||||
str_objs = []
|
||||
|
@ -82,7 +105,7 @@ def unpack_obj_global_id(global_id, r_type='tuple'):
|
|||
obj = global_id.split(':', 2)
|
||||
return {'type': obj[0], 'subtype': obj[1], 'id': obj[2]}
|
||||
else: # tuple(type, subtype, id)
|
||||
return global_id.split(':', 2)
|
||||
return global_id.split(':', 2) # TODO REPLACE get_obj_type_subtype_id_from_global_id(global_id)
|
||||
|
||||
def unpack_objs_global_id(objs_global_id, r_type='tuple'):
|
||||
objs = []
|
||||
|
|
|
@ -139,6 +139,10 @@ class AILQueue:
|
|||
def error(self):
|
||||
r_queues.hdel(f'modules', f'{self.pid}:{self.name}')
|
||||
|
||||
def end(self):
|
||||
self.clear()
|
||||
r_queues.hdel(f'modules', f'{self.pid}:{self.name}')
|
||||
|
||||
|
||||
def get_queues_modules():
|
||||
return r_queues.hkeys('queues')
|
||||
|
|
|
@ -8,7 +8,6 @@ import sys
|
|||
import requests
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
from lib.objects.CryptoCurrencies import CryptoCurrency
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
|
|||
|
||||
# filter btc seen in ail
|
||||
def filter_btc_seen(btc_addr_set):
|
||||
from lib.objects import CryptoCurrencies
|
||||
|
||||
list_seen_btc = []
|
||||
for btc_addr in btc_addr_set:
|
||||
cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin')
|
||||
cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
|
||||
if cryptocurrency.exists():
|
||||
list_seen_btc.append(btc_addr)
|
||||
return list_seen_btc
|
||||
|
|
|
@ -11,6 +11,7 @@ import sys
|
|||
import time
|
||||
import uuid
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
|
@ -287,6 +288,10 @@ def get_obj_chat(chat_type, chat_subtype, chat_id):
|
|||
elif chat_type == 'chat-thread':
|
||||
return ChatThreads.ChatThread(chat_id, chat_subtype)
|
||||
|
||||
def get_obj_chat_from_global_id(chat_gid):
|
||||
chat_type, chat_subtype, chat_id = chat_gid.split(':', 2)
|
||||
return get_obj_chat(chat_type, chat_subtype, chat_id)
|
||||
|
||||
def get_obj_chat_meta(obj_chat, new_options=set()):
|
||||
options = {}
|
||||
if obj_chat.type == 'chat':
|
||||
|
@ -321,7 +326,192 @@ def get_threads_metas(threads):
|
|||
def get_username_meta_from_global_id(username_global_id):
|
||||
_, instance_uuid, username_id = username_global_id.split(':', 2)
|
||||
username = Usernames.Username(username_id, instance_uuid)
|
||||
return username.get_meta()
|
||||
return username.get_meta(options={'icon'})
|
||||
|
||||
###############################################################################
|
||||
# TODO Pagination
|
||||
def list_messages_to_dict(l_messages_id, translation_target=None):
|
||||
options = {'content', 'files-names', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}
|
||||
meta = {}
|
||||
curr_date = None
|
||||
for mess_id in l_messages_id:
|
||||
message = Messages.Message(mess_id[1:])
|
||||
timestamp = message.get_timestamp()
|
||||
date_day = message.get_date()
|
||||
date_day = f'{date_day[0:4]}/{date_day[4:6]}/{date_day[6:8]}'
|
||||
if date_day != curr_date:
|
||||
meta[date_day] = []
|
||||
curr_date = date_day
|
||||
meta_mess = message.get_meta(options=options, timestamp=timestamp, translation_target=translation_target)
|
||||
meta[date_day].append(meta_mess)
|
||||
|
||||
# if mess_dict.get('tags'):
|
||||
# for tag in mess_dict['tags']:
|
||||
# if tag not in tags:
|
||||
# tags[tag] = 0
|
||||
# tags[tag] += 1
|
||||
# return messages, pagination, tags
|
||||
return meta
|
||||
|
||||
# TODO Filter
|
||||
## Instance type
|
||||
## Chats IDS
|
||||
## SubChats IDS
|
||||
## Threads IDS
|
||||
## Daterange
|
||||
def get_messages_iterator(filters={}):
|
||||
|
||||
for instance_uuid in get_chat_service_instances():
|
||||
|
||||
for chat_id in ChatServiceInstance(instance_uuid).get_chats():
|
||||
chat = Chats.Chat(chat_id, instance_uuid)
|
||||
|
||||
# subchannels
|
||||
for subchannel_gid in chat.get_subchannels():
|
||||
_, _, subchannel_id = subchannel_gid.split(':', 2)
|
||||
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
|
||||
messages, _ = subchannel._get_messages(nb=-1)
|
||||
for mess in messages:
|
||||
_, _, message_id = mess[0].split(':', )
|
||||
yield Messages.Message(message_id)
|
||||
# threads
|
||||
|
||||
# threads
|
||||
for threads in chat.get_threads():
|
||||
thread = ChatThreads.ChatThread(threads['id'], instance_uuid)
|
||||
messages, _ = thread._get_messages(nb=-1)
|
||||
for mess in messages:
|
||||
message_id, _, message_id = mess[0].split(':', )
|
||||
yield Messages.Message(message_id)
|
||||
|
||||
# messages
|
||||
messages, _ = chat._get_messages(nb=-1)
|
||||
for mess in messages:
|
||||
_, _, message_id = mess[0].split(':', )
|
||||
yield Messages.Message(message_id)
|
||||
# threads ???
|
||||
|
||||
def get_nb_messages_iterator(filters={}):
|
||||
nb_messages = 0
|
||||
for instance_uuid in get_chat_service_instances():
|
||||
for chat_id in ChatServiceInstance(instance_uuid).get_chats():
|
||||
chat = Chats.Chat(chat_id, instance_uuid)
|
||||
# subchannels
|
||||
for subchannel_gid in chat.get_subchannels():
|
||||
_, _, subchannel_id = subchannel_gid.split(':', 2)
|
||||
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
|
||||
nb_messages += subchannel.get_nb_messages()
|
||||
# threads
|
||||
for threads in chat.get_threads():
|
||||
thread = ChatThreads.ChatThread(threads['id'], instance_uuid)
|
||||
nb_messages += thread.get_nb_messages()
|
||||
# messages
|
||||
nb_messages += chat.get_nb_messages()
|
||||
return nb_messages
|
||||
|
||||
def get_user_account_chats_meta(user_id, chats, subchannels):
|
||||
meta = []
|
||||
for chat_g_id in chats:
|
||||
c_subtype, c_id = chat_g_id.split(':', 1)
|
||||
chat = Chats.Chat(c_id, c_subtype)
|
||||
chat_meta = chat.get_meta(options={'icon', 'info', 'nb_participants', 'tags_safe', 'username'})
|
||||
if chat_meta['username']:
|
||||
chat_meta['username'] = get_username_meta_from_global_id(chat_meta['username'])
|
||||
chat_meta['nb_messages'] = len(chat.get_user_messages(user_id))
|
||||
chat_meta['subchannels'] = []
|
||||
for subchannel_gid in chat.get_subchannels():
|
||||
if subchannel_gid[16:] in subchannels:
|
||||
_, s_subtype, s_id = subchannel_gid.split(':', 2)
|
||||
subchannel = ChatSubChannels.ChatSubChannel(s_id, s_subtype)
|
||||
subchannel_meta = subchannel.get_meta(options={'created_at'})
|
||||
subchannel_meta['nb_messages'] = len(subchannel.get_user_messages(user_id))
|
||||
chat_meta['subchannels'].append(subchannel_meta)
|
||||
meta.append(chat_meta)
|
||||
return meta
|
||||
|
||||
def get_user_account_chat_message(user_id, subtype, chat_id): # TODO subchannel + threads ...
|
||||
meta = {}
|
||||
chat = Chats.Chat(chat_id, subtype)
|
||||
chat_meta = chat.get_meta(options={'icon', 'info', 'nb_participants', 'tags_safe', 'username'})
|
||||
if chat_meta['username']:
|
||||
chat_meta['username'] = get_username_meta_from_global_id(chat_meta['username'])
|
||||
|
||||
meta['messages'] = list_messages_to_dict(chat.get_user_messages(user_id), translation_target=None)
|
||||
return meta
|
||||
|
||||
def get_user_account_nb_all_week_messages(user_id, chats, subchannels):
|
||||
week = {}
|
||||
# Init
|
||||
for day in ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']:
|
||||
week[day] = {}
|
||||
for i in range(24):
|
||||
week[day][i] = 0
|
||||
|
||||
# chats
|
||||
for chat_g_id in chats:
|
||||
c_subtype, c_id = chat_g_id.split(':', 1)
|
||||
chat = Chats.Chat(c_id, c_subtype)
|
||||
for message in chat.get_user_messages(user_id):
|
||||
timestamp = message.split('/', 2)[1]
|
||||
timestamp = datetime.utcfromtimestamp(float(timestamp))
|
||||
date_name = timestamp.strftime('%a')
|
||||
week[date_name][timestamp.hour] += 1
|
||||
|
||||
stats = []
|
||||
nb_day = 0
|
||||
for day in week:
|
||||
for hour in week[day]:
|
||||
stats.append({'date': day, 'day': nb_day, 'hour': hour, 'count': week[day][hour]})
|
||||
nb_day += 1
|
||||
return stats
|
||||
|
||||
def _get_chat_card_meta_options():
|
||||
return {'created_at', 'icon', 'info', 'nb_participants', 'origin_link', 'subchannels', 'tags_safe', 'threads', 'translation', 'username'}
|
||||
|
||||
def _get_message_bloc_meta_options():
|
||||
return {'chat', 'content', 'files-names', 'icon', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions','thread', 'translation', 'user-account'}
|
||||
|
||||
def get_message_report(l_mess): # TODO Force language + translation
|
||||
translation_target = 'en'
|
||||
chats = {}
|
||||
messages = []
|
||||
mess_options = _get_message_bloc_meta_options()
|
||||
|
||||
l_mess = sorted(l_mess, key=lambda x: x[2])
|
||||
|
||||
for m in l_mess:
|
||||
message = Messages.Message(m[2])
|
||||
meta = message.get_meta(options=mess_options, translation_target=translation_target)
|
||||
if meta['chat'] not in chats:
|
||||
chat = Chats.Chat(meta['chat'], message.get_chat_instance())
|
||||
meta_chat = chat.get_meta(options=_get_chat_card_meta_options(), translation_target=translation_target)
|
||||
if meta_chat['username']:
|
||||
meta_chat['username'] = get_username_meta_from_global_id(meta_chat['username'])
|
||||
chats[chat.id] = meta_chat
|
||||
|
||||
# stats
|
||||
chats[chat.id]['t_messages'] = 1
|
||||
else:
|
||||
chats[meta['chat']]['t_messages'] += 1
|
||||
|
||||
messages.append(meta)
|
||||
|
||||
return chats, messages
|
||||
|
||||
#### FIX ####
|
||||
|
||||
def fix_correlations_subchannel_message():
|
||||
for instance_uuid in get_chat_service_instances():
|
||||
for chat_id in ChatServiceInstance(instance_uuid).get_chats():
|
||||
chat = Chats.Chat(chat_id, instance_uuid)
|
||||
# subchannels
|
||||
for subchannel_gid in chat.get_subchannels():
|
||||
_, _, subchannel_id = subchannel_gid.split(':', 2)
|
||||
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
|
||||
messages, _ = subchannel._get_messages(nb=-1)
|
||||
for mess in messages:
|
||||
_, _, message_id = mess[0].split(':', )
|
||||
subchannel.add_correlation('message', '', message_id)
|
||||
|
||||
#### API ####
|
||||
|
||||
|
@ -331,11 +521,12 @@ def api_get_chat_service_instance(chat_instance_uuid):
|
|||
return {"status": "error", "reason": "Unknown uuid"}, 404
|
||||
return chat_instance.get_meta({'chats'}), 200
|
||||
|
||||
def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, page=-1):
|
||||
def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, page=-1, messages=True):
|
||||
chat = Chats.Chat(chat_id, chat_instance_uuid)
|
||||
if not chat.exists():
|
||||
return {"status": "error", "reason": "Unknown chat"}, 404
|
||||
meta = chat.get_meta({'created_at', 'icon', 'info', 'nb_participants', 'subchannels', 'threads', 'translation', 'username'}, translation_target=translation_target)
|
||||
# print(chat.get_obj_language_stats())
|
||||
meta = chat.get_meta({'created_at', 'icon', 'info', 'nb_participants', 'subchannels', 'tags_safe', 'threads', 'translation', 'username'}, translation_target=translation_target)
|
||||
if meta['username']:
|
||||
meta['username'] = get_username_meta_from_global_id(meta['username'])
|
||||
if meta['subchannels']:
|
||||
|
@ -343,17 +534,25 @@ def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, pa
|
|||
else:
|
||||
if translation_target not in Language.get_translation_languages():
|
||||
translation_target = None
|
||||
meta['messages'], meta['pagination'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target, nb=nb, page=page)
|
||||
if messages:
|
||||
meta['messages'], meta['pagination'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target, nb=nb, page=page)
|
||||
return meta, 200
|
||||
|
||||
def api_get_nb_message_by_week(chat_id, chat_instance_uuid):
|
||||
chat = Chats.Chat(chat_id, chat_instance_uuid)
|
||||
def api_get_nb_message_by_week(chat_type, chat_instance_uuid, chat_id):
|
||||
chat = get_obj_chat(chat_type, chat_instance_uuid, chat_id)
|
||||
if not chat.exists():
|
||||
return {"status": "error", "reason": "Unknown chat"}, 404
|
||||
week = chat.get_nb_message_this_week()
|
||||
# week = chat.get_nb_message_by_week('20231109')
|
||||
return week, 200
|
||||
|
||||
def api_get_nb_week_messages(chat_type, chat_instance_uuid, chat_id):
|
||||
chat = get_obj_chat(chat_type, chat_instance_uuid, chat_id)
|
||||
if not chat.exists():
|
||||
return {"status": "error", "reason": "Unknown chat"}, 404
|
||||
week = chat.get_nb_week_messages()
|
||||
return week, 200
|
||||
|
||||
def api_get_chat_participants(chat_type, chat_subtype, chat_id):
|
||||
if chat_type not in ['chat', 'chat-subchannel', 'chat-thread']:
|
||||
return {"status": "error", "reason": "Unknown chat type"}, 400
|
||||
|
@ -373,6 +572,7 @@ def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None, nb=
|
|||
subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid)
|
||||
if not subchannel.exists():
|
||||
return {"status": "error", "reason": "Unknown subchannel"}, 404
|
||||
# print(subchannel.get_obj_language_stats())
|
||||
meta = subchannel.get_meta({'chat', 'created_at', 'icon', 'nb_messages', 'nb_participants', 'threads', 'translation'}, translation_target=translation_target)
|
||||
if meta['chat']:
|
||||
meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
|
||||
|
@ -387,6 +587,7 @@ def api_get_thread(thread_id, thread_instance_uuid, translation_target=None, nb=
|
|||
thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid)
|
||||
if not thread.exists():
|
||||
return {"status": "error", "reason": "Unknown thread"}, 404
|
||||
# print(thread.get_obj_language_stats())
|
||||
meta = thread.get_meta({'chat', 'nb_messages', 'nb_participants'})
|
||||
# if meta['chat']:
|
||||
# meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
|
||||
|
@ -397,14 +598,101 @@ def api_get_message(message_id, translation_target=None):
|
|||
message = Messages.Message(message_id)
|
||||
if not message.exists():
|
||||
return {"status": "error", "reason": "Unknown uuid"}, 404
|
||||
meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target)
|
||||
meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target)
|
||||
return meta, 200
|
||||
|
||||
def api_message_detect_language(message_id):
|
||||
message = Messages.Message(message_id)
|
||||
if not message.exists():
|
||||
return {"status": "error", "reason": "Unknown uuid"}, 404
|
||||
lang = message.detect_language()
|
||||
return {"language": lang}, 200
|
||||
|
||||
def api_manually_translate_message(message_id, source, translation_target, translation):
|
||||
message = Messages.Message(message_id)
|
||||
if not message.exists():
|
||||
return {"status": "error", "reason": "Unknown uuid"}, 404
|
||||
if translation:
|
||||
if len(translation) > 200000: # TODO REVIEW LIMIT
|
||||
return {"status": "error", "reason": "Max Size reached"}, 400
|
||||
all_languages = Language.get_translation_languages()
|
||||
if source not in all_languages:
|
||||
return {"status": "error", "reason": "Unknown source Language"}, 400
|
||||
message_language = message.get_language()
|
||||
if message_language != source:
|
||||
message.edit_language(message_language, source)
|
||||
if translation:
|
||||
if translation_target not in all_languages:
|
||||
return {"status": "error", "reason": "Unknown target Language"}, 400
|
||||
message.set_translation(translation_target, translation)
|
||||
# TODO SANITYZE translation
|
||||
return None, 200
|
||||
|
||||
def api_get_user_account(user_id, instance_uuid, translation_target=None):
|
||||
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
|
||||
if not user_account.exists():
|
||||
return {"status": "error", "reason": "Unknown user-account"}, 404
|
||||
meta = user_account.get_meta({'chats', 'icon', 'info', 'subchannels', 'threads', 'translation', 'username', 'username_meta'}, translation_target=translation_target)
|
||||
if meta['chats']:
|
||||
meta['chats'] = get_user_account_chats_meta(user_id, meta['chats'], meta['subchannels'])
|
||||
return meta, 200
|
||||
|
||||
def api_get_user_account_chat_messages(user_id, instance_uuid, chat_id, translation_target=None):
|
||||
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
|
||||
if not user_account.exists():
|
||||
return {"status": "error", "reason": "Unknown user-account"}, 404
|
||||
meta = get_user_account_chat_message(user_id, instance_uuid, chat_id)
|
||||
meta['user-account'] = user_account.get_meta({'icon', 'info', 'translation', 'username', 'username_meta'}, translation_target=translation_target)
|
||||
resp = api_get_chat(chat_id, instance_uuid, translation_target=translation_target, messages=False)
|
||||
if resp[1] != 200:
|
||||
return resp
|
||||
meta['chat'] = resp[0]
|
||||
return meta, 200
|
||||
|
||||
def api_get_user_account_nb_all_week_messages(user_id, instance_uuid):
|
||||
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
|
||||
if not user_account.exists():
|
||||
return {"status": "error", "reason": "Unknown user-account"}, 404
|
||||
week = get_user_account_nb_all_week_messages(user_account.id, user_account.get_chats(), user_account.get_chat_subchannels())
|
||||
return week, 200
|
||||
|
||||
def api_chat_messages(subtype, chat_id):
|
||||
chat = Chats.Chat(chat_id, subtype)
|
||||
if not chat.exists():
|
||||
return {"status": "error", "reason": "Unknown chat"}, 404
|
||||
meta = chat.get_meta({'created_at', 'info', 'nb_participants', 'subchannels', 'threads', 'username'}) # 'icon' 'translation'
|
||||
if meta['username']:
|
||||
meta['username'] = get_username_meta_from_global_id(meta['username'])
|
||||
if meta['subchannels']:
|
||||
meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels'])
|
||||
else:
|
||||
options = {'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}
|
||||
meta['messages'], _, _ = chat.get_messages(nb=-1, options=options)
|
||||
return meta, 200
|
||||
|
||||
def api_subchannel_messages(subtype, subchannel_id):
|
||||
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, subtype)
|
||||
if not subchannel.exists():
|
||||
return {"status": "error", "reason": "Unknown subchannel"}, 404
|
||||
meta = subchannel.get_meta(
|
||||
{'chat', 'created_at', 'nb_messages', 'nb_participants', 'threads'})
|
||||
if meta['chat']:
|
||||
meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
|
||||
if meta.get('threads'):
|
||||
meta['threads'] = get_threads_metas(meta['threads'])
|
||||
if meta.get('username'):
|
||||
meta['username'] = get_username_meta_from_global_id(meta['username'])
|
||||
options = {'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}
|
||||
meta['messages'], _, _ = subchannel.get_messages(nb=-1, options=options)
|
||||
return meta, 200
|
||||
|
||||
def api_thread_messages(subtype, thread_id):
|
||||
thread = ChatThreads.ChatThread(thread_id, subtype)
|
||||
if not thread.exists():
|
||||
return {"status": "error", "reason": "Unknown thread"}, 404
|
||||
meta = thread.get_meta({'chat', 'nb_messages', 'nb_participants'})
|
||||
options = {'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}
|
||||
meta['messages'], _, _ = thread.get_messages(nb=-1, options=options)
|
||||
return meta, 200
|
||||
|
||||
# # # # # # # # # # LATER
|
||||
|
|
|
@ -41,25 +41,26 @@ config_loader = None
|
|||
##################################
|
||||
|
||||
CORRELATION_TYPES_BY_OBJ = {
|
||||
"chat": ["chat-subchannel", "chat-thread", "image", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
|
||||
"chat-subchannel": ["chat", "chat-thread", "image", "message", "user-account"],
|
||||
"chat-thread": ["chat", "chat-subchannel", "image", "message", "user-account"], # TODO user account
|
||||
"chat": ["chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
|
||||
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
|
||||
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"], # TODO user account
|
||||
"cookie-name": ["domain"],
|
||||
"cryptocurrency": ["domain", "item", "message"],
|
||||
"cve": ["domain", "item", "message"],
|
||||
"decoded": ["domain", "item", "message"],
|
||||
"cryptocurrency": ["domain", "item", "message", "ocr"],
|
||||
"cve": ["domain", "item", "message", "ocr"],
|
||||
"decoded": ["domain", "item", "message", "ocr"],
|
||||
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
|
||||
"etag": ["domain"],
|
||||
"favicon": ["domain", "item"], # TODO Decoded
|
||||
"file-name": ["chat", "message"],
|
||||
"hhhash": ["domain"],
|
||||
"image": ["chat", "message", "user-account"],
|
||||
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "user-account"], # TODO subchannel + threads ????
|
||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
|
||||
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "pgp", "user-account"], # chat ??
|
||||
"pgp": ["domain", "item", "message"],
|
||||
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
|
||||
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
|
||||
"pgp": ["domain", "item", "message", "ocr"],
|
||||
"screenshot": ["domain", "item"],
|
||||
"title": ["domain", "item"],
|
||||
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "username"],
|
||||
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "ocr", "username"],
|
||||
"username": ["domain", "item", "message", "user-account"],
|
||||
}
|
||||
|
||||
|
|
|
@ -925,6 +925,19 @@ def get_crawlers_stats_by_day(date, domain_type):
|
|||
'down': r_crawler.scard(f'{domain_type}_down:{date}'),
|
||||
}
|
||||
|
||||
def get_crawlers_stats_by_month(domain_type, date=None):
|
||||
stats = []
|
||||
for date in Date.get_month_dates(date=date):
|
||||
stats.append(get_crawlers_stats_by_day(date, domain_type))
|
||||
return stats
|
||||
|
||||
def get_crawlers_stats_up_down_by_month(domain_type, date=None):
|
||||
stats = {'down': 0, 'up': 0}
|
||||
for date in Date.get_month_dates(date=date):
|
||||
day = get_crawlers_stats_by_day(date, domain_type)
|
||||
stats['down'] += day.get('down', 0)
|
||||
stats['up'] += day.get('up', 0)
|
||||
return stats
|
||||
|
||||
def get_crawlers_stats(domain_type=None):
|
||||
stats = {}
|
||||
|
@ -1273,6 +1286,11 @@ def create_schedule(frequency, user, url, depth=1, har=True, screenshot=True, he
|
|||
schedule.create(frequency, user, url, depth=depth, har=har, screenshot=screenshot, header=header, cookiejar=cookiejar, proxy=proxy, user_agent=user_agent, tags=tags)
|
||||
return schedule_uuid
|
||||
|
||||
def _delete_schedules():
|
||||
for schedule_uuid in get_schedulers_uuid():
|
||||
schedule = CrawlerSchedule(schedule_uuid)
|
||||
schedule.delete()
|
||||
|
||||
# TODO sanityze UUID
|
||||
def api_delete_schedule(data):
|
||||
schedule_uuid = data.get('uuid')
|
||||
|
@ -1660,7 +1678,6 @@ def create_task(url, depth=1, har=True, screenshot=True, header=None, cookiejar=
|
|||
external=external)
|
||||
return task_uuid
|
||||
|
||||
|
||||
## -- CRAWLER TASK -- ##
|
||||
|
||||
#### CRAWLER TASK API ####
|
||||
|
|
|
@ -19,3 +19,6 @@ class ModuleQueueError(AILError):
|
|||
|
||||
class MISPConnectionError(AILError):
|
||||
pass
|
||||
|
||||
class AILObjectUnknown(AILError):
|
||||
pass
|
||||
|
|
|
@ -40,6 +40,11 @@ r_key = regex_helper.generate_redis_cache_key('extractor')
|
|||
|
||||
# TODO UI Link
|
||||
|
||||
CORRELATION_TO_EXTRACT = {
|
||||
'item': ['cve', 'cryptocurrency', 'title', 'username'],
|
||||
'message': ['cve', 'cryptocurrency', 'username']
|
||||
}
|
||||
|
||||
MODULES = {
|
||||
'infoleak:automatic-detection="credit-card"': CreditCards(queue=False),
|
||||
'infoleak:automatic-detection="iban"': Iban(queue=False),
|
||||
|
@ -57,9 +62,27 @@ tools = Tools(queue=False)
|
|||
for tool_name in tools.get_tools():
|
||||
MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
|
||||
|
||||
def get_correl_match(extract_type, obj_id, content):
|
||||
def merge_overlap(extracted):
|
||||
merged = []
|
||||
curr_start, curr_end, curr_string_match, curr_obj_ref = extracted[0]
|
||||
curr_obj_ref = [(curr_obj_ref, curr_string_match)]
|
||||
|
||||
for start, end, mstring, ref in extracted[1:]:
|
||||
# overlap
|
||||
if start <= curr_end:
|
||||
curr_string_match += mstring[curr_end - start:]
|
||||
curr_end = max(curr_end, end)
|
||||
curr_obj_ref.append((ref, mstring))
|
||||
else:
|
||||
merged.append((curr_start, curr_end, curr_string_match, curr_obj_ref))
|
||||
curr_start, curr_end, curr_string_match, curr_obj_ref = start, end, mstring, [(ref, mstring)]
|
||||
|
||||
merged.append((curr_start, curr_end, curr_string_match, curr_obj_ref))
|
||||
return merged
|
||||
|
||||
def get_correl_match(extract_type, obj, content):
|
||||
extracted = []
|
||||
correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
|
||||
correl = correlations_engine.get_correlation_by_correl_type(obj.type, obj.get_subtype(r_str=True), obj.id, extract_type)
|
||||
to_extract = []
|
||||
map_subtype = {}
|
||||
map_value_id = {}
|
||||
|
@ -75,18 +98,20 @@ def get_correl_match(extract_type, obj_id, content):
|
|||
sha256_val = sha256(value.encode()).hexdigest()
|
||||
map_value_id[sha256_val] = value
|
||||
if to_extract:
|
||||
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
|
||||
for obj in objs:
|
||||
if map_subtype.get(obj[2]):
|
||||
subtype = map_subtype[obj[2]]
|
||||
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content)
|
||||
if extract_type == 'title' and objs:
|
||||
objs = [objs[0]]
|
||||
for ob in objs:
|
||||
if map_subtype.get(ob[2]):
|
||||
subtype = map_subtype[ob[2]]
|
||||
else:
|
||||
subtype = ''
|
||||
sha256_val = sha256(obj[2].encode()).hexdigest()
|
||||
sha256_val = sha256(ob[2].encode()).hexdigest()
|
||||
value_id = map_value_id.get(sha256_val)
|
||||
if not value_id:
|
||||
logger.critical(f'Error module extractor: {sha256_val}\n{extract_type}\n{subtype}\n{value_id}\n{map_value_id}\n{objs}')
|
||||
value_id = 'ERROR'
|
||||
extracted.append([obj[0], obj[1], obj[2], f'{extract_type}:{subtype}:{value_id}'])
|
||||
extracted.append([ob[0], ob[1], ob[2], f'{extract_type}:{subtype}:{value_id}'])
|
||||
return extracted
|
||||
|
||||
def _get_yara_match(data):
|
||||
|
@ -100,7 +125,7 @@ def _get_yara_match(data):
|
|||
return yara.CALLBACK_CONTINUE
|
||||
|
||||
def _get_word_regex(word):
|
||||
return '(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
|
||||
return '(?i)(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
|
||||
|
||||
def convert_byte_offset_to_string(b_content, offset):
|
||||
byte_chunk = b_content[:offset + 1]
|
||||
|
@ -115,17 +140,18 @@ def convert_byte_offset_to_string(b_content, offset):
|
|||
|
||||
# TODO RETRO HUNTS
|
||||
# TODO TRACKER TYPE IN UI
|
||||
def get_tracker_match(obj_id, content):
|
||||
def get_tracker_match(obj, content):
|
||||
extracted = []
|
||||
extracted_yara = []
|
||||
trackers = Tracker.get_obj_trackers('item', '', obj_id)
|
||||
obj_gid = obj.get_global_id()
|
||||
trackers = Tracker.get_obj_trackers(obj.type, obj.get_subtype(r_str=True), obj.id)
|
||||
for tracker_uuid in trackers:
|
||||
tracker = Tracker.Tracker(tracker_uuid)
|
||||
tracker_type = tracker.get_type()
|
||||
# print(tracker_type)
|
||||
tracked = tracker.get_tracked()
|
||||
if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
|
||||
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_id, content)
|
||||
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content)
|
||||
for match in regex_match:
|
||||
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
|
||||
elif tracker_type == 'yara':
|
||||
|
@ -147,11 +173,25 @@ def get_tracker_match(obj_id, content):
|
|||
words = [tracked]
|
||||
for word in words:
|
||||
regex = _get_word_regex(word)
|
||||
regex_match = regex_helper.regex_finditer(r_key, regex, obj_id, content)
|
||||
regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content)
|
||||
# print(regex_match)
|
||||
for match in regex_match:
|
||||
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
|
||||
|
||||
# Retro Hunt
|
||||
retro_hunts = Tracker.get_obj_retro_hunts(obj.type, obj.get_subtype(r_str=True), obj.id)
|
||||
for retro_uuid in retro_hunts:
|
||||
retro_hunt = Tracker.RetroHunt(retro_uuid)
|
||||
rule = retro_hunt.get_rule(r_compile=True)
|
||||
rule.match(data=content.encode(), callback=_get_yara_match,
|
||||
which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
|
||||
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
|
||||
r_cache.delete(f'extractor:yara:match:{r_key}')
|
||||
extracted = []
|
||||
for match in yara_match:
|
||||
start, end, value = match.split(':', 2)
|
||||
extracted_yara.append([int(start), int(end), value, f'retro_hunt:{retro_hunt.uuid}'])
|
||||
|
||||
# Convert byte offset to string offset
|
||||
if extracted_yara:
|
||||
b_content = content.encode()
|
||||
|
@ -168,99 +208,101 @@ def get_tracker_match(obj_id, content):
|
|||
# Type:subtype:id
|
||||
# tag:iban
|
||||
# tracker:uuid
|
||||
|
||||
def extract(obj_id, content=None):
|
||||
item = Item(obj_id)
|
||||
if not item.exists():
|
||||
# def extract(obj_id, content=None):
|
||||
def extract(obj_type, subtype, obj_id, content=None):
|
||||
obj = ail_objects.get_object(obj_type, subtype, obj_id)
|
||||
if not obj.exists():
|
||||
return []
|
||||
obj_gid = obj.get_global_id()
|
||||
|
||||
# CHECK CACHE
|
||||
cached = r_cache.get(f'extractor:cache:{obj_id}')
|
||||
cached = r_cache.get(f'extractor:cache:{obj_gid}')
|
||||
# cached = None
|
||||
if cached:
|
||||
r_cache.expire(f'extractor:cache:{obj_id}', 300)
|
||||
r_cache.expire(f'extractor:cache:{obj_gid}', 300)
|
||||
return json.loads(cached)
|
||||
|
||||
if not content:
|
||||
content = item.get_content()
|
||||
content = obj.get_content()
|
||||
|
||||
extracted = get_tracker_match(obj_id, content)
|
||||
extracted = get_tracker_match(obj, content)
|
||||
|
||||
# print(item.get_tags())
|
||||
for tag in item.get_tags():
|
||||
for tag in obj.get_tags():
|
||||
if MODULES.get(tag):
|
||||
# print(tag)
|
||||
module = MODULES.get(tag)
|
||||
matches = module.extract(obj_id, content, tag)
|
||||
matches = module.extract(obj, content, tag)
|
||||
if matches:
|
||||
extracted = extracted + matches
|
||||
|
||||
for obj_t in ['cve', 'cryptocurrency', 'title', 'username']: # Decoded, PGP->extract bloc
|
||||
matches = get_correl_match(obj_t, obj_id, content)
|
||||
for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
|
||||
matches = get_correl_match(obj_t, obj, content)
|
||||
if matches:
|
||||
extracted = extracted + matches
|
||||
|
||||
# SORT By Start Pos
|
||||
extracted = sorted(extracted, key=itemgetter(0))
|
||||
# print(extracted)
|
||||
if extracted:
|
||||
extracted = sorted(extracted, key=itemgetter(0))
|
||||
extracted = merge_overlap(extracted)
|
||||
|
||||
# Save In Cache
|
||||
if extracted:
|
||||
extracted_dump = json.dumps(extracted)
|
||||
r_cache.set(f'extractor:cache:{obj_id}', extracted_dump)
|
||||
r_cache.expire(f'extractor:cache:{obj_id}', 300) # TODO Reduce CACHE ???????????????
|
||||
r_cache.set(f'extractor:cache:{obj_gid}', extracted_dump)
|
||||
r_cache.expire(f'extractor:cache:{obj_gid}', 300) # TODO Reduce CACHE ???????????????
|
||||
|
||||
return extracted
|
||||
|
||||
# TODO ADD LINK UI
|
||||
def get_extracted_by_match(extracted):
|
||||
matches = {}
|
||||
for start, end, value, str_obj in extracted:
|
||||
for start, end, value, raw_objs in extracted:
|
||||
|
||||
if str_obj not in matches:
|
||||
matches[str_obj] = {}
|
||||
ob_type, row_id = str_obj.split(':', 1)
|
||||
if ob_type == 'tag': # TODO put me in object class
|
||||
matches[str_obj]['subtype'] = 'tag'
|
||||
matches[str_obj]['id'] = row_id
|
||||
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf02b', 'color': '#28a745', 'radius': 5}
|
||||
matches[str_obj]['link'] = ''
|
||||
elif ob_type == 'tracker': # TODO put me in object class
|
||||
matches[str_obj]['subtype'] = 'tracker'
|
||||
matches[str_obj]['id'] = row_id
|
||||
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf05b', 'color': '#ffc107', 'radius': 5}
|
||||
matches[str_obj]['link'] = ''
|
||||
else:
|
||||
row_id = row_id.split(':', 1)
|
||||
if len(row_id) == 2:
|
||||
subtype = row_id[0]
|
||||
obj_id = row_id[1]
|
||||
for raw in raw_objs:
|
||||
str_obj, str_match = raw
|
||||
|
||||
if str_obj not in matches:
|
||||
matches[str_obj] = {}
|
||||
ob_type, row_id = str_obj.split(':', 1)
|
||||
if ob_type == 'tag': # TODO put me in object class
|
||||
matches[str_obj]['subtype'] = 'tag'
|
||||
matches[str_obj]['id'] = row_id
|
||||
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf02b', 'color': '#28a745', 'radius': 5}
|
||||
matches[str_obj]['link'] = ''
|
||||
elif ob_type == 'tracker': # TODO put me in object class
|
||||
matches[str_obj]['subtype'] = 'tracker'
|
||||
matches[str_obj]['id'] = row_id
|
||||
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf05b', 'color': '#ffc107', 'radius': 5}
|
||||
matches[str_obj]['link'] = ''
|
||||
elif ob_type == 'retro_hunt': # TODO put me in object class
|
||||
matches[str_obj]['subtype'] = 'retro_hunt'
|
||||
matches[str_obj]['id'] = row_id
|
||||
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf05b', 'color': '#008107', 'radius': 5}
|
||||
matches[str_obj]['link'] = ''
|
||||
else:
|
||||
subtype = ''
|
||||
obj_id = row_id[0]
|
||||
matches[str_obj]['subtype'] = subtype
|
||||
matches[str_obj]['id'] = obj_id
|
||||
matches[str_obj]['icon'] = ail_objects.get_object_svg(ob_type, subtype, obj_id)
|
||||
matches[str_obj]['link'] = ail_objects.get_object_link(ob_type, subtype, obj_id)
|
||||
row_id = row_id.split(':', 1)
|
||||
if len(row_id) == 2:
|
||||
subtype = row_id[0]
|
||||
obj_id = row_id[1]
|
||||
else:
|
||||
subtype = ''
|
||||
obj_id = row_id[0]
|
||||
matches[str_obj]['subtype'] = subtype
|
||||
matches[str_obj]['id'] = obj_id
|
||||
matches[str_obj]['icon'] = ail_objects.get_object_svg(ob_type, subtype, obj_id)
|
||||
matches[str_obj]['link'] = ail_objects.get_object_link(ob_type, subtype, obj_id)
|
||||
|
||||
matches[str_obj]['matches'] = []
|
||||
matches[str_obj]['matches'] = []
|
||||
|
||||
match = [start, end, value]
|
||||
matches[str_obj]['matches'].append(match)
|
||||
match = [start, end, str_match]
|
||||
matches[str_obj]['matches'].append(match)
|
||||
return matches
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# t0 = time.time()
|
||||
# obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
|
||||
# obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
|
||||
# obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
|
||||
# # obj_id = 'tests/2021/01/01/credit_cards.gz'
|
||||
# # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
|
||||
# obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
|
||||
# obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
|
||||
# obj_id = 'crawled/2023/02/21/circl.lu1c300acb-0cbe-480f-917e-9afe3ec958e8'
|
||||
#
|
||||
# extract(obj_id)
|
||||
#
|
||||
# # get_obj_correl('cve', obj_id, content)
|
||||
|
|
|
@ -106,7 +106,7 @@ def create(thread_id, chat_instance, chat_id, subchannel_id, message_id, contain
|
|||
new_thread_id = f'{chat_id}/{subchannel_id}/{thread_id}'
|
||||
|
||||
thread = ChatThread(new_thread_id, chat_instance)
|
||||
if not thread.exists():
|
||||
if not thread.is_children():
|
||||
thread.create(container_obj, message_id)
|
||||
return thread
|
||||
|
||||
|
|
|
@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
|
|||
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
|
||||
|
||||
|
||||
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
|
||||
from lib.data_retention_engine import update_obj_date
|
||||
from lib.objects import ail_objects
|
||||
from lib.objects.abstract_subtype_object import get_all_id
|
||||
# from lib.data_retention_engine import update_obj_date
|
||||
from lib.timeline_engine import Timeline
|
||||
|
||||
from lib.correlations_engine import get_correlation_by_correl_type
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
|
@ -51,11 +48,18 @@ class Chat(AbstractChatObject):
|
|||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
|
||||
url = url_for('chats_explorer.chats_explorer_chat', subtype=self.subtype, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_origin_link(self):
|
||||
if self.subtype == '00098785-7e70-5d12-a120-c5cdc1252b2b':
|
||||
username = self.get_username()
|
||||
if username:
|
||||
username = username.split(':', 2)[2]
|
||||
return f'https://t.me/{username}'
|
||||
|
||||
def get_svg_icon(self): # TODO
|
||||
# if self.subtype == 'telegram':
|
||||
# style = 'fab'
|
||||
|
@ -75,6 +79,7 @@ class Chat(AbstractChatObject):
|
|||
meta['name'] = self.get_name()
|
||||
meta['tags'] = self.get_tags(r_list=True)
|
||||
if 'icon' in options:
|
||||
meta['svg_icon'] = self.get_svg_icon()
|
||||
meta['icon'] = self.get_icon()
|
||||
meta['img'] = meta['icon']
|
||||
if 'info' in options:
|
||||
|
@ -99,6 +104,8 @@ class Chat(AbstractChatObject):
|
|||
meta['threads'] = self.get_threads()
|
||||
if 'tags_safe' in options:
|
||||
meta['tags_safe'] = self.is_tags_safe(meta['tags'])
|
||||
if 'origin_link' in options:
|
||||
meta['origin_link'] = self.get_origin_link()
|
||||
return meta
|
||||
|
||||
def get_misp_object(self):
|
||||
|
|
|
@ -60,7 +60,7 @@ class CryptoCurrency(AbstractSubtypeObject):
|
|||
pass
|
||||
|
||||
def is_valid_address(self):
|
||||
if self.type == 'bitcoin' or self.type == 'dash' or self.type == 'litecoin':
|
||||
if self.subtype == 'bitcoin' or self.subtype == 'dash' or self.subtype == 'litecoin' or self.subtype == 'tron':
|
||||
return check_base58_address(self.id)
|
||||
else:
|
||||
return True
|
||||
|
@ -80,6 +80,8 @@ class CryptoCurrency(AbstractSubtypeObject):
|
|||
return 'ZEC'
|
||||
elif self.subtype == 'dash':
|
||||
return 'DASH'
|
||||
elif self.subtype == 'tron':
|
||||
return 'TRX'
|
||||
return None
|
||||
|
||||
def get_link(self, flask_context=False):
|
||||
|
@ -140,7 +142,7 @@ class CryptoCurrency(AbstractSubtypeObject):
|
|||
|
||||
def get_all_subtypes():
|
||||
# return ail_core.get_object_all_subtypes(self.type)
|
||||
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
|
||||
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'tron', 'zcash']
|
||||
|
||||
|
||||
# def build_crypto_regex(subtype, search_id):
|
||||
|
@ -172,6 +174,8 @@ def get_subtype_by_symbol(symbol):
|
|||
return 'zcash'
|
||||
elif symbol == 'DASH':
|
||||
return 'dash'
|
||||
elif symbol == 'TRX':
|
||||
return 'tron'
|
||||
return None
|
||||
|
||||
|
||||
|
@ -189,10 +193,6 @@ def get_all_cryptocurrencies_by_subtype(subtype):
|
|||
def sanitize_cryptocurrency_name_to_search(name_to_search, subtype): # TODO FILTER NAME + Key + mail
|
||||
if subtype == '':
|
||||
pass
|
||||
elif subtype == 'name':
|
||||
pass
|
||||
elif subtype == 'mail':
|
||||
pass
|
||||
return name_to_search
|
||||
|
||||
def search_cryptocurrency_by_name(name_to_search, subtype, r_pos=False):
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
@ -208,7 +209,7 @@ class Domain(AbstractObject):
|
|||
def get_screenshot(self):
|
||||
last_item = self.get_last_item_root()
|
||||
if last_item:
|
||||
screenshot = self._get_external_correlation('item', '', last_item, 'screenshot').get('screenshot')
|
||||
screenshot = self.get_obj_correlations('item', '', last_item, ['screenshot']).get('screenshot')
|
||||
if screenshot:
|
||||
return screenshot.pop()[1:]
|
||||
|
||||
|
@ -391,7 +392,7 @@ class Domain(AbstractObject):
|
|||
print(har)
|
||||
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
|
||||
# Screenshot
|
||||
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
|
||||
screenshot = self.get_obj_correlations('item', '', item_id, ['screenshot'])
|
||||
if screenshot and screenshot['screenshot']:
|
||||
screenshot = screenshot['screenshot'].pop()[1:]
|
||||
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
|
||||
|
@ -410,6 +411,10 @@ class Domain(AbstractObject):
|
|||
r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
|
||||
r_crawler.sadd(f'domain:language:{self.id}', language)
|
||||
|
||||
def update_vanity_cluster(self):
|
||||
if self.get_domain_type() == 'onion':
|
||||
update_vanity_cluster(self.id)
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
||||
|
@ -643,6 +648,82 @@ def api_search_domains_by_name(name_to_search, domain_types, meta=False, page=1)
|
|||
################################################################################
|
||||
################################################################################
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# dom = Domain('')
|
||||
# dom.get_download_zip()
|
||||
#### Vanity Explorer ####
|
||||
|
||||
# TODO ADD ME IN OBJ CLASS
|
||||
def get_domain_vanity(domain, len_vanity=4):
|
||||
return domain[:len_vanity]
|
||||
|
||||
def get_vanity_clusters(nb_min=4):
|
||||
return r_crawler.zrange('vanity:onion:4', nb_min, '+inf', byscore=True, withscores=True)
|
||||
|
||||
def get_vanity_domains(vanity, len_vanity=4, meta=False):
|
||||
if len_vanity == 4:
|
||||
domains = r_crawler.smembers(f'vanity:{int(len_vanity)}:{vanity}')
|
||||
else:
|
||||
domains = []
|
||||
for domain in r_crawler.smembers(f'vanity:4:{vanity[:4]}'):
|
||||
dom_vanity = get_domain_vanity(domain, len_vanity=len_vanity)
|
||||
if vanity == dom_vanity:
|
||||
domains.append(domain)
|
||||
if meta:
|
||||
metas = []
|
||||
for domain in domains:
|
||||
metas.append(Domain(domain).get_meta(options={'languages', 'screenshot', 'tags_safe'}))
|
||||
return metas
|
||||
else:
|
||||
return domains
|
||||
|
||||
def get_vanity_cluster(vanity, len_vanity=4, nb_min=4):
|
||||
if len_vanity == 4:
|
||||
return get_vanity_clusters(nb_min=nb_min)
|
||||
else:
|
||||
clusters = {}
|
||||
for domain in get_vanity_domains(vanity[:4], len_vanity=4):
|
||||
new_vanity = get_domain_vanity(domain, len_vanity=len_vanity)
|
||||
if new_vanity not in clusters:
|
||||
clusters[new_vanity] = 0
|
||||
clusters[new_vanity] += 1
|
||||
to_remove = []
|
||||
for new_vanity in clusters:
|
||||
if clusters[new_vanity] < nb_min:
|
||||
to_remove.append(new_vanity)
|
||||
for new_vanity in to_remove:
|
||||
del clusters[new_vanity]
|
||||
return clusters
|
||||
|
||||
def get_vanity_nb_domains(vanity, len_vanity=4):
|
||||
return r_crawler.scard(f'vanity:{int(len_vanity)}:{vanity}')
|
||||
|
||||
# TODO BUILD DICTIONARY
|
||||
def update_vanity_cluster(domain):
|
||||
vanity = get_domain_vanity(domain, len_vanity=4)
|
||||
add = r_crawler.sadd(f'vanity:4:{vanity}', domain)
|
||||
if add == 1:
|
||||
r_crawler.zadd('vanity:onion:4', {vanity: 1}, incr=True)
|
||||
|
||||
def _rebuild_vanity_clusters():
|
||||
for vanity in r_crawler.zrange('vanity:onion:4', 0, -1):
|
||||
r_crawler.delete(f'vanity:4:{vanity}')
|
||||
r_crawler.delete('vanity:onion:4')
|
||||
for domain in get_domains_up_by_type('onion'):
|
||||
update_vanity_cluster(domain)
|
||||
|
||||
def cluster_onion_domain_vanity(len_vanity=4):
|
||||
domains = {}
|
||||
occurrences = {}
|
||||
for domain in get_domains_up_by_type('onion'):
|
||||
start = domain[:len_vanity]
|
||||
if start not in domains:
|
||||
domains[start] = []
|
||||
occurrences[start] = 0
|
||||
domains[start].append(domain)
|
||||
occurrences[start] += 1
|
||||
|
||||
# print(json.dumps(domains))
|
||||
res = dict(sorted(occurrences.items(), key=lambda item: item[1], reverse=True))
|
||||
print(json.dumps(res))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_rebuild_vanity_clusters()
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
import base64
|
||||
|
||||
import mmh3
|
||||
import os
|
||||
import sys
|
||||
|
||||
from flask import url_for
|
||||
from io import BytesIO
|
||||
|
||||
from flask import url_for
|
||||
from pymisp import MISPObject
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -18,6 +20,7 @@ from lib.objects.abstract_daterange_object import AbstractDaterangeObject, Abstr
|
|||
|
||||
config_loader = ConfigLoader()
|
||||
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
FAVICON_FOLDER = config_loader.get_files_directory('favicons')
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
config_loader = None
|
||||
|
||||
|
@ -40,10 +43,6 @@ class Favicon(AbstractDaterangeObject):
|
|||
# # TODO:
|
||||
pass
|
||||
|
||||
def get_content(self, r_type='str'):
|
||||
if r_type == 'str':
|
||||
return self._get_field('content')
|
||||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||
|
@ -53,7 +52,31 @@ class Favicon(AbstractDaterangeObject):
|
|||
|
||||
# TODO # CHANGE COLOR
|
||||
def get_svg_icon(self):
|
||||
return {'style': 'fas', 'icon': '\uf20a', 'color': '#1E88E5', 'radius': 5} # f0c8 f45c
|
||||
return {'style': 'fas', 'icon': '\uf089', 'color': '#E1F5D0', 'radius': 5} # f0c8 f45c f089
|
||||
|
||||
def get_rel_path(self): # TODO USE MUMUR HASH
|
||||
rel_path = os.path.join(self.id[0:1], self.id[1:2], self.id[2:3], self.id[3:4], self.id[4:5], self.id[5:6], self.id[6:])
|
||||
return rel_path
|
||||
|
||||
def get_filepath(self):
|
||||
filename = os.path.join(FAVICON_FOLDER, self.get_rel_path())
|
||||
return os.path.realpath(filename)
|
||||
|
||||
def get_file_content(self, r_type='str'):
|
||||
filepath = self.get_filepath()
|
||||
if r_type == 'str':
|
||||
with open(filepath, 'rb') as f:
|
||||
file_content = f.read()
|
||||
b64 = base64.b64encode(file_content)
|
||||
# b64 = base64.encodebytes(file_content)
|
||||
return b64.decode()
|
||||
elif r_type == 'io':
|
||||
with open(filepath, 'rb') as f:
|
||||
file_content = BytesIO(f.read())
|
||||
return file_content
|
||||
|
||||
def get_content(self, r_type='str'):
|
||||
return self.get_file_content()
|
||||
|
||||
def get_misp_object(self):
|
||||
obj_attrs = []
|
||||
|
@ -69,7 +92,7 @@ class Favicon(AbstractDaterangeObject):
|
|||
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
||||
|
||||
obj_attrs.append(obj.add_attribute('favicon-mmh3', value=self.id))
|
||||
obj_attrs.append(obj.add_attribute('favicon', value=self.get_content(r_type='bytes')))
|
||||
obj_attrs.append(obj.add_attribute('favicon', value=self.get_content()))
|
||||
for obj_attr in obj_attrs:
|
||||
for tag in self.get_tags():
|
||||
obj_attr.add_tag(tag)
|
||||
|
@ -78,29 +101,32 @@ class Favicon(AbstractDaterangeObject):
|
|||
def get_meta(self, options=set()):
|
||||
meta = self._get_meta(options=options)
|
||||
meta['id'] = self.id
|
||||
meta['img'] = self.id
|
||||
meta['tags'] = self.get_tags(r_list=True)
|
||||
if 'content' in options:
|
||||
meta['content'] = self.get_content()
|
||||
if 'tags_safe' in options:
|
||||
meta['tags_safe'] = self.is_tags_safe(meta['tags'])
|
||||
return meta
|
||||
|
||||
# def get_links(self):
|
||||
# # TODO GET ALL URLS FROM CORRELATED ITEMS
|
||||
|
||||
def create(self, content, _first_seen=None, _last_seen=None):
|
||||
if not isinstance(content, str):
|
||||
content = content.decode()
|
||||
self._set_field('content', content)
|
||||
def create(self, content): # TODO first seen / last seen options
|
||||
filepath = self.get_filepath()
|
||||
dirname = os.path.dirname(filepath)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(content)
|
||||
self._create()
|
||||
|
||||
|
||||
def create_favicon(content, url=None): # TODO URL ????
|
||||
if isinstance(content, str):
|
||||
content = content.encode()
|
||||
favicon_id = mmh3.hash_bytes(content)
|
||||
def create(b_content, size_limit=5000000, b64=False, force=False):
|
||||
if isinstance(b_content, str):
|
||||
b_content = b_content.encode()
|
||||
b64 = base64.encodebytes(b_content) # newlines inserted after every 76 bytes of output
|
||||
favicon_id = str(mmh3.hash(b64))
|
||||
favicon = Favicon(favicon_id)
|
||||
if not favicon.exists():
|
||||
favicon.create(content)
|
||||
|
||||
favicon.create(b_content)
|
||||
return favicon
|
||||
|
||||
class Favicons(AbstractDaterangeObjects):
|
||||
"""
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
|
||||
import base64
|
||||
import magic
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
@ -50,7 +51,7 @@ class Image(AbstractDaterangeObject):
|
|||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||
url = f'/correlation/show?type={self.type}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_svg_icon(self):
|
||||
|
@ -64,6 +65,14 @@ class Image(AbstractDaterangeObject):
|
|||
filename = os.path.join(IMAGE_FOLDER, self.get_rel_path())
|
||||
return os.path.realpath(filename)
|
||||
|
||||
def is_gif(self, filepath=None):
|
||||
if not filepath:
|
||||
filepath = self.get_filepath()
|
||||
mime = magic.from_file(filepath, mime=True)
|
||||
if mime == 'image/gif':
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_file_content(self):
|
||||
filepath = self.get_filepath()
|
||||
with open(filepath, 'rb') as f:
|
||||
|
@ -71,7 +80,10 @@ class Image(AbstractDaterangeObject):
|
|||
return file_content
|
||||
|
||||
def get_content(self, r_type='str'):
|
||||
return self.get_file_content()
|
||||
if r_type == 'str':
|
||||
return None
|
||||
else:
|
||||
return self.get_file_content()
|
||||
|
||||
def get_misp_object(self):
|
||||
obj_attrs = []
|
||||
|
@ -106,6 +118,20 @@ class Image(AbstractDaterangeObject):
|
|||
def get_screenshot_dir():
|
||||
return IMAGE_FOLDER
|
||||
|
||||
def get_all_images():
|
||||
images = []
|
||||
for root, dirs, files in os.walk(get_screenshot_dir()):
|
||||
for file in files:
|
||||
path = f'{root}{file}'
|
||||
image_id = path.replace(IMAGE_FOLDER, '').replace('/', '')
|
||||
images.append(image_id)
|
||||
return images
|
||||
|
||||
|
||||
def get_all_images_objects(filters={}):
|
||||
for image_id in get_all_images():
|
||||
yield Image(image_id)
|
||||
|
||||
|
||||
def create(content, size_limit=5000000, b64=False, force=False):
|
||||
size = (len(content)*3) / 4
|
||||
|
@ -131,5 +157,6 @@ class Images(AbstractDaterangeObjects):
|
|||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# print(json.dumps(get_all_images()))
|
||||
# name_to_search = '29ba'
|
||||
# print(search_screenshots_by_name(name_to_search))
|
||||
|
|
|
@ -305,6 +305,8 @@ class Item(AbstractObject):
|
|||
meta['investigations'] = self.get_investigations()
|
||||
if 'link' in options:
|
||||
meta['link'] = self.get_link(flask_context=True)
|
||||
if 'last_full_date' in options:
|
||||
meta['last_full_date'] = f"{meta['date'][0:4]}-{meta['date'][5:7]}-{meta['date'][8:10]}"
|
||||
|
||||
# meta['encoding'] = None
|
||||
return meta
|
||||
|
@ -339,9 +341,9 @@ class Item(AbstractObject):
|
|||
return {'nb': nb_line, 'max_length': max_length}
|
||||
|
||||
# TODO RENAME ME
|
||||
def get_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
|
||||
def get_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7, force_gcld3=False):
|
||||
ld = LanguagesDetector(nb_langs=num_langs, min_proportion=min_proportion, min_probability=min_probability, min_len=min_len)
|
||||
return ld.detect(self.get_content())
|
||||
return ld.detect(self.get_content(), force_gcld3=force_gcld3)
|
||||
|
||||
def get_mimetype(self, content=None):
|
||||
if not content:
|
||||
|
|
|
@ -71,6 +71,10 @@ class Message(AbstractObject):
|
|||
def get_basename(self):
|
||||
return os.path.basename(self.id)
|
||||
|
||||
def get_chat_instance(self):
|
||||
c_id = self.id.split('/')
|
||||
return c_id[0]
|
||||
|
||||
def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ???????
|
||||
"""
|
||||
Returns content
|
||||
|
@ -85,11 +89,16 @@ class Message(AbstractObject):
|
|||
if r_type == 'str':
|
||||
return content
|
||||
elif r_type == 'bytes':
|
||||
return content.encode()
|
||||
if content:
|
||||
return content.encode()
|
||||
|
||||
def get_date(self):
|
||||
timestamp = self.get_timestamp()
|
||||
return datetime.fromtimestamp(float(timestamp)).strftime('%Y%m%d')
|
||||
return datetime.utcfromtimestamp(float(timestamp)).strftime('%Y%m%d')
|
||||
|
||||
def get_last_full_date(self):
|
||||
timestamp = datetime.utcfromtimestamp(float(self.get_timestamp()))
|
||||
return timestamp.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
def get_timestamp(self):
|
||||
dirs = self.id.split('/')
|
||||
|
@ -102,9 +111,24 @@ class Message(AbstractObject):
|
|||
return message_id
|
||||
|
||||
def get_chat_id(self): # TODO optimize -> use me to tag Chat
|
||||
chat_id = self.get_basename().rsplit('_', 1)[0]
|
||||
return chat_id
|
||||
c_id = self.id.split('/')
|
||||
return c_id[2]
|
||||
|
||||
def get_chat(self):
|
||||
c_id = self.id.split('/')
|
||||
return f'chat:{c_id[0]}:{c_id[2]}'
|
||||
|
||||
def get_subchannel(self):
|
||||
subchannel = self.get_correlation('chat-subchannel')
|
||||
if subchannel.get('chat-subchannel'):
|
||||
return f'chat-subchannel:{subchannel["chat-subchannel"].pop()}'
|
||||
|
||||
def get_current_thread(self):
|
||||
subchannel = self.get_correlation('chat-thread')
|
||||
if subchannel.get('chat-thread'):
|
||||
return f'chat-thread:{subchannel["chat-thread"].pop()}'
|
||||
|
||||
# children thread
|
||||
def get_thread(self):
|
||||
for child in self.get_childrens():
|
||||
obj_type, obj_subtype, obj_id = child.split(':', 2)
|
||||
|
@ -116,12 +140,15 @@ class Message(AbstractObject):
|
|||
# TODO get channel ID
|
||||
# TODO get thread ID
|
||||
|
||||
def _get_image_ocr(self, obj_id):
|
||||
return bool(self.get_correlation('ocr').get('ocr'))
|
||||
|
||||
def get_images(self):
|
||||
images = []
|
||||
for child in self.get_childrens():
|
||||
obj_type, _, obj_id = child.split(':', 2)
|
||||
if obj_type == 'image':
|
||||
images.append(obj_id)
|
||||
images.append({'id': obj_id, 'ocr': self._get_image_ocr(obj_id)})
|
||||
return images
|
||||
|
||||
def get_user_account(self, meta=False):
|
||||
|
@ -175,30 +202,12 @@ class Message(AbstractObject):
|
|||
# message media
|
||||
# flag is deleted -> event or missing from feeder pass ???
|
||||
|
||||
def get_translation(self, content=None, source=None, target='fr'):
|
||||
"""
|
||||
Returns translated content
|
||||
"""
|
||||
|
||||
# return self._get_field('translated')
|
||||
global_id = self.get_global_id()
|
||||
translation = r_cache.get(f'translation:{target}:{global_id}')
|
||||
r_cache.expire(f'translation:{target}:{global_id}', 0)
|
||||
if translation:
|
||||
return translation
|
||||
if not content:
|
||||
content = self.get_content()
|
||||
translation = Language.LanguageTranslator().translate(content, source=source, target=target)
|
||||
if translation:
|
||||
r_cache.set(f'translation:{target}:{global_id}', translation)
|
||||
r_cache.expire(f'translation:{target}:{global_id}', 300)
|
||||
return translation
|
||||
|
||||
def _set_translation(self, translation):
|
||||
"""
|
||||
Set translated content
|
||||
"""
|
||||
return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time
|
||||
def get_language(self):
|
||||
languages = self.get_languages()
|
||||
if languages:
|
||||
return languages.pop()
|
||||
else:
|
||||
return None
|
||||
|
||||
# def get_ail_2_ail_payload(self):
|
||||
# payload = {'raw': self.get_gzip_content(b64=True)}
|
||||
|
@ -236,7 +245,7 @@ class Message(AbstractObject):
|
|||
# return r_object.hget(f'meta:item::{self.id}', 'url')
|
||||
|
||||
# options: set of optional meta fields
|
||||
def get_meta(self, options=None, timestamp=None, translation_target='en'):
|
||||
def get_meta(self, options=None, timestamp=None, translation_target=''):
|
||||
"""
|
||||
:type options: set
|
||||
:type timestamp: float
|
||||
|
@ -250,10 +259,12 @@ class Message(AbstractObject):
|
|||
timestamp = self.get_timestamp()
|
||||
else:
|
||||
timestamp = float(timestamp)
|
||||
timestamp = datetime.fromtimestamp(float(timestamp))
|
||||
meta['date'] = timestamp.strftime('%Y/%m/%d')
|
||||
timestamp = datetime.utcfromtimestamp(float(timestamp))
|
||||
meta['date'] = timestamp.strftime('%Y-%m-%d')
|
||||
meta['hour'] = timestamp.strftime('%H:%M:%S')
|
||||
meta['full_date'] = timestamp.isoformat(' ')
|
||||
if 'last_full_date' in options:
|
||||
meta['last_full_date'] = meta['full_date']
|
||||
|
||||
meta['source'] = self.get_source()
|
||||
# optional meta fields
|
||||
|
@ -289,8 +300,16 @@ class Message(AbstractObject):
|
|||
meta['files-names'] = self.get_files_names()
|
||||
if 'reactions' in options:
|
||||
meta['reactions'] = self.get_reactions()
|
||||
if 'language' in options:
|
||||
meta['language'] = self.get_language()
|
||||
if 'translation' in options and translation_target:
|
||||
meta['translation'] = self.translate(content=meta.get('content'), target=translation_target)
|
||||
if meta.get('language'):
|
||||
source = meta['language']
|
||||
else:
|
||||
source = None
|
||||
meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target)
|
||||
if 'language' in options:
|
||||
meta['language'] = self.get_language()
|
||||
|
||||
# meta['encoding'] = None
|
||||
return meta
|
||||
|
@ -301,14 +320,30 @@ class Message(AbstractObject):
|
|||
# content = self.get_content()
|
||||
# translated = argostranslate.translate.translate(content, 'ru', 'en')
|
||||
# # Save translation
|
||||
# self._set_translation(translated)
|
||||
# return translated
|
||||
|
||||
def create(self, content, translation=None, tags=[]):
|
||||
## Language ##
|
||||
|
||||
def get_objs_container(self):
|
||||
objs_containers = set()
|
||||
# chat
|
||||
objs_containers.add(self.get_chat())
|
||||
subchannel = self.get_subchannel()
|
||||
if subchannel:
|
||||
objs_containers.add(subchannel)
|
||||
thread = self.get_current_thread()
|
||||
if thread:
|
||||
objs_containers.add(thread)
|
||||
return objs_containers
|
||||
|
||||
#- Language -#
|
||||
|
||||
def create(self, content, language=None, translation=None, tags=[]):
|
||||
self._set_field('content', content)
|
||||
# r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content)
|
||||
if translation:
|
||||
self._set_translation(translation)
|
||||
if not language and content:
|
||||
language = self.detect_language()
|
||||
if translation and content:
|
||||
self.set_translation(language, translation)
|
||||
for tag in tags:
|
||||
self.add_tag(tag)
|
||||
|
||||
|
@ -339,7 +374,6 @@ def create(obj_id, content, translation=None, tags=[]):
|
|||
message.create(content, translation=translation, tags=tags)
|
||||
return message
|
||||
|
||||
|
||||
# TODO Encode translation
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,336 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
from PIL import ImageDraw
|
||||
|
||||
from pymisp import MISPObject
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from packages import Date
|
||||
# from lib import Language
|
||||
# from lib.data_retention_engine import update_obj_date, get_obj_date_first
|
||||
|
||||
from flask import url_for
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
IMAGE_FOLDER = config_loader.get_files_directory('images')
|
||||
config_loader = None
|
||||
|
||||
# SET x1,y1:x2,y2:x3,y3:x4,y4:extracted_text
|
||||
|
||||
class Ocr(AbstractDaterangeObject):
|
||||
"""
|
||||
AIL Message Object. (strings)
|
||||
"""
|
||||
|
||||
def __init__(self, id):
|
||||
super(Ocr, self).__init__('ocr', id)
|
||||
|
||||
def exists(self):
|
||||
return r_object.exists(f'ocr:{self.id}')
|
||||
|
||||
def get_content(self, r_type='str'):
|
||||
"""
|
||||
Returns content
|
||||
"""
|
||||
global_id = self.get_global_id()
|
||||
content = r_cache.get(f'content:{global_id}')
|
||||
if not content:
|
||||
dict_content = {}
|
||||
for extracted in r_object.smembers(f'ocr:{self.id}'):
|
||||
extracted = extracted.split(':', 4)
|
||||
x, y = extracted[0].split(',', 1)
|
||||
# get text line, y +- 20
|
||||
rounded_y = round(int(y) / 20) * 20
|
||||
if rounded_y not in dict_content:
|
||||
dict_content[rounded_y] = []
|
||||
dict_content[rounded_y].append((int(x), int(y), extracted[-1]))
|
||||
|
||||
content = ''
|
||||
new_line = True
|
||||
l_key = sorted(dict_content.keys())
|
||||
for key in l_key:
|
||||
dict_content[key] = sorted(dict_content[key], key=lambda c: c[0])
|
||||
for text in dict_content[key]:
|
||||
if new_line:
|
||||
content = f'{content}{text[2]}'
|
||||
new_line = False
|
||||
else:
|
||||
content = f'{content} {text[2]}'
|
||||
content = f'{content}\n'
|
||||
new_line = True
|
||||
|
||||
# Set Cache
|
||||
if content:
|
||||
global_id = self.get_global_id()
|
||||
r_cache.set(f'content:{global_id}', content)
|
||||
r_cache.expire(f'content:{global_id}', 300)
|
||||
|
||||
if r_type == 'str':
|
||||
return content
|
||||
elif r_type == 'bytes':
|
||||
if content:
|
||||
return content.encode()
|
||||
|
||||
def get_date(self): # TODO
|
||||
return Date.get_today_date_str()
|
||||
|
||||
def get_source(self): # TODO
|
||||
"""
|
||||
Returns source/feeder name
|
||||
"""
|
||||
return 'ocr'
|
||||
# l_source = self.id.split('/')[:-2]
|
||||
# return os.path.join(*l_source)
|
||||
|
||||
def get_basename(self): # TODO
|
||||
return 'ocr'
|
||||
|
||||
def get_language(self):
|
||||
languages = self.get_languages()
|
||||
if languages:
|
||||
return languages.pop()
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_svg_icon(self):
|
||||
return {'style': 'fas', 'icon': '\uf065', 'color': 'yellow', 'radius': 5}
|
||||
|
||||
def get_image_path(self):
|
||||
rel_path = os.path.join(self.id[0:2], self.id[2:4], self.id[4:6], self.id[6:8], self.id[8:10], self.id[10:12], self.id[12:])
|
||||
filename = os.path.join(IMAGE_FOLDER, rel_path)
|
||||
return os.path.realpath(filename)
|
||||
|
||||
def get_misp_object(self): # TODO
|
||||
obj = MISPObject('instant-message', standalone=True)
|
||||
obj_date = self.get_date()
|
||||
if obj_date:
|
||||
obj.first_seen = obj_date
|
||||
else:
|
||||
self.logger.warning(
|
||||
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
|
||||
|
||||
# obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
|
||||
# obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
|
||||
# obj.add_attribute('sensor', value=get_ail_uuid())]
|
||||
obj_attrs = []
|
||||
for obj_attr in obj_attrs:
|
||||
for tag in self.get_tags():
|
||||
obj_attr.add_tag(tag)
|
||||
return obj
|
||||
|
||||
# options: set of optional meta fields
|
||||
def get_meta(self, options=None, translation_target=''):
|
||||
"""
|
||||
:type options: set
|
||||
"""
|
||||
if options is None:
|
||||
options = set()
|
||||
meta = self._get_meta(options=options)
|
||||
meta['tags'] = self.get_tags()
|
||||
meta['content'] = self.get_content()
|
||||
|
||||
# optional meta fields
|
||||
if 'investigations' in options:
|
||||
meta['investigations'] = self.get_investigations()
|
||||
if 'link' in options:
|
||||
meta['link'] = self.get_link(flask_context=True)
|
||||
if 'icon' in options:
|
||||
meta['svg_icon'] = self.get_svg_icon()
|
||||
if 'img' in options:
|
||||
meta['img'] = self.draw_bounding_boxs()
|
||||
if 'map' in options:
|
||||
meta['map'] = self.get_img_map_coords()
|
||||
if 'language' in options:
|
||||
meta['language'] = self.get_language()
|
||||
if 'translation' in options and translation_target:
|
||||
if meta.get('language'):
|
||||
source = meta['language']
|
||||
else:
|
||||
source = None
|
||||
meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target)
|
||||
if 'language' in options:
|
||||
meta['language'] = self.get_language()
|
||||
return meta
|
||||
|
||||
def get_objs_container(self):
|
||||
objs_containers = set()
|
||||
# chat
|
||||
objs_containers.add(self.get_first_correlation('chat'))
|
||||
subchannel = self.get_first_correlation('chat-subchannel')
|
||||
if subchannel:
|
||||
objs_containers.add(subchannel)
|
||||
thread = self.get_first_correlation('chat-thread')
|
||||
if thread:
|
||||
objs_containers.add(thread)
|
||||
return objs_containers
|
||||
|
||||
def create_coord_str(self, bbox):
|
||||
c1, c2, c3, c4 = bbox
|
||||
x1, y1 = c1
|
||||
x2, y2 = c2
|
||||
x3, y3 = c3
|
||||
x4, y4 = c4
|
||||
return f'{int(x1)},{int(y1)}:{int(x2)},{int(y2)}:{int(x3)},{int(y3)}:{int(x4)},{int(y4)}'
|
||||
|
||||
def _unpack_coord(self, coord):
|
||||
return coord.split(',', 1)
|
||||
|
||||
def get_coords(self):
|
||||
coords = []
|
||||
for extracted in r_object.smembers(f'ocr:{self.id}'):
|
||||
coord = []
|
||||
bbox = extracted.split(':', 4)[:-1]
|
||||
for c in bbox:
|
||||
x, y = self._unpack_coord(c)
|
||||
coord.append((int(x), int(y)))
|
||||
coords.append(coord)
|
||||
return coords
|
||||
|
||||
def get_img_map_coords(self):
|
||||
coords = []
|
||||
for extracted in r_object.smembers(f'ocr:{self.id}'):
|
||||
extract = extracted.split(':', 4)
|
||||
x1, y1 = self._unpack_coord(extract[0])
|
||||
x2, y2 = self._unpack_coord(extract[1])
|
||||
x3, y3 = self._unpack_coord(extract[2])
|
||||
x4, y4 = self._unpack_coord(extract[3])
|
||||
coords.append((f'{x1},{y1},{x2},{y2},{x3},{y3},{x4},{y4}', extract[4]))
|
||||
return coords
|
||||
|
||||
def edit_text(self, coordinates, text, new_text, new_coordinates=None):
|
||||
pass
|
||||
|
||||
def add_text(self, coordinates, text):
|
||||
val = f'{coordinates}:{text}'
|
||||
return r_object.sadd(f'ocr:{self.id}', val)
|
||||
|
||||
def remove_text(self, val):
|
||||
return r_object.srem(f'ocr:{self.id}', val)
|
||||
|
||||
def update_correlation(self, date=None):
|
||||
if date:
|
||||
self.add(date, None)
|
||||
image_correl = self.get_obj_correlations('image', '', self.id)
|
||||
for obj_type in image_correl:
|
||||
if obj_type != 'ocr':
|
||||
for obj_raw in image_correl[obj_type]:
|
||||
obj_subtype, obj_id = obj_raw.split(':', 1)
|
||||
self.add_correlation(obj_type, obj_subtype, obj_id)
|
||||
|
||||
def create(self, extracted_texts, tags=[]):
|
||||
# r_object.sadd(f'{self.type}:all', self.id)
|
||||
created = False
|
||||
for extracted in extracted_texts:
|
||||
bbox, text = extracted
|
||||
if len(text) > 1:
|
||||
str_coords = self.create_coord_str(bbox)
|
||||
self.add_text(str_coords, text)
|
||||
created = True
|
||||
|
||||
if created:
|
||||
# Correlations
|
||||
self._copy_from('image', self.id)
|
||||
self.update_correlation()
|
||||
self.add_correlation('image', '', self.id)
|
||||
|
||||
for tag in tags:
|
||||
self.add_tag(tag)
|
||||
return self.id
|
||||
|
||||
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||
def delete(self):
|
||||
r_object.delete(f'ocr:{self.id}')
|
||||
|
||||
def draw_bounding_boxs(self):
|
||||
img = Image.open(self.get_image_path()).convert("RGBA")
|
||||
draw = ImageDraw.Draw(img)
|
||||
for bbox in self.get_coords():
|
||||
c1, c2, c3, c4 = bbox
|
||||
draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
|
||||
draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
|
||||
draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
|
||||
draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
|
||||
# img.show()
|
||||
buff = BytesIO()
|
||||
img.save(buff, "PNG")
|
||||
return buff.getvalue()
|
||||
|
||||
|
||||
def create(obj_id, detections, tags=[]):
|
||||
obj = Ocr(obj_id)
|
||||
if not obj.exists():
|
||||
obj_id = obj.create(detections, tags=tags)
|
||||
if obj_id:
|
||||
return obj
|
||||
|
||||
# TODO preload languages
|
||||
def extract_text(image_path, languages, threshold=0.2):
|
||||
import easyocr
|
||||
reader = easyocr.Reader(languages, verbose=False)
|
||||
texts = reader.readtext(image_path)
|
||||
# print(texts)
|
||||
extracted = []
|
||||
for bbox, text, score in texts:
|
||||
if score > threshold:
|
||||
extracted.append((bbox, text))
|
||||
return extracted
|
||||
|
||||
|
||||
def get_ocr_languages():
|
||||
return {'af', 'ar', 'as', 'az', 'be', 'bg', 'bh', 'bs', 'cs', 'cy', 'da', 'de', 'en', 'es', 'et', 'fa', 'fr', 'ga', 'hi', 'hr', 'hu', 'id', 'is', 'it', 'ja', 'kn', 'ko', 'ku', 'la', 'lt', 'lv', 'mi', 'mn', 'mr', 'ms', 'mt', 'ne', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'sw', 'ta', 'te', 'th', 'tl', 'tr', 'ug', 'uk', 'ur', 'uz', 'vi', 'zh'}
|
||||
|
||||
|
||||
def sanityze_ocr_languages(languages, ocr_languages=None):
|
||||
langs = set()
|
||||
if not ocr_languages:
|
||||
ocr_languages = get_ocr_languages()
|
||||
for lang in languages:
|
||||
if lang in ocr_languages:
|
||||
if lang == 'zh':
|
||||
langs.add('ch_sim')
|
||||
elif lang == 'sr':
|
||||
langs.add('rs_latin')
|
||||
else:
|
||||
langs.add(lang)
|
||||
return langs
|
||||
|
||||
class Ocrs(AbstractDaterangeObjects):
|
||||
"""
|
||||
OCR Objects
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__('ocr', Ocr)
|
||||
|
||||
def sanitize_id_to_search(self, name_to_search):
|
||||
return name_to_search # TODO
|
||||
|
||||
|
||||
#### API ####
|
||||
def api_get_ocr(obj_id, translation_target=None):
|
||||
ocr = Ocr(obj_id)
|
||||
if not ocr.exists():
|
||||
return {"status": "error", "reason": "Unknown ocr"}, 404
|
||||
meta = ocr.get_meta({'content', 'icon', 'img', 'language', 'link', 'map', 'translation'}, translation_target=translation_target)
|
||||
return meta, 200
|
|
@ -49,9 +49,9 @@ class UserAccount(AbstractSubtypeObject):
|
|||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
|
||||
url = url_for('chats_explorer.objects_user_account', type=self.type, subtype=self.subtype, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
|
||||
url = f'{baseurl}/objects/user-account?&subtype={self.subtype}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_svg_icon(self): # TODO change icon/color
|
||||
|
@ -127,6 +127,13 @@ class UserAccount(AbstractSubtypeObject):
|
|||
def update_username_timeline(self, username_global_id, timestamp):
|
||||
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
|
||||
|
||||
def get_messages(self):
|
||||
messages = []
|
||||
for mess in self.get_correlation('message'):
|
||||
messages.append(f'message:{mess}')
|
||||
return messages
|
||||
|
||||
|
||||
def get_messages_by_chat_obj(self, chat_obj):
|
||||
messages = []
|
||||
for mess in self.get_correlation_iter_obj(chat_obj, 'message'):
|
||||
|
@ -143,13 +150,14 @@ class UserAccount(AbstractSubtypeObject):
|
|||
if meta['username']:
|
||||
_, username_account_subtype, username_account_id = meta['username'].split(':', 3)
|
||||
if 'username_meta' in options:
|
||||
meta['username'] = Usernames.Username(username_account_id, username_account_subtype).get_meta()
|
||||
meta['username'] = Usernames.Username(username_account_id, username_account_subtype).get_meta(options={'icon'})
|
||||
else:
|
||||
meta['username'] = {'type': 'username', 'subtype': username_account_subtype, 'id': username_account_id}
|
||||
if 'usernames' in options:
|
||||
meta['usernames'] = self.get_usernames()
|
||||
if 'icon' in options:
|
||||
meta['icon'] = self.get_icon()
|
||||
meta['svg_icon'] = meta['icon']
|
||||
if 'info' in options:
|
||||
meta['info'] = self.get_info()
|
||||
if 'translation' in options and translation_target:
|
||||
|
|
|
@ -51,8 +51,6 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
|
|||
# get useraccount / username
|
||||
# get users ?
|
||||
# timeline name ????
|
||||
# info
|
||||
# created
|
||||
# last imported/updated
|
||||
|
||||
# TODO get instance
|
||||
|
@ -97,7 +95,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
|
|||
def get_created_at(self, date=False):
|
||||
created_at = self._get_field('created_at')
|
||||
if date and created_at:
|
||||
created_at = datetime.fromtimestamp(float(created_at))
|
||||
created_at = datetime.utcfromtimestamp(float(created_at))
|
||||
created_at = created_at.isoformat(' ')
|
||||
return created_at
|
||||
|
||||
|
@ -176,7 +174,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
|
|||
def get_nb_message_by_hours(self, date_day, nb_day):
|
||||
hours = []
|
||||
# start=0, end=23
|
||||
timestamp = time.mktime(datetime.strptime(date_day, "%Y%m%d").timetuple())
|
||||
timestamp = time.mktime(datetime.strptime(date_day, "%Y%m%d").utctimetuple())
|
||||
for i in range(24):
|
||||
timestamp_end = timestamp + 3600
|
||||
nb_messages = r_object.zcount(f'messages:{self.type}:{self.subtype}:{self.id}', timestamp, timestamp_end)
|
||||
|
@ -197,12 +195,42 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
|
|||
week_date = Date.get_current_week_day()
|
||||
return self.get_nb_message_by_week(week_date)
|
||||
|
||||
def get_message_meta(self, message, timestamp=None, translation_target='en'): # TODO handle file message
|
||||
def get_nb_week_messages(self):
|
||||
week = {}
|
||||
# Init
|
||||
for day in ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']:
|
||||
week[day] = {}
|
||||
for i in range(24):
|
||||
week[day][i] = 0
|
||||
|
||||
# chat
|
||||
for mess_t in r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True):
|
||||
timestamp = datetime.utcfromtimestamp(float(mess_t[1]))
|
||||
date_name = timestamp.strftime('%a')
|
||||
week[date_name][timestamp.hour] += 1
|
||||
|
||||
subchannels = self.get_subchannels()
|
||||
for gid in subchannels:
|
||||
for mess_t in r_object.zrange(f'messages:{gid}', 0, -1, withscores=True):
|
||||
timestamp = datetime.utcfromtimestamp(float(mess_t[1]))
|
||||
date_name = timestamp.strftime('%a')
|
||||
week[date_name][timestamp.hour] += 1
|
||||
stats = []
|
||||
nb_day = 0
|
||||
for day in week:
|
||||
for hour in week[day]:
|
||||
stats.append({'date': day, 'day': nb_day, 'hour': hour, 'count': week[day][hour]})
|
||||
nb_day += 1
|
||||
return stats
|
||||
|
||||
def get_message_meta(self, message, timestamp=None, translation_target='', options=None): # TODO handle file message
|
||||
message = Messages.Message(message[9:])
|
||||
meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, timestamp=timestamp, translation_target=translation_target)
|
||||
if not options:
|
||||
options = {'content', 'files-names', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}
|
||||
meta = message.get_meta(options=options, timestamp=timestamp, translation_target=translation_target)
|
||||
return meta
|
||||
|
||||
def get_messages(self, start=0, page=-1, nb=500, unread=False, translation_target='en'): # threads ???? # TODO ADD last/first message timestamp + return page
|
||||
def get_messages(self, start=0, page=-1, nb=500, unread=False, options=None, translation_target='en'): # threads ???? # TODO ADD last/first message timestamp + return page
|
||||
# TODO return message meta
|
||||
tags = {}
|
||||
messages = {}
|
||||
|
@ -220,11 +248,11 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
|
|||
mess, pagination = self._get_messages(nb=nb, page=page)
|
||||
for message in mess:
|
||||
timestamp = message[1]
|
||||
date_day = datetime.fromtimestamp(timestamp).strftime('%Y/%m/%d')
|
||||
date_day = datetime.utcfromtimestamp(timestamp).strftime('%Y/%m/%d')
|
||||
if date_day != curr_date:
|
||||
messages[date_day] = []
|
||||
curr_date = date_day
|
||||
mess_dict = self.get_message_meta(message[0], timestamp=timestamp, translation_target=translation_target)
|
||||
mess_dict = self.get_message_meta(message[0], timestamp=timestamp, translation_target=translation_target, options=options)
|
||||
messages[date_day].append(mess_dict)
|
||||
|
||||
if mess_dict.get('tags'):
|
||||
|
@ -279,6 +307,9 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
|
|||
def get_nb_participants(self):
|
||||
return self.get_nb_correlation('user-account')
|
||||
|
||||
def get_user_messages(self, user_id):
|
||||
return self.get_correlation_iter('user-account', self.subtype, user_id, 'message')
|
||||
|
||||
# TODO move me to abstract subtype
|
||||
class AbstractChatObjects(ABC):
|
||||
def __init__(self, type):
|
||||
|
|
|
@ -71,7 +71,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
|
|||
else:
|
||||
return last_seen
|
||||
|
||||
def get_nb_seen(self): # TODO REPLACE ME -> correlation image
|
||||
def get_nb_seen(self): # TODO REPLACE ME -> correlation image chats
|
||||
return self.get_nb_correlation('item') + self.get_nb_correlation('message')
|
||||
|
||||
def get_nb_seen_by_date(self, date):
|
||||
|
@ -88,6 +88,8 @@ class AbstractDaterangeObject(AbstractObject, ABC):
|
|||
meta_dict['nb_seen'] = self.get_nb_seen()
|
||||
if 'sparkline' in options:
|
||||
meta_dict['sparkline'] = self.get_sparkline()
|
||||
if 'last_full_date'in options:
|
||||
meta_dict['last_full_date'] = self.get_last_full_date()
|
||||
return meta_dict
|
||||
|
||||
def set_first_seen(self, first_seen):
|
||||
|
@ -125,6 +127,20 @@ class AbstractDaterangeObject(AbstractObject, ABC):
|
|||
def _add_create(self):
|
||||
r_object.sadd(f'{self.type}:all', self.id)
|
||||
|
||||
def _copy_from(self, obj_type, obj_id):
|
||||
first_seen = r_object.hget(f'meta:{obj_type}:{obj_id}', 'first_seen')
|
||||
last_seen = r_object.hget(f'meta:{obj_type}:{obj_id}', 'last_seen')
|
||||
if first_seen and last_seen:
|
||||
for date in Date.get_daterange(first_seen, last_seen):
|
||||
nb = r_object.zscore(f'{obj_type}:date:{date}', self.id)
|
||||
if nb:
|
||||
r_object.zincrby(f'{self.type}:date:{date}', nb, self.id)
|
||||
update_obj_date(first_seen, self.type)
|
||||
update_obj_date(last_seen, self.type)
|
||||
self._add_create()
|
||||
self.set_first_seen(first_seen)
|
||||
self.set_last_seen(last_seen)
|
||||
|
||||
def _add(self, date, obj): # TODO OBJ=None
|
||||
if not self.exists():
|
||||
self._add_create()
|
||||
|
@ -229,6 +245,17 @@ class AbstractDaterangeObjects(ABC):
|
|||
def sanitize_content_to_search(self, content_to_search):
|
||||
return content_to_search
|
||||
|
||||
def get_contents_ids(self):
|
||||
titles = {}
|
||||
for obj_id in self.get_ids():
|
||||
obj = self.obj_class(obj_id)
|
||||
content = obj.get_content()
|
||||
if content not in titles:
|
||||
titles[content] = []
|
||||
for domain in obj.get_correlation('domain').get('domain', []):
|
||||
titles[content].append(domain[1:])
|
||||
return titles
|
||||
|
||||
def search_by_content(self, content_to_search, r_pos=False, case_sensitive=True):
|
||||
objs = {}
|
||||
if case_sensitive:
|
||||
|
|
|
@ -25,7 +25,7 @@ from lib import Duplicate
|
|||
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation
|
||||
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
|
||||
from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship
|
||||
from lib.Language import get_obj_translation
|
||||
from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, detect_obj_language, get_obj_language_stats, get_obj_translation, set_obj_translation, delete_obj_translation, get_obj_main_language
|
||||
from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
|
||||
|
||||
logging.config.dictConfig(ail_logger.get_config(name='ail'))
|
||||
|
@ -67,12 +67,15 @@ class AbstractObject(ABC):
|
|||
def get_global_id(self):
|
||||
return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}'
|
||||
|
||||
def get_last_full_date(self):
|
||||
return None
|
||||
|
||||
def get_default_meta(self, tags=False, link=False):
|
||||
dict_meta = {'id': self.get_id(),
|
||||
'type': self.get_type(),
|
||||
'subtype': self.get_subtype(r_str=True)}
|
||||
if tags:
|
||||
dict_meta['tags'] = self.get_tags()
|
||||
dict_meta['tags'] = self.get_tags(r_list=True)
|
||||
if link:
|
||||
dict_meta['link'] = self.get_link()
|
||||
return dict_meta
|
||||
|
@ -222,11 +225,11 @@ class AbstractObject(ABC):
|
|||
|
||||
## Correlation ##
|
||||
|
||||
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
|
||||
def get_obj_correlations(self, obj_type, obj_subtype, obj_id, filter_types=[]):
|
||||
"""
|
||||
Get object correlation
|
||||
"""
|
||||
return get_correlations(req_type, req_subtype, req_id, filter_types=[obj_type])
|
||||
return get_correlations(obj_type, obj_subtype, obj_id, filter_types=filter_types)
|
||||
|
||||
def get_correlation(self, obj_type):
|
||||
"""
|
||||
|
@ -234,6 +237,11 @@ class AbstractObject(ABC):
|
|||
"""
|
||||
return get_correlations(self.type, self.subtype, self.id, filter_types=[obj_type])
|
||||
|
||||
def get_first_correlation(self, obj_type):
|
||||
correlation = self.get_correlation(obj_type)
|
||||
if correlation.get(obj_type):
|
||||
return f'{obj_type}:{correlation[obj_type].pop()}'
|
||||
|
||||
def get_correlations(self, filter_types=[], unpack=False):
|
||||
"""
|
||||
Get object correlations
|
||||
|
@ -302,15 +310,51 @@ class AbstractObject(ABC):
|
|||
|
||||
## -Relationship- ##
|
||||
|
||||
## Translation ##
|
||||
def get_objs_container(self):
|
||||
return set()
|
||||
|
||||
## Language ##
|
||||
|
||||
def get_languages(self):
|
||||
return get_obj_languages(self.type, self.get_subtype(r_str=True), self.id)
|
||||
|
||||
def add_language(self, language):
|
||||
return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container())
|
||||
|
||||
def remove_language(self, language):
|
||||
return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container())
|
||||
|
||||
def edit_language(self, old_language, new_language):
|
||||
if old_language:
|
||||
self.remove_language(old_language)
|
||||
self.add_language(new_language)
|
||||
|
||||
def detect_language(self, field=''):
|
||||
return detect_obj_language(self.type, self.get_subtype(r_str=True), self.id, self.get_content(), objs_containers=self.get_objs_container())
|
||||
|
||||
def get_obj_language_stats(self):
|
||||
return get_obj_language_stats(self.type, self.get_subtype(r_str=True), self.id)
|
||||
|
||||
def get_main_language(self):
|
||||
return get_obj_main_language(self.type, self.get_subtype(r_str=True), self.id)
|
||||
|
||||
def get_translation(self, language, field=''):
|
||||
return get_obj_translation(self.get_global_id(), language, field=field, objs_containers=self.get_objs_container())
|
||||
|
||||
def set_translation(self, language, translation, field=''):
|
||||
return set_obj_translation(self.get_global_id(), language, translation, field=field)
|
||||
|
||||
def delete_translation(self, language, field=''):
|
||||
return delete_obj_translation(self.get_global_id(), language, field=field)
|
||||
|
||||
def translate(self, content=None, field='', source=None, target='en'):
|
||||
global_id = self.get_global_id()
|
||||
if not content:
|
||||
content = self.get_content()
|
||||
return get_obj_translation(global_id, content, field=field, source=source, target=target)
|
||||
translation = get_obj_translation(global_id, target, source=source, content=content, field=field, objs_containers=self.get_objs_container())
|
||||
return translation
|
||||
|
||||
## -Translation- ##
|
||||
## -Language- ##
|
||||
|
||||
## Parent ##
|
||||
|
||||
|
|
|
@ -85,6 +85,11 @@ class AbstractSubtypeObject(AbstractObject, ABC):
|
|||
else:
|
||||
return int(nb)
|
||||
|
||||
def get_last_full_date(self):
|
||||
last_seen = self.get_last_seen()
|
||||
if last_seen:
|
||||
return f'{last_seen[0:4]}-{last_seen[4:6]}-{last_seen[6:8]}'
|
||||
|
||||
def _get_meta(self, options=None):
|
||||
if options is None:
|
||||
options = set()
|
||||
|
|
|
@ -7,13 +7,18 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.exceptions import AILObjectUnknown
|
||||
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.ail_core import get_all_objects, get_object_all_subtypes
|
||||
from lib.ail_core import get_all_objects, get_object_all_subtypes, get_objects_with_subtypes, get_default_correlation_objects
|
||||
from lib import correlations_engine
|
||||
from lib import relationships_engine
|
||||
from lib import btc_ail
|
||||
from lib import Language
|
||||
from lib import Tag
|
||||
|
||||
from lib import chats_viewer
|
||||
|
||||
from lib.objects import Chats
|
||||
from lib.objects import ChatSubChannels
|
||||
from lib.objects import ChatThreads
|
||||
|
@ -23,16 +28,17 @@ from lib.objects.Cves import Cve
|
|||
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
|
||||
from lib.objects.Domains import Domain
|
||||
from lib.objects import Etags
|
||||
from lib.objects.Favicons import Favicon
|
||||
from lib.objects import Favicons
|
||||
from lib.objects import FilesNames
|
||||
from lib.objects import HHHashs
|
||||
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
|
||||
from lib.objects import Images
|
||||
from lib.objects.Messages import Message
|
||||
from lib.objects import Messages
|
||||
from lib.objects import Ocrs
|
||||
from lib.objects import Pgps
|
||||
from lib.objects.Screenshots import Screenshot
|
||||
from lib.objects import Titles
|
||||
from lib.objects.UsersAccount import UserAccount
|
||||
from lib.objects import UsersAccount
|
||||
from lib.objects import Usernames
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
|
@ -43,64 +49,123 @@ config_loader = None
|
|||
def is_valid_object_type(obj_type):
|
||||
return obj_type in get_all_objects()
|
||||
|
||||
def is_object_subtype(obj_type):
|
||||
return obj_type in get_objects_with_subtypes()
|
||||
|
||||
def is_valid_object_subtype(obj_type, subtype):
|
||||
return subtype in get_object_all_subtypes(obj_type)
|
||||
|
||||
def sanitize_objs_types(objs):
|
||||
def sanitize_objs_types(objs, default=False):
|
||||
l_types = []
|
||||
for obj in objs:
|
||||
if is_valid_object_type(obj):
|
||||
l_types.append(obj)
|
||||
if not l_types:
|
||||
l_types = get_all_objects()
|
||||
if default:
|
||||
l_types = get_default_correlation_objects()
|
||||
else:
|
||||
l_types = get_all_objects()
|
||||
return l_types
|
||||
|
||||
#### OBJECT ####
|
||||
|
||||
def get_object(obj_type, subtype, obj_id):
|
||||
if obj_type == 'item':
|
||||
return Item(obj_id)
|
||||
elif obj_type == 'domain':
|
||||
return Domain(obj_id)
|
||||
elif obj_type == 'decoded':
|
||||
return Decoded(obj_id)
|
||||
elif obj_type == 'chat':
|
||||
return Chats.Chat(obj_id, subtype)
|
||||
elif obj_type == 'chat-subchannel':
|
||||
return ChatSubChannels.ChatSubChannel(obj_id, subtype)
|
||||
elif obj_type == 'chat-thread':
|
||||
return ChatThreads.ChatThread(obj_id, subtype)
|
||||
elif obj_type == 'cookie-name':
|
||||
return CookiesNames.CookieName(obj_id)
|
||||
elif obj_type == 'cve':
|
||||
return Cve(obj_id)
|
||||
elif obj_type == 'etag':
|
||||
return Etags.Etag(obj_id)
|
||||
elif obj_type == 'favicon':
|
||||
return Favicon(obj_id)
|
||||
elif obj_type == 'file-name':
|
||||
return FilesNames.FileName(obj_id)
|
||||
elif obj_type == 'hhhash':
|
||||
return HHHashs.HHHash(obj_id)
|
||||
elif obj_type == 'image':
|
||||
return Images.Image(obj_id)
|
||||
elif obj_type == 'message':
|
||||
return Message(obj_id)
|
||||
elif obj_type == 'screenshot':
|
||||
return Screenshot(obj_id)
|
||||
elif obj_type == 'cryptocurrency':
|
||||
return CryptoCurrencies.CryptoCurrency(obj_id, subtype)
|
||||
elif obj_type == 'pgp':
|
||||
return Pgps.Pgp(obj_id, subtype)
|
||||
elif obj_type == 'title':
|
||||
return Titles.Title(obj_id)
|
||||
elif obj_type == 'user-account':
|
||||
return UserAccount(obj_id, subtype)
|
||||
elif obj_type == 'username':
|
||||
return Usernames.Username(obj_id, subtype)
|
||||
if not subtype:
|
||||
if obj_type == 'item':
|
||||
return Item(obj_id)
|
||||
elif obj_type == 'domain':
|
||||
return Domain(obj_id)
|
||||
elif obj_type == 'decoded':
|
||||
return Decoded(obj_id)
|
||||
elif obj_type == 'cookie-name':
|
||||
return CookiesNames.CookieName(obj_id)
|
||||
elif obj_type == 'cve':
|
||||
return Cve(obj_id)
|
||||
elif obj_type == 'etag':
|
||||
return Etags.Etag(obj_id)
|
||||
elif obj_type == 'favicon':
|
||||
return Favicons.Favicon(obj_id)
|
||||
elif obj_type == 'file-name':
|
||||
return FilesNames.FileName(obj_id)
|
||||
elif obj_type == 'hhhash':
|
||||
return HHHashs.HHHash(obj_id)
|
||||
elif obj_type == 'image':
|
||||
return Images.Image(obj_id)
|
||||
elif obj_type == 'message':
|
||||
return Messages.Message(obj_id)
|
||||
elif obj_type == 'ocr':
|
||||
return Ocrs.Ocr(obj_id)
|
||||
elif obj_type == 'screenshot':
|
||||
return Screenshot(obj_id)
|
||||
elif obj_type == 'title':
|
||||
return Titles.Title(obj_id)
|
||||
else:
|
||||
raise AILObjectUnknown(f'Unknown AIL object: {obj_type} {subtype} {obj_id}')
|
||||
# SUBTYPES
|
||||
else:
|
||||
raise Exception(f'Unknown AIL object: {obj_type} {subtype} {obj_id}')
|
||||
if obj_type == 'chat':
|
||||
return Chats.Chat(obj_id, subtype)
|
||||
elif obj_type == 'chat-subchannel':
|
||||
return ChatSubChannels.ChatSubChannel(obj_id, subtype)
|
||||
elif obj_type == 'chat-thread':
|
||||
return ChatThreads.ChatThread(obj_id, subtype)
|
||||
elif obj_type == 'cryptocurrency':
|
||||
return CryptoCurrencies.CryptoCurrency(obj_id, subtype)
|
||||
elif obj_type == 'pgp':
|
||||
return Pgps.Pgp(obj_id, subtype)
|
||||
elif obj_type == 'user-account':
|
||||
return UsersAccount.UserAccount(obj_id, subtype)
|
||||
elif obj_type == 'username':
|
||||
return Usernames.Username(obj_id, subtype)
|
||||
else:
|
||||
raise AILObjectUnknown(f'Unknown AIL object: {obj_type} {subtype} {obj_id}')
|
||||
|
||||
def get_objects(objects):
|
||||
def exists_obj(obj_type, subtype, obj_id):
|
||||
obj = get_object(obj_type, subtype, obj_id)
|
||||
if obj:
|
||||
return obj.exists()
|
||||
else:
|
||||
return False
|
||||
|
||||
#### API ####
|
||||
|
||||
def api_get_object(obj_type, obj_subtype, obj_id):
|
||||
if not obj_id:
|
||||
return {'status': 'error', 'reason': 'Invalid object id'}, 400
|
||||
if not is_valid_object_type(obj_type):
|
||||
return {'status': 'error', 'reason': 'Invalid object type'}, 400
|
||||
if obj_subtype:
|
||||
if not is_valid_object_subtype(obj_type, obj_subtype):
|
||||
return {'status': 'error', 'reason': 'Invalid object subtype'}, 400
|
||||
obj = get_object(obj_type, obj_subtype, obj_id)
|
||||
if not obj.exists():
|
||||
return {'status': 'error', 'reason': 'Object Not Found'}, 404
|
||||
options = {'chat', 'content', 'created_at', 'files-names', 'icon', 'images', 'info', 'nb_participants', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account', 'username', 'subchannels', 'threads'}
|
||||
return obj.get_meta(options=options), 200
|
||||
|
||||
|
||||
def api_get_object_type_id(obj_type, obj_id):
|
||||
if not is_valid_object_type(obj_type):
|
||||
return {'status': 'error', 'reason': 'Invalid object type'}, 400
|
||||
if is_object_subtype(obj_type):
|
||||
subtype, obj_id = obj_type.split('/', 1)
|
||||
else:
|
||||
subtype = None
|
||||
return api_get_object(obj_type, subtype, obj_id)
|
||||
|
||||
|
||||
def api_get_object_global_id(global_id):
|
||||
obj_type, subtype, obj_id = global_id.split(':', 2)
|
||||
return api_get_object(obj_type, subtype, obj_id)
|
||||
|
||||
#### --API-- ####
|
||||
|
||||
#########################################################################################
|
||||
#########################################################################################
|
||||
#########################################################################################
|
||||
|
||||
|
||||
def get_objects(objects): # TODO RENAME ME
|
||||
objs = set()
|
||||
for obj in objects:
|
||||
if isinstance(obj, dict):
|
||||
|
@ -108,7 +173,7 @@ def get_objects(objects):
|
|||
obj_subtype = obj['subtype']
|
||||
obj_id = obj['id']
|
||||
if 'lvl' in obj:
|
||||
correl_objs = get_obj_correlations_objs(obj_type, obj_subtype, obj_id, lvl=obj['lvl'])
|
||||
correl_objs = get_obj_correlations_objs(obj_type, obj_subtype, obj_id, lvl=int(obj['lvl']))
|
||||
objs = objs.union(correl_objs)
|
||||
else:
|
||||
obj_type, obj_subtype, obj_id = obj
|
||||
|
@ -119,14 +184,6 @@ def get_objects(objects):
|
|||
return ail_objects
|
||||
|
||||
|
||||
def exists_obj(obj_type, subtype, obj_id):
|
||||
obj = get_object(obj_type, subtype, obj_id)
|
||||
if obj:
|
||||
return obj.exists()
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def get_obj_global_id(obj_type, subtype, obj_id):
|
||||
obj = get_object(obj_type, subtype, obj_id)
|
||||
return obj.get_global_id()
|
||||
|
@ -199,8 +256,9 @@ def get_objects_meta(objs, options=set(), flask_context=False):
|
|||
|
||||
def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
||||
obj = get_object(obj_type, subtype, id)
|
||||
meta = obj.get_meta()
|
||||
meta['icon'] = obj.get_svg_icon()
|
||||
meta = obj.get_meta(options={'chat', 'chats', 'created_at', 'icon', 'info', 'map', 'nb_messages', 'nb_participants', 'threads', 'username'})
|
||||
# meta['icon'] = obj.get_svg_icon()
|
||||
meta['svg_icon'] = obj.get_svg_icon()
|
||||
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
|
||||
meta['sparkline'] = obj.get_sparkline()
|
||||
if obj_type == 'cve':
|
||||
|
@ -218,6 +276,34 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
|||
meta["add_tags_modal"] = Tag.get_modal_add_tags(obj.id, obj.get_type(), obj.get_subtype(r_str=True))
|
||||
return meta
|
||||
|
||||
#### OBJ LANGUAGES ####
|
||||
|
||||
def api_detect_language(obj_type, subtype, obj_id):
|
||||
obj = get_object(obj_type, subtype, obj_id)
|
||||
if not obj.exists():
|
||||
return {"status": "error", "reason": "Unknown obj"}, 404
|
||||
lang = obj.detect_language()
|
||||
return {"language": lang}, 200
|
||||
|
||||
def api_manually_translate(obj_type, subtype, obj_id, source, translation_target, translation):
|
||||
obj = get_object(obj_type, subtype, obj_id)
|
||||
if not obj.exists():
|
||||
return {"status": "error", "reason": "Unknown obj"}, 404
|
||||
if translation:
|
||||
if len(translation) > 200000: # TODO REVIEW LIMIT
|
||||
return {"status": "error", "reason": "Max Size reached"}, 400
|
||||
all_languages = Language.get_translation_languages()
|
||||
if source not in all_languages:
|
||||
return {"status": "error", "reason": "Unknown source Language"}, 400
|
||||
obj_language = obj.get_language()
|
||||
if obj_language != source:
|
||||
obj.edit_language(obj_language, source)
|
||||
if translation:
|
||||
if translation_target not in all_languages:
|
||||
return {"status": "error", "reason": "Unknown target Language"}, 400
|
||||
obj.set_translation(translation_target, translation)
|
||||
# TODO SANITYZE translation
|
||||
return None, 200
|
||||
|
||||
#### OBJ FILTERS ####
|
||||
|
||||
|
@ -239,10 +325,15 @@ def is_filtered(obj, filters):
|
|||
def obj_iterator(obj_type, filters):
|
||||
if obj_type == 'decoded':
|
||||
return get_all_decodeds_objects(filters=filters)
|
||||
elif obj_type == 'image':
|
||||
return Images.get_all_images_objects(filters=filters)
|
||||
elif obj_type == 'item':
|
||||
return get_all_items_objects(filters=filters)
|
||||
elif obj_type == 'pgp':
|
||||
return Pgps.get_all_pgps_objects(filters=filters)
|
||||
elif obj_type == 'message':
|
||||
return chats_viewer.get_messages_iterator(filters=filters)
|
||||
|
||||
|
||||
def card_objs_iterators(filters):
|
||||
nb = 0
|
||||
|
@ -257,6 +348,8 @@ def card_obj_iterator(obj_type, filters):
|
|||
return get_nb_items_objects(filters=filters)
|
||||
elif obj_type == 'pgp':
|
||||
return Pgps.nb_all_pgps_objects(filters=filters)
|
||||
elif obj_type == 'message':
|
||||
return chats_viewer.get_nb_messages_iterator(filters=filters)
|
||||
|
||||
def get_ui_obj_tag_table_keys(obj_type): # TODO REMOVE ME
|
||||
"""
|
||||
|
@ -387,7 +480,7 @@ def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lv
|
|||
|
||||
def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
|
||||
objs = set()
|
||||
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden)
|
||||
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, int(lvl), nb_max, objs_hidden)
|
||||
return objs
|
||||
|
||||
def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
|
||||
|
|
|
@ -22,11 +22,8 @@ REGEX_JOIN_HASH = re.compile(r'[0-9a-zA-z-]+')
|
|||
|
||||
## ##
|
||||
|
||||
def save_item_correlation(username, item_id, item_date):
|
||||
Username.save_item_correlation('telegram', username, item_id, item_date)
|
||||
|
||||
def save_telegram_invite_hash(invite_hash, item_id):
|
||||
r_obj.sadd('telegram:invite_code', f'{invite_hash};{item_id}')
|
||||
def save_telegram_invite_hash(invite_hash, obj_global_id):
|
||||
r_obj.sadd('telegram:invite_code', f'{invite_hash};{obj_global_id}')
|
||||
|
||||
def get_data_from_telegram_url(base_url, url_path):
|
||||
dict_url = {}
|
||||
|
|
|
@ -61,7 +61,7 @@ class ApiKey(AbstractModule):
|
|||
|
||||
if google_api_key:
|
||||
print(f'found google api key: {to_print}')
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(google_api_key)} found Google API Key;{item.get_id()}')
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(google_api_key)} found Google API Key;{self.obj.get_global_id()}')
|
||||
|
||||
tag = 'infoleak:automatic-detection="google-api-key"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
|
@ -69,10 +69,10 @@ class ApiKey(AbstractModule):
|
|||
# # TODO: # FIXME: AWS regex/validate/sanitize KEY + SECRET KEY
|
||||
if aws_access_key:
|
||||
print(f'found AWS key: {to_print}')
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(aws_access_key)} found AWS Key;{item.get_id()}')
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(aws_access_key)} found AWS Key;{self.obj.get_global_id()}')
|
||||
if aws_secret_key:
|
||||
print(f'found AWS secret key')
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{item.get_id()}')
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{self.obj.get_global_id()}')
|
||||
|
||||
tag = 'infoleak:automatic-detection="aws-key"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
|
|
|
@ -89,7 +89,7 @@ class Categ(AbstractModule):
|
|||
# Search for pattern categories in obj content
|
||||
for categ, pattern in self.categ_words:
|
||||
|
||||
if obj.type == 'message':
|
||||
if obj.type == 'message' or obj.type == 'ocr':
|
||||
self.add_message_to_queue(message='0', queue=categ)
|
||||
else:
|
||||
|
||||
|
|
|
@ -103,11 +103,11 @@ class Credential(AbstractModule):
|
|||
|
||||
print(message)
|
||||
|
||||
to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'
|
||||
to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{self.obj.get_global_id()}'
|
||||
|
||||
# num of creds above threshold, publish an alert
|
||||
if nb_cred > self.criticalNumberToAlert:
|
||||
print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
|
||||
print(f"========> Found more than 10 credentials in this file : {self.obj.get_global_id()}")
|
||||
self.redis_logger.warning(to_print)
|
||||
|
||||
tag = 'infoleak:automatic-detection="credential"'
|
||||
|
|
|
@ -58,9 +58,9 @@ class CreditCards(AbstractModule):
|
|||
if lib_refine.is_luhn_valid(clean_card):
|
||||
return clean_card
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
cards = self.regex_finditer(self.regex, obj_id, content)
|
||||
cards = self.regex_finditer(self.regex, obj.get_global_id(), content)
|
||||
for card in cards:
|
||||
start, end, value = card
|
||||
if self.get_valid_card(value):
|
||||
|
@ -86,7 +86,7 @@ class CreditCards(AbstractModule):
|
|||
# print(creditcard_set)
|
||||
to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
if creditcard_set:
|
||||
mess = f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.id}'
|
||||
mess = f'{to_print}Checked {len(creditcard_set)} valid number(s);{self.obj.get_global_id()}'
|
||||
print(mess)
|
||||
self.redis_logger.warning(mess)
|
||||
|
||||
|
@ -96,7 +96,7 @@ class CreditCards(AbstractModule):
|
|||
if r_result:
|
||||
return creditcard_set
|
||||
else:
|
||||
self.redis_logger.info(f'{to_print}CreditCard related;{item.id}')
|
||||
self.redis_logger.info(f'{to_print}CreditCard related;{self.obj.get_global_id()}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -92,7 +92,13 @@ CURRENCIES = {
|
|||
'regex': r'\b(?<![+/=])X[A-Za-z0-9]{33}(?![+/=])\b',
|
||||
'max_execution_time': default_max_execution_time,
|
||||
'tag': 'infoleak:automatic-detection="dash-address"',
|
||||
}
|
||||
},
|
||||
'tron': {
|
||||
'name': 'tron', # e.g. TYdds9VLDjUshf9tbsXSfGUZNzJSbbBeat
|
||||
'regex': r'\b(?<![+/=])T[0-9a-zA-Z]{33}(?![+/=])\b',
|
||||
'max_execution_time': default_max_execution_time,
|
||||
'tag': 'infoleak:automatic-detection="tron-address"',
|
||||
},
|
||||
}
|
||||
##################################
|
||||
##################################
|
||||
|
@ -149,7 +155,7 @@ class Cryptocurrencies(AbstractModule, ABC):
|
|||
item.get_date(),
|
||||
item.get_basename())
|
||||
self.redis_logger.warning('{}Detected {} {} private key;{}'.format(
|
||||
to_print, len(private_keys), currency['name'], item_id))
|
||||
to_print, len(private_keys), currency['name'], self.obj.get_global_id()))
|
||||
else:
|
||||
private_keys = []
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ class CveModule(AbstractModule):
|
|||
cve = Cves.Cve(cve_id)
|
||||
cve.add(date, item)
|
||||
|
||||
warning = f'{item_id} contains CVEs {cves}'
|
||||
warning = f'{self.obj.get_global_id()} contains CVEs {cves}'
|
||||
print(warning)
|
||||
self.redis_logger.warning(warning)
|
||||
|
||||
|
|
|
@ -82,20 +82,20 @@ class DomClassifier(AbstractModule):
|
|||
localizeddomains = self.dom_classifier.include(expression=self.cc_tld)
|
||||
if localizeddomains:
|
||||
print(localizeddomains)
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{self.obj.get_global_id()}")
|
||||
|
||||
if self.cc:
|
||||
localizeddomains = self.dom_classifier.localizedomain(cc=self.cc)
|
||||
if localizeddomains:
|
||||
print(localizeddomains)
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{self.obj.get_global_id()}")
|
||||
|
||||
if r_result:
|
||||
return self.dom_classifier.vdomain
|
||||
|
||||
except IOError as err:
|
||||
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
|
||||
raise Exception(f"CRC Checksum Failed on: {item.get_id()}")
|
||||
raise Exception(f"CRC Checksum Failed on: {self.obj.get_global_id()}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -92,10 +92,10 @@ class Duplicates(AbstractModule):
|
|||
Duplicate.save_object_hash(algo, curr_date_ymonth, self.algos[algo]['hash'], item.get_id())
|
||||
|
||||
if nb_duplicates:
|
||||
self.redis_logger.info(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {nb_duplicates};{item.get_id()}')
|
||||
self.redis_logger.info(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {nb_duplicates};{self.obj.get_global_id()}')
|
||||
|
||||
y = time.time()
|
||||
print(f'{item.get_id()} Processed in {y-x} sec')
|
||||
print(f'{self.obj.get_global_id()} Processed in {y-x} sec')
|
||||
# self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
|
||||
|
||||
|
||||
|
|
|
@ -42,13 +42,17 @@ class Exif(AbstractModule):
|
|||
img_exif = img.getexif()
|
||||
print(img_exif)
|
||||
if img_exif:
|
||||
self.logger.critical(f'Exif: {self.get_obj().id}')
|
||||
gps = img_exif.get(34853)
|
||||
print(gps)
|
||||
self.logger.critical(f'gps: {gps}')
|
||||
for key, val in img_exif.items():
|
||||
if key in ExifTags.TAGS:
|
||||
print(f'{ExifTags.TAGS[key]}:{val}')
|
||||
self.logger.critical(f'{ExifTags.TAGS[key]}:{val}')
|
||||
else:
|
||||
print(f'{key}:{val}')
|
||||
self.logger.critical(f'{key}:{val}')
|
||||
sys.exit(0)
|
||||
|
||||
# tag = 'infoleak:automatic-detection="cve"'
|
||||
|
|
|
@ -81,10 +81,9 @@ class Global(AbstractModule):
|
|||
|
||||
def compute(self, message, r_result=False): # TODO move OBJ ID sanitization to importer
|
||||
# Recovering the streamed message infos
|
||||
gzip64encoded = message
|
||||
|
||||
if self.obj.type == 'item':
|
||||
if gzip64encoded:
|
||||
if message:
|
||||
|
||||
# Creating the full filepath
|
||||
filename = os.path.join(self.ITEMS_FOLDER, self.obj.id)
|
||||
|
@ -97,7 +96,7 @@ class Global(AbstractModule):
|
|||
|
||||
else:
|
||||
# Decode compressed base64
|
||||
decoded = base64.standard_b64decode(gzip64encoded)
|
||||
decoded = base64.standard_b64decode(message)
|
||||
new_file_content = self.gunzip_bytes_obj(filename, decoded)
|
||||
|
||||
# TODO REWRITE ME
|
||||
|
@ -105,6 +104,11 @@ class Global(AbstractModule):
|
|||
filename = self.check_filename(filename, new_file_content)
|
||||
|
||||
if filename:
|
||||
new_obj_id = filename.replace(self.ITEMS_FOLDER, '', 1)
|
||||
new_obj = Item(new_obj_id)
|
||||
new_obj.sanitize_id()
|
||||
self.set_obj(new_obj)
|
||||
|
||||
# create subdir
|
||||
dirname = os.path.dirname(filename)
|
||||
if not os.path.exists(dirname):
|
||||
|
@ -124,11 +128,11 @@ class Global(AbstractModule):
|
|||
|
||||
else:
|
||||
self.logger.info(f"Empty Item: {message} not processed")
|
||||
elif self.obj.type == 'message':
|
||||
elif self.obj.type == 'message' or self.obj.type == 'ocr':
|
||||
# TODO send to specific object queue => image, ...
|
||||
self.add_message_to_queue(obj=self.obj, queue='Item')
|
||||
elif self.obj.type == 'image':
|
||||
self.add_message_to_queue(obj=self.obj, queue='Image')
|
||||
self.add_message_to_queue(obj=self.obj, queue='Image', message=message)
|
||||
else:
|
||||
self.logger.critical(f"Empty obj: {self.obj} {message} not processed")
|
||||
|
||||
|
|
|
@ -82,8 +82,8 @@ class IPAddress(AbstractModule):
|
|||
matching_ips.append(address)
|
||||
|
||||
if len(matching_ips) > 0:
|
||||
self.logger.info(f'{item.get_id()} contains {len(matching_ips)} IPs')
|
||||
self.redis_logger.warning(f'{item.get_id()} contains {item.get_id()} IPs')
|
||||
self.logger.info(f'{self.obj.get_global_id()} contains {len(matching_ips)} IPs')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} contains IPs')
|
||||
|
||||
# Tag message with IP
|
||||
tag = 'infoleak:automatic-detection="ip"'
|
||||
|
|
|
@ -62,9 +62,9 @@ class Iban(AbstractModule):
|
|||
return True
|
||||
return False
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
ibans = self.regex_finditer(self.iban_regex, obj_id, content)
|
||||
ibans = self.regex_finditer(self.iban_regex, obj.get_global_id(), content)
|
||||
for iban in ibans:
|
||||
start, end, value = iban
|
||||
value = ''.join(e for e in value if e.isalnum())
|
||||
|
@ -95,7 +95,7 @@ class Iban(AbstractModule):
|
|||
# Statistics.add_module_tld_stats_by_date('iban', date, iban[0:2], 1)
|
||||
|
||||
to_print = f'Iban;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
self.redis_logger.warning(f'{to_print}Checked found {len(valid_ibans)} IBAN;{item_id}')
|
||||
self.redis_logger.warning(f'{to_print}Checked found {len(valid_ibans)} IBAN;{self.obj.get_global_id()}')
|
||||
# Tags
|
||||
tag = 'infoleak:automatic-detection="iban"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
|
|
|
@ -63,7 +63,7 @@ class Keys(AbstractModule):
|
|||
get_pgp_content = False
|
||||
|
||||
if KeyEnum.PGP_MESSAGE.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has a PGP enc message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has a PGP enc message')
|
||||
|
||||
tag = 'infoleak:automatic-detection="pgp-message"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
|
@ -81,21 +81,21 @@ class Keys(AbstractModule):
|
|||
get_pgp_content = True
|
||||
|
||||
if KeyEnum.PGP_PRIVATE_KEY_BLOCK.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has a pgp private key block message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has a pgp private key block message')
|
||||
|
||||
tag = 'infoleak:automatic-detection="pgp-private-key"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
get_pgp_content = True
|
||||
|
||||
if KeyEnum.CERTIFICATE.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has a certificate message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has a certificate message')
|
||||
|
||||
tag = 'infoleak:automatic-detection="certificate"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
# find = True
|
||||
|
||||
if KeyEnum.RSA_PRIVATE_KEY.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has a RSA private key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has a RSA private key message')
|
||||
print('rsa private key message found')
|
||||
|
||||
tag = 'infoleak:automatic-detection="rsa-private-key"'
|
||||
|
@ -103,7 +103,7 @@ class Keys(AbstractModule):
|
|||
# find = True
|
||||
|
||||
if KeyEnum.PRIVATE_KEY.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has a private key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has a private key message')
|
||||
print('private key message found')
|
||||
|
||||
tag = 'infoleak:automatic-detection="private-key"'
|
||||
|
@ -111,7 +111,7 @@ class Keys(AbstractModule):
|
|||
# find = True
|
||||
|
||||
if KeyEnum.ENCRYPTED_PRIVATE_KEY.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has an encrypted private key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has an encrypted private key message')
|
||||
print('encrypted private key message found')
|
||||
|
||||
tag = 'infoleak:automatic-detection="encrypted-private-key"'
|
||||
|
@ -119,7 +119,7 @@ class Keys(AbstractModule):
|
|||
# find = True
|
||||
|
||||
if KeyEnum.OPENSSH_PRIVATE_KEY.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has an openssh private key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has an openssh private key message')
|
||||
print('openssh private key message found')
|
||||
|
||||
tag = 'infoleak:automatic-detection="private-ssh-key"'
|
||||
|
@ -127,7 +127,7 @@ class Keys(AbstractModule):
|
|||
# find = True
|
||||
|
||||
if KeyEnum.SSH2_ENCRYPTED_PRIVATE_KEY.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has an ssh2 private key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has an ssh2 private key message')
|
||||
print('SSH2 private key message found')
|
||||
|
||||
tag = 'infoleak:automatic-detection="private-ssh-key"'
|
||||
|
@ -135,7 +135,7 @@ class Keys(AbstractModule):
|
|||
# find = True
|
||||
|
||||
if KeyEnum.OPENVPN_STATIC_KEY_V1.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has an openssh private key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has an openssh private key message')
|
||||
print('OpenVPN Static key message found')
|
||||
|
||||
tag = 'infoleak:automatic-detection="vpn-static-key"'
|
||||
|
@ -143,21 +143,21 @@ class Keys(AbstractModule):
|
|||
# find = True
|
||||
|
||||
if KeyEnum.DSA_PRIVATE_KEY.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has a dsa private key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has a dsa private key message')
|
||||
|
||||
tag = 'infoleak:automatic-detection="dsa-private-key"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
# find = True
|
||||
|
||||
if KeyEnum.EC_PRIVATE_KEY.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has an ec private key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has an ec private key message')
|
||||
|
||||
tag = 'infoleak:automatic-detection="ec-private-key"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
# find = True
|
||||
|
||||
if KeyEnum.PUBLIC_KEY.value in content:
|
||||
self.redis_logger.warning(f'{item.get_basename()} has a public key message')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} has a public key message')
|
||||
|
||||
tag = 'infoleak:automatic-detection="public-key"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
|
|
|
@ -30,9 +30,13 @@ class Languages(AbstractModule):
|
|||
if obj.type == 'item':
|
||||
if obj.is_crawled():
|
||||
domain = Domain(obj.get_domain())
|
||||
for lang in obj.get_languages(min_probability=0.8):
|
||||
for lang in obj.get_languages(min_probability=0.8, force_gcld3=True):
|
||||
print(lang)
|
||||
domain.add_language(lang)
|
||||
# Detect Chat Message Language
|
||||
# elif obj.type == 'message':
|
||||
# lang = obj.detect_language()
|
||||
# print(self.obj.id, lang)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -70,7 +70,7 @@ class LibInjection(AbstractModule):
|
|||
print(f"Detected (libinjection) SQL in URL: {item_id}")
|
||||
print(unquote(url))
|
||||
|
||||
to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
|
||||
to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{self.obj.get_global_id()}'
|
||||
self.redis_logger.warning(to_print)
|
||||
|
||||
# Add tag
|
||||
|
|
|
@ -57,7 +57,7 @@ class MISP_Thehive_Auto_Push(AbstractModule):
|
|||
Tag.set_auto_push_status('misp', 'ConnectionError')
|
||||
else:
|
||||
Tag.set_auto_push_status('misp', '')
|
||||
self.logger.info('MISP Pushed:', tag, '->', item_id)
|
||||
self.logger.info(f'MISP Pushed: {tag} -> {item_id}')
|
||||
|
||||
if 'thehive' in self.tags:
|
||||
if tag in self.tags['thehive']:
|
||||
|
@ -68,7 +68,7 @@ class MISP_Thehive_Auto_Push(AbstractModule):
|
|||
Tag.set_auto_push_status('thehive', 'Request Entity Too Large')
|
||||
else:
|
||||
Tag.set_auto_push_status('thehive', '')
|
||||
self.logger.info('thehive Pushed:', tag, '->', item_id)
|
||||
self.logger.info(f'thehive Pushed: {tag} -> {item_id}')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages #
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.objects.Items import Item
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
# from lib import Statistics
|
||||
|
||||
|
@ -118,10 +117,10 @@ class Mail(AbstractModule):
|
|||
print(e)
|
||||
return valid_mxdomain
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
mxdomains = {}
|
||||
mails = self.regex_finditer(self.email_regex, obj_id, content)
|
||||
mails = self.regex_finditer(self.email_regex, obj.get_global_id(), content)
|
||||
for mail in mails:
|
||||
start, end, value = mail
|
||||
mxdomain = value.rsplit('@', 1)[1].lower()
|
||||
|
@ -172,7 +171,7 @@ class Mail(AbstractModule):
|
|||
# for tld in mx_tlds:
|
||||
# Statistics.add_module_tld_stats_by_date('mail', item_date, tld, mx_tlds[tld])
|
||||
|
||||
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{item.id}'
|
||||
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{self.obj.get_global_id()}'
|
||||
if num_valid_email > self.mail_threshold:
|
||||
print(f'{item.id} Checked {num_valid_email} e-mail(s)')
|
||||
self.redis_logger.warning(msg)
|
||||
|
|
|
@ -218,7 +218,7 @@ class Mixer(AbstractModule):
|
|||
if self.obj.type == 'item':
|
||||
self.add_message_to_queue(obj=self.obj, message=gzip64encoded)
|
||||
else:
|
||||
self.add_message_to_queue(obj=self.obj)
|
||||
self.add_message_to_queue(obj=self.obj, message=gzip64encoded)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
The OcrExtractor Module
|
||||
======================
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import cv2
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import chats_viewer
|
||||
from lib.objects import Messages
|
||||
from lib.objects import Ocrs
|
||||
|
||||
|
||||
# Default to eng
|
||||
def get_model_languages(obj, add_en=True):
|
||||
if add_en:
|
||||
model_languages = {'en'}
|
||||
else:
|
||||
model_languages = set()
|
||||
|
||||
ob = obj.get_first_correlation('message')
|
||||
if ob:
|
||||
message = Messages.Message(ob.split(':', 2)[-1])
|
||||
lang = message.get_language()
|
||||
if lang:
|
||||
model_languages.add(lang)
|
||||
return model_languages
|
||||
|
||||
ob = obj.get_first_correlation('chat-subchannel')
|
||||
if ob:
|
||||
ob = chats_viewer.get_obj_chat_from_global_id(ob)
|
||||
lang = ob.get_main_language()
|
||||
if lang:
|
||||
model_languages.add(lang)
|
||||
return model_languages
|
||||
|
||||
ob = obj.get_first_correlation('chat')
|
||||
if ob:
|
||||
ob = chats_viewer.get_obj_chat_from_global_id(ob)
|
||||
lang = ob.get_main_language()
|
||||
if lang:
|
||||
model_languages.add(lang)
|
||||
return model_languages
|
||||
|
||||
return model_languages
|
||||
|
||||
# TODO thread
|
||||
|
||||
|
||||
class OcrExtractor(AbstractModule):
|
||||
"""
|
||||
OcrExtractor for AIL framework
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(OcrExtractor, self).__init__()
|
||||
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
|
||||
self.ocr_languages = Ocrs.get_ocr_languages()
|
||||
|
||||
# Send module state to logs
|
||||
self.logger.info(f'Module {self.module_name} initialized')
|
||||
|
||||
def is_cached(self):
|
||||
return self.r_cache.exists(f'ocr:no:{self.obj.id}')
|
||||
|
||||
def add_to_cache(self):
|
||||
self.r_cache.setex(f'ocr:no:{self.obj.id}', 86400, 0)
|
||||
|
||||
def compute(self, message):
|
||||
image = self.get_obj()
|
||||
date = message
|
||||
|
||||
ocr = Ocrs.Ocr(image.id)
|
||||
if self.is_cached():
|
||||
return None
|
||||
|
||||
if self.obj.is_gif():
|
||||
self.logger.warning(f'Ignoring GIF: {self.obj.id}')
|
||||
return None
|
||||
|
||||
if not ocr.exists():
|
||||
path = image.get_filepath()
|
||||
languages = get_model_languages(image)
|
||||
languages = Ocrs.sanityze_ocr_languages(languages, ocr_languages=self.ocr_languages)
|
||||
print(image.id, languages)
|
||||
try:
|
||||
texts = Ocrs.extract_text(path, languages)
|
||||
except (OSError, ValueError, cv2.error) as e:
|
||||
self.logger.warning(e)
|
||||
self.obj.add_tag('infoleak:confirmed="false-positive"')
|
||||
texts = None
|
||||
if texts:
|
||||
print('create')
|
||||
ocr = Ocrs.create(image.id, texts)
|
||||
if ocr:
|
||||
self.add_message_to_queue(ocr)
|
||||
else:
|
||||
print('no text')
|
||||
self.add_to_cache()
|
||||
# Save in cache
|
||||
else:
|
||||
print('no text detected')
|
||||
self.add_to_cache()
|
||||
else:
|
||||
# print(image.id)
|
||||
# print('update correlation', date)
|
||||
ocr.update_correlation(date=date)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = OcrExtractor()
|
||||
module.run()
|
|
@ -55,9 +55,9 @@ class Onion(AbstractModule):
|
|||
# TEMP var: SAVE I2P Domain (future I2P crawler)
|
||||
# self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
onions = self.regex_finditer(self.onion_regex, obj_id, content)
|
||||
onions = self.regex_finditer(self.onion_regex, obj.get_global_id(), content)
|
||||
for onion in onions:
|
||||
start, end, value = onion
|
||||
url_unpack = crawlers.unpack_url(value)
|
||||
|
@ -98,8 +98,8 @@ class Onion(AbstractModule):
|
|||
print(f'{domain} added to crawler queue: {task_uuid}')
|
||||
else:
|
||||
to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
print(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
|
||||
self.redis_logger.warning(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
|
||||
print(f'{to_print}Detected {len(domains)} .onion(s);{self.obj.get_global_id()}')
|
||||
self.redis_logger.warning(f'{to_print}Detected {len(domains)} .onion(s);{self.obj.get_global_id()}')
|
||||
|
||||
# TAG Item
|
||||
tag = 'infoleak:automatic-detection="onion"'
|
||||
|
|
|
@ -56,7 +56,7 @@ class Pasties(AbstractModule):
|
|||
with open(domains_pasties) as f:
|
||||
for line in f:
|
||||
url = line.strip()
|
||||
if url: # TODO validate line
|
||||
if url: # TODO validate line
|
||||
self.faup.decode(url)
|
||||
url_decoded = self.faup.get()
|
||||
host = url_decoded['host']
|
||||
|
@ -135,7 +135,7 @@ class Pasties(AbstractModule):
|
|||
if path.startswith(url_path):
|
||||
if url_path != path and url_path != path_end:
|
||||
print('send to crawler', url_path, url)
|
||||
self.send_to_crawler(url, self.obj.id))
|
||||
self.send_to_crawler(url, self.obj.id)
|
||||
break
|
||||
|
||||
|
||||
|
|
|
@ -41,9 +41,9 @@ class Phone(AbstractModule):
|
|||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
phones = self.regex_phone_iter('ZZ', obj_id, content)
|
||||
phones = self.regex_phone_iter('ZZ', obj.get_global_id(), content)
|
||||
for phone in phones:
|
||||
extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
|
||||
return extracted
|
||||
|
@ -62,7 +62,7 @@ class Phone(AbstractModule):
|
|||
tag = 'infoleak:automatic-detection="phone-number"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
|
||||
self.redis_logger.warning(f'{item.get_id()} contains {len(phone)} Phone numbers')
|
||||
self.redis_logger.warning(f'{self.obj.get_global_id()} contains {len(phone)} Phone numbers')
|
||||
|
||||
# # List of the regex results in the Item, may be null
|
||||
# results = self.REG_PHONE.findall(content)
|
||||
|
|
|
@ -51,13 +51,13 @@ class SQLInjectionDetection(AbstractModule):
|
|||
self.faup.decode(url)
|
||||
url_parsed = self.faup.get()
|
||||
|
||||
print(f"Detected SQL in URL: {item_id}")
|
||||
print(f"Detected SQL in URL: {item.id}")
|
||||
print(urllib.request.unquote(url))
|
||||
to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
|
||||
to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{self.obj.get_global_id()}'
|
||||
self.redis_logger.warning(to_print)
|
||||
|
||||
# Tag
|
||||
tag = f'infoleak:automatic-detection="sql-injection";{item_id}'
|
||||
tag = f'infoleak:automatic-detection="sql-injection"'
|
||||
self.add_message_to_queue(message=tag, queue='Tags')
|
||||
|
||||
# statistics
|
||||
|
|
|
@ -41,7 +41,7 @@ class Tags(AbstractModule):
|
|||
|
||||
# Create a new tag
|
||||
item.add_tag(tag)
|
||||
print(f'{item.get_id()}: Tagged {tag}')
|
||||
print(f'{self.obj.get_global_id()}: Tagged {tag}')
|
||||
|
||||
# Forward message to channel
|
||||
self.add_message_to_queue(message=tag, queue='Tag_feed')
|
||||
|
|
|
@ -62,7 +62,7 @@ class Telegram(AbstractModule):
|
|||
print(f'username: {user_id}')
|
||||
invite_hash = dict_url.get('invite_hash')
|
||||
if invite_hash:
|
||||
telegram.save_telegram_invite_hash(invite_hash, item.id)
|
||||
telegram.save_telegram_invite_hash(invite_hash, self.obj.get_global_id())
|
||||
print(f'invite code: {invite_hash}')
|
||||
invite_code_found = True
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ class Urls(AbstractModule):
|
|||
except AttributeError:
|
||||
url = url_decoded['url']
|
||||
|
||||
print(url, item.get_id())
|
||||
print(url, self.obj.get_global_id())
|
||||
self.add_message_to_queue(message=str(url), queue='Url')
|
||||
self.logger.debug(f"url_parsed: {url}")
|
||||
|
||||
|
|
|
@ -76,6 +76,14 @@ class AbstractModule(ABC):
|
|||
def get_obj(self):
|
||||
return self.obj
|
||||
|
||||
def set_obj(self, new_obj):
|
||||
if self.obj:
|
||||
old_id = self.obj.id
|
||||
self.obj = new_obj
|
||||
self.queue.rename_message_obj(self.obj.id, old_id)
|
||||
else:
|
||||
self.obj = new_obj
|
||||
|
||||
def get_message(self):
|
||||
"""
|
||||
Get message from the Redis Queue (QueueIn)
|
||||
|
@ -171,7 +179,10 @@ class AbstractModule(ABC):
|
|||
trace = traceback.format_tb(err.__traceback__)
|
||||
trace = ''.join(trace)
|
||||
self.logger.critical(f"Error in module {self.module_name}: {__name__} : {err}")
|
||||
self.logger.critical(f"Module {self.module_name} input message: {message}")
|
||||
if message:
|
||||
self.logger.critical(f"Module {self.module_name} input message: {message}")
|
||||
if self.obj:
|
||||
self.logger.critical(f"{self.module_name} Obj: {self.obj.get_global_id()}")
|
||||
self.logger.critical(trace)
|
||||
|
||||
if isinstance(err, ModuleQueueError):
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import datetime
|
||||
import time
|
||||
from calendar import monthrange
|
||||
|
||||
from dateutil.rrule import rrule, MONTHLY
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
@ -90,6 +92,18 @@ def get_current_week_day():
|
|||
start = dt - datetime.timedelta(days=dt.weekday())
|
||||
return start.strftime("%Y%m%d")
|
||||
|
||||
def get_current_utc_full_time():
|
||||
timestamp = datetime.datetime.fromtimestamp(time.time())
|
||||
return timestamp.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
def get_month_dates(date=None):
|
||||
if date:
|
||||
date = convert_date_str_to_datetime(date)
|
||||
else:
|
||||
date = datetime.date.today()
|
||||
num_days = monthrange(date.year, date.month)[1]
|
||||
return [datetime.date(date.year, date.month, day).strftime("%Y%m%d") for day in range(1, num_days+1)]
|
||||
|
||||
def get_date_week_by_date(date):
|
||||
dt = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8]))
|
||||
start = dt - datetime.timedelta(days=dt.weekday())
|
||||
|
@ -249,3 +263,9 @@ def sanitise_daterange(date_from, date_to, separator='', date_type='str'):
|
|||
date_from = date_to
|
||||
date_to = res
|
||||
return date_from, date_to
|
||||
|
||||
def get_previous_month_date():
|
||||
now = datetime.date.today()
|
||||
first = now.replace(day=1)
|
||||
last_month = first - datetime.timedelta(days=1)
|
||||
return last_month.strftime("%Y%m%d")
|
||||
|
|
|
@ -88,6 +88,9 @@ class Retro_Hunt_Module(AbstractModule):
|
|||
for obj in ail_objects.obj_iterator(obj_type, filters):
|
||||
self.obj = obj
|
||||
content = obj.get_content(r_type='bytes')
|
||||
if not content:
|
||||
continue
|
||||
|
||||
rule.match(data=content, callback=self.yara_rules_match,
|
||||
which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout)
|
||||
|
||||
|
@ -128,7 +131,7 @@ class Retro_Hunt_Module(AbstractModule):
|
|||
self.redis_logger.info(f'{self.module_name}, Retro hunt {task_uuid} match found: {obj_id}')
|
||||
print(f'Retro hunt {task_uuid} match found: {self.obj.get_type()} {obj_id}')
|
||||
|
||||
self.retro_hunt.add(self.obj.get_type(), self.obj.get_subtype(), obj_id)
|
||||
self.retro_hunt.add(self.obj.get_type(), self.obj.get_subtype(r_str=True), obj_id)
|
||||
|
||||
# TODO FILTER Tags
|
||||
|
||||
|
|
|
@ -116,8 +116,8 @@ class Tracker_Regex(AbstractModule):
|
|||
if ail_objects.is_filtered(obj, filters):
|
||||
continue
|
||||
|
||||
print(f'new tracked regex found: {tracker_name} in {obj_id}')
|
||||
self.redis_logger.warning(f'new tracked regex found: {tracker_name} in {obj_id}')
|
||||
print(f'new tracked regex found: {tracker_name} in {self.obj.get_global_id()}')
|
||||
self.redis_logger.warning(f'new tracked regex found: {tracker_name} in {self.obj.get_global_id()}')
|
||||
|
||||
tracker.add(obj.get_type(), obj.get_subtype(r_str=True), obj_id)
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ class Tracker_Term(AbstractModule):
|
|||
try:
|
||||
dict_words_freq = Tracker.get_text_word_frequency(content)
|
||||
except TimeoutException:
|
||||
self.redis_logger.warning(f"{obj.get_id()} processing timeout")
|
||||
self.redis_logger.warning(f"{self.obj.get_global_id()} processing timeout")
|
||||
else:
|
||||
signal.alarm(0)
|
||||
|
||||
|
@ -124,8 +124,8 @@ class Tracker_Term(AbstractModule):
|
|||
if ail_objects.is_filtered(obj, filters):
|
||||
continue
|
||||
|
||||
print(f'new tracked term {tracker_uuid} found: {tracker_name} in {obj_id}')
|
||||
self.redis_logger.warning(f'new tracked term found: {tracker_name} in {obj_id}')
|
||||
print(f'new tracked term {tracker_uuid} found: {tracker_name} in {self.obj.get_global_id()}')
|
||||
self.redis_logger.warning(f'new tracked term found: {tracker_name} in {self.obj.get_global_id()}')
|
||||
|
||||
tracker.add(obj.get_type(), obj.get_subtype(), obj_id)
|
||||
|
||||
|
|
|
@ -75,8 +75,8 @@ class Tracker_Typo_Squatting(AbstractModule):
|
|||
if ail_objects.is_filtered(obj, filters):
|
||||
continue
|
||||
|
||||
print(f'new tracked typosquatting found: {tracked} in {obj_id}')
|
||||
self.redis_logger.warning(f'tracker typosquatting: {tracked} in {obj_id}')
|
||||
print(f'new tracked typosquatting found: {tracked} in {self.obj.get_global_id()}')
|
||||
self.redis_logger.warning(f'tracker typosquatting: {tracked} in {self.obj.get_global_id()}')
|
||||
|
||||
tracker.add(obj.get_type(), obj.get_subtype(r_str=True), obj_id)
|
||||
|
||||
|
|
|
@ -62,13 +62,15 @@ class Tracker_Yara(AbstractModule):
|
|||
return None
|
||||
|
||||
content = self.obj.get_content(r_type='bytes')
|
||||
if not content:
|
||||
return None
|
||||
|
||||
try:
|
||||
yara_match = self.rules[obj_type].match(data=content, callback=self.yara_rules_match,
|
||||
which_callbacks=yara.CALLBACK_MATCHES, timeout=60)
|
||||
if yara_match:
|
||||
self.redis_logger.warning(f'tracker yara: new match {self.obj.get_id()}: {yara_match}')
|
||||
print(f'{self.obj.get_id()}: {yara_match}')
|
||||
self.redis_logger.warning(f'tracker yara: new match {self.obj.get_global_id()}: {yara_match}')
|
||||
print(f'{self.obj.get_global_id()}: {yara_match}')
|
||||
except yara.TimeoutError:
|
||||
print(f'{self.obj.get_id()}: yara scanning timed out')
|
||||
self.redis_logger.info(f'{self.obj.get_id()}: yara scanning timed out')
|
||||
|
|
|
@ -903,6 +903,7 @@ namespace.cor ail_correls
|
|||
namespace.crawl ail_crawlers
|
||||
namespace.db ail_datas
|
||||
namespace.dup ail_dups
|
||||
namespace.lg ail_langs
|
||||
namespace.obj ail_objs
|
||||
namespace.rel ail_rels
|
||||
namespace.stat ail_stats
|
||||
|
|
|
@ -7,6 +7,7 @@ crawled = crawled
|
|||
har = CRAWLED_SCREENSHOT
|
||||
screenshot = CRAWLED_SCREENSHOT/screenshot
|
||||
images = IMAGES
|
||||
favicons = FAVICONS
|
||||
|
||||
wordtrending_csv = var/www/static/csv/wordstrendingdata
|
||||
wordsfile = files/wordfile
|
||||
|
@ -191,6 +192,11 @@ host = localhost
|
|||
port = 6383
|
||||
password = ail_crawlers
|
||||
|
||||
[Kvrocks_Languages]
|
||||
host = localhost
|
||||
port = 6383
|
||||
password = ail_langs
|
||||
|
||||
[Kvrocks_Objects]
|
||||
host = localhost
|
||||
port = 6383
|
||||
|
|
|
@ -162,6 +162,9 @@ publish = Tags
|
|||
subscribe = Image
|
||||
publish = Tags
|
||||
|
||||
[OcrExtractor]
|
||||
subscribe = Image
|
||||
publish = Item
|
||||
|
||||
######## CORE ########
|
||||
|
||||
|
|
114
doc/README.md
114
doc/README.md
|
@ -52,6 +52,8 @@ Available Importers:
|
|||
```
|
||||
git clone https://github.com/cvandeplas/pystemon.git
|
||||
```
|
||||
Clone it into the same directory as AIL if you wish to launch it via the AIL launcher.
|
||||
|
||||
|
||||
2. Edit configuration file for pystemon ```pystemon/pystemon.yaml```:
|
||||
- Configure the storage section according to your needs:
|
||||
|
@ -80,7 +82,7 @@ Available Importers:
|
|||
```shell
|
||||
cd ail-framework/
|
||||
. ./AILENV/bin/activate
|
||||
cd pystemon/
|
||||
cd ../pystemon/
|
||||
pip install -U -r requirements.txt
|
||||
```
|
||||
4. Edit the configuration file ```ail-framework/configs/core.cfg```:
|
||||
|
@ -189,9 +191,119 @@ from GHArchive, collect and feed AIL
|
|||
- [ail-feeder-leak](https://github.com/ail-project/ail-feeder-leak): Automates the process of feeding files to AIL, using data chunking to handle large files.
|
||||
- [ail-feeder-atom-rss](https://github.com/ail-project/ail-feeder-atom-rss) Atom and RSS feeder for AIL.
|
||||
- [ail-feeder-jsonlogs](https://github.com/ail-project/ail-feeder-jsonlogs) Aggregate JSON log lines and pushes them to AIL.
|
||||
|
||||
### AIL Chats Feeders List:
|
||||
- [ail-feeder-discord](https://github.com/ail-project/ail-feeder-discord) Discord Feeder.
|
||||
- [ail-feeder-telegram](https://github.com/ail-project/ail-feeder-telegram) Telegram Channels and User Feeder.
|
||||
|
||||
### Chats Message
|
||||
|
||||
Overview of the JSON fields used by the Chat feeder.
|
||||
|
||||
```
|
||||
{
|
||||
"data": "New NFT Scam available,"
|
||||
"meta": {
|
||||
"chat": {
|
||||
"date": {
|
||||
"datestamp": "2023-01-10 08:19:16",
|
||||
"timestamp": 1673870217.0,
|
||||
"timezone": "UTC"
|
||||
},
|
||||
"icon": "AAAAAAAA",
|
||||
"id": 123456,
|
||||
"info": "",
|
||||
"name": "NFT legit",
|
||||
"subchannel": {
|
||||
"date": {
|
||||
"datestamp": "2023-08-10 08:19:18",
|
||||
"timestamp": 1691655558.0,
|
||||
"timezone": "UTC"
|
||||
},
|
||||
"id": 285,
|
||||
"name": "Market"
|
||||
},
|
||||
},
|
||||
"date": {
|
||||
"datestamp": "2024-02-01 13:43:46",
|
||||
"timestamp": 1707139999.0,
|
||||
"timezone": "UTC"
|
||||
},
|
||||
"id": 16,
|
||||
"reply_to": {
|
||||
"message_id": 12
|
||||
},
|
||||
"sender": {
|
||||
"first_name": "nftmaster",
|
||||
"icon": "AAAAAAAA",
|
||||
"id": 5684,
|
||||
"info": "best legit NFT vendor",
|
||||
"username": "nft_best"
|
||||
},
|
||||
"type": "message"
|
||||
},
|
||||
"source": "ail_feeder_telegram",
|
||||
"source-uuid": "9cde0855-248b-4439-b964-0495b9b2b8bb"
|
||||
}
|
||||
```
|
||||
|
||||
#### 1. "data"
|
||||
- Content of the message.
|
||||
|
||||
#### 2. "meta"
|
||||
- Provides metadata about the message.
|
||||
|
||||
##### "type":
|
||||
- Indicates the type of message. It can be either "message" or "image".
|
||||
|
||||
##### "id":
|
||||
- The unique identifier of the message.
|
||||
|
||||
##### "date":
|
||||
- Represents the timestamp of the message.
|
||||
- "datestamp": The date in the format "YYYY-MM-DD HH:MM:SS".
|
||||
- "timestamp": The timestamp representing the date and time.
|
||||
- "timezone": The timezone in which the date and time are specified (e.g., "UTC").
|
||||
|
||||
##### "reply_to":
|
||||
- The unique identifier of a message to which this message is a reply (optional).
|
||||
- "message_id": The unique identifier of the replied message.
|
||||
|
||||
##### "sender":
|
||||
- Contains information about the sender of the message.
|
||||
- "id": The unique identifier for the sender.
|
||||
- "info": Additional information about the sender (optional).
|
||||
- "username": The sender's username (optional).
|
||||
- "firstname": The sender's firstname (optional).
|
||||
- "lastname": The sender's lastname (optional).
|
||||
- "phone": The sender's phone (optional).
|
||||
|
||||
##### "chat":
|
||||
- Contains information about the chat where the message was sent.
|
||||
- "date": The chat creation date.
|
||||
- "datestamp": The date in the format "YYYY-MM-DD HH:MM:SS".
|
||||
- "timestamp": The timestamp representing the date and time.
|
||||
- "timezone": The timezone in which the date and time are specified (e.g., "UTC").
|
||||
- "icon": The icon associated with the chat (optional).
|
||||
- "id": The unique identifier of the chat.
|
||||
- "info": Chat description/info (optional).
|
||||
- "name": The name of the chat.
|
||||
- "username": The username of the chat (optional).
|
||||
- "subchannel": If this message is posted in a subchannel within the chat (optional).
|
||||
- "date": The subchannel creation date.
|
||||
- "datestamp": The date in the format "YYYY-MM-DD HH:MM:SS".
|
||||
- "timestamp": The timestamp representing the date and time.
|
||||
- "timezone": The timezone in which the date and time are specified (e.g., "UTC").
|
||||
- "id": The unique identifier of the subchannel.
|
||||
- "name": The name of the subchannel (optional).
|
||||
|
||||
#### 3. "source"
|
||||
- Indicates the feeder name.
|
||||
|
||||
#### 4. "source-uuid"
|
||||
- The UUID associated with the source.
|
||||
|
||||
|
||||
#### Example: Feeding AIL with Conti leaks
|
||||
|
||||
```python
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 208 KiB |
Binary file not shown.
After Width: | Height: | Size: 143 KiB |
Binary file not shown.
After Width: | Height: | Size: 144 KiB |
Binary file not shown.
After Width: | Height: | Size: 61 KiB |
|
@ -72,6 +72,7 @@ popd
|
|||
# pgpdump
|
||||
test ! -d pgpdump && git clone https://github.com/kazu-yamamoto/pgpdump.git
|
||||
pushd pgpdump/
|
||||
autoreconf -fiW all
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
|
|
|
@ -3,6 +3,7 @@ import requests
|
|||
import subprocess
|
||||
import re
|
||||
import os
|
||||
import shutil
|
||||
from time import sleep
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
|
@ -70,6 +71,14 @@ class Repo:
|
|||
except Exception as e:
|
||||
print(f"Failed to run {cmd} for {self.id}: {e}")
|
||||
|
||||
def cleanup(self, num_to_keep: int) -> None:
|
||||
files = os.listdir(self.outputdir)
|
||||
repo_images = [f for f in files if f.startswith(self.name)]
|
||||
if len(repo_images) > num_to_keep:
|
||||
repo_images.sort(key=lambda x: os.path.getmtime(os.path.join(self.outputdir, x)))
|
||||
for image in repo_images[:-num_to_keep]:
|
||||
shutil.rmtree(os.path.join(self.outputdir, image))
|
||||
|
||||
class GitHub(Repo):
|
||||
"""Class for tracking GitHub repositories."""
|
||||
|
||||
|
@ -126,8 +135,10 @@ def main():
|
|||
while True:
|
||||
for repo in repos:
|
||||
repo.build()
|
||||
repo.cleanup(num_to_keep=3)
|
||||
for package in aptpkg:
|
||||
package.build()
|
||||
repo.cleanup(num_to_keep=3)
|
||||
sleep(config["check_interval"])
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -37,13 +37,17 @@ textblob>=0.15.3
|
|||
html2text>=2020.1.16
|
||||
beautifulsoup4>4.8.2
|
||||
|
||||
#Crawler
|
||||
# Crawler
|
||||
scrapy>2.0.0
|
||||
scrapy-splash>=0.7.2
|
||||
|
||||
# Languages
|
||||
gcld3
|
||||
libretranslatepy
|
||||
lexilang
|
||||
|
||||
# Demoji
|
||||
git+https://github.com/ail-project/demoji
|
||||
|
||||
#Graph
|
||||
numpy>1.18.1
|
||||
|
@ -71,13 +75,16 @@ pylibinjection>=0.2.4
|
|||
phonenumbers>8.12.1
|
||||
|
||||
# Web
|
||||
flask==2.3.3
|
||||
flask>=2.3.3
|
||||
flask-login
|
||||
bcrypt>3.1.6
|
||||
|
||||
# Ail typo squatting
|
||||
ail_typo_squatting
|
||||
|
||||
# OCR
|
||||
easyocr
|
||||
|
||||
# Tests
|
||||
nose2>=0.12.0
|
||||
coverage>=5.5
|
||||
|
|
176
tests/testApi.py
176
tests/testApi.py
|
@ -1,176 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import unittest
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib import Tag
|
||||
from packages import Import_helper
|
||||
|
||||
sys.path.append(os.environ['AIL_FLASK'])
|
||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||
from Flask_server import app
|
||||
|
||||
|
||||
# def parse_response(obj, ail_response):
|
||||
# res_json = ail_response.get_json()
|
||||
# if 'status' in res_json:
|
||||
# if res_json['status'] == 'error':
|
||||
# return obj.fail('{}: {}: {}'.format(ail_response.status_code, res_json['status'], res_json['reason']))
|
||||
# return res_json
|
||||
#
|
||||
#
|
||||
# def get_api_key():
|
||||
# api_file = os.path.join(os.environ['AIL_HOME'], 'DEFAULT_PASSWORD')
|
||||
# if os.path.isfile(api_file):
|
||||
# with open(os.path.join(os.environ['AIL_HOME'], 'DEFAULT_PASSWORD'), 'r') as f:
|
||||
# content = f.read()
|
||||
# content = content.splitlines()
|
||||
# apikey = content[-1]
|
||||
# apikey = apikey.replace('API_Key=', '', 1)
|
||||
# # manual tests
|
||||
# else:
|
||||
# apikey = sys.argv[1]
|
||||
# return apikey
|
||||
#
|
||||
#
|
||||
# APIKEY = get_api_key()
|
||||
#
|
||||
#
|
||||
# class TestApiV1(unittest.TestCase):
|
||||
# import_uuid = None
|
||||
# item_id = None
|
||||
#
|
||||
# def setUp(self):
|
||||
# self.app = app
|
||||
# self.app.config['TESTING'] = True
|
||||
# self.client = self.app.test_client()
|
||||
# self.apikey = APIKEY
|
||||
# self.item_content = "text to import"
|
||||
# self.item_tags = ["infoleak:analyst-detection=\"private-key\""]
|
||||
# self.expected_tags = ["infoleak:analyst-detection=\"private-key\"", 'infoleak:submission="manual"']
|
||||
#
|
||||
# # POST /api/v1/import/item
|
||||
# def test_0001_api_import_item(self):
|
||||
# input_json = {"type": "text", "tags": self.item_tags, "text": self.item_content}
|
||||
# req = self.client.post('/api/v1/import/item', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# import_uuid = req_json['uuid']
|
||||
# self.__class__.import_uuid = import_uuid
|
||||
# self.assertTrue(Import_helper.is_valid_uuid_v4(import_uuid))
|
||||
#
|
||||
# # POST /api/v1/get/import/item
|
||||
# def test_0002_api_get_import_item(self):
|
||||
# input_json = {"uuid": self.__class__.import_uuid}
|
||||
# item_not_imported = True
|
||||
# import_timout = 60
|
||||
# start = time.time()
|
||||
#
|
||||
# while item_not_imported:
|
||||
# req = self.client.post('/api/v1/get/import/item', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# if req_json['status'] == 'imported':
|
||||
# try:
|
||||
# item_id = req_json['items'][0]
|
||||
# item_not_imported = False
|
||||
# except Exception as e:
|
||||
# if time.time() - start > import_timout:
|
||||
# item_not_imported = False
|
||||
# self.fail("Import error: {}".format(req_json))
|
||||
# else:
|
||||
# if time.time() - start > import_timout:
|
||||
# item_not_imported = False
|
||||
# self.fail("Import Timeout, import status: {}".format(req_json['status']))
|
||||
# self.__class__.item_id = item_id
|
||||
#
|
||||
# # Process item
|
||||
# time.sleep(5)
|
||||
#
|
||||
# # POST /api/v1/get/item/content
|
||||
# def test_0003_api_get_item_content(self):
|
||||
# input_json = {"id": self.__class__.item_id}
|
||||
# req = self.client.post('/api/v1/get/item/content', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# item_content = req_json['content']
|
||||
# self.assertEqual(item_content, self.item_content)
|
||||
#
|
||||
# # POST /api/v1/get/item/tag
|
||||
# def test_0004_api_get_item_tag(self):
|
||||
# input_json = {"id": self.__class__.item_id}
|
||||
# req = self.client.post('/api/v1/get/item/tag', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# item_tags = req_json['tags']
|
||||
# self.assertCountEqual(item_tags, self.expected_tags)
|
||||
#
|
||||
# # POST /api/v1/get/item/tag
|
||||
# def test_0005_api_get_item_default(self):
|
||||
# input_json = {"id": self.__class__.item_id}
|
||||
# req = self.client.post('/api/v1/get/item/default', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# item_tags = req_json['tags']
|
||||
# self.assertCountEqual(item_tags, self.expected_tags)
|
||||
# item_content = req_json['content']
|
||||
# self.assertEqual(item_content, self.item_content)
|
||||
#
|
||||
# # POST /api/v1/get/item/tag
|
||||
# # # TODO: add more test
|
||||
# def test_0006_api_get_item(self):
|
||||
# input_json = {"id": self.__class__.item_id, "content": True}
|
||||
# req = self.client.post('/api/v1/get/item', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# item_tags = req_json['tags']
|
||||
# self.assertCountEqual(item_tags, self.expected_tags)
|
||||
# item_content = req_json['content']
|
||||
# self.assertEqual(item_content, self.item_content)
|
||||
#
|
||||
# # POST api/v1/add/item/tag
|
||||
# def test_0007_api_add_item_tag(self):
|
||||
# tags_to_add = ["infoleak:analyst-detection=\"api-key\""]
|
||||
# current_item_tag = Tag.get_obj_tag(self.__class__.item_id)
|
||||
# current_item_tag.append(tags_to_add[0])
|
||||
#
|
||||
# # galaxy_to_add = ["misp-galaxy:stealer=\"Vidar\""]
|
||||
# input_json = {"id": self.__class__.item_id, "tags": tags_to_add}
|
||||
# req = self.client.post('/api/v1/add/item/tag', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# item_tags = req_json['tags']
|
||||
# self.assertEqual(item_tags, tags_to_add)
|
||||
#
|
||||
# new_item_tag = Tag.get_obj_tag(self.__class__.item_id)
|
||||
# self.assertCountEqual(new_item_tag, current_item_tag)
|
||||
#
|
||||
# # DELETE api/v1/delete/item/tag
|
||||
# def test_0008_api_add_item_tag(self):
|
||||
# tags_to_delete = ["infoleak:analyst-detection=\"api-key\""]
|
||||
# input_json = {"id": self.__class__.item_id, "tags": tags_to_delete}
|
||||
# req = self.client.delete('/api/v1/delete/item/tag', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# item_tags = req_json['tags']
|
||||
# self.assertCountEqual(item_tags, tags_to_delete)
|
||||
# current_item_tag = Tag.get_obj_tag(self.__class__.item_id)
|
||||
# if tags_to_delete[0] in current_item_tag:
|
||||
# self.fail('Tag no deleted')
|
||||
#
|
||||
# # POST api/v1/get/tag/metadata
|
||||
# def test_0009_api_add_item_tag(self):
|
||||
# input_json = {"tag": self.item_tags[0]}
|
||||
# req = self.client.post('/api/v1/get/tag/metadata', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# self.assertEqual(req_json['tag'], self.item_tags[0])
|
||||
#
|
||||
# # GET api/v1/get/tag/all
|
||||
# def test_0010_api_add_item_tag(self):
|
||||
# input_json = {"tag": self.item_tags[0]}
|
||||
# req = self.client.get('/api/v1/get/tag/all', json=input_json, headers={'Authorization': self.apikey})
|
||||
# req_json = parse_response(self, req)
|
||||
# self.assertTrue(req_json['tags'])
|
||||
#
|
||||
#
|
||||
if __name__ == "__main__":
|
||||
unittest.main(argv=['first-arg-is-ignored'], exit=False)
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
from pyail import PyAIL
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib import Users
|
||||
|
||||
sys.path.append(os.environ['AIL_FLASK'])
|
||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||
|
||||
class TestApiV1(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# TODO GET HOST + PORT
|
||||
self.ail = PyAIL('https://localhost:7000', Users.get_user_token('admin@admin.test'), ssl=False)
|
||||
|
||||
# GET /api/v1/ping
|
||||
def test_0001_api_ping(self):
|
||||
r = self.ail.ping_ail()
|
||||
self.assertEqual(r.get('status'), 'pong')
|
||||
|
||||
# # GET /api/v1/uuid
|
||||
# def test_0001_api_uuid(self):
|
||||
# r = self.ail.get_uuid()
|
||||
#
|
||||
# # GET /api/v1/version
|
||||
# def test_0001_api_version(self):
|
||||
# r = self.ail.get_version()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(exit=False)
|
|
@ -10,7 +10,10 @@ from base64 import b64encode
|
|||
from distutils.dir_util import copy_tree
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
# Modules Classes
|
||||
from modules.ApiKey import ApiKey
|
||||
from modules.Categ import Categ
|
||||
|
@ -22,87 +25,91 @@ from modules.Onion import Onion
|
|||
from modules.Telegram import Telegram
|
||||
|
||||
# project packages
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
import lib.crawlers as crawlers
|
||||
import lib.objects.Items as Items
|
||||
|
||||
#### COPY SAMPLES ####
|
||||
config_loader = ConfigLoader()
|
||||
# # TODO:move me in new Item package
|
||||
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
||||
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
|
||||
ITEMS_FOLDER = Items.ITEMS_FOLDER
|
||||
TESTS_ITEMS_FOLDER = os.path.join(ITEMS_FOLDER, 'tests')
|
||||
sample_dir = os.path.join(os.environ['AIL_HOME'], 'samples')
|
||||
copy_tree(sample_dir, TESTS_ITEMS_FOLDER)
|
||||
|
||||
|
||||
#### ---- ####
|
||||
|
||||
class Test_Module_ApiKey(unittest.TestCase):
|
||||
class TestModuleApiKey(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = ApiKey()
|
||||
self.module_obj.debug = True
|
||||
self.module = ApiKey()
|
||||
self.module.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/api_keys.gz'
|
||||
self.module.obj = Items.Item(item_id)
|
||||
google_api_key = 'AIza00000000000000000000000_example-KEY'
|
||||
aws_access_key = 'AKIAIOSFODNN7EXAMPLE'
|
||||
aws_secret_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
|
||||
|
||||
matches = self.module_obj.compute(f'{item_id} 3', r_result=True)
|
||||
matches = self.module.compute('3', r_result=True)
|
||||
self.assertCountEqual(matches[0], {google_api_key})
|
||||
self.assertCountEqual(matches[1], {aws_access_key})
|
||||
self.assertCountEqual(matches[2], {aws_secret_key})
|
||||
|
||||
class Test_Module_Categ(unittest.TestCase):
|
||||
|
||||
class TestModuleCateg(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = Categ()
|
||||
self.module_obj.debug = True
|
||||
self.module = Categ()
|
||||
self.module.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/categ.gz'
|
||||
self.module.obj = Items.Item(item_id)
|
||||
test_categ = ['CreditCards', 'Mail', 'Onion', 'Urls', 'Credential', 'Cve']
|
||||
|
||||
result = self.module_obj.compute(item_id, r_result=True)
|
||||
print(result)
|
||||
result = self.module.compute(None, r_result=True)
|
||||
self.assertCountEqual(result, test_categ)
|
||||
|
||||
class Test_Module_CreditCards(unittest.TestCase):
|
||||
|
||||
class TestModuleCreditCards(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = CreditCards()
|
||||
self.module_obj.debug = True
|
||||
self.module = CreditCards()
|
||||
self.module.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/credit_cards.gz 7'
|
||||
item_id = 'tests/2021/01/01/credit_cards.gz'
|
||||
self.module.obj = Items.Item(item_id)
|
||||
test_cards = ['341039324930797', # American Express
|
||||
'6011613905509166', # Discover Card
|
||||
'3547151714018657', # Japan Credit Bureau (JCB)
|
||||
'5492981206527330', # 16 digits MasterCard
|
||||
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators
|
||||
]
|
||||
]
|
||||
|
||||
result = self.module_obj.compute(item_id, r_result=True)
|
||||
result = self.module.compute('7', r_result=True)
|
||||
self.assertCountEqual(result, test_cards)
|
||||
|
||||
class Test_Module_DomClassifier(unittest.TestCase):
|
||||
|
||||
class TestModuleDomClassifier(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = DomClassifier()
|
||||
self.module_obj.debug = True
|
||||
self.module = DomClassifier()
|
||||
self.module.debug = True
|
||||
|
||||
def test_module(self):
|
||||
test_host = 'foo.be'
|
||||
item_id = 'tests/2021/01/01/domain_classifier.gz'
|
||||
msg = f'{test_host} {item_id}'
|
||||
result = self.module_obj.compute(msg, r_result=True)
|
||||
self.module.obj = Items.Item(item_id)
|
||||
result = self.module.compute(f'{test_host}', r_result=True)
|
||||
self.assertTrue(len(result))
|
||||
|
||||
class Test_Module_Global(unittest.TestCase):
|
||||
|
||||
class TestModuleGlobal(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = Global()
|
||||
self.module_obj.debug = True
|
||||
self.module = Global()
|
||||
self.module.debug = True
|
||||
|
||||
def test_module(self):
|
||||
# # TODO: delete item
|
||||
|
@ -113,24 +120,20 @@ class Test_Module_Global(unittest.TestCase):
|
|||
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
|
||||
item_content_1 = b64encode(gzip.compress(item_content)).decode()
|
||||
item_content_2 = b64encode(gzip.compress(item_content + b' more text ...')).decode()
|
||||
message = f'{item_id} {item_content_1}'
|
||||
|
||||
self.module.obj = Items.Item(item_id)
|
||||
# Test new item
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(f'test new item: {result}')
|
||||
result = self.module.compute(item_content_1, r_result=True)
|
||||
self.assertEqual(result, item_id)
|
||||
|
||||
# Test duplicate
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(f'test duplicate {result}')
|
||||
result = self.module.compute(item_content_1, r_result=True)
|
||||
self.assertIsNone(result)
|
||||
|
||||
# Test same id with != content
|
||||
item = Items.Item('tests/2021/01/01/global_831875da824fc86ab5cc0e835755b520.gz')
|
||||
item.delete()
|
||||
message = f'{item_id} {item_content_2}'
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(f'test same id with != content: {result}')
|
||||
result = self.module.compute(item_content_2, r_result=True)
|
||||
self.assertIn(item_id[:-3], result)
|
||||
self.assertNotEqual(result, item_id)
|
||||
|
||||
|
@ -139,40 +142,46 @@ class Test_Module_Global(unittest.TestCase):
|
|||
# item.delete()
|
||||
# # TODO: remove from queue
|
||||
|
||||
class Test_Module_Keys(unittest.TestCase):
|
||||
|
||||
class TestModuleKeys(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = Keys()
|
||||
self.module_obj.debug = True
|
||||
self.module = Keys()
|
||||
self.module.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/keys.gz'
|
||||
self.module.obj = Items.Item(item_id)
|
||||
# # TODO: check results
|
||||
result = self.module_obj.compute(item_id)
|
||||
self.module.compute(None)
|
||||
|
||||
class Test_Module_Onion(unittest.TestCase):
|
||||
|
||||
class TestModuleOnion(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = Onion()
|
||||
self.module_obj.debug = True
|
||||
self.module = Onion()
|
||||
self.module.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/onion.gz'
|
||||
domain_1 = 'eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion'
|
||||
domain_2 = 'www.facebookcorewwwi.onion'
|
||||
self.module.obj = Items.Item(item_id)
|
||||
# domain_1 = 'eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion'
|
||||
# domain_2 = 'www.facebookcorewwwi.onion'
|
||||
|
||||
self.module_obj.compute(f'{item_id} 3')
|
||||
self.module.compute(f'3')
|
||||
|
||||
class Test_Module_Telegram(unittest.TestCase):
|
||||
|
||||
class TestModuleTelegram(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = Telegram()
|
||||
self.module_obj.debug = True
|
||||
self.module = Telegram()
|
||||
self.module.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/keys.gz'
|
||||
self.module.obj = Items.Item(item_id)
|
||||
# # TODO: check results
|
||||
result = self.module_obj.compute(item_id)
|
||||
self.module.compute(None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Reprocess AIL Objects by Object Type
|
||||
================
|
||||
|
||||
Send ALL objects by type in queues
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ail_core import is_object_type
|
||||
from lib import ail_queues
|
||||
from lib.objects import ail_objects
|
||||
|
||||
# from modules.ApiKey import ApiKey
|
||||
# from modules.Categ import Categ
|
||||
# from modules.CreditCards import CreditCards
|
||||
# from modules.DomClassifier import DomClassifier
|
||||
# from modules.Global import Global
|
||||
# from modules.Keys import Keys
|
||||
# from modules.Onion import Onion
|
||||
# from modules.Telegram import Telegram
|
||||
|
||||
from modules.Languages import Languages
|
||||
from modules.OcrExtractor import OcrExtractor
|
||||
|
||||
MODULES = {
|
||||
'Languages': Languages,
|
||||
'OcrExtractor': OcrExtractor
|
||||
|
||||
}
|
||||
|
||||
def reprocess_message_objects(object_type, module_name=None):
|
||||
if module_name:
|
||||
module = MODULES[module_name]()
|
||||
for obj in ail_objects.obj_iterator(object_type, filters={}):
|
||||
if not obj.exists():
|
||||
print(f'ERROR: object does not exist, {obj.id}')
|
||||
continue
|
||||
module.obj = obj
|
||||
module.compute(None)
|
||||
else:
|
||||
queue = ail_queues.AILQueue('FeederModuleImporter', -1)
|
||||
for obj in ail_objects.obj_iterator(object_type, filters={}):
|
||||
queue.send_message(obj.get_global_id(), message='reprocess')
|
||||
queue.end()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(description='Reprocess AIL Objects')
|
||||
parser.add_argument('-t', '--type', type=str, help='AIL Object Type', required=True)
|
||||
parser.add_argument('-m', '--module', type=str, help='AIL Module Name')
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.type:
|
||||
parser.print_help()
|
||||
sys.exit(0)
|
||||
|
||||
obj_type = args.type
|
||||
if not is_object_type(obj_type):
|
||||
raise Exception(f'Invalid Object Type: {obj_type}')
|
||||
if obj_type not in ['image', 'item', 'message']:
|
||||
raise Exception(f'Currently not supported Object Type: {obj_type}')
|
||||
|
||||
modulename = args.module
|
||||
if modulename not in MODULES:
|
||||
raise Exception(f'Currently not supported Module: {modulename}')
|
||||
reprocess_message_objects(obj_type, module_name=modulename)
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_HOME'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from update.bin.ail_updater import AIL_Updater
|
||||
from lib import ail_updates
|
||||
from lib import chats_viewer
|
||||
|
||||
class Updater(AIL_Updater):
|
||||
"""default Updater."""
|
||||
|
||||
def __init__(self, version):
|
||||
super(Updater, self).__init__(version)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
chats_viewer.fix_correlations_subchannel_message()
|
||||
updater = Updater('v5.4')
|
||||
updater.run_update()
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
|
||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||
|
||||
export PATH=$AIL_HOME:$PATH
|
||||
export PATH=$AIL_REDIS:$PATH
|
||||
export PATH=$AIL_BIN:$PATH
|
||||
export PATH=$AIL_FLASK:$PATH
|
||||
|
||||
GREEN="\\033[1;32m"
|
||||
DEFAULT="\\033[0;39m"
|
||||
|
||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||
wait
|
||||
|
||||
# SUBMODULES #
|
||||
git submodule update
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Updating python packages ..."$DEFAULT
|
||||
echo ""
|
||||
pip install -U pyail
|
||||
pip install -U pylacus
|
||||
pip install -U git+https://github.com/ail-project/demoji
|
||||
pip install -U lexilang
|
||||
|
||||
|
||||
bash ${AIL_BIN}/LAUNCH.sh -lrv
|
||||
bash ${AIL_BIN}/LAUNCH.sh -lkv
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
||||
echo ""
|
||||
python ${AIL_HOME}/update/v5.4/Update.py
|
||||
wait
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
exit 0
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_HOME'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from update.bin.ail_updater import AIL_Updater
|
||||
from lib import ail_updates
|
||||
from lib import chats_viewer
|
||||
|
||||
class Updater(AIL_Updater):
|
||||
"""default Updater."""
|
||||
|
||||
def __init__(self, version):
|
||||
super(Updater, self).__init__(version)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
chats_viewer.fix_correlations_subchannel_message()
|
||||
updater = Updater('v5.5')
|
||||
updater.run_update()
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
#!/bin/bash
|
||||
|
||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||
|
||||
export PATH=$AIL_HOME:$PATH
|
||||
export PATH=$AIL_REDIS:$PATH
|
||||
export PATH=$AIL_BIN:$PATH
|
||||
export PATH=$AIL_FLASK:$PATH
|
||||
|
||||
GREEN="\\033[1;32m"
|
||||
DEFAULT="\\033[0;39m"
|
||||
|
||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||
wait
|
||||
|
||||
# SUBMODULES #
|
||||
git submodule update
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Updating python packages ..."$DEFAULT
|
||||
echo ""
|
||||
pip install -U easyocr
|
||||
|
||||
|
||||
bash ${AIL_BIN}/LAUNCH.sh -lrv
|
||||
bash ${AIL_BIN}/LAUNCH.sh -lkv
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
||||
echo ""
|
||||
python ${AIL_HOME}/update/v5.5/Update.py
|
||||
wait
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
exit 0
|
|
@ -35,6 +35,7 @@ import Flask_config
|
|||
from blueprints.root import root
|
||||
from blueprints.crawler_splash import crawler_splash
|
||||
from blueprints.correlation import correlation
|
||||
from blueprints.languages_ui import languages_ui
|
||||
from blueprints.tags_ui import tags_ui
|
||||
from blueprints.import_export import import_export
|
||||
from blueprints.investigations_b import investigations_b
|
||||
|
@ -52,6 +53,10 @@ from blueprints.objects_etag import objects_etag
|
|||
from blueprints.objects_hhhash import objects_hhhash
|
||||
from blueprints.chats_explorer import chats_explorer
|
||||
from blueprints.objects_image import objects_image
|
||||
from blueprints.objects_ocr import objects_ocr
|
||||
from blueprints.objects_favicon import objects_favicon
|
||||
from blueprints.api_rest import api_rest
|
||||
|
||||
|
||||
Flask_dir = os.environ['AIL_FLASK']
|
||||
|
||||
|
@ -94,6 +99,7 @@ app.config['MAX_CONTENT_LENGTH'] = 900 * 1024 * 1024
|
|||
app.register_blueprint(root, url_prefix=baseUrl)
|
||||
app.register_blueprint(crawler_splash, url_prefix=baseUrl)
|
||||
app.register_blueprint(correlation, url_prefix=baseUrl)
|
||||
app.register_blueprint(languages_ui, url_prefix=baseUrl)
|
||||
app.register_blueprint(tags_ui, url_prefix=baseUrl)
|
||||
app.register_blueprint(import_export, url_prefix=baseUrl)
|
||||
app.register_blueprint(investigations_b, url_prefix=baseUrl)
|
||||
|
@ -111,6 +117,9 @@ app.register_blueprint(objects_etag, url_prefix=baseUrl)
|
|||
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
|
||||
app.register_blueprint(chats_explorer, url_prefix=baseUrl)
|
||||
app.register_blueprint(objects_image, url_prefix=baseUrl)
|
||||
app.register_blueprint(objects_ocr, url_prefix=baseUrl)
|
||||
app.register_blueprint(objects_favicon, url_prefix=baseUrl)
|
||||
app.register_blueprint(api_rest, url_prefix=baseUrl)
|
||||
|
||||
# ========= =========#
|
||||
|
||||
|
@ -123,8 +132,6 @@ login_manager = LoginManager()
|
|||
login_manager.login_view = 'root.login'
|
||||
login_manager.init_app(app)
|
||||
|
||||
print()
|
||||
|
||||
# ========= LOGIN MANAGER ========
|
||||
|
||||
@login_manager.user_loader
|
||||
|
@ -231,18 +238,25 @@ def _handle_client_error(e):
|
|||
anchor_id = anchor_id.replace('/', '_')
|
||||
api_doc_url = 'https://github.com/ail-project/ail-framework/tree/master/doc#{}'.format(anchor_id)
|
||||
res_dict['documentation'] = api_doc_url
|
||||
return Response(json.dumps(res_dict, indent=2, sort_keys=True), mimetype='application/json'), 405
|
||||
return Response(json.dumps(res_dict) + '\n', mimetype='application/json'), 405
|
||||
else:
|
||||
return e
|
||||
|
||||
@app.errorhandler(404)
|
||||
def error_page_not_found(e):
|
||||
if request.path.startswith('/api/'): ## # TODO: add baseUrl
|
||||
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404
|
||||
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}) + '\n', mimetype='application/json'), 404
|
||||
else:
|
||||
# avoid endpoint enumeration
|
||||
return page_not_found(e)
|
||||
|
||||
@app.errorhandler(500)
|
||||
def _handle_client_error(e):
|
||||
if request.path.startswith('/api/'):
|
||||
return Response(json.dumps({"status": "error", "reason": "Server Error"}) + '\n', mimetype='application/json'), 500
|
||||
else:
|
||||
return e
|
||||
|
||||
@login_required
|
||||
def page_not_found(e):
|
||||
# avoid endpoint enumeration
|
||||
|
@ -255,6 +269,10 @@ default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]
|
|||
for taxonomy in default_taxonomies:
|
||||
Tag.enable_taxonomy_tags(taxonomy)
|
||||
|
||||
# rrrr = [str(p) for p in app.url_map.iter_rules()]
|
||||
# for p in rrrr:
|
||||
# print(p)
|
||||
|
||||
# ============ MAIN ============
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
"""
|
||||
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from functools import wraps
|
||||
from flask import request, Blueprint, Response
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib import ail_api
|
||||
from lib import ail_core
|
||||
from lib import ail_updates
|
||||
from lib import crawlers
|
||||
from lib import chats_viewer
|
||||
|
||||
from lib import Investigations
|
||||
from lib import Tag
|
||||
|
||||
from lib.objects import ail_objects
|
||||
from lib.objects import Domains
|
||||
from lib.objects import Titles
|
||||
|
||||
from importer.FeederImporter import api_add_json_feeder_to_queue
|
||||
|
||||
|
||||
# ============ BLUEPRINT ============
|
||||
api_rest = Blueprint('api_rest', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates'))
|
||||
|
||||
|
||||
# ============ AUTH FUNCTIONS ============
|
||||
|
||||
def get_auth_from_header():
|
||||
token = request.headers.get('Authorization').replace(' ', '') # remove space
|
||||
return token
|
||||
|
||||
|
||||
def token_required(user_role):
|
||||
def actual_decorator(funct):
|
||||
@wraps(funct)
|
||||
def api_token(*args, **kwargs):
|
||||
# Check AUTH Header
|
||||
if not request.headers.get('Authorization'):
|
||||
return create_json_response({'status': 'error', 'reason': 'Authentication needed'}, 401)
|
||||
|
||||
# Check Role
|
||||
if not user_role:
|
||||
return create_json_response({'status': 'error', 'reason': 'Invalid Role'}, 401)
|
||||
|
||||
token = get_auth_from_header()
|
||||
ip_source = request.remote_addr
|
||||
data, status_code = ail_api.authenticate_user(token, ip_address=ip_source)
|
||||
if status_code != 200:
|
||||
return create_json_response(data, status_code)
|
||||
elif data:
|
||||
# check user role
|
||||
if not ail_api.is_user_in_role(user_role, token):
|
||||
return create_json_response({'status': 'error', 'reason': 'Access Forbidden'}, 403)
|
||||
else:
|
||||
# User Authenticated + In Role
|
||||
return funct(*args, **kwargs)
|
||||
else:
|
||||
return create_json_response({'status': 'error', 'reason': 'Internal'}, 400)
|
||||
|
||||
return api_token
|
||||
return actual_decorator
|
||||
|
||||
|
||||
# ============ FUNCTIONS ============
|
||||
|
||||
def create_json_response(data, status_code):
|
||||
return Response(json.dumps(data) + "\n", mimetype='application/json'), status_code
|
||||
|
||||
# ============= ROUTES ==============
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # CORE # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
|
||||
@api_rest.route("api/v1/ping", methods=['GET'])
|
||||
@token_required('read_only')
|
||||
def v1_ping():
|
||||
return create_json_response({'status': 'pong'}, 200)
|
||||
|
||||
@api_rest.route("api/v1/uuid", methods=['GET'])
|
||||
@token_required('read_only')
|
||||
def v1_uuid():
|
||||
ail_uid = ail_core.get_ail_uuid()
|
||||
return create_json_response({'uuid': ail_uid}, 200)
|
||||
|
||||
@api_rest.route("api/v1/version", methods=['GET'])
|
||||
@token_required('read_only')
|
||||
def v1_version():
|
||||
version = ail_updates.get_ail_version()
|
||||
return create_json_response({'version': version}, 200)
|
||||
|
||||
@api_rest.route("api/v1/pyail/version", methods=['GET'])
|
||||
@token_required('read_only')
|
||||
def v1_pyail_version():
|
||||
ail_version = 'v1.0.0'
|
||||
return create_json_response({'version': ail_version}, 200)
|
||||
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # CRAWLERS # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # TODO: ADD RESULT JSON Response
|
||||
@api_rest.route("api/v1/add/crawler/task", methods=['POST']) # TODO V2 Migration
|
||||
@token_required('analyst')
|
||||
def add_crawler_task():
|
||||
data = request.get_json()
|
||||
user_token = get_auth_from_header()
|
||||
user_id = ail_api.get_user_from_token(user_token)
|
||||
res = crawlers.api_add_crawler_task(data, user_id=user_id)
|
||||
if res:
|
||||
return create_json_response(res[0], res[1])
|
||||
|
||||
dict_res = {'url': data['url']}
|
||||
return create_json_response(dict_res, 200)
|
||||
|
||||
|
||||
@api_rest.route("api/v1/add/crawler/capture", methods=['POST']) # TODO V2 Migration
|
||||
@token_required('analyst')
|
||||
def add_crawler_capture():
|
||||
data = request.get_json()
|
||||
user_token = get_auth_from_header()
|
||||
user_id = ail_api.get_user_from_token(user_token)
|
||||
res = crawlers.api_add_crawler_capture(data, user_id)
|
||||
if res:
|
||||
return create_json_response(res[0], res[1])
|
||||
|
||||
dict_res = {'url': data['url']}
|
||||
return create_json_response(dict_res, 200)
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # IMPORTERS # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
@api_rest.route("api/v1/import/json/item", methods=['POST']) # TODO V2 Migration
|
||||
@token_required('user')
|
||||
def import_json_item():
|
||||
data_json = request.get_json()
|
||||
res = api_add_json_feeder_to_queue(data_json)
|
||||
return Response(json.dumps(res[0]), mimetype='application/json'), res[1]
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # OBJECTS # # # # # # # # # # # # # # # # # # # TODO LIST OBJ TYPES + SUBTYPES
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
@api_rest.route("api/v1/object", methods=['GET']) # TODO options
|
||||
@token_required('read_only')
|
||||
def v1_object():
|
||||
obj_gid = request.args.get('gid')
|
||||
if obj_gid:
|
||||
r = ail_objects.api_get_object_global_id(obj_gid)
|
||||
else:
|
||||
obj_type = request.args.get('type')
|
||||
obj_subtype = request.args.get('subtype')
|
||||
obj_id = request.args.get('id')
|
||||
r = ail_objects.api_get_object(obj_type, obj_subtype, obj_id)
|
||||
return create_json_response(r[0], r[1])
|
||||
|
||||
|
||||
@api_rest.route("api/v1/obj/gid/<path:object_global_id>", methods=['GET']) # TODO REMOVE ME ????
|
||||
@token_required('read_only')
|
||||
def v1_object_global_id(object_global_id):
|
||||
r = ail_objects.api_get_object_global_id(object_global_id)
|
||||
return create_json_response(r[0], r[1])
|
||||
|
||||
# @api_rest.route("api/v1/object/<object_type>/<object_subtype>/<path:object_id>", methods=['GET'])
|
||||
@api_rest.route("api/v1/obj/<object_type>/<path:object_id>", methods=['GET']) # TODO REMOVE ME ????
|
||||
@token_required('read_only')
|
||||
def v1_object_type_id(object_type, object_id):
|
||||
r = ail_objects.api_get_object_type_id(object_type, object_id)
|
||||
return create_json_response(r[0], r[1])
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # CHATS # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
|
||||
@api_rest.route("api/v1/chat/messages", methods=['GET'])
|
||||
@token_required('analyst')
|
||||
def objects_chat_messages():
|
||||
obj_subtype = request.args.get('subtype')
|
||||
obj_id = request.args.get('id')
|
||||
r = chats_viewer.api_chat_messages(obj_subtype, obj_id)
|
||||
return create_json_response(r[0], r[1])
|
||||
|
||||
@api_rest.route("api/v1/chat-subchannel/messages", methods=['GET'])
|
||||
@token_required('analyst')
|
||||
def objects_chat_subchannel_messages():
|
||||
obj_subtype = request.args.get('subtype')
|
||||
obj_id = request.args.get('id')
|
||||
r = chats_viewer.api_subchannel_messages(obj_subtype, obj_id)
|
||||
return create_json_response(r[0], r[1])
|
||||
|
||||
@api_rest.route("api/v1/chat-thread/messages", methods=['GET'])
|
||||
@token_required('analyst')
|
||||
def objects_chat_thread_messages():
|
||||
obj_subtype = request.args.get('subtype')
|
||||
obj_id = request.args.get('id')
|
||||
r = chats_viewer.api_thread_messages(obj_subtype, obj_id)
|
||||
return create_json_response(r[0], r[1])
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # TITLES # # # # # # # # # # # # # # # # # # # TODO TO REVIEW
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
|
||||
@api_rest.route("api/v1/titles/download", methods=['GET']) # TODO RENAME ->api/v1/titles/domains
|
||||
@token_required('analyst')
|
||||
def objects_titles_download():
|
||||
return create_json_response(Titles.Titles().get_contents_ids(), 200)
|
||||
|
||||
|
||||
# TODO
|
||||
@api_rest.route("api/v1/titles/download/unsafe", methods=['GET']) # TODO RENAME ->api/v1/titles/domains/unsafe
|
||||
@token_required('analyst')
|
||||
def objects_titles_download_unsafe():
|
||||
all_titles = {}
|
||||
unsafe_tags = Tag.unsafe_tags
|
||||
for tag in unsafe_tags:
|
||||
domains = Tag.get_tag_objects(tag, 'domain')
|
||||
for domain_id in domains:
|
||||
domain = Domains.Domain(domain_id)
|
||||
domain_titles = domain.get_correlation('title').get('title', [])
|
||||
for dt in domain_titles:
|
||||
title = Titles.Title(dt[1:])
|
||||
title_content = title.get_content()
|
||||
if title_content and title_content != 'None':
|
||||
if title_content not in all_titles:
|
||||
all_titles[title_content] = []
|
||||
all_titles[title_content].append(domain.get_id())
|
||||
return Response(json.dumps(all_titles), mimetype='application/json'), 200
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # INVESTIGATIONS # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
|
||||
@api_rest.route("api/v1/investigation/<investigation_uuid>", methods=['GET']) # TODO options
|
||||
@token_required('read_only')
|
||||
def v1_investigation(investigation_uuid):
|
||||
r = Investigations.api_get_investigation(investigation_uuid)
|
||||
return create_json_response(r[0], r[1])
|
||||
|
||||
# TODO CATCH REDIRECT
|
|
@ -23,6 +23,7 @@ from lib import ail_core
|
|||
from lib import chats_viewer
|
||||
from lib import Language
|
||||
from lib import Tag
|
||||
from lib import module_extractor
|
||||
|
||||
# ============ BLUEPRINT ============
|
||||
chats_explorer = Blueprint('chats_explorer', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/chats_explorer'))
|
||||
|
@ -58,7 +59,7 @@ def chats_explorer_networks():
|
|||
networks = chats_viewer.get_chat_service_instances_by_protocol(protocol)
|
||||
if len(networks) == 1:
|
||||
instance_uuid = list(networks.values())[0]
|
||||
return redirect(url_for('chats_explorer.chats_explorer_instance', uuid=instance_uuid))
|
||||
return redirect(url_for('chats_explorer.chats_explorer_instance', subtype=instance_uuid))
|
||||
else:
|
||||
return render_template('chats_networks.html', protocol=protocol, networks=networks)
|
||||
|
||||
|
@ -67,7 +68,7 @@ def chats_explorer_networks():
|
|||
@login_required
|
||||
@login_read_only
|
||||
def chats_explorer_instance():
|
||||
intance_uuid = request.args.get('uuid')
|
||||
intance_uuid = request.args.get('subtype')
|
||||
chat_instance = chats_viewer.api_get_chat_service_instance(intance_uuid)
|
||||
if chat_instance[1] != 200:
|
||||
return create_json_response(chat_instance[0], chat_instance[1])
|
||||
|
@ -80,7 +81,7 @@ def chats_explorer_instance():
|
|||
@login_read_only
|
||||
def chats_explorer_chat():
|
||||
chat_id = request.args.get('id')
|
||||
instance_uuid = request.args.get('uuid')
|
||||
instance_uuid = request.args.get('subtype')
|
||||
target = request.args.get('target')
|
||||
if target == "Don't Translate":
|
||||
target = None
|
||||
|
@ -92,15 +93,31 @@ def chats_explorer_chat():
|
|||
else:
|
||||
chat = chat[0]
|
||||
languages = Language.get_translation_languages()
|
||||
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
|
||||
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label,
|
||||
ail_tags=Tag.get_modal_add_tags(chat['id'], chat['type'], chat['subtype']),
|
||||
translation_languages=languages, translation_target=target)
|
||||
|
||||
@chats_explorer.route("chats/explorer/messages/stats/week", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def chats_explorer_messages_stats_week():
|
||||
chat_type = request.args.get('type')
|
||||
instance_uuid = request.args.get('subtype')
|
||||
chat_id = request.args.get('id')
|
||||
instance_uuid = request.args.get('uuid')
|
||||
week = chats_viewer.api_get_nb_message_by_week(chat_id, instance_uuid)
|
||||
week = chats_viewer.api_get_nb_message_by_week(chat_type, instance_uuid, chat_id)
|
||||
if week[1] != 200:
|
||||
return create_json_response(week[0], week[1])
|
||||
else:
|
||||
return jsonify(week[0])
|
||||
|
||||
@chats_explorer.route("chats/explorer/messages/stats/week/all", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def chats_explorer_messages_stats_week_all():
|
||||
chat_type = request.args.get('type')
|
||||
instance_uuid = request.args.get('subtype')
|
||||
chat_id = request.args.get('id')
|
||||
week = chats_viewer.api_get_nb_week_messages(chat_type, instance_uuid, chat_id) # TODO SELECT DATE
|
||||
if week[1] != 200:
|
||||
return create_json_response(week[0], week[1])
|
||||
else:
|
||||
|
@ -111,7 +128,7 @@ def chats_explorer_messages_stats_week():
|
|||
@login_read_only
|
||||
def objects_subchannel_messages():
|
||||
subchannel_id = request.args.get('id')
|
||||
instance_uuid = request.args.get('uuid')
|
||||
instance_uuid = request.args.get('subtype')
|
||||
target = request.args.get('target')
|
||||
if target == "Don't Translate":
|
||||
target = None
|
||||
|
@ -123,14 +140,16 @@ def objects_subchannel_messages():
|
|||
else:
|
||||
subchannel = subchannel[0]
|
||||
languages = Language.get_translation_languages()
|
||||
return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
|
||||
return render_template('SubChannelMessages.html', subchannel=subchannel,
|
||||
ail_tags=Tag.get_modal_add_tags(subchannel['id'], subchannel['type'], subchannel['subtype']),
|
||||
bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
|
||||
|
||||
@chats_explorer.route("/chats/explorer/thread", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_thread_messages():
|
||||
thread_id = request.args.get('id')
|
||||
instance_uuid = request.args.get('uuid')
|
||||
instance_uuid = request.args.get('subtype')
|
||||
target = request.args.get('target')
|
||||
if target == "Don't Translate":
|
||||
target = None
|
||||
|
@ -158,6 +177,51 @@ def chats_explorer_chat_participants():
|
|||
meta = meta[0]
|
||||
return render_template('chat_participants.html', meta=meta, bootstrap_label=bootstrap_label)
|
||||
|
||||
|
||||
@chats_explorer.route("/chats/explorer/chat/download", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def chats_explorer_chat_download():
|
||||
chat_id = request.args.get('id')
|
||||
chat_subtype = request.args.get('subtype')
|
||||
chat = chats_viewer.api_chat_messages(chat_subtype, chat_id)
|
||||
if chat[1] != 200:
|
||||
if chat[1] == 404:
|
||||
abort(404)
|
||||
else:
|
||||
return create_json_response(chat[0], chat[1])
|
||||
else:
|
||||
return jsonify(chat[0])
|
||||
|
||||
@chats_explorer.route("/chats/explorer/subchannel/download", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_subchannel_messages_download():
|
||||
subchannel_id = request.args.get('id')
|
||||
instance_uuid = request.args.get('subtype')
|
||||
subchannel = chats_viewer.api_subchannel_messages(instance_uuid, subchannel_id)
|
||||
if subchannel[1] != 200:
|
||||
return create_json_response(subchannel[0], subchannel[1])
|
||||
else:
|
||||
return jsonify(subchannel[0])
|
||||
|
||||
|
||||
@chats_explorer.route("/chats/explorer/thread/download", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_thread_messages_download():
|
||||
thread_id = request.args.get('id')
|
||||
instance_uuid = request.args.get('subtype')
|
||||
thread = chats_viewer.api_thread_messages(instance_uuid, thread_id)
|
||||
if thread[1] != 200:
|
||||
return create_json_response(thread[0], thread[1])
|
||||
else:
|
||||
return jsonify(thread[0])
|
||||
|
||||
|
||||
#### ####
|
||||
|
||||
|
||||
@chats_explorer.route("/objects/message", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
|
@ -172,10 +236,48 @@ def objects_message():
|
|||
else:
|
||||
message = message[0]
|
||||
languages = Language.get_translation_languages()
|
||||
extracted = module_extractor.extract('message', '', message['id'], content=message['content'])
|
||||
extracted_matches = module_extractor.get_extracted_by_match(extracted)
|
||||
message['extracted'] = extracted
|
||||
message['extracted_matches'] = extracted_matches
|
||||
return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label,
|
||||
translation_languages=languages, translation_target=target,
|
||||
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))
|
||||
|
||||
@chats_explorer.route("/objects/message/translate", methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_message_translate():
|
||||
message_id = request.form.get('id')
|
||||
source = request.form.get('language_target')
|
||||
target = request.form.get('target')
|
||||
translation = request.form.get('translation')
|
||||
if target == "Don't Translate":
|
||||
target = None
|
||||
resp = chats_viewer.api_manually_translate_message(message_id, source, target, translation)
|
||||
if resp[1] != 200:
|
||||
return create_json_response(resp[0], resp[1])
|
||||
else:
|
||||
if request.referrer:
|
||||
return redirect(request.referrer)
|
||||
else:
|
||||
return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target))
|
||||
|
||||
@chats_explorer.route("/objects/message/detect/language", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_message_detect_language():
|
||||
message_id = request.args.get('id')
|
||||
target = request.args.get('target')
|
||||
resp = chats_viewer.api_message_detect_language(message_id)
|
||||
if resp[1] != 200:
|
||||
return create_json_response(resp[0], resp[1])
|
||||
else:
|
||||
if request.referrer:
|
||||
return redirect(request.referrer)
|
||||
else:
|
||||
return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target))
|
||||
|
||||
@chats_explorer.route("/objects/user-account", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
|
@ -192,4 +294,40 @@ def objects_user_account():
|
|||
user_account = user_account[0]
|
||||
languages = Language.get_translation_languages()
|
||||
return render_template('user_account.html', meta=user_account, bootstrap_label=bootstrap_label,
|
||||
ail_tags=Tag.get_modal_add_tags(user_account['id'], user_account['type'], user_account['subtype']),
|
||||
translation_languages=languages, translation_target=target)
|
||||
|
||||
@chats_explorer.route("/objects/user-account/chat", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_user_account_chat():
|
||||
instance_uuid = request.args.get('subtype')
|
||||
user_id = request.args.get('id')
|
||||
chat_id = request.args.get('chat_id')
|
||||
target = request.args.get('target')
|
||||
if target == "Don't Translate":
|
||||
target = None
|
||||
meta = chats_viewer.api_get_user_account_chat_messages(user_id, instance_uuid, chat_id, translation_target=target)
|
||||
if meta[1] != 200:
|
||||
return create_json_response(meta[0], meta[1])
|
||||
else:
|
||||
meta = meta[0]
|
||||
languages = Language.get_translation_languages()
|
||||
return render_template('chats_explorer/user_chat_messages.html', meta=meta, bootstrap_label=bootstrap_label,
|
||||
ail_tags=Tag.get_modal_add_tags(meta['user-account']['id'], meta['user-account']['type'], meta['user-account']['subtype']),
|
||||
translation_languages=languages, translation_target=target)
|
||||
|
||||
@chats_explorer.route("objects/user-account/messages/stats/week/all", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def user_account_messages_stats_week_all():
|
||||
instance_uuid = request.args.get('subtype')
|
||||
user_id = request.args.get('id')
|
||||
week = chats_viewer.api_get_user_account_nb_all_week_messages(user_id, instance_uuid)
|
||||
if week[1] != 200:
|
||||
return create_json_response(week[0], week[1])
|
||||
else:
|
||||
return jsonify(week[0])
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -87,58 +87,10 @@ def show_correlation():
|
|||
|
||||
## get all selected correlations
|
||||
filter_types = []
|
||||
correl_option = request.form.get('CookieNameCheck')
|
||||
if correl_option:
|
||||
filter_types.append('cookie-name')
|
||||
correl_option = request.form.get('EtagCheck')
|
||||
if correl_option:
|
||||
filter_types.append('etag')
|
||||
correl_option = request.form.get('CveCheck')
|
||||
if correl_option:
|
||||
filter_types.append('cve')
|
||||
correl_option = request.form.get('CryptocurrencyCheck')
|
||||
if correl_option:
|
||||
filter_types.append('cryptocurrency')
|
||||
correl_option = request.form.get('HHHashCheck')
|
||||
if correl_option:
|
||||
filter_types.append('hhhash')
|
||||
correl_option = request.form.get('PgpCheck')
|
||||
if correl_option:
|
||||
filter_types.append('pgp')
|
||||
correl_option = request.form.get('UsernameCheck')
|
||||
if correl_option:
|
||||
filter_types.append('username')
|
||||
correl_option = request.form.get('DecodedCheck')
|
||||
if correl_option:
|
||||
filter_types.append('decoded')
|
||||
correl_option = request.form.get('ScreenshotCheck')
|
||||
if correl_option:
|
||||
filter_types.append('screenshot')
|
||||
# correlation_objects
|
||||
correl_option = request.form.get('DomainCheck')
|
||||
if correl_option:
|
||||
filter_types.append('domain')
|
||||
correl_option = request.form.get('ItemCheck')
|
||||
if correl_option:
|
||||
filter_types.append('item')
|
||||
correl_option = request.form.get('chatCheck')
|
||||
if correl_option:
|
||||
filter_types.append('chat')
|
||||
correl_option = request.form.get('subchannelCheck')
|
||||
if correl_option:
|
||||
filter_types.append('chat-subchannel')
|
||||
correl_option = request.form.get('threadCheck')
|
||||
if correl_option:
|
||||
filter_types.append('chat-thread')
|
||||
correl_option = request.form.get('messageCheck')
|
||||
if correl_option:
|
||||
filter_types.append('message')
|
||||
correl_option = request.form.get('imageCheck')
|
||||
if correl_option:
|
||||
filter_types.append('image')
|
||||
correl_option = request.form.get('user_accountCheck')
|
||||
if correl_option:
|
||||
filter_types.append('user-account')
|
||||
for ob_type in ail_objects.get_all_objects():
|
||||
correl_option = request.form.get(f'{ob_type}_Check')
|
||||
if correl_option:
|
||||
filter_types.append(ob_type)
|
||||
|
||||
# list as params
|
||||
filter_types = ",".join(filter_types)
|
||||
|
@ -162,7 +114,7 @@ def show_correlation():
|
|||
|
||||
related_btc = bool(request.args.get('related_btc', False))
|
||||
|
||||
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
|
||||
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','), default=True)
|
||||
|
||||
# check if obj_id exist
|
||||
if not ail_objects.exists_obj(obj_type, subtype, obj_id):
|
||||
|
@ -187,8 +139,11 @@ def show_correlation():
|
|||
else:
|
||||
dict_object["subtype"] = ''
|
||||
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc)
|
||||
dict_object["metadata_card"]['tags_safe'] = True
|
||||
return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label,
|
||||
tags_selector_data=Tag.get_tags_selector_data())
|
||||
tags_selector_data=Tag.get_tags_selector_data(),
|
||||
meta=dict_object["metadata_card"],
|
||||
ail_tags=dict_object["metadata_card"]["add_tags_modal"])
|
||||
|
||||
@correlation.route('/correlation/get/description')
|
||||
@login_required
|
||||
|
@ -203,7 +158,10 @@ def get_description():
|
|||
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404
|
||||
# object exist
|
||||
else:
|
||||
res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'icon', 'tags', 'tags_safe'},
|
||||
options = {'icon', 'tags', 'tags_safe'}
|
||||
if obj_type == 'message':
|
||||
options.add('content')
|
||||
res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options=options,
|
||||
flask_context=True)
|
||||
if 'tags' in res:
|
||||
res['tags'] = list(res['tags'])
|
||||
|
@ -350,6 +308,10 @@ def show_relationship():
|
|||
dict_object["metadata"]['type_id'] = subtype
|
||||
else:
|
||||
dict_object["subtype"] = ''
|
||||
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id)
|
||||
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id)
|
||||
dict_object["metadata_card"]['tags_safe'] = True
|
||||
return render_template("show_relationship.html", dict_object=dict_object, bootstrap_label=bootstrap_label,
|
||||
tags_selector_data=Tag.get_tags_selector_data())
|
||||
tags_selector_data=Tag.get_tags_selector_data(),
|
||||
meta=dict_object["metadata_card"],
|
||||
ail_tags=dict_object["metadata_card"]["add_tags_modal"])
|
||||
|
||||
|
|
|
@ -306,6 +306,40 @@ def crawlers_last_domains_json():
|
|||
stats.append(crawlers.get_crawlers_stats_by_day(date, domain_type))
|
||||
return jsonify(stats)
|
||||
|
||||
@crawler_splash.route('/crawlers/last/domains/month/json')
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawlers_last_domains_month_json():
|
||||
domain_type = request.args.get('type')
|
||||
if domain_type not in crawlers.get_crawler_all_types():
|
||||
return jsonify({'error': 'Invalid domain type'}), 400
|
||||
stats = crawlers.get_crawlers_stats_by_month(domain_type)
|
||||
return jsonify(stats)
|
||||
|
||||
@crawler_splash.route('/crawlers/last/domains/month/previous/json')
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawlers_last_domains_previous_month_json():
|
||||
domain_type = request.args.get('type')
|
||||
if domain_type not in crawlers.get_crawler_all_types():
|
||||
return jsonify({'error': 'Invalid domain type'}), 400
|
||||
date = Date.get_previous_month_date()
|
||||
stats = crawlers.get_crawlers_stats_by_month(domain_type, date=date)
|
||||
return jsonify(stats)
|
||||
|
||||
@crawler_splash.route('/crawlers/last/domains/status/month/json')
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawlers_last_domains_status_month_json():
|
||||
domain_type = request.args.get('type')
|
||||
if domain_type not in crawlers.get_crawler_all_types():
|
||||
return jsonify({'error': 'Invalid domain type'}), 400
|
||||
stats = crawlers.get_crawlers_stats_up_down_by_month(domain_type)
|
||||
data = []
|
||||
for key in stats:
|
||||
data.append({'name': key, 'value': stats[key]})
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
#### Domains ####
|
||||
|
||||
|
@ -576,6 +610,37 @@ def domains_search_date_post():
|
|||
type=domain_type, down=down, up=up))
|
||||
|
||||
|
||||
@crawler_splash.route('/domains/explorer/vanity', methods=['GET'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def domains_explorer_vanity_clusters():
|
||||
nb_min = request.args.get('min', 4)
|
||||
if int(nb_min) < 0:
|
||||
nb_min = 4
|
||||
vanity_clusters = Domains.get_vanity_clusters(nb_min=nb_min)
|
||||
return render_template("explorer_vanity_clusters.html", vanity_clusters=vanity_clusters,
|
||||
length=4)
|
||||
|
||||
@crawler_splash.route('/domains/explorer/vanity/explore', methods=['GET'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def domains_explorer_vanity_explore():
|
||||
vanity = request.args.get('vanity')
|
||||
nb_min = request.args.get('min', 2) # TODO SHOW DOMAINS OPTIONS + HARD CODED DOMAINS LIMIT FOR RENDER
|
||||
length = len(vanity)
|
||||
if int(nb_min) < 0:
|
||||
nb_min = 4
|
||||
vanity_clusters = Domains.get_vanity_cluster(vanity, len_vanity=length+1, nb_min=nb_min)
|
||||
vanity_domains = Domains.get_vanity_domains(vanity, len_vanity=length, meta=True)
|
||||
vanities_tree = []
|
||||
for i in range(4, length):
|
||||
vanities_tree.append(vanity[:i])
|
||||
if length == len(vanity):
|
||||
vanities_tree.append(vanity)
|
||||
return render_template("explorer_vanity_domains.html", vanity_clusters=vanity_clusters,
|
||||
bootstrap_label=bootstrap_label, vanity=vanity, vanities_tree=vanities_tree,
|
||||
vanity_domains=vanity_domains, length=length)
|
||||
|
||||
##-- --##
|
||||
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ import os
|
|||
import sys
|
||||
import json
|
||||
|
||||
from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, escape, abort
|
||||
from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
|
||||
from flask_login import login_required, current_user, login_user, logout_user
|
||||
|
||||
sys.path.append('modules')
|
||||
|
@ -24,6 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from lib import ail_core
|
||||
from lib.objects import ail_objects
|
||||
from lib import chats_viewer
|
||||
from lib import item_basic
|
||||
from lib import Tracker
|
||||
from lib import Tag
|
||||
|
@ -174,7 +175,7 @@ def show_tracker():
|
|||
if date_from:
|
||||
date_from, date_to = Date.sanitise_daterange(date_from, date_to)
|
||||
objs = tracker.get_objs_by_daterange(date_from, date_to)
|
||||
meta['objs'] = ail_objects.get_objects_meta(objs, flask_context=True)
|
||||
meta['objs'] = ail_objects.get_objects_meta(objs, options={'last_full_date'}, flask_context=True)
|
||||
else:
|
||||
date_from = ''
|
||||
date_to = ''
|
||||
|
@ -372,6 +373,78 @@ def get_json_tracker_graph():
|
|||
res = Tracker.get_trackers_graph_by_day([tracker_uuid])
|
||||
return jsonify(res)
|
||||
|
||||
@hunters.route('/tracker/object/add', methods=['GET'])
|
||||
@login_required
|
||||
@login_admin
|
||||
def tracker_object_add():
|
||||
user_id = current_user.get_id()
|
||||
tracker_uuid = request.args.get('uuid')
|
||||
object_global_id = request.args.get('gid')
|
||||
if object_global_id.startswith('messages::'):
|
||||
obj = ail_objects.get_obj_from_global_id(object_global_id)
|
||||
date = obj.get_date()
|
||||
else:
|
||||
date = request.args.get('date') # TODO check daterange
|
||||
res = Tracker.api_tracker_add_object({'uuid': tracker_uuid, 'gid': object_global_id, 'date': date}, user_id)
|
||||
if res[1] != 200:
|
||||
return create_json_response(res[0], res[1])
|
||||
else:
|
||||
if request.referrer:
|
||||
return redirect(request.referrer)
|
||||
else:
|
||||
return redirect(url_for('hunters.show_tracker', uuid=tracker_uuid))
|
||||
|
||||
@hunters.route('/tracker/object/remove', methods=['GET'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def tracker_object_remove():
|
||||
user_id = current_user.get_id()
|
||||
tracker_uuid = request.args.get('uuid')
|
||||
object_global_id = request.args.get('gid')
|
||||
res = Tracker.api_tracker_remove_object({'uuid': tracker_uuid, 'gid': object_global_id}, user_id)
|
||||
if res[1] != 200:
|
||||
return create_json_response(res[0], res[1])
|
||||
else:
|
||||
if request.referrer:
|
||||
return redirect(request.referrer)
|
||||
else:
|
||||
return redirect(url_for('hunters.show_tracker', uuid=tracker_uuid))
|
||||
|
||||
|
||||
@hunters.route('/tracker/objects', methods=['GET'])
|
||||
@login_required
|
||||
@login_admin
|
||||
def tracker_objects():
|
||||
user_id = current_user.get_id()
|
||||
tracker_uuid = request.args.get('uuid', None)
|
||||
res = Tracker.api_is_allowed_to_edit_tracker(tracker_uuid, user_id)
|
||||
if res[1] != 200: # invalid access
|
||||
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
|
||||
|
||||
tracker = Tracker.Tracker(tracker_uuid)
|
||||
meta = tracker.get_meta(options={'description', 'sparkline', 'tags', 'nb_objs'})
|
||||
if meta['type'] == 'yara':
|
||||
yara_rule_content = Tracker.get_yara_rule_content(meta['tracked'])
|
||||
else:
|
||||
yara_rule_content = None
|
||||
|
||||
chats, messages = chats_viewer.get_message_report(tracker.get_objs())
|
||||
|
||||
meta['date'] = Date.get_current_utc_full_time()
|
||||
|
||||
return render_template("messages_report.html", meta=meta, yara_rule_content=yara_rule_content,
|
||||
chats=chats, messages=messages, bootstrap_label=bootstrap_label)
|
||||
|
||||
# TODO
|
||||
|
||||
# Manual - Title
|
||||
# - Summary
|
||||
|
||||
# Messages table
|
||||
|
||||
# Timeline messages by chats - line
|
||||
# pie charts NB messages all chats
|
||||
# Barchart NB messages by days
|
||||
|
||||
####################
|
||||
# RETRO HUNT #
|
||||
|
|
|
@ -209,6 +209,14 @@ def unregister_investigation():
|
|||
def get_investigations_selector_json():
|
||||
return jsonify(Investigations.get_investigations_selector())
|
||||
|
||||
@investigations_b.route("/object/gid")
|
||||
@login_required
|
||||
@login_read_only
|
||||
def get_object_gid():
|
||||
obj_global_id = request.args.get('gid')
|
||||
ail_obj = ail_objects.get_obj_from_global_id(obj_global_id)
|
||||
url = ail_obj.get_link(flask_context=True)
|
||||
return redirect(url)
|
||||
|
||||
#
|
||||
# @investigations_b.route("/object/item") #completely shows the paste in a new tab
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
"""
|
||||
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
|
||||
from flask_login import login_required, current_user
|
||||
|
||||
# Import Role_Manager
|
||||
from Role_Manager import login_admin, login_analyst, login_read_only
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib import ail_core
|
||||
from lib import Language
|
||||
from lib import Tag
|
||||
from lib.objects import ail_objects
|
||||
|
||||
# ============ BLUEPRINT ============
|
||||
languages_ui = Blueprint('languages_ui', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/chats_explorer'))
|
||||
|
||||
# ============ VARIABLES ============
|
||||
# bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
|
||||
|
||||
def create_json_response(data, status_code):
|
||||
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
|
||||
|
||||
# ============ FUNCTIONS ============
|
||||
|
||||
# ============= ROUTES ==============
|
||||
@languages_ui.route("/languages/object/translate", methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def translate_object():
|
||||
obj_type = request.form.get('type')
|
||||
subtype = request.form.get('subtype')
|
||||
obj_id = request.form.get('id')
|
||||
source = request.form.get('language_target')
|
||||
target = request.form.get('target')
|
||||
translation = request.form.get('translation')
|
||||
if target == "Don't Translate":
|
||||
target = None
|
||||
|
||||
resp = ail_objects.api_manually_translate(obj_type, subtype, obj_id, source, target, translation)
|
||||
if resp[1] != 200:
|
||||
return create_json_response(resp[0], resp[1])
|
||||
else:
|
||||
if request.referrer:
|
||||
return redirect(request.referrer)
|
||||
else:
|
||||
if obj_type == 'ocr':
|
||||
return redirect(url_for('objects_ocr.object_ocr', id=obj_id, target=target)) # TODO change to support all objects
|
||||
|
||||
@languages_ui.route("/languages/object/detect/language", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def detect_object_language():
|
||||
obj_type = request.args.get('type')
|
||||
subtype = request.args.get('subtype')
|
||||
obj_id = request.args.get('id')
|
||||
target = request.args.get('target')
|
||||
|
||||
resp = ail_objects.api_detect_language(obj_type, subtype, obj_id)
|
||||
if resp[1] != 200:
|
||||
return create_json_response(resp[0], resp[1])
|
||||
else:
|
||||
if request.referrer:
|
||||
return redirect(request.referrer)
|
||||
else:
|
||||
if obj_type == 'ocr':
|
||||
return redirect(url_for('objects_ocr.object_ocr', id=obj_id, target=target)) # TODO change to support all objects
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
'''
|
||||
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
|
||||
'''
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file, send_from_directory
|
||||
from flask_login import login_required, current_user
|
||||
|
||||
# Import Role_Manager
|
||||
from Role_Manager import login_admin, login_analyst, login_read_only, no_cache
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.objects import Favicons
|
||||
from packages import Date
|
||||
|
||||
# ============ BLUEPRINT ============
|
||||
objects_favicon = Blueprint('objects_favicon', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/favicon'))
|
||||
|
||||
# ============ VARIABLES ============
|
||||
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
|
||||
|
||||
|
||||
# ============ FUNCTIONS ============
|
||||
@objects_favicon.route('/favicon/<path:filename>')
|
||||
@login_required
|
||||
@login_read_only
|
||||
@no_cache
|
||||
def favicon(filename):
|
||||
if not filename:
|
||||
abort(404)
|
||||
if not 9 <= len(filename) <= 11:
|
||||
abort(404)
|
||||
filename = filename.replace('/', '')
|
||||
fav = Favicons.Favicon(filename)
|
||||
return send_from_directory(Favicons.FAVICON_FOLDER, fav.get_rel_path(), as_attachment=False, mimetype='image')
|
||||
|
||||
|
||||
@objects_favicon.route("/objects/favicons", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_favicons():
|
||||
date_from = request.args.get('date_from')
|
||||
date_to = request.args.get('date_to')
|
||||
show_objects = request.args.get('show_objects')
|
||||
date = Date.sanitise_date_range(date_from, date_to)
|
||||
date_from = date['date_from']
|
||||
date_to = date['date_to']
|
||||
|
||||
if show_objects:
|
||||
dict_objects = Favicons.Favicons().api_get_meta_by_daterange(date_from, date_to)
|
||||
else:
|
||||
dict_objects = {}
|
||||
|
||||
print(dict_objects)
|
||||
|
||||
return render_template("FaviconDaterange.html", date_from=date_from, date_to=date_to,
|
||||
dict_objects=dict_objects, show_objects=show_objects)
|
||||
|
||||
|
||||
@objects_favicon.route("/objects/favicons/post", methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_favicons_post():
|
||||
date_from = request.form.get('date_from')
|
||||
date_to = request.form.get('date_to')
|
||||
show_objects = request.form.get('show_objects')
|
||||
return redirect(url_for('objects_favicon.objects_favicons', date_from=date_from, date_to=date_to, show_objects=show_objects))
|
||||
|
||||
|
||||
@objects_favicon.route("/objects/favicons/range/json", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_favicons_range_json():
|
||||
date_from = request.args.get('date_from')
|
||||
date_to = request.args.get('date_to')
|
||||
date = Date.sanitise_date_range(date_from, date_to)
|
||||
date_from = date['date_from']
|
||||
date_to = date['date_to']
|
||||
return jsonify(Favicons.Favicons().api_get_chart_nb_by_daterange(date_from, date_to))
|
||||
|
||||
# ============= ROUTES ==============
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue