mirror of https://github.com/CIRCL/AIL-framework
200 lines
6.7 KiB
Python
200 lines
6.7 KiB
Python
#!/usr/bin/env python3.5
|
|
# -*-coding:UTF-8 -*
|
|
|
|
'''
|
|
Flask functions and routes for the trending modules page
|
|
'''
|
|
import redis
|
|
import json
|
|
import os
|
|
import datetime
|
|
import flask
|
|
from flask import Flask, render_template, jsonify, request, Blueprint
|
|
|
|
import Paste
|
|
from whoosh import index
|
|
from whoosh.fields import Schema, TEXT, ID
|
|
from whoosh.qparser import QueryParser
|
|
|
|
# ============ VARIABLES ============
|
|
import Flask_config
|
|
|
|
app = Flask_config.app
|
|
cfg = Flask_config.cfg
|
|
r_serv_pasteName = Flask_config.r_serv_pasteName
|
|
max_preview_char = Flask_config.max_preview_char
|
|
max_preview_modal = Flask_config.max_preview_modal
|
|
|
|
|
|
baseindexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path"))
|
|
indexRegister_path = os.path.join(os.environ['AIL_HOME'],
|
|
cfg.get("Indexer", "register"))
|
|
|
|
searches = Blueprint('searches', __name__, template_folder='templates')
|
|
|
|
# ============ FUNCTIONS ============
|
|
def get_current_index():
|
|
with open(indexRegister_path, "r") as f:
|
|
allIndex = f.read()
|
|
allIndex = allIndex.split() # format [time1\ntime2]
|
|
allIndex.sort()
|
|
try:
|
|
indexname = allIndex[-1].strip('\n\r')
|
|
except IndexError as e:
|
|
indexname = "no-index"
|
|
indexpath = os.path.join(baseindexpath, indexname)
|
|
return indexpath
|
|
|
|
def get_index_list(selected_index=""):
|
|
temp = []
|
|
index_list = []
|
|
for dirs in os.listdir(baseindexpath):
|
|
if os.path.isdir(os.path.join(baseindexpath, dirs)):
|
|
value = dirs
|
|
name = to_iso_date(dirs) + " - " + \
|
|
str(get_dir_size(dirs) / (1000*1000)) + " Mb " + \
|
|
"(" + str(get_item_count(dirs)) + " Items" + ")"
|
|
flag = dirs==selected_index.split('/')[-1]
|
|
if dirs == "old_index":
|
|
temp = [value, name, flag]
|
|
else:
|
|
index_list.append([value, name, flag])
|
|
|
|
index_list.sort(reverse=True, key=lambda x: x[0])
|
|
if len(temp) != 0:
|
|
index_list.append(temp)
|
|
return index_list
|
|
|
|
def get_dir_size(directory):
|
|
cur_sum = 0
|
|
for directory, subdirs, files in os.walk(os.path.join(baseindexpath,directory)):
|
|
try:
|
|
cur_sum += sum(os.path.getsize(os.path.join(directory, name)) for name in files)
|
|
except OSError as e: #File disappeared
|
|
pass
|
|
return cur_sum
|
|
|
|
def get_item_count(dirs):
|
|
ix = index.open_dir(os.path.join(baseindexpath, dirs))
|
|
return ix.doc_count_all()
|
|
|
|
def to_iso_date(timestamp):
|
|
if timestamp == "old_index":
|
|
return "old_index"
|
|
return str(datetime.datetime.fromtimestamp(int(timestamp))).split()[0]
|
|
|
|
|
|
# ============ ROUTES ============
|
|
|
|
@searches.route("/search", methods=['POST'])
|
|
def search():
|
|
query = request.form['query']
|
|
q = []
|
|
q.append(query)
|
|
r = [] #complete path
|
|
c = [] #preview of the paste content
|
|
paste_date = []
|
|
paste_size = []
|
|
index_name = request.form['index_name']
|
|
num_elem_to_get = 50
|
|
|
|
# select correct index
|
|
if index_name is None or index_name == "0":
|
|
selected_index = get_current_index()
|
|
else:
|
|
selected_index = os.path.join(baseindexpath, index_name)
|
|
|
|
# Search filename
|
|
for path in r_serv_pasteName.smembers(q[0]):
|
|
r.append(path)
|
|
paste = Paste.Paste(path)
|
|
content = paste.get_p_content()
|
|
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
|
|
c.append(content[0:content_range])
|
|
curr_date = str(paste._get_p_date())
|
|
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
|
|
paste_date.append(curr_date)
|
|
paste_size.append(paste._get_p_size())
|
|
|
|
# Search full line
|
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
|
|
|
|
ix = index.open_dir(selected_index)
|
|
with ix.searcher() as searcher:
|
|
query = QueryParser("content", ix.schema).parse(" ".join(q))
|
|
results = searcher.search_page(query, 1, pagelen=num_elem_to_get)
|
|
for x in results:
|
|
r.append(x.items()[0][1])
|
|
paste = Paste.Paste(x.items()[0][1])
|
|
content = paste.get_p_content()
|
|
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
|
|
c.append(content[0:content_range])
|
|
curr_date = str(paste._get_p_date())
|
|
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
|
|
paste_date.append(curr_date)
|
|
paste_size.append(paste._get_p_size())
|
|
results = searcher.search(query)
|
|
num_res = len(results)
|
|
|
|
index_min = 1
|
|
index_max = len(get_index_list())
|
|
return render_template("search.html", r=r, c=c,
|
|
query=request.form['query'], paste_date=paste_date,
|
|
paste_size=paste_size, char_to_display=max_preview_modal,
|
|
num_res=num_res, index_min=index_min, index_max=index_max,
|
|
index_list=get_index_list(selected_index)
|
|
)
|
|
|
|
|
|
@searches.route("/get_more_search_result", methods=['POST'])
|
|
def get_more_search_result():
|
|
query = request.form['query']
|
|
q = []
|
|
q.append(query)
|
|
page_offset = int(request.form['page_offset'])
|
|
index_name = request.form['index_name']
|
|
num_elem_to_get = 50
|
|
|
|
# select correct index
|
|
if index_name is None or index_name == "0":
|
|
selected_index = get_current_index()
|
|
else:
|
|
selected_index = os.path.join(baseindexpath, index_name)
|
|
|
|
path_array = []
|
|
preview_array = []
|
|
date_array = []
|
|
size_array = []
|
|
|
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
|
|
|
|
ix = index.open_dir(selected_index)
|
|
with ix.searcher() as searcher:
|
|
query = QueryParser("content", ix.schema).parse(" ".join(q))
|
|
results = searcher.search_page(query, page_offset, num_elem_to_get)
|
|
for x in results:
|
|
path_array.append(x.items()[0][1])
|
|
paste = Paste.Paste(x.items()[0][1])
|
|
content = paste.get_p_content()
|
|
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
|
|
preview_array.append(content[0:content_range])
|
|
curr_date = str(paste._get_p_date())
|
|
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
|
|
date_array.append(curr_date)
|
|
size_array.append(paste._get_p_size())
|
|
to_return = {}
|
|
to_return["path_array"] = path_array
|
|
to_return["preview_array"] = preview_array
|
|
to_return["date_array"] = date_array
|
|
to_return["size_array"] = size_array
|
|
if len(path_array) < num_elem_to_get: #pagelength
|
|
to_return["moreData"] = False
|
|
else:
|
|
to_return["moreData"] = True
|
|
|
|
return jsonify(to_return)
|
|
|
|
|
|
# ========= REGISTRATION =========
|
|
app.register_blueprint(searches)
|