add: [api/backend] new full-text indexer
First version using Python whoosh (maybe not optimal on the long run) The indexer is running by enumerating the item(s) from the CyCAT backend.main
parent
5e0df4b667
commit
44d2176a23
|
@ -14,7 +14,16 @@ cycat_type = {"1": "Publisher", "2": "Project", "3": "Item"}
|
||||||
|
|
||||||
r = redis.Redis(host='127.0.0.1', port='3033', decode_responses=True)
|
r = redis.Redis(host='127.0.0.1', port='3033', decode_responses=True)
|
||||||
|
|
||||||
# genericc lib - TODO: move to cycat Python library
|
# full-text part (/search API)
|
||||||
|
|
||||||
|
from whoosh import index, qparser
|
||||||
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
|
from whoosh.qparser import QueryParser
|
||||||
|
indexpath = "../index"
|
||||||
|
ix = index.open_dir(indexpath)
|
||||||
|
|
||||||
|
|
||||||
|
# generic lib - TODO: move to cycat Python library
|
||||||
|
|
||||||
def _validate_uuid(value=None):
|
def _validate_uuid(value=None):
|
||||||
if uuid is None:
|
if uuid is None:
|
||||||
|
@ -182,5 +191,18 @@ class propose(Resource):
|
||||||
r.rpush("proposal", json.dumps(x))
|
r.rpush("proposal", json.dumps(x))
|
||||||
return {'message': 'Proposal submitted'}, 200
|
return {'message': 'Proposal submitted'}, 200
|
||||||
|
|
||||||
|
@api.route('/search/<string:searchquery>')
|
||||||
|
@api.doc(description="Full-text search in CyCAT and return matching UUID.")
|
||||||
|
class search(Resource):
|
||||||
|
def get(self, searchquery=None):
|
||||||
|
if searchquery is None:
|
||||||
|
return None
|
||||||
|
with ix.searcher() as searcher:
|
||||||
|
query = QueryParser("content", ix.schema).parse(searchquery)
|
||||||
|
results = searcher.search(query, limit=None)
|
||||||
|
uuids = []
|
||||||
|
for result in results:
|
||||||
|
uuids.append(result['path'])
|
||||||
|
return(uuids)
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run()
|
app.run()
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
./kvrocks/src/kvrocks -c ./etc/kvrocks.conf
|
./kvrocks/src/kvrocks -c ./etc/kvrocks.conf
|
||||||
python3.8 ./bin/server.py
|
cd bin
|
||||||
|
python3.8 server.py
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
import redis
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
cycat_type = {"1": "Publisher", "2": "Project", "3": "Item"}
|
||||||
|
|
||||||
|
rdb = redis.Redis(host='127.0.0.1', port='3033', decode_responses=True)
|
||||||
|
|
||||||
|
from whoosh.fields import Schema, TEXT, KEYWORD, ID, STORED
|
||||||
|
from whoosh.analysis import StemmingAnalyzer
|
||||||
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
|
|
||||||
|
schema = Schema(
|
||||||
|
title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT
|
||||||
|
)
|
||||||
|
indexpath = "../index"
|
||||||
|
if not os.path.exists(indexpath):
|
||||||
|
os.mkdir(indexpath)
|
||||||
|
|
||||||
|
if not exists_in(indexpath):
|
||||||
|
ix = create_in(indexpath, schema)
|
||||||
|
else:
|
||||||
|
ix = open_dir(indexpath)
|
||||||
|
|
||||||
|
try:
|
||||||
|
writer = ix.writer()
|
||||||
|
except:
|
||||||
|
print("Index is locked.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def getUUID(oid=None, oidtype=1):
|
||||||
|
if oid is None:
|
||||||
|
return None
|
||||||
|
return rdb.hgetall('{}:{}'.format(oidtype, oid))
|
||||||
|
|
||||||
|
for ctype in cycat_type:
|
||||||
|
card = rdb.zcard("t:{}".format(ctype))
|
||||||
|
for start in range(0, card, 100):
|
||||||
|
i = start+100
|
||||||
|
x = rdb.zrange('t:{}'.format(ctype), start, i)
|
||||||
|
for item in x:
|
||||||
|
toindex = getUUID(oid=item, oidtype=ctype)
|
||||||
|
print(toindex)
|
||||||
|
title = ""
|
||||||
|
content = ""
|
||||||
|
if 'title' in toindex:
|
||||||
|
title = toindex['title']
|
||||||
|
content = content + toindex['title']
|
||||||
|
if 'raw' in toindex:
|
||||||
|
content = toindex['raw']
|
||||||
|
if 'description' in toindex:
|
||||||
|
title = title + toindex['description']
|
||||||
|
content = content + toindex['description']
|
||||||
|
if 'mitre-cti:description' in toindex:
|
||||||
|
title = title + toindex['mitre-cti:description']
|
||||||
|
content = content + toindex['mitre-cti:description']
|
||||||
|
if 'github:description' in toindex:
|
||||||
|
title = title + toindex['github:description']
|
||||||
|
content = content + toindex['github:description']
|
||||||
|
writer.update_document(title=title, path=item, content=content)
|
||||||
|
writer.commit()
|
|
@ -0,0 +1,20 @@
|
||||||
|
import argparse
|
||||||
|
from whoosh import index, qparser
|
||||||
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
|
from whoosh.qparser import QueryParser
|
||||||
|
indexpath = "../index"
|
||||||
|
argParser = argparse.ArgumentParser(description="Full text search for cycat")
|
||||||
|
argParser.add_argument("-q", action="append", help="query to lookup (one or more)")
|
||||||
|
args = argParser.parse_args()
|
||||||
|
ix = index.open_dir(indexpath)
|
||||||
|
|
||||||
|
with ix.searcher() as searcher:
|
||||||
|
if len(args.q) == 1:
|
||||||
|
query = QueryParser("content", ix.schema).parse(" ".join(args.q))
|
||||||
|
else:
|
||||||
|
query = QueryParser("content", schema=ix.schema, group=qparser.AndGroup).parse(" ".join(args.q))
|
||||||
|
|
||||||
|
results = searcher.search(query, limit=None)
|
||||||
|
for result in results:
|
||||||
|
print(result['path'])
|
||||||
|
print(results)
|
Loading…
Reference in New Issue