add: [api/backend] new full-text indexer
First version using Python whoosh (maybe not optimal on the long run) The indexer is running by enumerating the item(s) from the CyCAT backend.main
parent
5e0df4b667
commit
44d2176a23
|
@ -14,7 +14,16 @@ cycat_type = {"1": "Publisher", "2": "Project", "3": "Item"}
|
|||
|
||||
r = redis.Redis(host='127.0.0.1', port='3033', decode_responses=True)
|
||||
|
||||
# genericc lib - TODO: move to cycat Python library
|
||||
# full-text part (/search API)
|
||||
|
||||
from whoosh import index, qparser
|
||||
from whoosh.fields import Schema, TEXT, ID
|
||||
from whoosh.qparser import QueryParser
|
||||
indexpath = "../index"
|
||||
ix = index.open_dir(indexpath)
|
||||
|
||||
|
||||
# generic lib - TODO: move to cycat Python library
|
||||
|
||||
def _validate_uuid(value=None):
|
||||
if uuid is None:
|
||||
|
@ -182,5 +191,18 @@ class propose(Resource):
|
|||
r.rpush("proposal", json.dumps(x))
|
||||
return {'message': 'Proposal submitted'}, 200
|
||||
|
||||
@api.route('/search/<string:searchquery>')
|
||||
@api.doc(description="Full-text search in CyCAT and return matching UUID.")
|
||||
class search(Resource):
|
||||
def get(self, searchquery=None):
|
||||
if searchquery is None:
|
||||
return None
|
||||
with ix.searcher() as searcher:
|
||||
query = QueryParser("content", ix.schema).parse(searchquery)
|
||||
results = searcher.search(query, limit=None)
|
||||
uuids = []
|
||||
for result in results:
|
||||
uuids.append(result['path'])
|
||||
return(uuids)
|
||||
if __name__ == '__main__':
|
||||
app.run()
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
./kvrocks/src/kvrocks -c ./etc/kvrocks.conf
|
||||
python3.8 ./bin/server.py
|
||||
cd bin
|
||||
python3.8 server.py
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
import redis
|
||||
import os
|
||||
import sys
|
||||
|
||||
cycat_type = {"1": "Publisher", "2": "Project", "3": "Item"}
|
||||
|
||||
rdb = redis.Redis(host='127.0.0.1', port='3033', decode_responses=True)
|
||||
|
||||
from whoosh.fields import Schema, TEXT, KEYWORD, ID, STORED
|
||||
from whoosh.analysis import StemmingAnalyzer
|
||||
from whoosh.index import create_in, exists_in, open_dir
|
||||
|
||||
schema = Schema(
|
||||
title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT
|
||||
)
|
||||
indexpath = "../index"
|
||||
if not os.path.exists(indexpath):
|
||||
os.mkdir(indexpath)
|
||||
|
||||
if not exists_in(indexpath):
|
||||
ix = create_in(indexpath, schema)
|
||||
else:
|
||||
ix = open_dir(indexpath)
|
||||
|
||||
try:
|
||||
writer = ix.writer()
|
||||
except:
|
||||
print("Index is locked.")
|
||||
sys.exit(1)
|
||||
|
||||
def getUUID(oid=None, oidtype=1):
|
||||
if oid is None:
|
||||
return None
|
||||
return rdb.hgetall('{}:{}'.format(oidtype, oid))
|
||||
|
||||
for ctype in cycat_type:
|
||||
card = rdb.zcard("t:{}".format(ctype))
|
||||
for start in range(0, card, 100):
|
||||
i = start+100
|
||||
x = rdb.zrange('t:{}'.format(ctype), start, i)
|
||||
for item in x:
|
||||
toindex = getUUID(oid=item, oidtype=ctype)
|
||||
print(toindex)
|
||||
title = ""
|
||||
content = ""
|
||||
if 'title' in toindex:
|
||||
title = toindex['title']
|
||||
content = content + toindex['title']
|
||||
if 'raw' in toindex:
|
||||
content = toindex['raw']
|
||||
if 'description' in toindex:
|
||||
title = title + toindex['description']
|
||||
content = content + toindex['description']
|
||||
if 'mitre-cti:description' in toindex:
|
||||
title = title + toindex['mitre-cti:description']
|
||||
content = content + toindex['mitre-cti:description']
|
||||
if 'github:description' in toindex:
|
||||
title = title + toindex['github:description']
|
||||
content = content + toindex['github:description']
|
||||
writer.update_document(title=title, path=item, content=content)
|
||||
writer.commit()
|
|
@ -0,0 +1,20 @@
|
|||
import argparse
|
||||
from whoosh import index, qparser
|
||||
from whoosh.fields import Schema, TEXT, ID
|
||||
from whoosh.qparser import QueryParser
|
||||
indexpath = "../index"
|
||||
argParser = argparse.ArgumentParser(description="Full text search for cycat")
|
||||
argParser.add_argument("-q", action="append", help="query to lookup (one or more)")
|
||||
args = argParser.parse_args()
|
||||
ix = index.open_dir(indexpath)
|
||||
|
||||
with ix.searcher() as searcher:
|
||||
if len(args.q) == 1:
|
||||
query = QueryParser("content", ix.schema).parse(" ".join(args.q))
|
||||
else:
|
||||
query = QueryParser("content", schema=ix.schema, group=qparser.AndGroup).parse(" ".join(args.q))
|
||||
|
||||
results = searcher.search(query, limit=None)
|
||||
for result in results:
|
||||
print(result['path'])
|
||||
print(results)
|
Loading…
Reference in New Issue