diff --git a/backend/bin/server.py b/backend/bin/server.py index d0e1d18..7776d21 100644 --- a/backend/bin/server.py +++ b/backend/bin/server.py @@ -14,7 +14,16 @@ cycat_type = {"1": "Publisher", "2": "Project", "3": "Item"} r = redis.Redis(host='127.0.0.1', port='3033', decode_responses=True) -# genericc lib - TODO: move to cycat Python library +# full-text part (/search API) + +from whoosh import index, qparser +from whoosh.fields import Schema, TEXT, ID +from whoosh.qparser import QueryParser +indexpath = "../index" +ix = index.open_dir(indexpath) + + +# generic lib - TODO: move to cycat Python library def _validate_uuid(value=None): if uuid is None: @@ -182,5 +191,18 @@ class propose(Resource): r.rpush("proposal", json.dumps(x)) return {'message': 'Proposal submitted'}, 200 +@api.route('/search/') +@api.doc(description="Full-text search in CyCAT and return matching UUID.") +class search(Resource): + def get(self, searchquery=None): + if searchquery is None: + return None + with ix.searcher() as searcher: + query = QueryParser("content", ix.schema).parse(searchquery) + results = searcher.search(query, limit=None) + uuids = [] + for result in results: + uuids.append(result['path']) + return(uuids) if __name__ == '__main__': app.run() diff --git a/backend/run.sh b/backend/run.sh index 51f7216..af9ea70 100644 --- a/backend/run.sh +++ b/backend/run.sh @@ -1,2 +1,3 @@ ./kvrocks/src/kvrocks -c ./etc/kvrocks.conf -python3.8 ./bin/server.py +cd bin +python3.8 server.py diff --git a/backend/sbin/indexer.py b/backend/sbin/indexer.py new file mode 100644 index 0000000..f022147 --- /dev/null +++ b/backend/sbin/indexer.py @@ -0,0 +1,61 @@ +import redis +import os +import sys + +cycat_type = {"1": "Publisher", "2": "Project", "3": "Item"} + +rdb = redis.Redis(host='127.0.0.1', port='3033', decode_responses=True) + +from whoosh.fields import Schema, TEXT, KEYWORD, ID, STORED +from whoosh.analysis import StemmingAnalyzer +from whoosh.index import create_in, exists_in, open_dir + +schema = Schema( + title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT +) +indexpath = "../index" +if not os.path.exists(indexpath): + os.mkdir(indexpath) + +if not exists_in(indexpath): + ix = create_in(indexpath, schema) +else: + ix = open_dir(indexpath) + +try: + writer = ix.writer() +except: + print("Index is locked.") + sys.exit(1) + +def getUUID(oid=None, oidtype=1): + if oid is None: + return None + return rdb.hgetall('{}:{}'.format(oidtype, oid)) + +for ctype in cycat_type: + card = rdb.zcard("t:{}".format(ctype)) + for start in range(0, card, 100): + i = start+100 + x = rdb.zrange('t:{}'.format(ctype), start, i) + for item in x: + toindex = getUUID(oid=item, oidtype=ctype) + print(toindex) + title = "" + content = "" + if 'title' in toindex: + title = toindex['title'] + content = content + toindex['title'] + if 'raw' in toindex: + content = toindex['raw'] + if 'description' in toindex: + title = title + toindex['description'] + content = content + toindex['description'] + if 'mitre-cti:description' in toindex: + title = title + toindex['mitre-cti:description'] + content = content + toindex['mitre-cti:description'] + if 'github:description' in toindex: + title = title + toindex['github:description'] + content = content + toindex['github:description'] + writer.update_document(title=title, path=item, content=content) +writer.commit() diff --git a/backend/sbin/search.py b/backend/sbin/search.py new file mode 100644 index 0000000..b85a00b --- /dev/null +++ b/backend/sbin/search.py @@ -0,0 +1,20 @@ +import argparse +from whoosh import index, qparser +from whoosh.fields import Schema, TEXT, ID +from whoosh.qparser import QueryParser +indexpath = "../index" +argParser = argparse.ArgumentParser(description="Full text search for cycat") +argParser.add_argument("-q", action="append", help="query to lookup (one or more)") +args = argParser.parse_args() +ix = index.open_dir(indexpath) + +with ix.searcher() as searcher: + if len(args.q) == 1: + query = QueryParser("content", ix.schema).parse(" ".join(args.q)) + else: + query = QueryParser("content", schema=ix.schema, group=qparser.AndGroup).parse(" ".join(args.q)) + + results = searcher.search(query, limit=None) + for result in results: + print(result['path']) + print(results)