-f option added: dump full document for each match

pull/12/head
Alexandre Dulaunoy 2014-08-12 13:26:56 +02:00
parent 6d196e5022
commit fd6e1a8436
1 changed files with 14 additions and 2 deletions

View File

@ -13,6 +13,13 @@
import ConfigParser import ConfigParser
import argparse import argparse
import sys import sys
import gzip
def readdoc(path=None):
if path is None:
return False
f = gzip.open (path, 'r')
return f.read()
configfile = '../packages/config.cfg' configfile = '../packages/config.cfg'
cfg = ConfigParser.ConfigParser() cfg = ConfigParser.ConfigParser()
@ -27,6 +34,8 @@ argParser.add_argument('-q', action='append', help='query to lookup (one or more
argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents') argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents')
argParser.add_argument('-t', action='store_true', default=False, help='dump top 500 terms') argParser.add_argument('-t', action='store_true', default=False, help='dump top 500 terms')
argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents') argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents')
argParser.add_argument('-f', action='store_true', default=False, help='dump each matching document')
args = argParser.parse_args() args = argParser.parse_args()
from whoosh import index from whoosh import index
@ -61,5 +70,8 @@ with ix.searcher() as searcher:
query = QueryParser("content", ix.schema).parse(" ".join(args.q)) query = QueryParser("content", ix.schema).parse(" ".join(args.q))
results = searcher.search(query, limit=None) results = searcher.search(query, limit=None)
for x in results: for x in results:
print (x) if args.f:
print (readdoc(path=x.items()[0][1]))
else:
print (x.items()[0][1])
print