From f3d1ca052e2eb1a004a46fcfdf28b6e2abc7dd23 Mon Sep 17 00:00:00 2001
From: Alexandre Dulaunoy
Date: Mon, 11 Aug 2014 14:50:35 +0200
Subject: [PATCH 1/3] Return the number of indexed documents
---
bin/tests/indexer_lookup.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/bin/tests/indexer_lookup.py b/bin/tests/indexer_lookup.py
index 3b0a1e7a..09ae24c4 100644
--- a/bin/tests/indexer_lookup.py
+++ b/bin/tests/indexer_lookup.py
@@ -24,6 +24,7 @@ indexertype = cfg.get("Indexer", "type")
argParser = argparse.ArgumentParser(description='Fulltext search for AIL')
argParser.add_argument('-q', action='append', help='query to lookup (one or more)')
+argParser.add_argument('-n', action='store_true', default=False, help='Return numbers of document indexed')
args = argParser.parse_args()
from whoosh import index
@@ -33,6 +34,11 @@ schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = index.open_dir(indexpath)
from whoosh.qparser import QueryParser
+
+if args.n:
+ print ix.doc_count_all()
+ exit(0)
+
if args.q is None:
argParser.print_help()
exit(1)
From f65a94d47b7af677576c2033296ee49646c0cc55 Mon Sep 17 00:00:00 2001
From: Alexandre Dulaunoy
Date: Mon, 11 Aug 2014 14:56:15 +0200
Subject: [PATCH 2/3] -l added -> dumping all terms indexed
---
bin/tests/indexer_lookup.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/bin/tests/indexer_lookup.py b/bin/tests/indexer_lookup.py
index 09ae24c4..93bbf00d 100644
--- a/bin/tests/indexer_lookup.py
+++ b/bin/tests/indexer_lookup.py
@@ -24,7 +24,8 @@ indexertype = cfg.get("Indexer", "type")
argParser = argparse.ArgumentParser(description='Fulltext search for AIL')
argParser.add_argument('-q', action='append', help='query to lookup (one or more)')
-argParser.add_argument('-n', action='store_true', default=False, help='Return numbers of document indexed')
+argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents')
+argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents')
args = argParser.parse_args()
from whoosh import index
@@ -39,6 +40,12 @@ if args.n:
print ix.doc_count_all()
exit(0)
+if args.l:
+ xr = ix.searcher().reader()
+ for x in xr.lexicon("content"):
+ print (x)
+ exit(0)
+
if args.q is None:
argParser.print_help()
exit(1)
From 0a6664ffbab58919f6766e2f3ac9d1e3ee49cd66 Mon Sep 17 00:00:00 2001
From: Alexandre Dulaunoy
Date: Mon, 11 Aug 2014 15:07:12 +0200
Subject: [PATCH 3/3] Indexer: Some index statistics added
usage: indexer_lookup.py [-h] [-q Q] [-n] [-t] [-l]
Fulltext search for AIL
optional arguments:
-h, --help show this help message and exit
-q Q query to lookup (one or more)
-n return number of indexed documents
-t dump top 500 terms
-l dump all terms encountered in indexed documents
---
bin/tests/indexer_lookup.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/bin/tests/indexer_lookup.py b/bin/tests/indexer_lookup.py
index 93bbf00d..305ae236 100644
--- a/bin/tests/indexer_lookup.py
+++ b/bin/tests/indexer_lookup.py
@@ -25,6 +25,7 @@ indexertype = cfg.get("Indexer", "type")
argParser = argparse.ArgumentParser(description='Fulltext search for AIL')
argParser.add_argument('-q', action='append', help='query to lookup (one or more)')
argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents')
+argParser.add_argument('-t', action='store_true', default=False, help='dump top 500 terms')
argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents')
args = argParser.parse_args()
@@ -46,6 +47,12 @@ if args.l:
print (x)
exit(0)
+if args.t:
+ xr = ix.searcher().reader()
+ for x in xr.most_frequent_terms("content", number=500, prefix=''):
+ print (x)
+ exit(0)
+
if args.q is None:
argParser.print_help()
exit(1)
@@ -54,5 +61,5 @@ with ix.searcher() as searcher:
query = QueryParser("content", ix.schema).parse(" ".join(args.q))
results = searcher.search(query, limit=None)
for x in results:
- print x
+ print (x)