Add config file for DomainClassifier, proper reporting

pull/38/head
Raphaël Vinot 2014-09-17 17:19:03 +02:00
parent f017680365
commit 65b9a01644
2 changed files with 16 additions and 4 deletions

View File

@ -28,6 +28,10 @@ def main():
publisher.info("""ZMQ DomainClassifier is Running""") publisher.info("""ZMQ DomainClassifier is Running""")
c = DomainClassifier.domainclassifier.Extract(rawtext="") c = DomainClassifier.domainclassifier.Extract(rawtext="")
cc = p.config.get("DomClassifier", "cc")
cc_tld = p.config.get("DomClassifier", "cc_tld")
while True: while True:
try: try:
message = p.get_from_set() message = p.get_from_set()
@ -44,12 +48,16 @@ def main():
c.text(rawtext=paste) c.text(rawtext=paste)
c.potentialdomain() c.potentialdomain()
c.validdomain(rtype=['A'], extended=True) c.validdomain(rtype=['A'], extended=True)
localizeddomains = c.include(expression=r'\.lu$') localizeddomains = c.include(expression=cc_tld)
if localizeddomains: if localizeddomains:
print(localizeddomains) print(localizeddomains)
localizeddomains = c.localizedomain(cc='LU') publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld))
localizeddomains = c.localizedomain(cc=cc)
if localizeddomains: if localizeddomains:
print(localizeddomains) print(localizeddomains)
publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc))
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print "CRC Checksum Failed on :", PST.p_path
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(

View File

@ -34,6 +34,10 @@ db = 1
[Url] [Url]
cc_critical = DE cc_critical = DE
[DomClassifier]
cc = DE
cc_tld = r'\.de$'
# Indexer configuration # Indexer configuration
[Indexer] [Indexer]
type = whoosh type = whoosh