diff --git a/generate_all.sh b/generate_all.sh index cb33900..20c4364 100755 --- a/generate_all.sh +++ b/generate_all.sh @@ -36,6 +36,7 @@ python3 generate-tenable.py python3 generate-microsoft-azure-appid.py python3 generate-chrome-crux-1m.py python3 generate-digitalside.py +python3 generate-gptbot.py popd ./jq_all_the_things.sh diff --git a/lists/openai-gptbot/list.json b/lists/openai-gptbot/list.json new file mode 100644 index 0000000..7d39f78 --- /dev/null +++ b/lists/openai-gptbot/list.json @@ -0,0 +1,20 @@ +{ + "description": "OpenAI gptbot crawler (https://openai.com/gptbot-ranges.txt)", + "list": [ + "20.15.240.176/28", + "20.15.240.64/27", + "20.15.240.96/28", + "20.15.241.0/28", + "20.15.242.128/27", + "20.15.242.192/28", + "40.83.2.64/28" + ], + "matching_attributes": [ + "ip-src", + "ip-dst", + "domain|ip" + ], + "name": "List of known IP address ranges for OpenAI GPT crawler bot", + "type": "cidr", + "version": 20230808 +} diff --git a/tools/generate-gptbot.py b/tools/generate-gptbot.py new file mode 100755 index 0000000..cb760cc --- /dev/null +++ b/tools/generate-gptbot.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import json + +from generator import download_to_file, get_version, write_to_file, get_abspath_source_file, consolidate_networks + + +def process(file, dst): + l = [] + with open(get_abspath_source_file(file), 'r') as freetext_file: + for line in freetext_file: + cidr = line.rstrip() + l.append(cidr) + + warninglist = { + 'name': 'List of known IP address ranges for OpenAI GPT crawler bot', + 'version': get_version(), + 'description': 'OpenAI gptbot crawler (https://openai.com/gptbot-ranges.txt)', + 'type': 'cidr', + 'list': consolidate_networks(l), + 'matching_attributes': ["ip-src", "ip-dst", "domain|ip"] + } + + write_to_file(warninglist, dst) + + +if __name__ == '__main__': + gptbot_url = "https://openai.com/gptbot-ranges.txt" + gptbot_file = "openai-gptbot-ranges.json" + gptbot_dst = "openai-gptbot" + + download_to_file(gptbot_url, gptbot_file) + process(gptbot_file, gptbot_dst)