diff --git a/import_sixxs_prefixes.py b/import_sixxs_prefixes.py index 7336e4a..8126ac3 100644 --- a/import_sixxs_prefixes.py +++ b/import_sixxs_prefixes.py @@ -15,34 +15,69 @@ class MyHTMLParser(HTMLParser): self.results = [] + self.th = False + self.prefix = False + self.name = False + self.org = False + self.website = False + def handle_starttag(self, tag, attrs): # print(f"Encountered a start tag: '{tag}'") - if tag == "tbody": - self.tbody_count += 1 - print(f"tbody: {self.tbody_count} {attrs}") + if tag == "th": + self.th = True - if self.tbody_count == 2: - self.in_table = True - print("in real table") - - elif self.in_table and tag == "tr": + if self.in_table and tag == "tr": self.in_row = True - print("in real table") + self.col_index = 0 elif self.in_row and tag == "td": - print("td data") + self.col_index += 1 + if self.col_index == 2: + self.name = True + elif self.col_index == 3: + self.org = True + + elif self.col_index == 1 and tag == "a": + self.prefix = True + elif self.col_index == 4 and tag == "a": + self.website = True def handle_endtag(self, tag): - pass - #print("Encountered an end tag :", tag) + if tag == "th": + self.th = False + elif tag == "tr": + self.in_row = False + def handle_data(self, data): - #print("Encountered some data :", data) - pass + if self.th and data == "Prefix": + print("Found table start") + self.in_table = True + if self.prefix: + self.record = {} + self.record['prefix'] = data + + self.results.append(self.record) + + self.prefix = False + + elif self.name: + self.record['name'] = data + self.name = False + elif self.org: + self.record['organization'] = data + self.org = False + elif self.website: + self.record['website'] = data + self.website = False + + def report(self): + for record in self.results: + print(record) if __name__ == '__main__': url = "https://www.sixxs.net/tools/grh/ula/list/" @@ -51,3 +86,5 @@ if __name__ == '__main__': html = "\n".join([ line.decode('utf-8') for line in response.readlines() ]) parser.feed(html) + + parser.report()