add first version of import script

This commit is contained in:
Nico Schottelius 2020-12-05 12:35:01 +01:00
parent ccc7053eb4
commit 403b53e406

53
import_sixxs_prefixes.py Normal file
View file

@ -0,0 +1,53 @@
from html.parser import HTMLParser
import urllib.request
# https://www.sixxs.net/tools/grh/ula/list/
class MyHTMLParser(HTMLParser):
def __init__(self):
self.tbody_count = 0
self.in_table = False
self.in_row = False
self.col_index = 0
super().__init__()
self.results = []
def handle_starttag(self, tag, attrs):
# print(f"Encountered a start tag: '{tag}'")
if tag == "tbody":
self.tbody_count += 1
print(f"tbody: {self.tbody_count} {attrs}")
if self.tbody_count == 2:
self.in_table = True
print("in real table")
elif self.in_table and tag == "tr":
self.in_row = True
print("in real table")
elif self.in_row and tag == "td":
print("td data")
def handle_endtag(self, tag):
pass
#print("Encountered an end tag :", tag)
def handle_data(self, data):
#print("Encountered some data :", data)
pass
if __name__ == '__main__':
url = "https://www.sixxs.net/tools/grh/ula/list/"
parser = MyHTMLParser()
response = urllib.request.urlopen(url)
html = "\n".join([ line.decode('utf-8') for line in response.readlines() ])
parser.feed(html)