add first version of import script
This commit is contained in:
parent
ccc7053eb4
commit
403b53e406
1 changed files with 53 additions and 0 deletions
53
import_sixxs_prefixes.py
Normal file
53
import_sixxs_prefixes.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
from html.parser import HTMLParser
|
||||
import urllib.request
|
||||
|
||||
# https://www.sixxs.net/tools/grh/ula/list/
|
||||
|
||||
class MyHTMLParser(HTMLParser):
|
||||
def __init__(self):
|
||||
self.tbody_count = 0
|
||||
self.in_table = False
|
||||
self.in_row = False
|
||||
|
||||
self.col_index = 0
|
||||
|
||||
super().__init__()
|
||||
|
||||
self.results = []
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
# print(f"Encountered a start tag: '{tag}'")
|
||||
|
||||
if tag == "tbody":
|
||||
self.tbody_count += 1
|
||||
print(f"tbody: {self.tbody_count} {attrs}")
|
||||
|
||||
if self.tbody_count == 2:
|
||||
self.in_table = True
|
||||
print("in real table")
|
||||
|
||||
elif self.in_table and tag == "tr":
|
||||
self.in_row = True
|
||||
print("in real table")
|
||||
|
||||
elif self.in_row and tag == "td":
|
||||
print("td data")
|
||||
|
||||
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
pass
|
||||
#print("Encountered an end tag :", tag)
|
||||
|
||||
def handle_data(self, data):
|
||||
#print("Encountered some data :", data)
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
url = "https://www.sixxs.net/tools/grh/ula/list/"
|
||||
parser = MyHTMLParser()
|
||||
response = urllib.request.urlopen(url)
|
||||
html = "\n".join([ line.decode('utf-8') for line in response.readlines() ])
|
||||
|
||||
parser.feed(html)
|
Loading…
Reference in a new issue