Fix issues in convert.py
- Replace all query with objects - Reformat code
This commit is contained in:
parent
e21450730d
commit
8685c4a7a2
1 changed files with 29 additions and 19 deletions
|
@ -1,4 +1,6 @@
|
|||
import csv, json, re
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
from os.path import isfile
|
||||
from app.models import *
|
||||
from .formats import *
|
||||
|
@ -12,6 +14,7 @@ def fix_bracketed_lists(data):
|
|||
data = data.replace(fix, fix.replace(',', ' /'))
|
||||
return data
|
||||
|
||||
|
||||
# Check for valid link
|
||||
def fix_url(link):
|
||||
if len(link) > 3 and not link.startswith('http'):
|
||||
|
@ -19,31 +22,37 @@ def fix_url(link):
|
|||
# link = link.split(';')[0]
|
||||
return link
|
||||
|
||||
|
||||
# Create linked objects
|
||||
def add_linked(person, field, obj, data):
|
||||
# TODO: fuzzy matching instead of lower()
|
||||
items = fix_bracketed_lists(data).lower()
|
||||
items = items.replace(';',',').split(',')
|
||||
items = items.replace(';', ',').split(',')
|
||||
for i in items:
|
||||
n = i.strip()
|
||||
if len(n)<3: continue
|
||||
tgt = obj.query.filter_by(name=n).first()
|
||||
if len(n) < 3:
|
||||
continue
|
||||
tgt = obj.objects.filter(name=n).first()
|
||||
if not tgt:
|
||||
tgt = obj()
|
||||
tgt.name = n
|
||||
tgt.save()
|
||||
field.append(tgt)
|
||||
|
||||
|
||||
# Fetch an object by source_id (numeric identifier used in source DB)
|
||||
def get_by_id(rowid, obj, first=True):
|
||||
if type(rowid) is str and rowid.isdigit():
|
||||
rowid = int(rowid)
|
||||
if type(rowid) is int:
|
||||
l = obj.query.filter_by(source_id=rowid)
|
||||
if first: return l.first(), rowid
|
||||
else: return l, rowid
|
||||
l = obj.objects.filter(source_id=rowid)
|
||||
if first:
|
||||
return l.first(), rowid
|
||||
else:
|
||||
return l, rowid
|
||||
return None, None
|
||||
|
||||
|
||||
# Quick check of the number of lines
|
||||
def get_total_rows_csv(filename):
|
||||
with open(filename) as f:
|
||||
|
@ -51,12 +60,14 @@ def get_total_rows_csv(filename):
|
|||
pass
|
||||
return i + 1
|
||||
|
||||
|
||||
# Search index routine
|
||||
def reindex_data():
|
||||
for i, p in enumerate(Person.query.all()):
|
||||
for i, p in enumerate(Person.objects.all()):
|
||||
p.index()
|
||||
p.save()
|
||||
|
||||
|
||||
# Data update routine
|
||||
def refresh_data(filename, fmt=None):
|
||||
count = 0
|
||||
|
@ -64,17 +75,16 @@ def refresh_data(filename, fmt=None):
|
|||
if not isfile(filename):
|
||||
msg = "Missing data: %s - refresh aborted." % fmt['filename']
|
||||
print(msg)
|
||||
yield(msg, "error")
|
||||
yield msg, "error"
|
||||
return None
|
||||
if fmt['extension'] == 'csv':
|
||||
totalrows = get_total_rows_csv(filename)
|
||||
with open(filename, 'rt', encoding='utf-8', errors='ignore') as csvfile:
|
||||
|
||||
datareader = csv.DictReader(csvfile)
|
||||
for row in datareader:
|
||||
rowcount += 1
|
||||
if row is None: continue
|
||||
yield rowcount, rowcount/totalrows
|
||||
yield rowcount, rowcount / totalrows
|
||||
|
||||
# # Ensure any new data is flushed from time to time
|
||||
# if count % 25 == 0:
|
||||
|
@ -84,13 +94,13 @@ def refresh_data(filename, fmt=None):
|
|||
if not r in row:
|
||||
msg = "Missing attribute in %s (%s)" % (r, fmt['filename'])
|
||||
print(msg)
|
||||
yield(msg, "error")
|
||||
yield msg, "error"
|
||||
return None
|
||||
|
||||
if fmt['dataformat'] is DataFormat.PERSON_DETAIL:
|
||||
person, source_id = get_by_id(row['ID'], Person)
|
||||
if not person:
|
||||
person = Person.query.filter_by(first_name=row['First name'], last_name=row['Last name']).first()
|
||||
person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first()
|
||||
if not person:
|
||||
person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID'])
|
||||
|
||||
|
@ -106,9 +116,9 @@ def refresh_data(filename, fmt=None):
|
|||
|
||||
with transaction.atomic():
|
||||
add_linked(person, person.research_methods, Method, row['Methods'])
|
||||
add_linked(person, person.research_scales, Scale, row['Scale'])
|
||||
add_linked(person, person.research_taxa, Taxon, row['Taxa'])
|
||||
add_linked(person, person.research_fields, Field, row['Field of expertise'])
|
||||
add_linked(person, person.research_scales, Scale, row['Scale'])
|
||||
add_linked(person, person.research_taxa, Taxon, row['Taxa'])
|
||||
add_linked(person, person.research_fields, Field, row['Field of expertise'])
|
||||
|
||||
person.index()
|
||||
person.save()
|
||||
|
@ -119,7 +129,7 @@ def refresh_data(filename, fmt=None):
|
|||
if not res: res = Resource(source_id=source_id)
|
||||
res.title = row['Title']
|
||||
res.citation = row['Citation']
|
||||
res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting#
|
||||
res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting#
|
||||
res.abstract = row['Abstract']
|
||||
res.save()
|
||||
count = count + 1
|
||||
|
@ -160,11 +170,11 @@ def refresh_data(filename, fmt=None):
|
|||
if fmt['dataformat'] is DataFormat.RANGE_SHAPES:
|
||||
totalrows = len(jsondata['features'])
|
||||
for f in jsondata['features']:
|
||||
yield count, count/totalrows
|
||||
yield count, count / totalrows
|
||||
count = count + 1
|
||||
|
||||
p = f['properties']
|
||||
rge = Range.query.filter_by(gmba_id=p['GMBA_ID']).first()
|
||||
rge = Range.objects.filter(gmba_id=p['GMBA_ID']).first()
|
||||
if not rge:
|
||||
ranges_missing.append(p['GMBA_ID'])
|
||||
continue
|
||||
|
|
Loading…
Add table
Reference in a new issue