Fix db errors in convert.py
- use print for logger for the time being - replace db.session with obj.save methods - use transaction for no_flush case TODO: change query / filter / filter_by
This commit is contained in:
parent
0b93703886
commit
b628f6621d
|
@ -1,9 +1,11 @@
|
||||||
import csv, json, re
|
import csv, json, re
|
||||||
from app import app
|
|
||||||
from os.path import isfile
|
from os.path import isfile
|
||||||
from .models import *
|
from app.models import *
|
||||||
from .formats import *
|
from .formats import *
|
||||||
|
|
||||||
|
from django.db import transaction
|
||||||
|
|
||||||
|
|
||||||
# Correct commas inside of a linked field
|
# Correct commas inside of a linked field
|
||||||
def fix_bracketed_lists(data):
|
def fix_bracketed_lists(data):
|
||||||
for fix in re.findall(r'\([^\s]*,[ ]*[^\s]*\)', data):
|
for fix in re.findall(r'\([^\s]*,[ ]*[^\s]*\)', data):
|
||||||
|
@ -29,7 +31,7 @@ def add_linked(person, field, obj, data):
|
||||||
if not tgt:
|
if not tgt:
|
||||||
tgt = obj()
|
tgt = obj()
|
||||||
tgt.name = n
|
tgt.name = n
|
||||||
db.session.add(tgt)
|
tgt.save()
|
||||||
field.append(tgt)
|
field.append(tgt)
|
||||||
|
|
||||||
# Fetch an object by source_id (numeric identifier used in source DB)
|
# Fetch an object by source_id (numeric identifier used in source DB)
|
||||||
|
@ -53,9 +55,7 @@ def get_total_rows_csv(filename):
|
||||||
def reindex_data():
|
def reindex_data():
|
||||||
for i, p in enumerate(Person.query.all()):
|
for i, p in enumerate(Person.query.all()):
|
||||||
p.index()
|
p.index()
|
||||||
db.session.add(p)
|
p.save()
|
||||||
if i % 10 == 0: db.session.commit()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
# Data update routine
|
# Data update routine
|
||||||
def refresh_data(filename, fmt=None):
|
def refresh_data(filename, fmt=None):
|
||||||
|
@ -63,7 +63,7 @@ def refresh_data(filename, fmt=None):
|
||||||
rowcount = 0
|
rowcount = 0
|
||||||
if not isfile(filename):
|
if not isfile(filename):
|
||||||
msg = "Missing data: %s - refresh aborted." % fmt['filename']
|
msg = "Missing data: %s - refresh aborted." % fmt['filename']
|
||||||
app.logger.warn(msg)
|
print(msg)
|
||||||
yield(msg, "error")
|
yield(msg, "error")
|
||||||
return None
|
return None
|
||||||
if fmt['extension'] == 'csv':
|
if fmt['extension'] == 'csv':
|
||||||
|
@ -76,14 +76,14 @@ def refresh_data(filename, fmt=None):
|
||||||
if row is None: continue
|
if row is None: continue
|
||||||
yield rowcount, rowcount/totalrows
|
yield rowcount, rowcount/totalrows
|
||||||
|
|
||||||
# Ensure any new data is flushed from time to time
|
# # Ensure any new data is flushed from time to time
|
||||||
if count % 25 == 0:
|
# if count % 25 == 0:
|
||||||
db.session.commit()
|
# db.session.commit()
|
||||||
|
|
||||||
for r in fmt['required']:
|
for r in fmt['required']:
|
||||||
if not r in row:
|
if not r in row:
|
||||||
msg = "Missing attribute in %s (%s)" % (r, fmt['filename'])
|
msg = "Missing attribute in %s (%s)" % (r, fmt['filename'])
|
||||||
app.logger.warn(msg)
|
print(msg)
|
||||||
yield(msg, "error")
|
yield(msg, "error")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -104,14 +104,14 @@ def refresh_data(filename, fmt=None):
|
||||||
person.contact_email = row['e-mail 1']
|
person.contact_email = row['e-mail 1']
|
||||||
person.personal_url = fix_url(row['URL'])
|
person.personal_url = fix_url(row['URL'])
|
||||||
|
|
||||||
with db.session.no_autoflush:
|
with transaction.atomic():
|
||||||
add_linked(person, person.research_methods, Method, row['Methods'])
|
add_linked(person, person.research_methods, Method, row['Methods'])
|
||||||
add_linked(person, person.research_scales, Scale, row['Scale'])
|
add_linked(person, person.research_scales, Scale, row['Scale'])
|
||||||
add_linked(person, person.research_taxa, Taxon, row['Taxa'])
|
add_linked(person, person.research_taxa, Taxon, row['Taxa'])
|
||||||
add_linked(person, person.research_fields, Field, row['Field of expertise'])
|
add_linked(person, person.research_fields, Field, row['Field of expertise'])
|
||||||
|
|
||||||
person.index()
|
person.index()
|
||||||
db.session.add(person)
|
person.save()
|
||||||
count = count + 1
|
count = count + 1
|
||||||
|
|
||||||
elif fmt['dataformat'] is DataFormat.RESOURCE_DETAIL:
|
elif fmt['dataformat'] is DataFormat.RESOURCE_DETAIL:
|
||||||
|
@ -121,7 +121,7 @@ def refresh_data(filename, fmt=None):
|
||||||
res.citation = row['Citation']
|
res.citation = row['Citation']
|
||||||
res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting#
|
res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting#
|
||||||
res.abstract = row['Abstract']
|
res.abstract = row['Abstract']
|
||||||
db.session.add(res)
|
res.save()
|
||||||
count = count + 1
|
count = count + 1
|
||||||
|
|
||||||
elif fmt['dataformat'] is DataFormat.RANGE_DETAIL:
|
elif fmt['dataformat'] is DataFormat.RANGE_DETAIL:
|
||||||
|
@ -130,7 +130,7 @@ def refresh_data(filename, fmt=None):
|
||||||
rng.gmba_id = row['GMBA_ID']
|
rng.gmba_id = row['GMBA_ID']
|
||||||
rng.name = row['RangeName']
|
rng.name = row['RangeName']
|
||||||
rng.countries = row['Countries']
|
rng.countries = row['Countries']
|
||||||
db.session.add(rng)
|
rng.save()
|
||||||
count = count + 1
|
count = count + 1
|
||||||
|
|
||||||
elif fmt['dataformat'] is DataFormat.PERSON_RESOURCE:
|
elif fmt['dataformat'] is DataFormat.PERSON_RESOURCE:
|
||||||
|
@ -140,7 +140,7 @@ def refresh_data(filename, fmt=None):
|
||||||
if not ppl or not ppl.first(): continue
|
if not ppl or not ppl.first(): continue
|
||||||
for person in ppl:
|
for person in ppl:
|
||||||
for r in rzs: person.resources.append(r)
|
for r in rzs: person.resources.append(r)
|
||||||
db.session.add(person)
|
person.save()
|
||||||
count = count + 1
|
count = count + 1
|
||||||
|
|
||||||
elif fmt['dataformat'] is DataFormat.PERSON_RANGE:
|
elif fmt['dataformat'] is DataFormat.PERSON_RANGE:
|
||||||
|
@ -150,7 +150,7 @@ def refresh_data(filename, fmt=None):
|
||||||
if not ppl or not ppl.first(): continue
|
if not ppl or not ppl.first(): continue
|
||||||
for person in ppl:
|
for person in ppl:
|
||||||
for r in rzs: person.ranges.append(r)
|
for r in rzs: person.ranges.append(r)
|
||||||
db.session.add(person)
|
person.save()
|
||||||
count = count + 1
|
count = count + 1
|
||||||
|
|
||||||
elif fmt['extension'] == 'geojson':
|
elif fmt['extension'] == 'geojson':
|
||||||
|
@ -171,10 +171,8 @@ def refresh_data(filename, fmt=None):
|
||||||
rge.name = p['Name']
|
rge.name = p['Name']
|
||||||
for c in ['Country_1', 'Country_2_']:
|
for c in ['Country_1', 'Country_2_']:
|
||||||
if c in p: rge.countries = p[c]
|
if c in p: rge.countries = p[c]
|
||||||
db.session.add(rge)
|
rge.save()
|
||||||
app.logger.info("Warning: %d ranges not found" % len(ranges_missing))
|
print("Warning: %d ranges not found" % len(ranges_missing))
|
||||||
app.logger.debug("[%s]" % ', '.join(ranges_missing))
|
print("[%s]" % ', '.join(ranges_missing))
|
||||||
|
|
||||||
db.session.commit()
|
|
||||||
yield None, None
|
yield None, None
|
||||||
return count
|
return count
|
||||||
|
|
Loading…
Reference in New Issue