Fix db errors in convert.py

- use print for logger for the time being
- replace db.session with obj.save methods
- use transaction for no_flush case

TODO: change query / filter / filter_by
This commit is contained in:
PCoder 2021-07-27 10:12:37 +05:30
parent 0b93703886
commit b628f6621d
1 changed files with 20 additions and 22 deletions

View File

@ -1,9 +1,11 @@
import csv, json, re import csv, json, re
from app import app
from os.path import isfile from os.path import isfile
from .models import * from app.models import *
from .formats import * from .formats import *
from django.db import transaction
# Correct commas inside of a linked field # Correct commas inside of a linked field
def fix_bracketed_lists(data): def fix_bracketed_lists(data):
for fix in re.findall(r'\([^\s]*,[ ]*[^\s]*\)', data): for fix in re.findall(r'\([^\s]*,[ ]*[^\s]*\)', data):
@ -29,7 +31,7 @@ def add_linked(person, field, obj, data):
if not tgt: if not tgt:
tgt = obj() tgt = obj()
tgt.name = n tgt.name = n
db.session.add(tgt) tgt.save()
field.append(tgt) field.append(tgt)
# Fetch an object by source_id (numeric identifier used in source DB) # Fetch an object by source_id (numeric identifier used in source DB)
@ -53,9 +55,7 @@ def get_total_rows_csv(filename):
def reindex_data(): def reindex_data():
for i, p in enumerate(Person.query.all()): for i, p in enumerate(Person.query.all()):
p.index() p.index()
db.session.add(p) p.save()
if i % 10 == 0: db.session.commit()
db.session.commit()
# Data update routine # Data update routine
def refresh_data(filename, fmt=None): def refresh_data(filename, fmt=None):
@ -63,7 +63,7 @@ def refresh_data(filename, fmt=None):
rowcount = 0 rowcount = 0
if not isfile(filename): if not isfile(filename):
msg = "Missing data: %s - refresh aborted." % fmt['filename'] msg = "Missing data: %s - refresh aborted." % fmt['filename']
app.logger.warn(msg) print(msg)
yield(msg, "error") yield(msg, "error")
return None return None
if fmt['extension'] == 'csv': if fmt['extension'] == 'csv':
@ -76,14 +76,14 @@ def refresh_data(filename, fmt=None):
if row is None: continue if row is None: continue
yield rowcount, rowcount/totalrows yield rowcount, rowcount/totalrows
# Ensure any new data is flushed from time to time # # Ensure any new data is flushed from time to time
if count % 25 == 0: # if count % 25 == 0:
db.session.commit() # db.session.commit()
for r in fmt['required']: for r in fmt['required']:
if not r in row: if not r in row:
msg = "Missing attribute in %s (%s)" % (r, fmt['filename']) msg = "Missing attribute in %s (%s)" % (r, fmt['filename'])
app.logger.warn(msg) print(msg)
yield(msg, "error") yield(msg, "error")
return None return None
@ -104,14 +104,14 @@ def refresh_data(filename, fmt=None):
person.contact_email = row['e-mail 1'] person.contact_email = row['e-mail 1']
person.personal_url = fix_url(row['URL']) person.personal_url = fix_url(row['URL'])
with db.session.no_autoflush: with transaction.atomic():
add_linked(person, person.research_methods, Method, row['Methods']) add_linked(person, person.research_methods, Method, row['Methods'])
add_linked(person, person.research_scales, Scale, row['Scale']) add_linked(person, person.research_scales, Scale, row['Scale'])
add_linked(person, person.research_taxa, Taxon, row['Taxa']) add_linked(person, person.research_taxa, Taxon, row['Taxa'])
add_linked(person, person.research_fields, Field, row['Field of expertise']) add_linked(person, person.research_fields, Field, row['Field of expertise'])
person.index() person.index()
db.session.add(person) person.save()
count = count + 1 count = count + 1
elif fmt['dataformat'] is DataFormat.RESOURCE_DETAIL: elif fmt['dataformat'] is DataFormat.RESOURCE_DETAIL:
@ -121,7 +121,7 @@ def refresh_data(filename, fmt=None):
res.citation = row['Citation'] res.citation = row['Citation']
res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting# res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting#
res.abstract = row['Abstract'] res.abstract = row['Abstract']
db.session.add(res) res.save()
count = count + 1 count = count + 1
elif fmt['dataformat'] is DataFormat.RANGE_DETAIL: elif fmt['dataformat'] is DataFormat.RANGE_DETAIL:
@ -130,7 +130,7 @@ def refresh_data(filename, fmt=None):
rng.gmba_id = row['GMBA_ID'] rng.gmba_id = row['GMBA_ID']
rng.name = row['RangeName'] rng.name = row['RangeName']
rng.countries = row['Countries'] rng.countries = row['Countries']
db.session.add(rng) rng.save()
count = count + 1 count = count + 1
elif fmt['dataformat'] is DataFormat.PERSON_RESOURCE: elif fmt['dataformat'] is DataFormat.PERSON_RESOURCE:
@ -140,7 +140,7 @@ def refresh_data(filename, fmt=None):
if not ppl or not ppl.first(): continue if not ppl or not ppl.first(): continue
for person in ppl: for person in ppl:
for r in rzs: person.resources.append(r) for r in rzs: person.resources.append(r)
db.session.add(person) person.save()
count = count + 1 count = count + 1
elif fmt['dataformat'] is DataFormat.PERSON_RANGE: elif fmt['dataformat'] is DataFormat.PERSON_RANGE:
@ -150,7 +150,7 @@ def refresh_data(filename, fmt=None):
if not ppl or not ppl.first(): continue if not ppl or not ppl.first(): continue
for person in ppl: for person in ppl:
for r in rzs: person.ranges.append(r) for r in rzs: person.ranges.append(r)
db.session.add(person) person.save()
count = count + 1 count = count + 1
elif fmt['extension'] == 'geojson': elif fmt['extension'] == 'geojson':
@ -171,10 +171,8 @@ def refresh_data(filename, fmt=None):
rge.name = p['Name'] rge.name = p['Name']
for c in ['Country_1', 'Country_2_']: for c in ['Country_1', 'Country_2_']:
if c in p: rge.countries = p[c] if c in p: rge.countries = p[c]
db.session.add(rge) rge.save()
app.logger.info("Warning: %d ranges not found" % len(ranges_missing)) print("Warning: %d ranges not found" % len(ranges_missing))
app.logger.debug("[%s]" % ', '.join(ranges_missing)) print("[%s]" % ', '.join(ranges_missing))
db.session.commit()
yield None, None yield None, None
return count return count