From b628f6621d5d2df99108a34a1043f33f9530eae0 Mon Sep 17 00:00:00 2001 From: PCoder Date: Tue, 27 Jul 2021 10:12:37 +0530 Subject: [PATCH] Fix db errors in convert.py - use print for logger for the time being - replace db.session with obj.save methods - use transaction for no_flush case TODO: change query / filter / filter_by --- gmba_django/convert.py | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/gmba_django/convert.py b/gmba_django/convert.py index 89be94c..e9d7d35 100644 --- a/gmba_django/convert.py +++ b/gmba_django/convert.py @@ -1,9 +1,11 @@ import csv, json, re -from app import app from os.path import isfile -from .models import * +from app.models import * from .formats import * +from django.db import transaction + + # Correct commas inside of a linked field def fix_bracketed_lists(data): for fix in re.findall(r'\([^\s]*,[ ]*[^\s]*\)', data): @@ -29,7 +31,7 @@ def add_linked(person, field, obj, data): if not tgt: tgt = obj() tgt.name = n - db.session.add(tgt) + tgt.save() field.append(tgt) # Fetch an object by source_id (numeric identifier used in source DB) @@ -53,9 +55,7 @@ def get_total_rows_csv(filename): def reindex_data(): for i, p in enumerate(Person.query.all()): p.index() - db.session.add(p) - if i % 10 == 0: db.session.commit() - db.session.commit() + p.save() # Data update routine def refresh_data(filename, fmt=None): @@ -63,7 +63,7 @@ def refresh_data(filename, fmt=None): rowcount = 0 if not isfile(filename): msg = "Missing data: %s - refresh aborted." % fmt['filename'] - app.logger.warn(msg) + print(msg) yield(msg, "error") return None if fmt['extension'] == 'csv': @@ -76,14 +76,14 @@ def refresh_data(filename, fmt=None): if row is None: continue yield rowcount, rowcount/totalrows - # Ensure any new data is flushed from time to time - if count % 25 == 0: - db.session.commit() + # # Ensure any new data is flushed from time to time + # if count % 25 == 0: + # db.session.commit() for r in fmt['required']: if not r in row: msg = "Missing attribute in %s (%s)" % (r, fmt['filename']) - app.logger.warn(msg) + print(msg) yield(msg, "error") return None @@ -104,14 +104,14 @@ def refresh_data(filename, fmt=None): person.contact_email = row['e-mail 1'] person.personal_url = fix_url(row['URL']) - with db.session.no_autoflush: + with transaction.atomic(): add_linked(person, person.research_methods, Method, row['Methods']) add_linked(person, person.research_scales, Scale, row['Scale']) add_linked(person, person.research_taxa, Taxon, row['Taxa']) add_linked(person, person.research_fields, Field, row['Field of expertise']) person.index() - db.session.add(person) + person.save() count = count + 1 elif fmt['dataformat'] is DataFormat.RESOURCE_DETAIL: @@ -121,7 +121,7 @@ def refresh_data(filename, fmt=None): res.citation = row['Citation'] res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting# res.abstract = row['Abstract'] - db.session.add(res) + res.save() count = count + 1 elif fmt['dataformat'] is DataFormat.RANGE_DETAIL: @@ -130,7 +130,7 @@ def refresh_data(filename, fmt=None): rng.gmba_id = row['GMBA_ID'] rng.name = row['RangeName'] rng.countries = row['Countries'] - db.session.add(rng) + rng.save() count = count + 1 elif fmt['dataformat'] is DataFormat.PERSON_RESOURCE: @@ -140,7 +140,7 @@ def refresh_data(filename, fmt=None): if not ppl or not ppl.first(): continue for person in ppl: for r in rzs: person.resources.append(r) - db.session.add(person) + person.save() count = count + 1 elif fmt['dataformat'] is DataFormat.PERSON_RANGE: @@ -150,7 +150,7 @@ def refresh_data(filename, fmt=None): if not ppl or not ppl.first(): continue for person in ppl: for r in rzs: person.ranges.append(r) - db.session.add(person) + person.save() count = count + 1 elif fmt['extension'] == 'geojson': @@ -171,10 +171,8 @@ def refresh_data(filename, fmt=None): rge.name = p['Name'] for c in ['Country_1', 'Country_2_']: if c in p: rge.countries = p[c] - db.session.add(rge) - app.logger.info("Warning: %d ranges not found" % len(ranges_missing)) - app.logger.debug("[%s]" % ', '.join(ranges_missing)) - - db.session.commit() + rge.save() + print("Warning: %d ranges not found" % len(ranges_missing)) + print("[%s]" % ', '.join(ranges_missing)) yield None, None return count