Fix db errors in convert.py

- use print for logger for the time being - replace db.session with obj.save methods - use transaction for no_flush case TODO: change query / filter / filter_by
2021-07-27 10:12:37 +05:30 · 2021-07-27 10:12:37 +05:30 · b628f6621d
commit b628f6621d
parent 0b93703886
1 changed files with 20 additions and 22 deletions
--- a/gmba_django/convert.py
+++ b/gmba_django/convert.py
@ -1,9 +1,11 @@
 import csv, json, re
-from app import app
 from os.path import isfile
-from .models import *
+from app.models import *
 from .formats import *

+from django.db import transaction
+
+
 # Correct commas inside of a linked field
 def fix_bracketed_lists(data):
    for fix in re.findall(r'\([^\s]*,[ ]*[^\s]*\)', data):
@ -29,7 +31,7 @@ def add_linked(person, field, obj, data):
        if not tgt:
            tgt = obj()
            tgt.name = n
-        db.session.add(tgt)
+        tgt.save()
        field.append(tgt)

 # Fetch an object by source_id (numeric identifier used in source DB)
@ -53,9 +55,7 @@ def get_total_rows_csv(filename):
 def reindex_data():
    for i, p in enumerate(Person.query.all()):
        p.index()
-        db.session.add(p)
-        if i % 10 == 0: db.session.commit()
-    db.session.commit()
+        p.save()

 # Data update routine
 def refresh_data(filename, fmt=None):
@ -63,7 +63,7 @@ def refresh_data(filename, fmt=None):
    rowcount = 0
    if not isfile(filename):
        msg = "Missing data: %s  - refresh aborted." % fmt['filename']
-        app.logger.warn(msg)
+        print(msg)
        yield(msg, "error")
        return None
    if fmt['extension'] == 'csv':
@ -76,14 +76,14 @@ def refresh_data(filename, fmt=None):
                if row is None: continue
                yield rowcount, rowcount/totalrows

-                # Ensure any new data is flushed from time to time
-                if count % 25 == 0:
-                    db.session.commit()
+                # # Ensure any new data is flushed from time to time
+                # if count % 25 == 0:
+                #     db.session.commit()

                for r in fmt['required']:
                    if not r in row:
                        msg = "Missing attribute in %s (%s)" % (r, fmt['filename'])
-                        app.logger.warn(msg)
+                        print(msg)
                        yield(msg, "error")
                        return None

@ -104,14 +104,14 @@ def refresh_data(filename, fmt=None):
                    person.contact_email = row['e-mail 1']
                    person.personal_url = fix_url(row['URL'])

-                    with db.session.no_autoflush:
+                    with transaction.atomic():
                        add_linked(person, person.research_methods, Method, row['Methods'])
                        add_linked(person, person.research_scales,  Scale,  row['Scale'])
                        add_linked(person, person.research_taxa,    Taxon,  row['Taxa'])
                        add_linked(person, person.research_fields,  Field,  row['Field of expertise'])

                    person.index()
-                    db.session.add(person)
+                    person.save()
                    count = count + 1

                elif fmt['dataformat'] is DataFormat.RESOURCE_DETAIL:
@ -121,7 +121,7 @@ def refresh_data(filename, fmt=None):
                    res.citation = row['Citation']
                    res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting#
                    res.abstract = row['Abstract']
-                    db.session.add(res)
+                    res.save()
                    count = count + 1

                elif fmt['dataformat'] is DataFormat.RANGE_DETAIL:
@ -130,7 +130,7 @@ def refresh_data(filename, fmt=None):
                    rng.gmba_id = row['GMBA_ID']
                    rng.name = row['RangeName']
                    rng.countries = row['Countries']
-                    db.session.add(rng)
+                    rng.save()
                    count = count + 1

                elif fmt['dataformat'] is DataFormat.PERSON_RESOURCE:
@ -140,7 +140,7 @@ def refresh_data(filename, fmt=None):
                    if not ppl or not ppl.first(): continue
                    for person in ppl:
                        for r in rzs: person.resources.append(r)
-                        db.session.add(person)
+                        person.save()
                        count = count + 1

                elif fmt['dataformat'] is DataFormat.PERSON_RANGE:
@ -150,7 +150,7 @@ def refresh_data(filename, fmt=None):
                    if not ppl or not ppl.first(): continue
                    for person in ppl:
                        for r in rzs: person.ranges.append(r)
-                        db.session.add(person)
+                        person.save()
                        count = count + 1

    elif fmt['extension'] == 'geojson':
@ -171,10 +171,8 @@ def refresh_data(filename, fmt=None):
                    rge.name = p['Name']
                    for c in ['Country_1', 'Country_2_']:
                        if c in p: rge.countries = p[c]
-                    db.session.add(rge)
-                app.logger.info("Warning: %d ranges not found" % len(ranges_missing))
-                app.logger.debug("[%s]" % ', '.join(ranges_missing))
-
-    db.session.commit()
+                    rge.save()
+                print("Warning: %d ranges not found" % len(ranges_missing))
+                print("[%s]" % ', '.join(ranges_missing))
    yield None, None
    return count