Fix issues in convert.py

- Replace all query with objects - Reformat code
2021-07-27 10:28:07 +05:30 · 2021-07-27 10:28:07 +05:30 · 8685c4a7a2
commit 8685c4a7a2
parent e21450730d
1 changed files with 29 additions and 19 deletions
--- a/gmba_django/convert.py
+++ b/gmba_django/convert.py
@ -1,4 +1,6 @@
-import csv, json, re
+import csv
+import json
+import re
 from os.path import isfile
 from app.models import *
 from .formats import *
@ -12,6 +14,7 @@ def fix_bracketed_lists(data):
        data = data.replace(fix, fix.replace(',', ' /'))
    return data

+
 # Check for valid link
 def fix_url(link):
    if len(link) > 3 and not link.startswith('http'):
@ -19,31 +22,37 @@ def fix_url(link):
    # link = link.split(';')[0]
    return link

+
 # Create linked objects
 def add_linked(person, field, obj, data):
    # TODO: fuzzy matching instead of lower()
    items = fix_bracketed_lists(data).lower()
-    items = items.replace(';',',').split(',')
+    items = items.replace(';', ',').split(',')
    for i in items:
        n = i.strip()
-        if len(n)<3: continue
-        tgt = obj.query.filter_by(name=n).first()
+        if len(n) < 3:
+            continue
+        tgt = obj.objects.filter(name=n).first()
        if not tgt:
            tgt = obj()
            tgt.name = n
        tgt.save()
        field.append(tgt)

+
 # Fetch an object by source_id (numeric identifier used in source DB)
 def get_by_id(rowid, obj, first=True):
    if type(rowid) is str and rowid.isdigit():
        rowid = int(rowid)
    if type(rowid) is int:
-        l = obj.query.filter_by(source_id=rowid)
-        if first: return l.first(), rowid
-        else: return l, rowid
+        l = obj.objects.filter(source_id=rowid)
+        if first:
+            return l.first(), rowid
+        else:
+            return l, rowid
    return None, None

+
 # Quick check of the number of lines
 def get_total_rows_csv(filename):
    with open(filename) as f:
@ -51,12 +60,14 @@ def get_total_rows_csv(filename):
            pass
    return i + 1

+
 # Search index routine
 def reindex_data():
-    for i, p in enumerate(Person.query.all()):
+    for i, p in enumerate(Person.objects.all()):
        p.index()
        p.save()

+
 # Data update routine
 def refresh_data(filename, fmt=None):
    count = 0
@ -64,17 +75,16 @@ def refresh_data(filename, fmt=None):
    if not isfile(filename):
        msg = "Missing data: %s  - refresh aborted." % fmt['filename']
        print(msg)
-        yield(msg, "error")
+        yield msg, "error"
        return None
    if fmt['extension'] == 'csv':
        totalrows = get_total_rows_csv(filename)
        with open(filename, 'rt', encoding='utf-8', errors='ignore') as csvfile:
-
            datareader = csv.DictReader(csvfile)
            for row in datareader:
                rowcount += 1
                if row is None: continue
-                yield rowcount, rowcount/totalrows
+                yield rowcount, rowcount / totalrows

                # # Ensure any new data is flushed from time to time
                # if count % 25 == 0:
@ -84,13 +94,13 @@ def refresh_data(filename, fmt=None):
                    if not r in row:
                        msg = "Missing attribute in %s (%s)" % (r, fmt['filename'])
                        print(msg)
-                        yield(msg, "error")
+                        yield msg, "error"
                        return None

                if fmt['dataformat'] is DataFormat.PERSON_DETAIL:
                    person, source_id = get_by_id(row['ID'], Person)
                    if not person:
-                        person = Person.query.filter_by(first_name=row['First name'], last_name=row['Last name']).first()
+                        person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first()
                    if not person:
                        person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID'])

@ -106,9 +116,9 @@ def refresh_data(filename, fmt=None):

                    with transaction.atomic():
                        add_linked(person, person.research_methods, Method, row['Methods'])
-                        add_linked(person, person.research_scales,  Scale,  row['Scale'])
-                        add_linked(person, person.research_taxa,    Taxon,  row['Taxa'])
-                        add_linked(person, person.research_fields,  Field,  row['Field of expertise'])
+                        add_linked(person, person.research_scales, Scale, row['Scale'])
+                        add_linked(person, person.research_taxa, Taxon, row['Taxa'])
+                        add_linked(person, person.research_fields, Field, row['Field of expertise'])

                    person.index()
                    person.save()
@ -119,7 +129,7 @@ def refresh_data(filename, fmt=None):
                    if not res: res = Resource(source_id=source_id)
                    res.title = row['Title']
                    res.citation = row['Citation']
-                    res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting#
+                    res.url = fix_url(row['URL'].strip('#'))  # remove weird #formatting#
                    res.abstract = row['Abstract']
                    res.save()
                    count = count + 1
@ -160,11 +170,11 @@ def refresh_data(filename, fmt=None):
            if fmt['dataformat'] is DataFormat.RANGE_SHAPES:
                totalrows = len(jsondata['features'])
                for f in jsondata['features']:
-                    yield count, count/totalrows
+                    yield count, count / totalrows
                    count = count + 1

                    p = f['properties']
-                    rge = Range.query.filter_by(gmba_id=p['GMBA_ID']).first()
+                    rge = Range.objects.filter(gmba_id=p['GMBA_ID']).first()
                    if not rge:
                        ranges_missing.append(p['GMBA_ID'])
                        continue