From 41a2129e9af16f692f2de2b5d1a370a0f70e2884 Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 00:43:44 +0530 Subject: [PATCH] Improve refresh_data --- app/convert.py | 74 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/app/convert.py b/app/convert.py index 22ea790..bf61193 100644 --- a/app/convert.py +++ b/app/convert.py @@ -39,7 +39,7 @@ def add_linked(person, obj, data): tgt.name = n tgt.save() field.append(tgt) - return field + return field # Fetch an object by source_id (numeric identifier used in source DB) @@ -71,7 +71,8 @@ def reindex_data(): # Data update routine -def refresh_data(filename, fmt=None): +def refresh_data(filename, fmt=None, update_existing=False): + print("refresh_data") count = 0 rowcount = 0 if not isfile(filename): @@ -101,6 +102,7 @@ def refresh_data(filename, fmt=None): return None if fmt['dataformat'] is DataFormat.PERSON_DETAIL: + print(row) person, source_id = get_by_id(row['ID'], Person) if not person: person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first() @@ -108,28 +110,37 @@ def refresh_data(filename, fmt=None): person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID']) # Update data fields - person.source_id = source_id - person.title = row['Title'] - person.organisation = row['Organisation English'] - print("Country = %s" % row['country']) - if row['country'] is None or row['country'].strip() == '': - row['country'] = 0 - c = Country.objects.get(id=row['country']) - person.country = c - person.position = row['Position'] - person.biography = row['Biography'] - person.contact_email = row['e-mail 1'] - person.personal_url = fix_url(row['URL']) - + if update_existing: + person.source_id = source_id + person.title = row['Title'] + print(row) + person.organisation = row['Organisation English'] + print("Country = %s" % row['country'] if 'country' in row else '') + if 'country' not in row or row['country'] is None or row['country'].strip() == '': + row['country'] = 0 + c = Country.objects.get(id=row['country']) + person.country = c + person.position = row['Position'] + person.biography = row['Biography'] + person.contact_email = row['e-mail 1'] + person.personal_url = fix_url(row['URL']) + person.save() with transaction.atomic(): research_methods = add_linked(person, Method, row['Methods']) - methods_people = [MethodsPeople.objects.get_or_create(method_id=m.id, person_id=person.id) for m in research_methods] + methods_people = [MethodsPeople.objects.get_or_create(method_id=m.id, person_id=person.id)[0] for m in research_methods] + person.methodspeople_set.set(methods_people) research_scales = add_linked(person, Scale, row['Scale']) - scales_people = [ScalesPeople.objects.get_or_create(scale_id=s.id, person_id=person.id) for s in research_scales] + scales_people = [ScalesPeople.objects.get_or_create(scale_id=s.id, person_id=person.id)[0] for s in research_scales] + person.scalespeople_set.set(scales_people) research_taxa = add_linked(person, Taxon, row['Taxa']) - taxa_people = [TaxaPeople.objects.get_or_create(taxon_id=t.id, person_id=person.id) for t in research_taxa] + taxa_people = [TaxaPeople.objects.get_or_create(taxon_id=t.id, person_id=person.id)[0] for t in research_taxa] + person.taxapeople_set.set(taxa_people) research_fields = add_linked(person, Field, row['Field of expertise']) - fields_people = [FieldsPeople.objects.get_or_create(field_id=f.id, person_id=person.id) for f in research_fields] + fields_people = [FieldsPeople.objects.get_or_create(field_id=f.id, person_id=person.id)[0] for f in research_fields] + person.fieldspeople_set.set(fields_people) + # research_ranges = add_linked(person, MountainRange, row['MountainRange']) + # ranges_people = [RangesPeople.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + # person.rangespeople_set.set(ranges_people) person.index() person.save() count = count + 1 @@ -167,15 +178,30 @@ def refresh_data(filename, fmt=None): elif fmt['dataformat'] is DataFormat.PERSON_RANGE: rzs, source_id = get_by_id(row['MountainRange'], MountainRange, first=False) + print(" range=%s, source_id=%s" % (rzs, source_id)) if not rzs or not rzs.first(): + print(" --- No rzs, continue") continue ppl, source_id = get_by_id(row['Person'], Person, first=False) - if not ppl or not ppl.first(): continue - for person in ppl: - person.ranges = [] - for r in rzs: - person.ranges.append(r) + print(" +++ ppl=%s, source_id=%s" % (ppl, source_id)) + if not ppl or not ppl.first(): + print(" --- No ppl, continue") + continue + with transaction.atomic(): + person = ppl.first() + research_ranges = add_linked(person, MountainRange, row['MountainRange']) + ranges_people = [PeopleRange.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + person.peoplerange_set.set(ranges_people) + # for person in ppl: + # research_ranges = add_linked(person, MountainRange, row['MountainRange']) + # ranges_people = [RangesPeople.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + # person.rangespeople_set.set(ranges_people) + # ranges_people = RangesPeople + # for r in rzs: + # ranges_people.append(r) + # person.rangespeople_set.set(ranges_people) person.save() + #print(" *** Saved %s => %s (%s)" % (person, ranges_people, len(ranges_people))) count = count + 1 elif fmt['extension'] == 'geojson':