Improve refresh_data

This commit is contained in:
PCoder 2022-03-05 00:43:44 +05:30
parent edb860fa16
commit 41a2129e9a

View file

@ -71,7 +71,8 @@ def reindex_data():
# Data update routine # Data update routine
def refresh_data(filename, fmt=None): def refresh_data(filename, fmt=None, update_existing=False):
print("refresh_data")
count = 0 count = 0
rowcount = 0 rowcount = 0
if not isfile(filename): if not isfile(filename):
@ -101,6 +102,7 @@ def refresh_data(filename, fmt=None):
return None return None
if fmt['dataformat'] is DataFormat.PERSON_DETAIL: if fmt['dataformat'] is DataFormat.PERSON_DETAIL:
print(row)
person, source_id = get_by_id(row['ID'], Person) person, source_id = get_by_id(row['ID'], Person)
if not person: if not person:
person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first() person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first()
@ -108,11 +110,13 @@ def refresh_data(filename, fmt=None):
person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID']) person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID'])
# Update data fields # Update data fields
if update_existing:
person.source_id = source_id person.source_id = source_id
person.title = row['Title'] person.title = row['Title']
print(row)
person.organisation = row['Organisation English'] person.organisation = row['Organisation English']
print("Country = %s" % row['country']) print("Country = %s" % row['country'] if 'country' in row else '')
if row['country'] is None or row['country'].strip() == '': if 'country' not in row or row['country'] is None or row['country'].strip() == '':
row['country'] = 0 row['country'] = 0
c = Country.objects.get(id=row['country']) c = Country.objects.get(id=row['country'])
person.country = c person.country = c
@ -120,16 +124,23 @@ def refresh_data(filename, fmt=None):
person.biography = row['Biography'] person.biography = row['Biography']
person.contact_email = row['e-mail 1'] person.contact_email = row['e-mail 1']
person.personal_url = fix_url(row['URL']) person.personal_url = fix_url(row['URL'])
person.save()
with transaction.atomic(): with transaction.atomic():
research_methods = add_linked(person, Method, row['Methods']) research_methods = add_linked(person, Method, row['Methods'])
methods_people = [MethodsPeople.objects.get_or_create(method_id=m.id, person_id=person.id) for m in research_methods] methods_people = [MethodsPeople.objects.get_or_create(method_id=m.id, person_id=person.id)[0] for m in research_methods]
person.methodspeople_set.set(methods_people)
research_scales = add_linked(person, Scale, row['Scale']) research_scales = add_linked(person, Scale, row['Scale'])
scales_people = [ScalesPeople.objects.get_or_create(scale_id=s.id, person_id=person.id) for s in research_scales] scales_people = [ScalesPeople.objects.get_or_create(scale_id=s.id, person_id=person.id)[0] for s in research_scales]
person.scalespeople_set.set(scales_people)
research_taxa = add_linked(person, Taxon, row['Taxa']) research_taxa = add_linked(person, Taxon, row['Taxa'])
taxa_people = [TaxaPeople.objects.get_or_create(taxon_id=t.id, person_id=person.id) for t in research_taxa] taxa_people = [TaxaPeople.objects.get_or_create(taxon_id=t.id, person_id=person.id)[0] for t in research_taxa]
person.taxapeople_set.set(taxa_people)
research_fields = add_linked(person, Field, row['Field of expertise']) research_fields = add_linked(person, Field, row['Field of expertise'])
fields_people = [FieldsPeople.objects.get_or_create(field_id=f.id, person_id=person.id) for f in research_fields] fields_people = [FieldsPeople.objects.get_or_create(field_id=f.id, person_id=person.id)[0] for f in research_fields]
person.fieldspeople_set.set(fields_people)
# research_ranges = add_linked(person, MountainRange, row['MountainRange'])
# ranges_people = [RangesPeople.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges]
# person.rangespeople_set.set(ranges_people)
person.index() person.index()
person.save() person.save()
count = count + 1 count = count + 1
@ -167,15 +178,30 @@ def refresh_data(filename, fmt=None):
elif fmt['dataformat'] is DataFormat.PERSON_RANGE: elif fmt['dataformat'] is DataFormat.PERSON_RANGE:
rzs, source_id = get_by_id(row['MountainRange'], MountainRange, first=False) rzs, source_id = get_by_id(row['MountainRange'], MountainRange, first=False)
print(" range=%s, source_id=%s" % (rzs, source_id))
if not rzs or not rzs.first(): if not rzs or not rzs.first():
print(" --- No rzs, continue")
continue continue
ppl, source_id = get_by_id(row['Person'], Person, first=False) ppl, source_id = get_by_id(row['Person'], Person, first=False)
if not ppl or not ppl.first(): continue print(" +++ ppl=%s, source_id=%s" % (ppl, source_id))
for person in ppl: if not ppl or not ppl.first():
person.ranges = [] print(" --- No ppl, continue")
for r in rzs: continue
person.ranges.append(r) with transaction.atomic():
person = ppl.first()
research_ranges = add_linked(person, MountainRange, row['MountainRange'])
ranges_people = [PeopleRange.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges]
person.peoplerange_set.set(ranges_people)
# for person in ppl:
# research_ranges = add_linked(person, MountainRange, row['MountainRange'])
# ranges_people = [RangesPeople.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges]
# person.rangespeople_set.set(ranges_people)
# ranges_people = RangesPeople
# for r in rzs:
# ranges_people.append(r)
# person.rangespeople_set.set(ranges_people)
person.save() person.save()
#print(" *** Saved %s => %s (%s)" % (person, ranges_people, len(ranges_people)))
count = count + 1 count = count + 1
elif fmt['extension'] == 'geojson': elif fmt['extension'] == 'geojson':