diff --git a/app/convert.py b/app/convert.py index 22ea790..95a070d 100644 --- a/app/convert.py +++ b/app/convert.py @@ -39,7 +39,7 @@ def add_linked(person, obj, data): tgt.name = n tgt.save() field.append(tgt) - return field + return field # Fetch an object by source_id (numeric identifier used in source DB) @@ -71,7 +71,8 @@ def reindex_data(): # Data update routine -def refresh_data(filename, fmt=None): +def refresh_data(filename, fmt=None, update_existing=False): + print("refresh_data") count = 0 rowcount = 0 if not isfile(filename): @@ -101,42 +102,72 @@ def refresh_data(filename, fmt=None): return None if fmt['dataformat'] is DataFormat.PERSON_DETAIL: + print(row) person, source_id = get_by_id(row['ID'], Person) if not person: person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first() + if person: + print("Fetched from DB") + else: + print("Does not exist in DB") if not person: person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID']) + person.source_id = source_id + person.title = row['Title'] + print(row) + person.organisation = row['Organisation English'] + print("Country = %s" % row['country'] if 'country' in row else '') + if 'country' not in row or row['country'] is None or row['country'].strip() == '': + row['country'] = 0 + c = Country.objects.get(id=row['country']) + person.country = c + person.position = row['Position'] + person.biography = row['Biography'] + person.contact_email = row['e-mail 1'] + person.personal_url = fix_url(row['URL']) + person.save() + print("Created") # Update data fields - person.source_id = source_id - person.title = row['Title'] - person.organisation = row['Organisation English'] - print("Country = %s" % row['country']) - if row['country'] is None or row['country'].strip() == '': - row['country'] = 0 - c = Country.objects.get(id=row['country']) - person.country = c - person.position = row['Position'] - person.biography = row['Biography'] - person.contact_email = row['e-mail 1'] - person.personal_url = fix_url(row['URL']) - + if update_existing: + person.source_id = source_id + person.title = row['Title'] + print(row) + person.organisation = row['Organisation English'] + print("Country = %s" % row['country'] if 'country' in row else '') + if 'country' not in row or row['country'] is None or row['country'].strip() == '': + row['country'] = 0 + c = Country.objects.get(id=row['country']) + person.country = c + person.position = row['Position'] + person.biography = row['Biography'] + person.contact_email = row['e-mail 1'] + person.personal_url = fix_url(row['URL']) + person.save() with transaction.atomic(): research_methods = add_linked(person, Method, row['Methods']) - methods_people = [MethodsPeople.objects.get_or_create(method_id=m.id, person_id=person.id) for m in research_methods] + methods_people = [MethodsPeople.objects.get_or_create(method_id=m.id, person_id=person.id)[0] for m in research_methods] + person.methodspeople_set.set(methods_people) research_scales = add_linked(person, Scale, row['Scale']) - scales_people = [ScalesPeople.objects.get_or_create(scale_id=s.id, person_id=person.id) for s in research_scales] + scales_people = [ScalesPeople.objects.get_or_create(scale_id=s.id, person_id=person.id)[0] for s in research_scales] + person.scalespeople_set.set(scales_people) research_taxa = add_linked(person, Taxon, row['Taxa']) - taxa_people = [TaxaPeople.objects.get_or_create(taxon_id=t.id, person_id=person.id) for t in research_taxa] + taxa_people = [TaxaPeople.objects.get_or_create(taxon_id=t.id, person_id=person.id)[0] for t in research_taxa] + person.taxapeople_set.set(taxa_people) research_fields = add_linked(person, Field, row['Field of expertise']) - fields_people = [FieldsPeople.objects.get_or_create(field_id=f.id, person_id=person.id) for f in research_fields] + fields_people = [FieldsPeople.objects.get_or_create(field_id=f.id, person_id=person.id)[0] for f in research_fields] + person.fieldspeople_set.set(fields_people) + # research_ranges = add_linked(person, MountainRange, row['MountainRange']) + # ranges_people = [RangesPeople.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + # person.rangespeople_set.set(ranges_people) person.index() person.save() count = count + 1 elif fmt['dataformat'] is DataFormat.RESOURCE_DETAIL: res, source_id = get_by_id(row['ID'], Resource) - if not res: res = Resource(source_id=source_id) + if not res: + res = Resource(source_id=source_id) res.title = row['Title'] res.citation = row['Citation'] res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting# @@ -155,9 +186,11 @@ def refresh_data(filename, fmt=None): elif fmt['dataformat'] is DataFormat.PERSON_RESOURCE: rzs, source_id = get_by_id(row['Resource'], Resource, first=False) - if not rzs or not rzs.first(): continue + if not rzs or not rzs.first(): + continue ppl, source_id = get_by_id(row['Person'], Person, first=False) - if not ppl or not ppl.first(): continue + if not ppl or not ppl.first(): + continue for person in ppl: person.resources = [] for r in rzs: @@ -167,15 +200,30 @@ def refresh_data(filename, fmt=None): elif fmt['dataformat'] is DataFormat.PERSON_RANGE: rzs, source_id = get_by_id(row['MountainRange'], MountainRange, first=False) + print(" range=%s, source_id=%s" % (rzs, source_id)) if not rzs or not rzs.first(): + print(" --- No rzs, continue") continue ppl, source_id = get_by_id(row['Person'], Person, first=False) - if not ppl or not ppl.first(): continue - for person in ppl: - person.ranges = [] - for r in rzs: - person.ranges.append(r) + print(" +++ ppl=%s, source_id=%s" % (ppl, source_id)) + if not ppl or not ppl.first(): + print(" --- No ppl, continue") + continue + with transaction.atomic(): + person = ppl.first() + research_ranges = add_linked(person, MountainRange, row['MountainRange']) + ranges_people = [PeopleRange.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + person.peoplerange_set.set(ranges_people) + # for person in ppl: + # research_ranges = add_linked(person, MountainRange, row['MountainRange']) + # ranges_people = [RangesPeople.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + # person.rangespeople_set.set(ranges_people) + # ranges_people = RangesPeople + # for r in rzs: + # ranges_people.append(r) + # person.rangespeople_set.set(ranges_people) person.save() + #print(" *** Saved %s => %s (%s)" % (person, ranges_people, len(ranges_people))) count = count + 1 elif fmt['extension'] == 'geojson': diff --git a/app/models.py b/app/models.py index 665e953..bed7292 100644 --- a/app/models.py +++ b/app/models.py @@ -666,10 +666,18 @@ Involved scientist''') return False def index(self): - self.field_indexer = " ".join([ - self.first_name, self.last_name, self.organisation, self.position, self.biography - ]) - return True + if self: + self.field_indexer = " ".join([ + self.first_name if self and self.first_name else "", + self.last_name if self and self.last_name else "", + self.organisation if self and self.organisation else "", + self.position if self and self.position else "", + self.biography if self and self.biography else "" + ] if self else "") + self.save() + return True + else: + return False def fullname(self): return " ".join([self.title if self.title else '',