From 41a2129e9af16f692f2de2b5d1a370a0f70e2884 Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 00:43:44 +0530 Subject: [PATCH 1/9] Improve refresh_data --- app/convert.py | 74 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/app/convert.py b/app/convert.py index 22ea790..bf61193 100644 --- a/app/convert.py +++ b/app/convert.py @@ -39,7 +39,7 @@ def add_linked(person, obj, data): tgt.name = n tgt.save() field.append(tgt) - return field + return field # Fetch an object by source_id (numeric identifier used in source DB) @@ -71,7 +71,8 @@ def reindex_data(): # Data update routine -def refresh_data(filename, fmt=None): +def refresh_data(filename, fmt=None, update_existing=False): + print("refresh_data") count = 0 rowcount = 0 if not isfile(filename): @@ -101,6 +102,7 @@ def refresh_data(filename, fmt=None): return None if fmt['dataformat'] is DataFormat.PERSON_DETAIL: + print(row) person, source_id = get_by_id(row['ID'], Person) if not person: person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first() @@ -108,28 +110,37 @@ def refresh_data(filename, fmt=None): person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID']) # Update data fields - person.source_id = source_id - person.title = row['Title'] - person.organisation = row['Organisation English'] - print("Country = %s" % row['country']) - if row['country'] is None or row['country'].strip() == '': - row['country'] = 0 - c = Country.objects.get(id=row['country']) - person.country = c - person.position = row['Position'] - person.biography = row['Biography'] - person.contact_email = row['e-mail 1'] - person.personal_url = fix_url(row['URL']) - + if update_existing: + person.source_id = source_id + person.title = row['Title'] + print(row) + person.organisation = row['Organisation English'] + print("Country = %s" % row['country'] if 'country' in row else '') + if 'country' not in row or row['country'] is None or row['country'].strip() == '': + row['country'] = 0 + c = Country.objects.get(id=row['country']) + person.country = c + person.position = row['Position'] + person.biography = row['Biography'] + person.contact_email = row['e-mail 1'] + person.personal_url = fix_url(row['URL']) + person.save() with transaction.atomic(): research_methods = add_linked(person, Method, row['Methods']) - methods_people = [MethodsPeople.objects.get_or_create(method_id=m.id, person_id=person.id) for m in research_methods] + methods_people = [MethodsPeople.objects.get_or_create(method_id=m.id, person_id=person.id)[0] for m in research_methods] + person.methodspeople_set.set(methods_people) research_scales = add_linked(person, Scale, row['Scale']) - scales_people = [ScalesPeople.objects.get_or_create(scale_id=s.id, person_id=person.id) for s in research_scales] + scales_people = [ScalesPeople.objects.get_or_create(scale_id=s.id, person_id=person.id)[0] for s in research_scales] + person.scalespeople_set.set(scales_people) research_taxa = add_linked(person, Taxon, row['Taxa']) - taxa_people = [TaxaPeople.objects.get_or_create(taxon_id=t.id, person_id=person.id) for t in research_taxa] + taxa_people = [TaxaPeople.objects.get_or_create(taxon_id=t.id, person_id=person.id)[0] for t in research_taxa] + person.taxapeople_set.set(taxa_people) research_fields = add_linked(person, Field, row['Field of expertise']) - fields_people = [FieldsPeople.objects.get_or_create(field_id=f.id, person_id=person.id) for f in research_fields] + fields_people = [FieldsPeople.objects.get_or_create(field_id=f.id, person_id=person.id)[0] for f in research_fields] + person.fieldspeople_set.set(fields_people) + # research_ranges = add_linked(person, MountainRange, row['MountainRange']) + # ranges_people = [RangesPeople.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + # person.rangespeople_set.set(ranges_people) person.index() person.save() count = count + 1 @@ -167,15 +178,30 @@ def refresh_data(filename, fmt=None): elif fmt['dataformat'] is DataFormat.PERSON_RANGE: rzs, source_id = get_by_id(row['MountainRange'], MountainRange, first=False) + print(" range=%s, source_id=%s" % (rzs, source_id)) if not rzs or not rzs.first(): + print(" --- No rzs, continue") continue ppl, source_id = get_by_id(row['Person'], Person, first=False) - if not ppl or not ppl.first(): continue - for person in ppl: - person.ranges = [] - for r in rzs: - person.ranges.append(r) + print(" +++ ppl=%s, source_id=%s" % (ppl, source_id)) + if not ppl or not ppl.first(): + print(" --- No ppl, continue") + continue + with transaction.atomic(): + person = ppl.first() + research_ranges = add_linked(person, MountainRange, row['MountainRange']) + ranges_people = [PeopleRange.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + person.peoplerange_set.set(ranges_people) + # for person in ppl: + # research_ranges = add_linked(person, MountainRange, row['MountainRange']) + # ranges_people = [RangesPeople.objects.get_or_create(range_id=r.id, person_id=person.id)[0] for r in research_ranges] + # person.rangespeople_set.set(ranges_people) + # ranges_people = RangesPeople + # for r in rzs: + # ranges_people.append(r) + # person.rangespeople_set.set(ranges_people) person.save() + #print(" *** Saved %s => %s (%s)" % (person, ranges_people, len(ranges_people))) count = count + 1 elif fmt['extension'] == 'geojson': From a0c3dd57dd923f7126669173c1eadb66aec17331 Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 00:44:07 +0530 Subject: [PATCH 2/9] Format + improve indexer --- app/convert.py | 9 ++++++--- app/models.py | 15 +++++++++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/app/convert.py b/app/convert.py index bf61193..faba6e9 100644 --- a/app/convert.py +++ b/app/convert.py @@ -147,7 +147,8 @@ def refresh_data(filename, fmt=None, update_existing=False): elif fmt['dataformat'] is DataFormat.RESOURCE_DETAIL: res, source_id = get_by_id(row['ID'], Resource) - if not res: res = Resource(source_id=source_id) + if not res: + res = Resource(source_id=source_id) res.title = row['Title'] res.citation = row['Citation'] res.url = fix_url(row['URL'].strip('#')) # remove weird #formatting# @@ -166,9 +167,11 @@ def refresh_data(filename, fmt=None, update_existing=False): elif fmt['dataformat'] is DataFormat.PERSON_RESOURCE: rzs, source_id = get_by_id(row['Resource'], Resource, first=False) - if not rzs or not rzs.first(): continue + if not rzs or not rzs.first(): + continue ppl, source_id = get_by_id(row['Person'], Person, first=False) - if not ppl or not ppl.first(): continue + if not ppl or not ppl.first(): + continue for person in ppl: person.resources = [] for r in rzs: diff --git a/app/models.py b/app/models.py index 665e953..bc71105 100644 --- a/app/models.py +++ b/app/models.py @@ -666,10 +666,17 @@ Involved scientist''') return False def index(self): - self.field_indexer = " ".join([ - self.first_name, self.last_name, self.organisation, self.position, self.biography - ]) - return True + if self: + self.field_indexer = " ".join([ + self.first_name if self and self.first_name else "", + self.last_name if self and self.last_name else "", + self.organisation if self and self.organisation else "", + self.position if self and self.position else "", + self.biography if self and self.biography else "" + ] if self else []) + return True + else: + return False def fullname(self): return " ".join([self.title if self.title else '', From 27ecafa88727ce6d88adcc998fa628c801269ae4 Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 01:01:28 +0530 Subject: [PATCH 3/9] Debug --- app/convert.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/convert.py b/app/convert.py index faba6e9..5f0b48d 100644 --- a/app/convert.py +++ b/app/convert.py @@ -106,8 +106,10 @@ def refresh_data(filename, fmt=None, update_existing=False): person, source_id = get_by_id(row['ID'], Person) if not person: person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first() + print("Fetched from DB") if not person: person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID']) + print("Created") # Update data fields if update_existing: From 401b554a1c758824318d659552a8e5e620ad0f34 Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 01:05:23 +0530 Subject: [PATCH 4/9] More debug + create person and update fields if it does not exist in DB --- app/convert.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/app/convert.py b/app/convert.py index 5f0b48d..95a070d 100644 --- a/app/convert.py +++ b/app/convert.py @@ -106,9 +106,26 @@ def refresh_data(filename, fmt=None, update_existing=False): person, source_id = get_by_id(row['ID'], Person) if not person: person = Person.objects.filter(first_name=row['First name'], last_name=row['Last name']).first() - print("Fetched from DB") + if person: + print("Fetched from DB") + else: + print("Does not exist in DB") if not person: person = Person(first_name=row['First name'], last_name=row['Last name'], source_id=row['ID']) + person.source_id = source_id + person.title = row['Title'] + print(row) + person.organisation = row['Organisation English'] + print("Country = %s" % row['country'] if 'country' in row else '') + if 'country' not in row or row['country'] is None or row['country'].strip() == '': + row['country'] = 0 + c = Country.objects.get(id=row['country']) + person.country = c + person.position = row['Position'] + person.biography = row['Biography'] + person.contact_email = row['e-mail 1'] + person.personal_url = fix_url(row['URL']) + person.save() print("Created") # Update data fields From 2dda215ffe58e1936881e276413fed54f1886bab Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 01:23:08 +0530 Subject: [PATCH 5/9] Append range name only if not none --- app/views.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/views.py b/app/views.py index f896b0d..37ec12c 100644 --- a/app/views.py +++ b/app/views.py @@ -176,7 +176,10 @@ def get_paginated(query_set, page, per_page): for p in paginator.object_list: filters['country'].append(p.country.short_name) for r in p.peoplerange_set.all(): - filters['range'].append(r.range.range_name) + if r.range and r.range.range_name: + filters['range'].append(r.range.range_name) + else: + print("r.range.range_name is None %s %s" % (r.range_id, r.person_id)) for r in p.fieldspeople_set.all(): filters['field'].append(r.field.name) for r in p.taxapeople_set.all(): From da8fba3497249c39eb3401b78491b6858449e65a Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 01:28:29 +0530 Subject: [PATCH 6/9] Fix taxon --- app/views.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/views.py b/app/views.py index 37ec12c..b1877b1 100644 --- a/app/views.py +++ b/app/views.py @@ -183,7 +183,10 @@ def get_paginated(query_set, page, per_page): for r in p.fieldspeople_set.all(): filters['field'].append(r.field.name) for r in p.taxapeople_set.all(): - filters['taxon'].append(r.taxon.name) + if r.taxon and r.taxon.name: + filters['taxon'].append(r.taxon.name) + else: + print("r.taxon.name is None %s %s" % (r.range_id, r.taxon)) filters = { 'country': sorted(set(filters['country'])), 'range': sorted(set(filters['range'])), From 5b7e4eed3e6d05209dc2dce522695b204ef4967c Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 01:32:47 +0530 Subject: [PATCH 7/9] Fix possible none --- app/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/views.py b/app/views.py index b1877b1..6373f20 100644 --- a/app/views.py +++ b/app/views.py @@ -183,7 +183,7 @@ def get_paginated(query_set, page, per_page): for r in p.fieldspeople_set.all(): filters['field'].append(r.field.name) for r in p.taxapeople_set.all(): - if r.taxon and r.taxon.name: + if r and r.taxon and r.taxon.name: filters['taxon'].append(r.taxon.name) else: print("r.taxon.name is None %s %s" % (r.range_id, r.taxon)) From f272481bf63790382aff539716ce1d1d8ec5c1e3 Mon Sep 17 00:00:00 2001 From: PCoder Date: Sat, 5 Mar 2022 02:09:38 +0530 Subject: [PATCH 8/9] Fix finding range from name --- app/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/views.py b/app/views.py index 6373f20..5819063 100644 --- a/app/views.py +++ b/app/views.py @@ -225,7 +225,7 @@ class SearchView(View): if len(q_country) > 2: query_set = query_set.filter(country__short_name__icontains=q_country.strip().lower()) if len(q_range) > 2: - ranges_people = PeopleRange.objects.filter(range__name__icontains=q_range.strip().lower()) + ranges_people = PeopleRange.objects.filter(range__range_name__icontains=q_range.strip().lower()) r_people_ids = [rp.person_id for rp in ranges_people] query_set = query_set.filter(id__in=r_people_ids) if len(q_field) > 2: From 2c83c3e322ad8e1e36f1f8ec91685f8f8c503cd4 Mon Sep 17 00:00:00 2001 From: PCoder Date: Fri, 11 Mar 2022 16:53:15 +0530 Subject: [PATCH 9/9] Add save method to index --- app/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models.py b/app/models.py index bc71105..bed7292 100644 --- a/app/models.py +++ b/app/models.py @@ -673,7 +673,8 @@ Involved scientist''') self.organisation if self and self.organisation else "", self.position if self and self.position else "", self.biography if self and self.biography else "" - ] if self else []) + ] if self else "") + self.save() return True else: return False