diff --git a/app/management/commands/import.py b/app/management/commands/import.py index b2cc12e..5b89c47 100644 --- a/app/management/commands/import.py +++ b/app/management/commands/import.py @@ -1,54 +1,63 @@ from django.core.management.base import BaseCommand, CommandError from django.apps import apps -from app.models import Country +from django.db.utils import IntegrityError + import csv +import json + class Command(BaseCommand): help = 'Imports csv to DB' - csv_files = [ - "v2-LU_GMBA_SpeciesGroups.csv", - "v2-LU_Countries.csv", - "v2-LU_Languages.csv", - "v2-LU_Sources.csv", - "v2-LU_RedListCategories.csv", - "v2-LU_RangeTypes.csv", - "v2-LU_PeopleStatus.csv", - "v2-LU_TrendsQuantity.csv", - "v2-LU_TrendsQuality.csv", - "v2-LU_TaxonUnit.csv", - "v2-LU_TaxonStatus.csv", - "v2-AddElevations.csv", - "v2-GMBA_Function.csv", - "v2-Gmba_V2_centroid.csv", - "v2-ImportGeom210915.csv", - "v2-LanguageLink.csv", - "v2-Keywords.csv", - "v2-NamesImport.csv", - "v2-Organisations.csv", - "v2-Peaks.csv", - "v2-PeopleRanges.csv", - "v2-PeopleFunction.csv", - "v2-PeopleResources.csv", - "v2-RangeCountries.csv", - "v2-RangeNameTranslations.csv", - "v2-RangeOnlineInfo.csv", - "v2-Ranges.csv", - "v2-ResourceRanges.csv", - "v2-ResourceKeywords.csv", - "v2-Repositories.csv", - "v2-Resources.csv", - "v2-Species.csv", - "v2-Searches.csv", - "v2-TaxonRange.csv", - "v2-SpeciesRange.csv", - "v2-People.csv" - ] + csv_files_models_dict = { + "v2-LU_GMBA_SpeciesGroups.csv": "GMBA_SpeciesGroup", + "v2-LU_Countries.csv": "Country", + "v2-LU_Languages.csv": "Language", + "v2-LU_Sources.csv": "Source", + "v2-LU_RedListCategories.csv": "RedListCategory", + "v2-LU_RangeTypes.csv": "RangeType", + "v2-LU_PeopleStatus.csv": "PeopleStatus", + "v2-LU_TrendsQuantity.csv": "TrendsQuantity", + "v2-LU_TrendsQuality.csv": "TrendsQuality", + "v2-LU_TaxonUnit.csv": "TaxonUnit", + "v2-LU_TaxonStatus.csv": "TaxonStatus", + + "v2-Ranges-cleaned.csv": "Range", + "v2-AddElevations.csv": "AddElevation", + + "v2-GMBA_Function.csv": "GMBA_function", + "v2-Gmba_V2_centroid.csv": "GMBA_V2_Centroid", + "v2-ImportGeom210915.csv": "ImportGeom210915", + "v2-LanguageLink.csv": "LanguageLink", + "v2-Keywords.csv": "Keyword", + "v2-NamesImport.csv": "NamesImport", + "v2-Organisations-cleaned.csv": "Organization", + "v2-Peaks.csv": "Peak", + + "v2-People.csv": "Person", + "v2-PeopleRanges.csv": "PeopleRange", + "v2-PeopleFunction.csv": "PeopleFunction", + "v2-Resources.csv": "Resource", + + "v2-PeopleResources.csv": "PeopleResource", + "v2-RangeCountries.csv": "RangeCountry", + "v2-RangeNameTranslations.csv": "RangeNameTranslation", + "v2-RangeOnlineInfo.csv": "RangeOnlineInfo", + "v2-ResourceRanges.csv": "ResourceRange", + "v2-ResourceKeywords.csv": "ResourceKeyword", + "v2-Repositories.csv": "Repository", + + "v2-Species.csv": "Species", + "v2-Searches.csv": "Search", + "v2-TaxonRange.csv": "TaxonRange", + "v2-SpeciesRange.csv": "SpeciesRange" + + } cols_to_django_fields = { "ID": 'id', "Source": 'source', - "RangeName": 'range_name', - "LanguageTranslation": 'language_translation', + "RangeName": 'range_name_id', + "LanguageTranslation": 'language_translation_id', "RangeNameTranslation": 'range_name_translation', "GMBA_ID_v2": 'gmba_v2_id', "Elev_Min": 'elev_min', @@ -57,7 +66,7 @@ class Command(BaseCommand): "TaxonStatus": 'taxon_status', "InfoSource": 'info_source', "URL": 'url', - "GMBA function": 'GMBA_function', + "GMBA function": 'gmba_function', "TaxonUnit": 'taxon_unit', "Range_ID": 'id', "RangeNameMap": 'range_name_map', @@ -109,8 +118,8 @@ class Command(BaseCommand): "Trend": 'trend', "RepositoryName": 'repository_name', "RepositoryURL": 'repository_url', - "Resource": 'resource', - "Keyword": 'keyword', + "Resource": 'resource_id', + "Keyword": 'keyword_id', "Keyword_ID": 'keyword_id', "Mother": 'mother', "CN": 'cn', @@ -120,9 +129,9 @@ class Command(BaseCommand): "PT": 'pt', "RU": 'ru', "TR": 'tr', - "ResourceTitle": 'resource_title', + "ResourceTitle": 'resource_title_id', "LanguageLetterCode": 'language_letter_code', - "LanguageNumberCode": 'language_number_code', + "LanguageNumberCode": 'language_number_code_id', "OrgNum1": 'org_num1', "Organisation Search": 'organisation_search', "OrgAlphaSearch": 'org_alpha_search', @@ -137,11 +146,11 @@ class Command(BaseCommand): "City": 'city', "Region": 'region', "SearchURL": 'search_url', - "LatLon": 'lat_lon', + "LatLon": 'lat_long', "URL Org": 'url', "Tel Org": 'tel', "Email Org": 'email', - "Country": 'country', + "Country": 'country_id', "Tags": 'tags', "Description": 'description', "Northing": 'northing', @@ -170,7 +179,8 @@ class Command(BaseCommand): "DOI": 'doi', "ShortName": 'short_name', "FormalName": 'formal_name', - "Membership within the UN System": '', + "Membership within the UN System": 'membership_within_un_system', + "Membership within theĀ UN System": 'membership_within_un_system', "Continent": 'continent', "EU_MS": 'eu_ms', "EEA_MS": 'eea_ms', @@ -179,7 +189,7 @@ class Command(BaseCommand): "Point_Name": 'point_name', "Elevation": 'elevation', "Link": 'link', - "Repository": 'repository', + "Repository": 'repository_id', "SearchString": 'search_string', "SearchDate": 'search_date', "Result": 'result', @@ -191,7 +201,7 @@ class Command(BaseCommand): "Last name": 'last_name', "Full name": 'full_name', "SearchName": 'search_name', - "e-mail 1": 'email_1', + "e-mail 1": 'contact_email', "e-mail 2": 'email_2', "Skype": 'skype', "Professional phone": 'professional_phone', @@ -203,7 +213,7 @@ class Command(BaseCommand): "Entry date": 'entry_date', "Newsletter": 'news_letter', "CountryLookup": 'country_lookup', - "Organisation": 'organization', + "Organisation": 'organization_id', "Birds": 'birds', "Mammals": 'mammals', 'Reptiles': 'reptiles', @@ -237,7 +247,7 @@ class Command(BaseCommand): 'Landscape': 'landscape', 'Regional': 'regional', 'National': 'national', - 'Global': 'global', + 'Global': '_global', 'Geographic area of expertise': 'geographic_area_of_expertise', 'ProfileOnWeb': 'profile_on_web', 'Updated': 'updated', @@ -245,24 +255,30 @@ class Command(BaseCommand): 'WebOfScience': 'web_of_science', 'Twitter': 'twitter', 'Instagram': 'instagram', - 'ScientificName': 'scientific_name', - 'Class': 'class', + 'ScientificName': 'scientific_name_id', + 'Class': '_class', 'EnglishName': 'english_name', 'Language': 'language', - 'Person': 'person', - 'Function': 'function', - 'Range': 'range', + 'Person': 'person_id', + 'Field': 'field_id', + 'Method': 'method_id', + 'Scale': 'scale_id', + 'Function': 'function_id', + 'Range': 'range_id', 'Endemic': 'endemic', 'SourceURL': 'source_url', 'MountainRange': 'mountain_range', 'TaxonRangeID': 'id', 'SubRangeOrRegion': 'subrange_or_region', - 'Taxon': 'taxon', + 'Taxon': 'taxon_id', 'Distribution': 'distribution', - 'RedList': 'red_list', + 'RedList': 'redlist', 'CountUnit': 'count_unit', 'NumberUnits': 'number_of_units', 'Remarks': 'remarks', + + + 'RangeType': 'range_type', 'Role': 'role', 'RedListCategory': 'red_list_category' @@ -270,22 +286,110 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('--path', type=str, help="file path") - parser.add_argument('--model_name', type=str, help="model name", required=True) - parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app', required=True) + parser.add_argument('--csv_folder_path', type=str, help="Path where the csvs are located") + parser.add_argument('--model_name', type=str, help="model name") + parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app') + parser.add_argument('--all', action='store_true', help="'Imports all csvs") + # ./manage.py import --path /home/pcoder/Downloads/gmbadb/csvs/v2-LU_RedListCategories.csv --model_name RedListCategory --app_name app def handle(self, *args, **options): - file_path = options['path'] - _model = apps.get_model(options['app_name'], options['model_name']) - with open(file_path, 'r') as csv_file: - reader = csv.reader(csv_file, delimiter=',', quotechar='|') - first = True - for row in reader: - if first: - # Assume the first row to be the header - header = row - header = [h.strip('"') for h in header] - first = False + csv.register_dialect( + 'mydialect', + delimiter=',', + quotechar='"', + doublequote=True, + skipinitialspace=True, + lineterminator='\n', + quoting=csv.QUOTE_MINIMAL) + csv_folder_path = '/home/pcoder/Downloads/gmbadb/csvs' + if options['csv_folder_path']: + csv_folder_path = options['csv_folder_path'] + if options.get('all'): + print("Doing an import of all csvs") + for csv_file_name, model_name in self.csv_files_models_dict.items(): + print("Importing %s -- %s" % (csv_file_name, model_name)) + if model_name in ['Range', 'NamesImport', 'ImportGeom210915', 'Organization', 'AddElevation', + 'GMBA_V2_Centroid', 'Person', 'PeopleRange', 'PeopleFunction', "PeopleResource", + "RangeCountry", "RangeNameTranslation", "RangeOnlineInfo", "ResourceRange", + "ResourceKeyword", "Repository"]: + # we have already imported and do not want to spend more time redoing stuff continue - _object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)} - m = _model(**_object_dict) - m.save() \ No newline at end of file + if csv_folder_path.endswith('/'): + file_path = '%s%s' % (csv_folder_path, csv_file_name) + else: + file_path = '%s/%s' % (csv_folder_path, csv_file_name) + _model = apps.get_model(options.get('app_name', 'app'), model_name) + with open(file_path, 'r') as csv_file: + reader = csv.reader(csv_file, dialect='mydialect') + first = True + for row in reader: + if first: + # Assume the first row to be the header + header = row + header = [h.strip('"') for h in header] + first = False + continue + _object_dict = {str(self.cols_to_django_fields.get(key)): str(value.lstrip('"').rstrip('"')) for key, value in zip(header, row)} + if model_name == 'Range': + # Reinstate range_name key + _object_dict['range_name'] = _object_dict['range_name_id'] + _object_dict.pop('range_name_id') + if model_name == 'Keyword': + _object_dict['keyword'] = _object_dict['keyword_id'] + _object_dict.pop('keyword_id') + if model_name == 'Organization' and 'country_id' in _object_dict: + _object_dict['country'] = _object_dict['country_id'] + _object_dict.pop('country_id') + if model_name == 'PeopleRange' and 'mountain_range' in _object_dict: + _object_dict['range_id'] = _object_dict['mountain_range'] + _object_dict.pop('mountain_range') + if model_name == 'Species' and 'scientific_name_id' in _object_dict: + _object_dict['scientific_name'] = _object_dict['scientific_name_id'] + _object_dict.pop('scientific_name_id') + if model_name == 'TaxonRange' and 'taxon_id' in _object_dict: + _object_dict['taxon'] = _object_dict['taxon_id'] + _object_dict.pop('taxon_id') + if model_name == 'Person' and 'organization_id' in _object_dict: + print("organization_id=%s" % _object_dict['organization_id']) + if _object_dict['organization_id'] == '' or _object_dict['organization_id'] is None: + _object_dict['organization_id'] = '-1' + else: + _object_dict['organization_id'] = int(float(_object_dict['organization_id'])) + print(_object_dict) + if _object_dict is None: + print("Object None for %s" % model_name) + m = _model(**_object_dict) + try: + m.save() + except IntegrityError as ie: + print(str(ie)) + if "UNIQUE constraint failed: range.gmba_v2_id" in str(ie): + print("======") + print("Could not save %s" % json.dumps(_object_dict)) + print("======") + print("Done importing %s" % model_name) + else: + _model = apps.get_model(options.get('app_name', 'app'), options['model_name']) + file_path = options.get('path') + csv.register_dialect( + 'mydialect', + delimiter=',', + quotechar='"', + doublequote=True, + skipinitialspace=True, + lineterminator='\n', + quoting=csv.QUOTE_MINIMAL) + with open(file_path, 'r', newline='') as csv_file: + reader = csv.reader(csv_file, dialect='mydialect') + first = True + for row in reader: + if first: + # Assume the first row to be the header + header = row + header = [h.strip('"') for h in header] + first = False + continue + _object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)} + m = _model(**_object_dict) + m.save() + print("Done importing %s" % str(_model)) diff --git a/app/migrations/0005_auto_20211110_1218.py b/app/migrations/0005_auto_20211110_1218.py new file mode 100644 index 0000000..e361851 --- /dev/null +++ b/app/migrations/0005_auto_20211110_1218.py @@ -0,0 +1,33 @@ +# Generated by Django 3.2.5 on 2021-11-10 12:18 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('app', '0004_auto_20211108_0106'), + ] + + operations = [ + migrations.AlterField( + model_name='addelevation', + name='gmba_v2_id', + field=models.TextField(blank=True, null=True), + ), + migrations.AlterField( + model_name='importgeom210915', + name='gmba_v2_id', + field=models.TextField(blank=True, null=True), + ), + migrations.AlterField( + model_name='namesimport', + name='gmba_v2_id', + field=models.TextField(blank=True, null=True), + ), + migrations.AlterField( + model_name='range', + name='gmba_v2_id', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/app/migrations/0006_rename_scale_peoplefunction_function.py b/app/migrations/0006_rename_scale_peoplefunction_function.py new file mode 100644 index 0000000..06da0a0 --- /dev/null +++ b/app/migrations/0006_rename_scale_peoplefunction_function.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.5 on 2021-11-10 12:34 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('app', '0005_auto_20211110_1218'), + ] + + operations = [ + migrations.RenameField( + model_name='peoplefunction', + old_name='scale', + new_name='function', + ), + ] diff --git a/app/migrations/0007_auto_20211110_1352.py b/app/migrations/0007_auto_20211110_1352.py new file mode 100644 index 0000000..0291550 --- /dev/null +++ b/app/migrations/0007_auto_20211110_1352.py @@ -0,0 +1,22 @@ +# Generated by Django 3.2.5 on 2021-11-10 13:52 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('app', '0006_rename_scale_peoplefunction_function'), + ] + + operations = [ + migrations.RemoveField( + model_name='resource', + name='WikiDataID', + ), + migrations.AddField( + model_name='resource', + name='wiki_data_id', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/app/migrations/0008_alter_taxonrange_taxon.py b/app/migrations/0008_alter_taxonrange_taxon.py new file mode 100644 index 0000000..97a3fb3 --- /dev/null +++ b/app/migrations/0008_alter_taxonrange_taxon.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.5 on 2021-11-10 14:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('app', '0007_auto_20211110_1352'), + ] + + operations = [ + migrations.AlterField( + model_name='taxonrange', + name='taxon', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/app/models.py b/app/models.py index cfeb244..0cb5658 100644 --- a/app/models.py +++ b/app/models.py @@ -93,7 +93,7 @@ class Range(models.Model): source = models.TextField(blank=True, null=True) range_alternate_id = models.TextField(blank=True, null=True) geologic_region = models.TextField(blank=True, null=True) - gmba_v2_id = models.TextField(blank=True, null=True, unique=True) + gmba_v2_id = models.TextField(blank=True, null=True) gmba_v2_id_str = models.TextField(blank=True, null=True) wiki_data_id = models.TextField(blank=True, null=True) wiki_data_url = models.TextField(blank=True, null=True) @@ -155,7 +155,7 @@ class Resource(models.Model): team = models.TextField(blank=True, null=True) inventory = models.TextField(blank=True, null=True) doi = models.TextField(blank=True, null=True) - WikiDataID = models.TextField(blank=True, null=True) + wiki_data_id = models.TextField(blank=True, null=True) class Meta: db_table = 'resource' @@ -318,7 +318,7 @@ class RangeNameTranslation(models.Model): class AddElevation(models.Model): id = models.AutoField(primary_key=True) - gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id') + gmba_v2_id = models.TextField(blank=True, null=True) elev_min = models.TextField(blank=True, null=True) elev_max = models.TextField(blank=True, null=True) elev_range = models.TextField(blank=True, null=True) @@ -334,7 +334,7 @@ class GMBA_V2_Centroid(models.Model): class ImportGeom210915(models.Model): id = models.AutoField(primary_key=True) - gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id') + gmba_v2_id = models.TextField(blank=True, null=True) area = models.TextField(blank=True, null=True) perimeter = models.TextField(blank=True, null=True) color_all = models.TextField(blank=True, null=True) @@ -355,7 +355,7 @@ class ResourceKeyword(models.Model): class NamesImport(models.Model): id = models.AutoField(primary_key=True) - gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id') + gmba_v2_id = models.TextField(blank=True, null=True) cn = models.TextField(blank=True, null=True) de = models.TextField(blank=True, null=True) es = models.TextField(blank=True, null=True) @@ -544,7 +544,7 @@ class Species(models.Model): class PeopleFunction(models.Model): id = models.AutoField(primary_key=True) person = models.ForeignKey(Person, models.DO_NOTHING, blank=True, null=True) - scale = models.ForeignKey(GMBA_function, models.DO_NOTHING, blank=True, null=True) + function = models.ForeignKey(GMBA_function, models.DO_NOTHING, blank=True, null=True) class SpeciesRange(models.Model): @@ -564,7 +564,7 @@ class PeopleRange(models.Model): class TaxonRange(models.Model): id = models.AutoField(primary_key=True) range = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True) - taxon = models.ForeignKey(Taxon, models.DO_NOTHING, blank=True, null=True) + taxon = models.TextField(blank=True, null=True) subrange_or_region = models.TextField(blank=True, null=True) taxon_status = models.TextField(blank=True, null=True) distribution = models.TextField(blank=True, null=True)