import django.db.utils from django.core.management.base import BaseCommand from django.apps import apps from django.db.utils import IntegrityError from app.models import * import csv import json class Command(BaseCommand): help = 'Imports csv to DB' csv_files_models_dict = { "v2-LU_GMBA_SpeciesGroups.csv": "GMBA_SpeciesGroup", "v2-LU_Countries.csv": "Country", "v2-LU_Languages.csv": "Language", "v2-LU_Sources.csv": "Source", "v2-LU_RedListCategories.csv": "RedListCategory", "v2-LU_RangeTypes.csv": "RangeType", "v2-LU_PeopleStatus.csv": "PeopleStatus", "v2-LU_TrendsQuantity.csv": "TrendsQuantity", "v2-LU_TrendsQuality.csv": "TrendsQuality", "v2-LU_TaxonUnit.csv": "TaxonUnit", "v2-LU_TaxonStatus.csv": "TaxonStatus", "v2-Ranges-cleaned.csv": "MountainRange", "v2-AddElevations.csv": "AddElevation", "v2-GMBA_Function.csv": "GMBA_function", "v2-Gmba_V2_centroid.csv": "GMBA_V2_Centroid", "v2-ImportGeom210915.csv": "ImportGeom210915", "v2-LanguageLink.csv": "LanguageLink", "v2-Keywords.csv": "Keyword", "v2-NamesImport.csv": "NamesImport", "v2-Organisations-cleaned.csv": "Organisation", "v2-Peaks.csv": "Peak", "v2-People.csv": "Person", "v2-PeopleRanges.csv": "PeopleRange", "v2-PeopleFunction.csv": "PeopleFunction", "v2-Resources.csv": "Resource", "v2-PeopleResources.csv": "PeopleResource", "v2-RangeCountries.csv": "RangeCountry", "v2-RangeNameTranslations.csv": "RangeNameTranslation", "v2-RangeOnlineInfo.csv": "RangeOnlineInfo", "v2-ResourceRanges.csv": "ResourceRange", "v2-ResourceKeywords.csv": "ResourceKeyword", "v2-Repositories.csv": "Repository", "v2-Species.csv": "Species", "v2-Searches.csv": "Search", "v2-TaxonRange.csv": "TaxonRange", "v2-SpeciesRange.csv": "SpeciesRange" } cols_to_django_fields = { "ID": 'id', "Source": 'source', "RangeName": 'range_name_id', "LanguageTranslation": 'language_translation_id', "RangeNameTranslation": 'range_name_translation', "GMBA_ID_v2": 'gmba_v2_id', "Elev_Min": 'elev_min', "Elev_Max": 'elev_max', "Elev_Range": 'elev_range', "TaxonStatus": 'taxon_status', "InfoSource": 'info_source', "URL": 'url', "GMBA function": 'gmba_function', "TaxonUnit": 'taxon_unit', "Range_ID": 'id', "RangeNameMap": 'range_name_map', "RangeNameAscii": 'range_name_ascii', "RangeNameLanguage": 'range_name_language', "MotherRange": 'mother_range', "Feature": 'feature', "MapUnit": 'map_unit', "Level": 'level', "LevelText": 'level_text', "Level_1": 'level_1', "Level_2": 'level_2', "Level_3": 'level_3', "Latitude": 'latitude', "Longitude": 'longitude', "Orogeny": 'orogeny', "Area": 'area', "GMBA_V1_ID": 'GMBA_v1_id', "Countries": 'countries', "Peak_Elevation": 'peak_elevation', "Peak_Name": 'peak_name', "Peak_Latitude": 'peak_latitude', "Peak_Longitude": 'peak_longitude', "Comments": 'comments', "Checked": 'checked', "Range_AlternateID": 'range_alternate_id', "GeologicRegion": 'geologic_region', "GMBA_V2_ID": 'gmba_v2_id', "GMBA_V2_ID_str": 'gmba_v2_id_str', "WikiDataID": 'wiki_data_id', "WikiDataURL": 'wiki_data_url', "Select_300": 'select_300', "Gmba_Narrow": 'gmba_narrow', "Name_FR": 'name_fr', "Name_DE": 'name_de', "Name_ES": 'name_es', "Name_PT": 'name_pt', "Name_CN": 'name_cn', "Name_RU": 'name_ru', "Name_TR": 'name_tr', "Perimeter": 'perimeter', "ColorAll": 'color_all', "ColorBasic": 'color_basic', "Color300": 'color_300', "Elev_Low": 'elev_low', "Elev_High": 'elev_high', "Elev_Avg": 'elev_avg', "gridcode": 'gridcode', "Trend": 'trend', "RepositoryName": 'repository_name', "RepositoryURL": 'repository_url', "Resource": 'resource_id', "Keyword": 'keyword', "Keyword_ID": 'keyword_id', "Mother": 'mother', "CN": 'cn', "DE": 'de', "ES": 'es', "FR": 'fr', "PT": 'pt', "RU": 'ru', "TR": 'tr', "ResourceTitle": 'resource_title_id', "LanguageLetterCode": 'language_letter_code', "LanguageNumberCode": 'language_number_code_id', "OrgNum1": 'org_num1', "Organisation Search": 'organisation_search', "OrgAlphaSearch": 'org_alpha_search', "Organisation English": 'organisation_english', "Organisation 2": 'organisation_2', "Organisation 3": 'organisation_3', "Organisation Original": 'organisation_original', "Acronym": 'acronym', "Street": 'street', "PO Box": 'po_box', "Postcode": 'postcode', "City": 'city', "Region": 'region', "SearchURL": 'search_url', "LatLon": 'lat_long', "URL Org": 'url', "Tel Org": 'tel', "Email Org": 'email', "Country": 'country_id', "Tags": 'tags', "Description": 'description', "Northing": 'northing', "Easting": 'easting', "Category": 'category', "Subject": 'subject', "Title": 'title', "Citation": 'citation', "Type": 'type', "Abstract": 'abstract', "AuthorKeywords": 'author_keywords', "Lat": 'lat', "Lon": 'lon', "Stars": 'stars', "PEGASuS_Check_map_with_author": 'PEGASuS_Check_map_with_author', "PEGASuS_polygon_ID": 'PEGASuS_polygon_ID', "PEGASuS_Polygon_comments": 'PEGASuS_Polygon_comments', "PEGASuS_Assessment_ID": 'PEGASuS_Assessment_ID', "GLORIA": 'gloria', "GNOMO": 'gnomo', "LTER": 'lter', "LTSER": 'ltser', "MIREN": 'miren', "TEAM": 'team', "Inventory": 'inventory', "DOI": 'doi', "ShortName": 'short_name', "FormalName": 'formal_name', "Membership within the UN System": 'membership_within_un_system', "Membership within theĀ UN System": 'membership_within_un_system', "Continent": 'continent', "EU_MS": 'eu_ms', "EEA_MS": 'eea_ms', "ISO3": 'iso3', "ISO2": 'iso2', "Point_Name": 'point_name', "Elevation": 'elevation', "Link": 'link', "Repository": 'repository_id', "SearchString": 'search_string', "SearchDate": 'search_date', "Result": 'result', "NumberOfRecords": 'number_of_records', "Stored": 'stored', "SpeciesGroup": 'species_group', "MrMrs": 'mr_mrs', "First name": 'first_name', "Last name": 'last_name', "Full name": 'full_name', "SearchName": 'search_name', "e-mail 1": 'contact_email', "e-mail 2": 'email_2', "Skype": 'skype', "Professional phone": 'professional_phone', "Mobile number": 'mobile_number', "Field of expertise": 'field_of_expertise', "Biography": 'biography', "Position": 'position', "Status": 'status', "Entry date": 'entry_date', "Newsletter": 'news_letter', "CountryLookup": 'country_lookup', "Organisation": 'organization_id', "Birds": 'birds', "Mammals": 'mammals', 'Reptiles': 'reptiles', 'Amphibians': 'amphibians', 'Fish': 'fish', 'Insects': 'insects', 'Molluscs': 'molluscs', 'Crustaceans': 'crustaceans', 'Arachnids': 'arachnids', 'Angiosperms': 'angiosperms', 'Gymnosperms': 'gymnosperms', 'Fungi': 'fungi', 'Algae': 'algae', 'Microbes': 'microbes', 'Biological field sampling': 'biological_field_sampling', 'Data mining': 'data_mining', 'Remote sensing': 'remote_sensing', 'GIS': 'gis', 'Spatial analysis': 'spatial_analysis', 'Statistical analysis': 'statistical_analysis', 'Modelling': 'modelling', 'Assessment': 'assessment', 'Meta-analysis': 'meta_analysis', 'Synthesis': 'synthesis', 'Qualitative social science methods (interviews, surveys)': 'qualitative_ssm', 'Genetic analyses': 'genetic_analyses', 'Field site': 'field_site', 'Transect': 'transect', 'Mountain top': 'mountain_top', 'Mountain range': 'mountain_range', 'Landscape': 'landscape', 'Regional': 'regional', 'National': 'national', 'Global': '_global', 'Geographic area of expertise': 'geographic_area_of_expertise', 'ProfileOnWeb': 'profile_on_web', 'Updated': 'updated', 'ORCID': 'orcid', 'WebOfScience': 'web_of_science', 'Twitter': 'twitter', 'Instagram': 'instagram', 'ScientificName': 'scientific_name_id', 'Class': '_class', 'EnglishName': 'english_name', 'Language': 'language', 'Person': 'person_id', 'Field': 'field_id', 'Method': 'method_id', 'Scale': 'scale_id', 'Function': 'function_id', 'Range': 'range_id', 'Endemic': 'endemic', 'SourceURL': 'source_url', 'MountainRange': 'mountain_range', 'TaxonRangeID': 'id', 'SubRangeOrRegion': 'subrange_or_region', 'Taxon': 'taxon_id', 'Distribution': 'distribution', 'RedList': 'redlist', 'CountUnit': 'count_unit', 'NumberUnits': 'number_of_units', 'Remarks': 'remarks', 'RangeType': 'range_type', 'Role': 'role', 'RedListCategory': 'red_list_category' } def add_arguments(self, parser): parser.add_argument('--path', type=str, help="file path") parser.add_argument('--csv_folder_path', type=str, help="Path where the csvs are located") parser.add_argument('--model_name', type=str, help="model name") parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app') parser.add_argument('--mother_range_reload', action='store_true', help="Whether we are trying to reload mother " "range relationship") parser.add_argument('--debug', action='store_true', help="Whether we want to debug") parser.add_argument('--all', action='store_true', help="Imports all csvs") # ./manage.py import --path /home/pcoder/Downloads/gmbadb/csvs/v2-LU_RedListCategories.csv --model_name RedListCategory --app_name app def handle(self, *args, **options): csv.register_dialect( 'mydialect', delimiter=',', quotechar='"', doublequote=True, skipinitialspace=True, lineterminator='\n', quoting=csv.QUOTE_MINIMAL) csv_folder_path = '/home/pcoder/Downloads/gmbadb/csvs' if options['csv_folder_path']: csv_folder_path = options['csv_folder_path'] debug = options.get('debug', None) if options.get('all'): print("Doing an import of all csvs") for csv_file_name, model_name in self.csv_files_models_dict.items(): print("Importing %s -- %s" % (csv_file_name, model_name)) models_to_ignore = ['Range', 'NamesImport', 'ImportGeom210915', 'Organization', 'AddElevation', 'GMBA_V2_Centroid', 'Person', 'PeopleRange', 'PeopleFunction', "PeopleResource", "RangeCountry", "RangeNameTranslation", "RangeOnlineInfo", "ResourceRange", "ResourceKeyword", "Repository"] models_to_ignore = [] if model_name in models_to_ignore: # we have already imported and do not want to spend more time redoing stuff continue if csv_folder_path.endswith('/'): file_path = '%s%s' % (csv_folder_path, csv_file_name) else: file_path = '%s/%s' % (csv_folder_path, csv_file_name) _model = apps.get_model(options.get('app_name', 'app'), model_name) with open(file_path, 'r') as csv_file: reader = csv.reader(csv_file, dialect='mydialect') first = True for row in reader: if first: # Assume the first row to be the header header = row header = [h.strip('"') for h in header] first = False continue _object_dict = {str(self.cols_to_django_fields.get(key)): str(value.lstrip('"').rstrip('"')) for key, value in zip(header, row)} _object_dict = handle_object_dict(_object_dict, model_name) m = _model(**_object_dict) try: m.save() except IntegrityError as ie: print(str(ie)) if "UNIQUE constraint failed: range.gmba_v2_id" in str(ie): print("======") print("Could not save %s" % json.dumps(_object_dict)) print("======") print("Done importing %s" % model_name) else: _model = apps.get_model(options.get('app_name', 'app'), options['model_name']) model_name = options['model_name'] mother_range_reload = options.get('mother_range_reload') k = '' csv_file_name = '' for k, v in self.csv_files_models_dict.items(): if v.strip().lower() == model_name.strip().lower(): csv_file_name = k if k == '': raise Exception('Could not find a csv file name for model %s' % model_name) if csv_folder_path.endswith('/'): file_path = '%s%s' % (csv_folder_path, csv_file_name) else: file_path = '%s/%s' % (csv_folder_path, csv_file_name) csv.register_dialect( 'mydialect', delimiter=',', quotechar='"', doublequote=True, skipinitialspace=True, lineterminator='\n', quoting=csv.QUOTE_MINIMAL) with open(file_path, 'r', newline='') as csv_file: reader = csv.reader(csv_file, dialect='mydialect') first = True error_row_count = 0 total = 0 for row in reader: total += 1 if first: # Assume the first row to be the header header = row header = [h.strip('"') for h in header] first = False continue if mother_range_reload: # We have loaded the range model already and we are attempting to construct the mother_range # relationship try: this_range = MountainRange.objects.get(id=int(row[0])) r = MountainRange.objects.get(id=int(row[5])) # default Range which exists already this_range.mother_range = r this_range.save() except MountainRange.DoesNotExist as dne: print("this range = %s, mother range --> %s" % (row[0], row[5])) print(str(dne)) error_row_count += 1 except ValueError as ve: print('0 = %s and 5 = %s' % (row[0], row[5])) print(str(ve)) error_row_count += 1 continue _object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)} _object_dict = handle_object_dict(_object_dict, model_name, debug=debug) try: m = _model(**_object_dict) m.save() except django.db.utils.IntegrityError as ex1: error_row_count += 1 print(str(ex1)) print("**********************") print(str(_object_dict)) print("**********************") except Exception as ex: error_row_count += 1 print('-----') print(str(ex)) print('----------------') continue print("Done importing %s" % str(_model)) print("Total rows = %s, error rows = %s" % (total, error_row_count)) def handle_object_dict(object_dict, model_name, debug=False): if model_name == 'Resource': object_dict['url'] = object_dict['url'].strip("#") for i in ['PEGASuS_Check_map_with_author', 'gloria', 'gnomo', 'lter', 'ltser', 'miren', 'team', 'inventory']: if i in object_dict: object_dict[i] = True if object_dict[i].lower().strip() == 'true' else False if model_name == 'MountainRange': # Reinstate range_name key object_dict['range_name'] = object_dict['range_name_id'] object_dict.pop('range_name_id') connected_fields = [{'range_name_language': Language}, {'feature': RangeType}] for f in connected_fields: for k, v in f.items(): if k in object_dict: if debug: print('Getting %s of %s' % (k, object_dict[k])) if object_dict[k] == '': object_dict[k] = 0 object_dict[k] = v.objects.get(id=int(object_dict[k])) if 'mother_range' in object_dict: if debug: print('Getting mother_range of %s' % object_dict['mother_range']) if object_dict['mother_range'] == '': object_dict['mother_range'] = 0 try: object_dict['mother_range'] = MountainRange.objects.get(id=0) # default Range which exists already except MountainRange.DoesNotExist as dne: print(str(dne)) for i in ['checked']: if i in object_dict: object_dict[i] = True if object_dict[i].lower().strip() == 'true' else False for i in ['select_300']: if i in object_dict: object_dict[i] = True if object_dict[i].lower().strip() == 'x' else False for i in ['gmba_narrow']: if i in object_dict: object_dict[i] = True if object_dict[i].lower().strip() == 'x' else False # area field can't be empty if 'area' in object_dict: if object_dict['area'] == '': object_dict['area'] = -1 if model_name == 'Keyword': if object_dict['mother'] == '': object_dict['mother'] = 0 try: object_dict['mother'] = Keyword.objects.get(keyword_id=object_dict['mother']) # default Keyword which exists already except Keyword.DoesNotExist as dne: object_dict['mother'] = Keyword.objects.get(keyword_id=0) print(str(dne)) if model_name == 'Organization' and 'country_id' in object_dict: object_dict['country'] = object_dict['country_id'] object_dict.pop('country_id') if model_name == 'PeopleRange' and 'mountain_range' in object_dict: object_dict['range_id'] = object_dict['mountain_range'] object_dict.pop('mountain_range') if model_name == 'Species' and 'scientific_name_id' in object_dict: object_dict['scientific_name'] = object_dict['scientific_name_id'] object_dict.pop('scientific_name_id') if model_name == 'TaxonRange' and 'taxon_id' in object_dict: object_dict['taxon'] = object_dict['taxon_id'] object_dict.pop('taxon_id') if model_name == 'Person' and 'organization_id' in object_dict: if debug: print("organization_id=%s" % object_dict['organization_id']) if object_dict['organization_id'] == '' or object_dict['organization_id'] is None: object_dict['organization_id'] = '-1' else: object_dict['organization_id'] = int(float(object_dict['organization_id'])) if 'status' in object_dict: if debug: print('Getting status of %s' % object_dict['status']) if object_dict['status'] == '': object_dict['status'] = 0 object_dict['status'] = PeopleStatus.objects.get(id=int(object_dict['status'])) if 'country_lookup' in object_dict: if object_dict['country_lookup'].strip() == '' or object_dict['country_lookup'] is None: object_dict['country_lookup'] = 0 if debug: print('Getting country of %s' % object_dict['country_lookup']) object_dict['country'] = Country.objects.get(id=int(object_dict['country_lookup'])) object_dict.pop('country_lookup') for i in ['news_letter', 'birds', 'mammals', 'reptiles', 'amphibians', 'fish', 'insects', 'molluscs', 'crustaceans', 'arachnids', 'angiosperms', 'gymnosperms', 'fungi', 'algae', 'microbes', 'biological_field_sampling', 'data_mining', 'remote_sensing', 'gis', 'spatial_analysis', 'statistical_analysis', 'modelling', 'assessment', 'meta_analysis', 'synthesis', 'qualitative_ssm', 'genetic_analyses', 'field_site', 'transect', 'mountain_top', 'mountain_range', 'landscape', 'regional', 'national', '_global', 'profile_on_web', 'updated']: if i in object_dict: object_dict[i] = True if object_dict[i].lower().strip() == 'true' else False if object_dict is None: if debug: print("Object None for %s" % model_name) # else: # print(object_dict) if str(object_dict) is not None else "Str object dict is None" return object_dict