gmba_django/app/management/commands/import.py

434 lines
18 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from django.core.management.base import BaseCommand, CommandError
from django.apps import apps
from django.db.utils import IntegrityError
from app.models import *
import csv
import json
class Command(BaseCommand):
help = 'Imports csv to DB'
csv_files_models_dict = {
"v2-LU_GMBA_SpeciesGroups.csv": "GMBA_SpeciesGroup",
"v2-LU_Countries.csv": "Country",
"v2-LU_Languages.csv": "Language",
"v2-LU_Sources.csv": "Source",
"v2-LU_RedListCategories.csv": "RedListCategory",
"v2-LU_RangeTypes.csv": "RangeType",
"v2-LU_PeopleStatus.csv": "PeopleStatus",
"v2-LU_TrendsQuantity.csv": "TrendsQuantity",
"v2-LU_TrendsQuality.csv": "TrendsQuality",
"v2-LU_TaxonUnit.csv": "TaxonUnit",
"v2-LU_TaxonStatus.csv": "TaxonStatus",
"v2-Ranges-cleaned.csv": "Range",
"v2-AddElevations.csv": "AddElevation",
"v2-GMBA_Function.csv": "GMBA_function",
"v2-Gmba_V2_centroid.csv": "GMBA_V2_Centroid",
"v2-ImportGeom210915.csv": "ImportGeom210915",
"v2-LanguageLink.csv": "LanguageLink",
"v2-Keywords.csv": "Keyword",
"v2-NamesImport.csv": "NamesImport",
"v2-Organisations-cleaned.csv": "Organization",
"v2-Peaks.csv": "Peak",
"v2-People.csv": "Person",
"v2-PeopleRanges.csv": "PeopleRange",
"v2-PeopleFunction.csv": "PeopleFunction",
"v2-Resources.csv": "Resource",
"v2-PeopleResources.csv": "PeopleResource",
"v2-RangeCountries.csv": "RangeCountry",
"v2-RangeNameTranslations.csv": "RangeNameTranslation",
"v2-RangeOnlineInfo.csv": "RangeOnlineInfo",
"v2-ResourceRanges.csv": "ResourceRange",
"v2-ResourceKeywords.csv": "ResourceKeyword",
"v2-Repositories.csv": "Repository",
"v2-Species.csv": "Species",
"v2-Searches.csv": "Search",
"v2-TaxonRange.csv": "TaxonRange",
"v2-SpeciesRange.csv": "SpeciesRange"
}
cols_to_django_fields = {
"ID": 'id',
"Source": 'source',
"RangeName": 'range_name_id',
"LanguageTranslation": 'language_translation_id',
"RangeNameTranslation": 'range_name_translation',
"GMBA_ID_v2": 'gmba_v2_id',
"Elev_Min": 'elev_min',
"Elev_Max": 'elev_max',
"Elev_Range": 'elev_range',
"TaxonStatus": 'taxon_status',
"InfoSource": 'info_source',
"URL": 'url',
"GMBA function": 'gmba_function',
"TaxonUnit": 'taxon_unit',
"Range_ID": 'id',
"RangeNameMap": 'range_name_map',
"RangeNameAscii": 'range_name_ascii',
"RangeNameLanguage": 'range_name_language',
"MotherRange": 'mother_range',
"Feature": 'feature',
"MapUnit": 'map_unit',
"Level": 'level',
"LevelText": 'level_text',
"Level_1": 'level_1',
"Level_2": 'level_2',
"Level_3": 'level_3',
"Latitude": 'latitude',
"Longitude": 'longitude',
"Orogeny": 'orogeny',
"Area": 'area',
"GMBA_V1_ID": 'GMBA_v1_id',
"Countries": 'countries',
"Peak_Elevation": 'peak_elevation',
"Peak_Name": 'peak_name',
"Peak_Latitude": 'peak_latitude',
"Peak_Longitude": 'peak_longitude',
"Comments": 'comments',
"Checked": 'checked',
"Range_AlternateID": 'range_alternate_id',
"GeologicRegion": 'geologic_region',
"GMBA_V2_ID": 'gmba_v2_id',
"GMBA_V2_ID_str": 'gmba_v2_id_str',
"WikiDataID": 'wiki_data_id',
"WikiDataURL": 'wiki_data_url',
"Select_300": 'select_300',
"Gmba_Narrow": 'gmba_narrow',
"Name_FR": 'name_fr',
"Name_DE": 'name_de',
"Name_ES": 'name_es',
"Name_PT": 'name_pt',
"Name_CN": 'name_cn',
"Name_RU": 'name_ru',
"Name_TR": 'name_tr',
"Perimeter": 'perimeter',
"ColorAll": 'color_all',
"ColorBasic": 'color_basic',
"Color300": 'color_300',
"Elev_Low": 'elev_low',
"Elev_High": 'elev_high',
"Elev_Avg": 'elev_avg',
"gridcode": 'gridcode',
"Trend": 'trend',
"RepositoryName": 'repository_name',
"RepositoryURL": 'repository_url',
"Resource": 'resource_id',
"Keyword": 'keyword_id',
"Keyword_ID": 'keyword_id',
"Mother": 'mother',
"CN": 'cn',
"DE": 'de',
"ES": 'es',
"FR": 'fr',
"PT": 'pt',
"RU": 'ru',
"TR": 'tr',
"ResourceTitle": 'resource_title_id',
"LanguageLetterCode": 'language_letter_code',
"LanguageNumberCode": 'language_number_code_id',
"OrgNum1": 'org_num1',
"Organisation Search": 'organisation_search',
"OrgAlphaSearch": 'org_alpha_search',
"Organisation English": 'organisation_english',
"Organisation 2": 'organisation_2',
"Organisation 3": 'organisation_3',
"Organisation Original": 'organisation_original',
"Acronym": 'acronym',
"Street": 'street',
"PO Box": 'po_box',
"Postcode": 'postcode',
"City": 'city',
"Region": 'region',
"SearchURL": 'search_url',
"LatLon": 'lat_long',
"URL Org": 'url',
"Tel Org": 'tel',
"Email Org": 'email',
"Country": 'country_id',
"Tags": 'tags',
"Description": 'description',
"Northing": 'northing',
"Easting": 'easting',
"Category": 'category',
"Subject": 'subject',
"Title": 'title',
"Citation": 'citation',
"Type": 'type',
"Abstract": 'abstract',
"AuthorKeywords": 'author_keywords',
"Lat": 'lat',
"Lon": 'lon',
"Stars": 'stars',
"PEGASuS_Check_map_with_author": 'PEGASuS_Check_map_with_author',
"PEGASuS_polygon_ID": 'PEGASuS_polygon_ID',
"PEGASuS_Polygon_comments": 'PEGASuS_Polygon_comments',
"PEGASuS_Assessment_ID": 'PEGASuS_Assessment_ID',
"GLORIA": 'gloria',
"GNOMO": 'gnomo',
"LTER": 'lter',
"LTSER": 'ltser',
"MIREN": 'miren',
"TEAM": 'team',
"Inventory": 'inventory',
"DOI": 'doi',
"ShortName": 'short_name',
"FormalName": 'formal_name',
"Membership within the UN System": 'membership_within_un_system',
"Membership within the UN System": 'membership_within_un_system',
"Continent": 'continent',
"EU_MS": 'eu_ms',
"EEA_MS": 'eea_ms',
"ISO3": 'iso3',
"ISO2": 'iso2',
"Point_Name": 'point_name',
"Elevation": 'elevation',
"Link": 'link',
"Repository": 'repository_id',
"SearchString": 'search_string',
"SearchDate": 'search_date',
"Result": 'result',
"NumberOfRecords": 'number_of_records',
"Stored": 'stored',
"SpeciesGroup": 'species_group',
"MrMrs": 'mr_mrs',
"First name": 'first_name',
"Last name": 'last_name',
"Full name": 'full_name',
"SearchName": 'search_name',
"e-mail 1": 'contact_email',
"e-mail 2": 'email_2',
"Skype": 'skype',
"Professional phone": 'professional_phone',
"Mobile number": 'mobile_number',
"Field of expertise": 'field_of_expertise',
"Biography": 'biography',
"Position": 'position',
"Status": 'status',
"Entry date": 'entry_date',
"Newsletter": 'news_letter',
"CountryLookup": 'country_lookup',
"Organisation": 'organization_id',
"Birds": 'birds',
"Mammals": 'mammals',
'Reptiles': 'reptiles',
'Amphibians': 'amphibians',
'Fish': 'fish',
'Insects': 'insects',
'Molluscs': 'molluscs',
'Crustaceans': 'crustaceans',
'Arachnids': 'arachnids',
'Angiosperms': 'angiosperms',
'Gymnosperms': 'gymnosperms',
'Fungi': 'fungi',
'Algae': 'algae',
'Microbes': 'microbes',
'Biological field sampling': 'biological_field_sampling',
'Data mining': 'data_mining',
'Remote sensing': 'remote_sensing',
'GIS': 'gis',
'Spatial analysis': 'spatial_analysis',
'Statistical analysis': 'statistical_analysis',
'Modelling': 'modelling',
'Assessment': 'assessment',
'Meta-analysis': 'meta_analysis',
'Synthesis': 'synthesis',
'Qualitative social science methods (interviews, surveys)': 'qualitative_ssm',
'Genetic analyses': 'genetic_analyses',
'Field site': 'field_site',
'Transect': 'transect',
'Mountain top': 'mountain_top',
'Mountain range': 'mountain_range',
'Landscape': 'landscape',
'Regional': 'regional',
'National': 'national',
'Global': '_global',
'Geographic area of expertise': 'geographic_area_of_expertise',
'ProfileOnWeb': 'profile_on_web',
'Updated': 'updated',
'ORCID': 'orcid',
'WebOfScience': 'web_of_science',
'Twitter': 'twitter',
'Instagram': 'instagram',
'ScientificName': 'scientific_name_id',
'Class': '_class',
'EnglishName': 'english_name',
'Language': 'language',
'Person': 'person_id',
'Field': 'field_id',
'Method': 'method_id',
'Scale': 'scale_id',
'Function': 'function_id',
'Range': 'range_id',
'Endemic': 'endemic',
'SourceURL': 'source_url',
'MountainRange': 'mountain_range',
'TaxonRangeID': 'id',
'SubRangeOrRegion': 'subrange_or_region',
'Taxon': 'taxon_id',
'Distribution': 'distribution',
'RedList': 'redlist',
'CountUnit': 'count_unit',
'NumberUnits': 'number_of_units',
'Remarks': 'remarks',
'RangeType': 'range_type',
'Role': 'role',
'RedListCategory': 'red_list_category'
}
def add_arguments(self, parser):
parser.add_argument('--path', type=str, help="file path")
parser.add_argument('--csv_folder_path', type=str, help="Path where the csvs are located")
parser.add_argument('--model_name', type=str, help="model name")
parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app')
parser.add_argument('--all', action='store_true', help="'Imports all csvs")
# ./manage.py import --path /home/pcoder/Downloads/gmbadb/csvs/v2-LU_RedListCategories.csv --model_name RedListCategory --app_name app
def handle(self, *args, **options):
csv.register_dialect(
'mydialect',
delimiter=',',
quotechar='"',
doublequote=True,
skipinitialspace=True,
lineterminator='\n',
quoting=csv.QUOTE_MINIMAL)
csv_folder_path = '/home/pcoder/Downloads/gmbadb/csvs'
if options['csv_folder_path']:
csv_folder_path = options['csv_folder_path']
if options.get('all'):
print("Doing an import of all csvs")
for csv_file_name, model_name in self.csv_files_models_dict.items():
print("Importing %s -- %s" % (csv_file_name, model_name))
models_to_ignore = ['Range', 'NamesImport', 'ImportGeom210915', 'Organization', 'AddElevation',
'GMBA_V2_Centroid', 'Person', 'PeopleRange', 'PeopleFunction', "PeopleResource",
"RangeCountry", "RangeNameTranslation", "RangeOnlineInfo", "ResourceRange",
"ResourceKeyword", "Repository"]
models_to_ignore = []
if model_name in models_to_ignore:
# we have already imported and do not want to spend more time redoing stuff
continue
if csv_folder_path.endswith('/'):
file_path = '%s%s' % (csv_folder_path, csv_file_name)
else:
file_path = '%s/%s' % (csv_folder_path, csv_file_name)
_model = apps.get_model(options.get('app_name', 'app'), model_name)
with open(file_path, 'r') as csv_file:
reader = csv.reader(csv_file, dialect='mydialect')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
continue
_object_dict = {str(self.cols_to_django_fields.get(key)): str(value.lstrip('"').rstrip('"')) for key, value in zip(header, row)}
_object_dict = handle_object_dict(_object_dict, model_name)
m = _model(**_object_dict)
try:
m.save()
except IntegrityError as ie:
print(str(ie))
if "UNIQUE constraint failed: range.gmba_v2_id" in str(ie):
print("======")
print("Could not save %s" % json.dumps(_object_dict))
print("======")
print("Done importing %s" % model_name)
else:
_model = apps.get_model(options.get('app_name', 'app'), options['model_name'])
model_name = options['model_name']
k = ''
csv_file_name = ''
for k, v in self.csv_files_models_dict.items():
if v.strip().lower() == model_name.strip().lower():
csv_file_name = k
if k == '':
raise Exception('Could not find a csv file name for model %s' % model_name)
if csv_folder_path.endswith('/'):
file_path = '%s%s' % (csv_folder_path, csv_file_name)
else:
file_path = '%s/%s' % (csv_folder_path, csv_file_name)
csv.register_dialect(
'mydialect',
delimiter=',',
quotechar='"',
doublequote=True,
skipinitialspace=True,
lineterminator='\n',
quoting=csv.QUOTE_MINIMAL)
with open(file_path, 'r', newline='') as csv_file:
reader = csv.reader(csv_file, dialect='mydialect')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
continue
_object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)}
_object_dict = handle_object_dict(_object_dict, model_name)
m = _model(**_object_dict)
m.save()
print("Done importing %s" % str(_model))
def handle_object_dict(object_dict, model_name):
if model_name == 'Range':
# Reinstate range_name key
object_dict['range_name'] = object_dict['range_name_id']
object_dict.pop('range_name_id')
if model_name == 'Keyword':
object_dict['keyword'] = object_dict['keyword_id']
object_dict.pop('keyword_id')
if model_name == 'Organization' and 'country_id' in object_dict:
object_dict['country'] = object_dict['country_id']
object_dict.pop('country_id')
if model_name == 'PeopleRange' and 'mountain_range' in object_dict:
object_dict['range_id'] = object_dict['mountain_range']
object_dict.pop('mountain_range')
if model_name == 'Species' and 'scientific_name_id' in object_dict:
object_dict['scientific_name'] = object_dict['scientific_name_id']
object_dict.pop('scientific_name_id')
if model_name == 'TaxonRange' and 'taxon_id' in object_dict:
object_dict['taxon'] = object_dict['taxon_id']
object_dict.pop('taxon_id')
if model_name == 'Person' and 'organization_id' in object_dict:
print("organization_id=%s" % object_dict['organization_id'])
if object_dict['organization_id'] == '' or object_dict['organization_id'] is None:
object_dict['organization_id'] = '-1'
else:
object_dict['organization_id'] = int(float(object_dict['organization_id']))
if 'status' in object_dict:
print('Getting status of %s' % object_dict['status'])
if object_dict['status'] == '':
object_dict['status'] = 0
object_dict['status'] = PeopleStatus.objects.get(id=int(object_dict['status']))
if 'country_lookup' in object_dict:
print('Getting country of %s' % object_dict['country_lookup'])
if object_dict['country_lookup'] == '':
object_dict['country_lookup'] = 0
object_dict['country'] = Country.objects.get(id=int(object_dict['country_lookup']))
object_dict.pop('country_lookup')
for i in ['news_letter', 'birds', 'mammals', 'reptiles', 'amphibians', 'fish', 'insects',
'molluscs', 'crustaceans', 'arachnids', 'angiosperms', 'gymnosperms', 'fungi',
'algae', 'microbes', 'biological_field_sampling', 'data_mining', 'remote_sensing',
'gis', 'spatial_analysis', 'statistical_analysis', 'modelling', 'assessment',
'meta_analysis', 'synthesis', 'qualitative_ssm', 'genetic_analyses', 'field_site',
'transect', 'mountain_top', 'mountain_range', 'landscape', 'regional', 'national',
'_global', 'profile_on_web', 'updated']:
if i in object_dict:
object_dict[i] = True if object_dict[i].lower().strip() == 'true' else False
print(object_dict)
if object_dict is None:
print("Object None for %s" % model_name)
return object_dict