gmba_django/app/management/commands/import.py

396 lines
16 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from django.core.management.base import BaseCommand, CommandError
from django.apps import apps
from django.db.utils import IntegrityError
import csv
import json
class Command(BaseCommand):
help = 'Imports csv to DB'
csv_files_models_dict = {
"v2-LU_GMBA_SpeciesGroups.csv": "GMBA_SpeciesGroup",
"v2-LU_Countries.csv": "Country",
"v2-LU_Languages.csv": "Language",
"v2-LU_Sources.csv": "Source",
"v2-LU_RedListCategories.csv": "RedListCategory",
"v2-LU_RangeTypes.csv": "RangeType",
"v2-LU_PeopleStatus.csv": "PeopleStatus",
"v2-LU_TrendsQuantity.csv": "TrendsQuantity",
"v2-LU_TrendsQuality.csv": "TrendsQuality",
"v2-LU_TaxonUnit.csv": "TaxonUnit",
"v2-LU_TaxonStatus.csv": "TaxonStatus",
"v2-Ranges-cleaned.csv": "Range",
"v2-AddElevations.csv": "AddElevation",
"v2-GMBA_Function.csv": "GMBA_function",
"v2-Gmba_V2_centroid.csv": "GMBA_V2_Centroid",
"v2-ImportGeom210915.csv": "ImportGeom210915",
"v2-LanguageLink.csv": "LanguageLink",
"v2-Keywords.csv": "Keyword",
"v2-NamesImport.csv": "NamesImport",
"v2-Organisations-cleaned.csv": "Organization",
"v2-Peaks.csv": "Peak",
"v2-People.csv": "Person",
"v2-PeopleRanges.csv": "PeopleRange",
"v2-PeopleFunction.csv": "PeopleFunction",
"v2-Resources.csv": "Resource",
"v2-PeopleResources.csv": "PeopleResource",
"v2-RangeCountries.csv": "RangeCountry",
"v2-RangeNameTranslations.csv": "RangeNameTranslation",
"v2-RangeOnlineInfo.csv": "RangeOnlineInfo",
"v2-ResourceRanges.csv": "ResourceRange",
"v2-ResourceKeywords.csv": "ResourceKeyword",
"v2-Repositories.csv": "Repository",
"v2-Species.csv": "Species",
"v2-Searches.csv": "Search",
"v2-TaxonRange.csv": "TaxonRange",
"v2-SpeciesRange.csv": "SpeciesRange"
}
cols_to_django_fields = {
"ID": 'id',
"Source": 'source',
"RangeName": 'range_name_id',
"LanguageTranslation": 'language_translation_id',
"RangeNameTranslation": 'range_name_translation',
"GMBA_ID_v2": 'gmba_v2_id',
"Elev_Min": 'elev_min',
"Elev_Max": 'elev_max',
"Elev_Range": 'elev_range',
"TaxonStatus": 'taxon_status',
"InfoSource": 'info_source',
"URL": 'url',
"GMBA function": 'gmba_function',
"TaxonUnit": 'taxon_unit',
"Range_ID": 'id',
"RangeNameMap": 'range_name_map',
"RangeNameAscii": 'range_name_ascii',
"RangeNameLanguage": 'range_name_language',
"MotherRange": 'mother_range',
"Feature": 'feature',
"MapUnit": 'map_unit',
"Level": 'level',
"LevelText": 'level_text',
"Level_1": 'level_1',
"Level_2": 'level_2',
"Level_3": 'level_3',
"Latitude": 'latitude',
"Longitude": 'longitude',
"Orogeny": 'orogeny',
"Area": 'area',
"GMBA_V1_ID": 'GMBA_v1_id',
"Countries": 'countries',
"Peak_Elevation": 'peak_elevation',
"Peak_Name": 'peak_name',
"Peak_Latitude": 'peak_latitude',
"Peak_Longitude": 'peak_longitude',
"Comments": 'comments',
"Checked": 'checked',
"Range_AlternateID": 'range_alternate_id',
"GeologicRegion": 'geologic_region',
"GMBA_V2_ID": 'gmba_v2_id',
"GMBA_V2_ID_str": 'gmba_v2_id_str',
"WikiDataID": 'wiki_data_id',
"WikiDataURL": 'wiki_data_url',
"Select_300": 'select_300',
"Gmba_Narrow": 'gmba_narrow',
"Name_FR": 'name_fr',
"Name_DE": 'name_de',
"Name_ES": 'name_es',
"Name_PT": 'name_pt',
"Name_CN": 'name_cn',
"Name_RU": 'name_ru',
"Name_TR": 'name_tr',
"Perimeter": 'perimeter',
"ColorAll": 'color_all',
"ColorBasic": 'color_basic',
"Color300": 'color_300',
"Elev_Low": 'elev_low',
"Elev_High": 'elev_high',
"Elev_Avg": 'elev_avg',
"gridcode": 'gridcode',
"Trend": 'trend',
"RepositoryName": 'repository_name',
"RepositoryURL": 'repository_url',
"Resource": 'resource_id',
"Keyword": 'keyword_id',
"Keyword_ID": 'keyword_id',
"Mother": 'mother',
"CN": 'cn',
"DE": 'de',
"ES": 'es',
"FR": 'fr',
"PT": 'pt',
"RU": 'ru',
"TR": 'tr',
"ResourceTitle": 'resource_title_id',
"LanguageLetterCode": 'language_letter_code',
"LanguageNumberCode": 'language_number_code_id',
"OrgNum1": 'org_num1',
"Organisation Search": 'organisation_search',
"OrgAlphaSearch": 'org_alpha_search',
"Organisation English": 'organisation_english',
"Organisation 2": 'organisation_2',
"Organisation 3": 'organisation_3',
"Organisation Original": 'organisation_original',
"Acronym": 'acronym',
"Street": 'street',
"PO Box": 'po_box',
"Postcode": 'postcode',
"City": 'city',
"Region": 'region',
"SearchURL": 'search_url',
"LatLon": 'lat_long',
"URL Org": 'url',
"Tel Org": 'tel',
"Email Org": 'email',
"Country": 'country_id',
"Tags": 'tags',
"Description": 'description',
"Northing": 'northing',
"Easting": 'easting',
"Category": 'category',
"Subject": 'subject',
"Title": 'title',
"Citation": 'citation',
"Type": 'type',
"Abstract": 'abstract',
"AuthorKeywords": 'author_keywords',
"Lat": 'lat',
"Lon": 'lon',
"Stars": 'stars',
"PEGASuS_Check_map_with_author": 'PEGASuS_Check_map_with_author',
"PEGASuS_polygon_ID": 'PEGASuS_polygon_ID',
"PEGASuS_Polygon_comments": 'PEGASuS_Polygon_comments',
"PEGASuS_Assessment_ID": 'PEGASuS_Assessment_ID',
"GLORIA": 'gloria',
"GNOMO": 'gnomo',
"LTER": 'lter',
"LTSER": 'ltser',
"MIREN": 'miren',
"TEAM": 'team',
"Inventory": 'inventory',
"DOI": 'doi',
"ShortName": 'short_name',
"FormalName": 'formal_name',
"Membership within the UN System": 'membership_within_un_system',
"Membership within the UN System": 'membership_within_un_system',
"Continent": 'continent',
"EU_MS": 'eu_ms',
"EEA_MS": 'eea_ms',
"ISO3": 'iso3',
"ISO2": 'iso2',
"Point_Name": 'point_name',
"Elevation": 'elevation',
"Link": 'link',
"Repository": 'repository_id',
"SearchString": 'search_string',
"SearchDate": 'search_date',
"Result": 'result',
"NumberOfRecords": 'number_of_records',
"Stored": 'stored',
"SpeciesGroup": 'species_group',
"MrMrs": 'mr_mrs',
"First name": 'first_name',
"Last name": 'last_name',
"Full name": 'full_name',
"SearchName": 'search_name',
"e-mail 1": 'contact_email',
"e-mail 2": 'email_2',
"Skype": 'skype',
"Professional phone": 'professional_phone',
"Mobile number": 'mobile_number',
"Field of expertise": 'field_of_expertise',
"Biography": 'biography',
"Position": 'position',
"Status": 'status',
"Entry date": 'entry_date',
"Newsletter": 'news_letter',
"CountryLookup": 'country_lookup',
"Organisation": 'organization_id',
"Birds": 'birds',
"Mammals": 'mammals',
'Reptiles': 'reptiles',
'Amphibians': 'amphibians',
'Fish': 'fish',
'Insects': 'insects',
'Molluscs': 'molluscs',
'Crustaceans': 'crustaceans',
'Arachnids': 'arachnids',
'Angiosperms': 'angiosperms',
'Gymnosperms': 'gymnosperms',
'Fungi': 'fungi',
'Algae': 'algae',
'Microbes': 'microbes',
'Biological field sampling': 'biological_field_sampling',
'Data mining': 'data_mining',
'Remote sensing': 'remote_sensing',
'GIS': 'gis',
'Spatial analysis': 'spatial_analysis',
'Statistical analysis': 'statistical_analysis',
'Modelling': 'modelling',
'Assessment': 'assessment',
'Meta-analysis': 'meta_analysis',
'Synthesis': 'synthesis',
'Qualitative social science methods (interviews, surveys)': 'qualitative_ssm',
'Genetic analyses': 'genetic_analyses',
'Field site': 'field_site',
'Transect': 'transect',
'Mountain top': 'mountain_top',
'Mountain range': 'mountain_range',
'Landscape': 'landscape',
'Regional': 'regional',
'National': 'national',
'Global': '_global',
'Geographic area of expertise': 'geographic_area_of_expertise',
'ProfileOnWeb': 'profile_on_web',
'Updated': 'updated',
'ORCID': 'orcid',
'WebOfScience': 'web_of_science',
'Twitter': 'twitter',
'Instagram': 'instagram',
'ScientificName': 'scientific_name_id',
'Class': '_class',
'EnglishName': 'english_name',
'Language': 'language',
'Person': 'person_id',
'Field': 'field_id',
'Method': 'method_id',
'Scale': 'scale_id',
'Function': 'function_id',
'Range': 'range_id',
'Endemic': 'endemic',
'SourceURL': 'source_url',
'MountainRange': 'mountain_range',
'TaxonRangeID': 'id',
'SubRangeOrRegion': 'subrange_or_region',
'Taxon': 'taxon_id',
'Distribution': 'distribution',
'RedList': 'redlist',
'CountUnit': 'count_unit',
'NumberUnits': 'number_of_units',
'Remarks': 'remarks',
'RangeType': 'range_type',
'Role': 'role',
'RedListCategory': 'red_list_category'
}
def add_arguments(self, parser):
parser.add_argument('--path', type=str, help="file path")
parser.add_argument('--csv_folder_path', type=str, help="Path where the csvs are located")
parser.add_argument('--model_name', type=str, help="model name")
parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app')
parser.add_argument('--all', action='store_true', help="'Imports all csvs")
# ./manage.py import --path /home/pcoder/Downloads/gmbadb/csvs/v2-LU_RedListCategories.csv --model_name RedListCategory --app_name app
def handle(self, *args, **options):
csv.register_dialect(
'mydialect',
delimiter=',',
quotechar='"',
doublequote=True,
skipinitialspace=True,
lineterminator='\n',
quoting=csv.QUOTE_MINIMAL)
csv_folder_path = '/home/pcoder/Downloads/gmbadb/csvs'
if options['csv_folder_path']:
csv_folder_path = options['csv_folder_path']
if options.get('all'):
print("Doing an import of all csvs")
for csv_file_name, model_name in self.csv_files_models_dict.items():
print("Importing %s -- %s" % (csv_file_name, model_name))
if model_name in ['Range', 'NamesImport', 'ImportGeom210915', 'Organization', 'AddElevation',
'GMBA_V2_Centroid', 'Person', 'PeopleRange', 'PeopleFunction', "PeopleResource",
"RangeCountry", "RangeNameTranslation", "RangeOnlineInfo", "ResourceRange",
"ResourceKeyword", "Repository"]:
# we have already imported and do not want to spend more time redoing stuff
continue
if csv_folder_path.endswith('/'):
file_path = '%s%s' % (csv_folder_path, csv_file_name)
else:
file_path = '%s/%s' % (csv_folder_path, csv_file_name)
_model = apps.get_model(options.get('app_name', 'app'), model_name)
with open(file_path, 'r') as csv_file:
reader = csv.reader(csv_file, dialect='mydialect')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
continue
_object_dict = {str(self.cols_to_django_fields.get(key)): str(value.lstrip('"').rstrip('"')) for key, value in zip(header, row)}
if model_name == 'Range':
# Reinstate range_name key
_object_dict['range_name'] = _object_dict['range_name_id']
_object_dict.pop('range_name_id')
if model_name == 'Keyword':
_object_dict['keyword'] = _object_dict['keyword_id']
_object_dict.pop('keyword_id')
if model_name == 'Organization' and 'country_id' in _object_dict:
_object_dict['country'] = _object_dict['country_id']
_object_dict.pop('country_id')
if model_name == 'PeopleRange' and 'mountain_range' in _object_dict:
_object_dict['range_id'] = _object_dict['mountain_range']
_object_dict.pop('mountain_range')
if model_name == 'Species' and 'scientific_name_id' in _object_dict:
_object_dict['scientific_name'] = _object_dict['scientific_name_id']
_object_dict.pop('scientific_name_id')
if model_name == 'TaxonRange' and 'taxon_id' in _object_dict:
_object_dict['taxon'] = _object_dict['taxon_id']
_object_dict.pop('taxon_id')
if model_name == 'Person' and 'organization_id' in _object_dict:
print("organization_id=%s" % _object_dict['organization_id'])
if _object_dict['organization_id'] == '' or _object_dict['organization_id'] is None:
_object_dict['organization_id'] = '-1'
else:
_object_dict['organization_id'] = int(float(_object_dict['organization_id']))
print(_object_dict)
if _object_dict is None:
print("Object None for %s" % model_name)
m = _model(**_object_dict)
try:
m.save()
except IntegrityError as ie:
print(str(ie))
if "UNIQUE constraint failed: range.gmba_v2_id" in str(ie):
print("======")
print("Could not save %s" % json.dumps(_object_dict))
print("======")
print("Done importing %s" % model_name)
else:
_model = apps.get_model(options.get('app_name', 'app'), options['model_name'])
file_path = options.get('path')
csv.register_dialect(
'mydialect',
delimiter=',',
quotechar='"',
doublequote=True,
skipinitialspace=True,
lineterminator='\n',
quoting=csv.QUOTE_MINIMAL)
with open(file_path, 'r', newline='') as csv_file:
reader = csv.reader(csv_file, dialect='mydialect')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
continue
_object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)}
m = _model(**_object_dict)
m.save()
print("Done importing %s" % str(_model))