First working version of the import magmt command

./manage.py import --all
This commit is contained in:
PCoder 2021-11-10 19:50:16 +05:30
parent ec5f4111ac
commit b9dcab334b
6 changed files with 278 additions and 83 deletions

View file

@ -1,54 +1,63 @@
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from django.apps import apps from django.apps import apps
from app.models import Country from django.db.utils import IntegrityError
import csv import csv
import json
class Command(BaseCommand): class Command(BaseCommand):
help = 'Imports csv to DB' help = 'Imports csv to DB'
csv_files = [ csv_files_models_dict = {
"v2-LU_GMBA_SpeciesGroups.csv", "v2-LU_GMBA_SpeciesGroups.csv": "GMBA_SpeciesGroup",
"v2-LU_Countries.csv", "v2-LU_Countries.csv": "Country",
"v2-LU_Languages.csv", "v2-LU_Languages.csv": "Language",
"v2-LU_Sources.csv", "v2-LU_Sources.csv": "Source",
"v2-LU_RedListCategories.csv", "v2-LU_RedListCategories.csv": "RedListCategory",
"v2-LU_RangeTypes.csv", "v2-LU_RangeTypes.csv": "RangeType",
"v2-LU_PeopleStatus.csv", "v2-LU_PeopleStatus.csv": "PeopleStatus",
"v2-LU_TrendsQuantity.csv", "v2-LU_TrendsQuantity.csv": "TrendsQuantity",
"v2-LU_TrendsQuality.csv", "v2-LU_TrendsQuality.csv": "TrendsQuality",
"v2-LU_TaxonUnit.csv", "v2-LU_TaxonUnit.csv": "TaxonUnit",
"v2-LU_TaxonStatus.csv", "v2-LU_TaxonStatus.csv": "TaxonStatus",
"v2-AddElevations.csv",
"v2-GMBA_Function.csv", "v2-Ranges-cleaned.csv": "Range",
"v2-Gmba_V2_centroid.csv", "v2-AddElevations.csv": "AddElevation",
"v2-ImportGeom210915.csv",
"v2-LanguageLink.csv", "v2-GMBA_Function.csv": "GMBA_function",
"v2-Keywords.csv", "v2-Gmba_V2_centroid.csv": "GMBA_V2_Centroid",
"v2-NamesImport.csv", "v2-ImportGeom210915.csv": "ImportGeom210915",
"v2-Organisations.csv", "v2-LanguageLink.csv": "LanguageLink",
"v2-Peaks.csv", "v2-Keywords.csv": "Keyword",
"v2-PeopleRanges.csv", "v2-NamesImport.csv": "NamesImport",
"v2-PeopleFunction.csv", "v2-Organisations-cleaned.csv": "Organization",
"v2-PeopleResources.csv", "v2-Peaks.csv": "Peak",
"v2-RangeCountries.csv",
"v2-RangeNameTranslations.csv", "v2-People.csv": "Person",
"v2-RangeOnlineInfo.csv", "v2-PeopleRanges.csv": "PeopleRange",
"v2-Ranges.csv", "v2-PeopleFunction.csv": "PeopleFunction",
"v2-ResourceRanges.csv", "v2-Resources.csv": "Resource",
"v2-ResourceKeywords.csv",
"v2-Repositories.csv", "v2-PeopleResources.csv": "PeopleResource",
"v2-Resources.csv", "v2-RangeCountries.csv": "RangeCountry",
"v2-Species.csv", "v2-RangeNameTranslations.csv": "RangeNameTranslation",
"v2-Searches.csv", "v2-RangeOnlineInfo.csv": "RangeOnlineInfo",
"v2-TaxonRange.csv", "v2-ResourceRanges.csv": "ResourceRange",
"v2-SpeciesRange.csv", "v2-ResourceKeywords.csv": "ResourceKeyword",
"v2-People.csv" "v2-Repositories.csv": "Repository",
]
"v2-Species.csv": "Species",
"v2-Searches.csv": "Search",
"v2-TaxonRange.csv": "TaxonRange",
"v2-SpeciesRange.csv": "SpeciesRange"
}
cols_to_django_fields = { cols_to_django_fields = {
"ID": 'id', "ID": 'id',
"Source": 'source', "Source": 'source',
"RangeName": 'range_name', "RangeName": 'range_name_id',
"LanguageTranslation": 'language_translation', "LanguageTranslation": 'language_translation_id',
"RangeNameTranslation": 'range_name_translation', "RangeNameTranslation": 'range_name_translation',
"GMBA_ID_v2": 'gmba_v2_id', "GMBA_ID_v2": 'gmba_v2_id',
"Elev_Min": 'elev_min', "Elev_Min": 'elev_min',
@ -57,7 +66,7 @@ class Command(BaseCommand):
"TaxonStatus": 'taxon_status', "TaxonStatus": 'taxon_status',
"InfoSource": 'info_source', "InfoSource": 'info_source',
"URL": 'url', "URL": 'url',
"GMBA function": 'GMBA_function', "GMBA function": 'gmba_function',
"TaxonUnit": 'taxon_unit', "TaxonUnit": 'taxon_unit',
"Range_ID": 'id', "Range_ID": 'id',
"RangeNameMap": 'range_name_map', "RangeNameMap": 'range_name_map',
@ -109,8 +118,8 @@ class Command(BaseCommand):
"Trend": 'trend', "Trend": 'trend',
"RepositoryName": 'repository_name', "RepositoryName": 'repository_name',
"RepositoryURL": 'repository_url', "RepositoryURL": 'repository_url',
"Resource": 'resource', "Resource": 'resource_id',
"Keyword": 'keyword', "Keyword": 'keyword_id',
"Keyword_ID": 'keyword_id', "Keyword_ID": 'keyword_id',
"Mother": 'mother', "Mother": 'mother',
"CN": 'cn', "CN": 'cn',
@ -120,9 +129,9 @@ class Command(BaseCommand):
"PT": 'pt', "PT": 'pt',
"RU": 'ru', "RU": 'ru',
"TR": 'tr', "TR": 'tr',
"ResourceTitle": 'resource_title', "ResourceTitle": 'resource_title_id',
"LanguageLetterCode": 'language_letter_code', "LanguageLetterCode": 'language_letter_code',
"LanguageNumberCode": 'language_number_code', "LanguageNumberCode": 'language_number_code_id',
"OrgNum1": 'org_num1', "OrgNum1": 'org_num1',
"Organisation Search": 'organisation_search', "Organisation Search": 'organisation_search',
"OrgAlphaSearch": 'org_alpha_search', "OrgAlphaSearch": 'org_alpha_search',
@ -137,11 +146,11 @@ class Command(BaseCommand):
"City": 'city', "City": 'city',
"Region": 'region', "Region": 'region',
"SearchURL": 'search_url', "SearchURL": 'search_url',
"LatLon": 'lat_lon', "LatLon": 'lat_long',
"URL Org": 'url', "URL Org": 'url',
"Tel Org": 'tel', "Tel Org": 'tel',
"Email Org": 'email', "Email Org": 'email',
"Country": 'country', "Country": 'country_id',
"Tags": 'tags', "Tags": 'tags',
"Description": 'description', "Description": 'description',
"Northing": 'northing', "Northing": 'northing',
@ -170,7 +179,8 @@ class Command(BaseCommand):
"DOI": 'doi', "DOI": 'doi',
"ShortName": 'short_name', "ShortName": 'short_name',
"FormalName": 'formal_name', "FormalName": 'formal_name',
"Membership within the UN System": '', "Membership within the UN System": 'membership_within_un_system',
"Membership within the UN System": 'membership_within_un_system',
"Continent": 'continent', "Continent": 'continent',
"EU_MS": 'eu_ms', "EU_MS": 'eu_ms',
"EEA_MS": 'eea_ms', "EEA_MS": 'eea_ms',
@ -179,7 +189,7 @@ class Command(BaseCommand):
"Point_Name": 'point_name', "Point_Name": 'point_name',
"Elevation": 'elevation', "Elevation": 'elevation',
"Link": 'link', "Link": 'link',
"Repository": 'repository', "Repository": 'repository_id',
"SearchString": 'search_string', "SearchString": 'search_string',
"SearchDate": 'search_date', "SearchDate": 'search_date',
"Result": 'result', "Result": 'result',
@ -191,7 +201,7 @@ class Command(BaseCommand):
"Last name": 'last_name', "Last name": 'last_name',
"Full name": 'full_name', "Full name": 'full_name',
"SearchName": 'search_name', "SearchName": 'search_name',
"e-mail 1": 'email_1', "e-mail 1": 'contact_email',
"e-mail 2": 'email_2', "e-mail 2": 'email_2',
"Skype": 'skype', "Skype": 'skype',
"Professional phone": 'professional_phone', "Professional phone": 'professional_phone',
@ -203,7 +213,7 @@ class Command(BaseCommand):
"Entry date": 'entry_date', "Entry date": 'entry_date',
"Newsletter": 'news_letter', "Newsletter": 'news_letter',
"CountryLookup": 'country_lookup', "CountryLookup": 'country_lookup',
"Organisation": 'organization', "Organisation": 'organization_id',
"Birds": 'birds', "Birds": 'birds',
"Mammals": 'mammals', "Mammals": 'mammals',
'Reptiles': 'reptiles', 'Reptiles': 'reptiles',
@ -237,7 +247,7 @@ class Command(BaseCommand):
'Landscape': 'landscape', 'Landscape': 'landscape',
'Regional': 'regional', 'Regional': 'regional',
'National': 'national', 'National': 'national',
'Global': 'global', 'Global': '_global',
'Geographic area of expertise': 'geographic_area_of_expertise', 'Geographic area of expertise': 'geographic_area_of_expertise',
'ProfileOnWeb': 'profile_on_web', 'ProfileOnWeb': 'profile_on_web',
'Updated': 'updated', 'Updated': 'updated',
@ -245,24 +255,30 @@ class Command(BaseCommand):
'WebOfScience': 'web_of_science', 'WebOfScience': 'web_of_science',
'Twitter': 'twitter', 'Twitter': 'twitter',
'Instagram': 'instagram', 'Instagram': 'instagram',
'ScientificName': 'scientific_name', 'ScientificName': 'scientific_name_id',
'Class': 'class', 'Class': '_class',
'EnglishName': 'english_name', 'EnglishName': 'english_name',
'Language': 'language', 'Language': 'language',
'Person': 'person', 'Person': 'person_id',
'Function': 'function', 'Field': 'field_id',
'Range': 'range', 'Method': 'method_id',
'Scale': 'scale_id',
'Function': 'function_id',
'Range': 'range_id',
'Endemic': 'endemic', 'Endemic': 'endemic',
'SourceURL': 'source_url', 'SourceURL': 'source_url',
'MountainRange': 'mountain_range', 'MountainRange': 'mountain_range',
'TaxonRangeID': 'id', 'TaxonRangeID': 'id',
'SubRangeOrRegion': 'subrange_or_region', 'SubRangeOrRegion': 'subrange_or_region',
'Taxon': 'taxon', 'Taxon': 'taxon_id',
'Distribution': 'distribution', 'Distribution': 'distribution',
'RedList': 'red_list', 'RedList': 'redlist',
'CountUnit': 'count_unit', 'CountUnit': 'count_unit',
'NumberUnits': 'number_of_units', 'NumberUnits': 'number_of_units',
'Remarks': 'remarks', 'Remarks': 'remarks',
'RangeType': 'range_type', 'RangeType': 'range_type',
'Role': 'role', 'Role': 'role',
'RedListCategory': 'red_list_category' 'RedListCategory': 'red_list_category'
@ -270,22 +286,110 @@ class Command(BaseCommand):
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument('--path', type=str, help="file path") parser.add_argument('--path', type=str, help="file path")
parser.add_argument('--model_name', type=str, help="model name", required=True) parser.add_argument('--csv_folder_path', type=str, help="Path where the csvs are located")
parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app', required=True) parser.add_argument('--model_name', type=str, help="model name")
parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app')
parser.add_argument('--all', action='store_true', help="'Imports all csvs")
# ./manage.py import --path /home/pcoder/Downloads/gmbadb/csvs/v2-LU_RedListCategories.csv --model_name RedListCategory --app_name app
def handle(self, *args, **options): def handle(self, *args, **options):
file_path = options['path'] csv.register_dialect(
_model = apps.get_model(options['app_name'], options['model_name']) 'mydialect',
with open(file_path, 'r') as csv_file: delimiter=',',
reader = csv.reader(csv_file, delimiter=',', quotechar='|') quotechar='"',
first = True doublequote=True,
for row in reader: skipinitialspace=True,
if first: lineterminator='\n',
# Assume the first row to be the header quoting=csv.QUOTE_MINIMAL)
header = row csv_folder_path = '/home/pcoder/Downloads/gmbadb/csvs'
header = [h.strip('"') for h in header] if options['csv_folder_path']:
first = False csv_folder_path = options['csv_folder_path']
if options.get('all'):
print("Doing an import of all csvs")
for csv_file_name, model_name in self.csv_files_models_dict.items():
print("Importing %s -- %s" % (csv_file_name, model_name))
if model_name in ['Range', 'NamesImport', 'ImportGeom210915', 'Organization', 'AddElevation',
'GMBA_V2_Centroid', 'Person', 'PeopleRange', 'PeopleFunction', "PeopleResource",
"RangeCountry", "RangeNameTranslation", "RangeOnlineInfo", "ResourceRange",
"ResourceKeyword", "Repository"]:
# we have already imported and do not want to spend more time redoing stuff
continue continue
_object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)} if csv_folder_path.endswith('/'):
m = _model(**_object_dict) file_path = '%s%s' % (csv_folder_path, csv_file_name)
m.save() else:
file_path = '%s/%s' % (csv_folder_path, csv_file_name)
_model = apps.get_model(options.get('app_name', 'app'), model_name)
with open(file_path, 'r') as csv_file:
reader = csv.reader(csv_file, dialect='mydialect')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
continue
_object_dict = {str(self.cols_to_django_fields.get(key)): str(value.lstrip('"').rstrip('"')) for key, value in zip(header, row)}
if model_name == 'Range':
# Reinstate range_name key
_object_dict['range_name'] = _object_dict['range_name_id']
_object_dict.pop('range_name_id')
if model_name == 'Keyword':
_object_dict['keyword'] = _object_dict['keyword_id']
_object_dict.pop('keyword_id')
if model_name == 'Organization' and 'country_id' in _object_dict:
_object_dict['country'] = _object_dict['country_id']
_object_dict.pop('country_id')
if model_name == 'PeopleRange' and 'mountain_range' in _object_dict:
_object_dict['range_id'] = _object_dict['mountain_range']
_object_dict.pop('mountain_range')
if model_name == 'Species' and 'scientific_name_id' in _object_dict:
_object_dict['scientific_name'] = _object_dict['scientific_name_id']
_object_dict.pop('scientific_name_id')
if model_name == 'TaxonRange' and 'taxon_id' in _object_dict:
_object_dict['taxon'] = _object_dict['taxon_id']
_object_dict.pop('taxon_id')
if model_name == 'Person' and 'organization_id' in _object_dict:
print("organization_id=%s" % _object_dict['organization_id'])
if _object_dict['organization_id'] == '' or _object_dict['organization_id'] is None:
_object_dict['organization_id'] = '-1'
else:
_object_dict['organization_id'] = int(float(_object_dict['organization_id']))
print(_object_dict)
if _object_dict is None:
print("Object None for %s" % model_name)
m = _model(**_object_dict)
try:
m.save()
except IntegrityError as ie:
print(str(ie))
if "UNIQUE constraint failed: range.gmba_v2_id" in str(ie):
print("======")
print("Could not save %s" % json.dumps(_object_dict))
print("======")
print("Done importing %s" % model_name)
else:
_model = apps.get_model(options.get('app_name', 'app'), options['model_name'])
file_path = options.get('path')
csv.register_dialect(
'mydialect',
delimiter=',',
quotechar='"',
doublequote=True,
skipinitialspace=True,
lineterminator='\n',
quoting=csv.QUOTE_MINIMAL)
with open(file_path, 'r', newline='') as csv_file:
reader = csv.reader(csv_file, dialect='mydialect')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
continue
_object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)}
m = _model(**_object_dict)
m.save()
print("Done importing %s" % str(_model))

View file

@ -0,0 +1,33 @@
# Generated by Django 3.2.5 on 2021-11-10 12:18
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('app', '0004_auto_20211108_0106'),
]
operations = [
migrations.AlterField(
model_name='addelevation',
name='gmba_v2_id',
field=models.TextField(blank=True, null=True),
),
migrations.AlterField(
model_name='importgeom210915',
name='gmba_v2_id',
field=models.TextField(blank=True, null=True),
),
migrations.AlterField(
model_name='namesimport',
name='gmba_v2_id',
field=models.TextField(blank=True, null=True),
),
migrations.AlterField(
model_name='range',
name='gmba_v2_id',
field=models.TextField(blank=True, null=True),
),
]

View file

@ -0,0 +1,18 @@
# Generated by Django 3.2.5 on 2021-11-10 12:34
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('app', '0005_auto_20211110_1218'),
]
operations = [
migrations.RenameField(
model_name='peoplefunction',
old_name='scale',
new_name='function',
),
]

View file

@ -0,0 +1,22 @@
# Generated by Django 3.2.5 on 2021-11-10 13:52
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('app', '0006_rename_scale_peoplefunction_function'),
]
operations = [
migrations.RemoveField(
model_name='resource',
name='WikiDataID',
),
migrations.AddField(
model_name='resource',
name='wiki_data_id',
field=models.TextField(blank=True, null=True),
),
]

View file

@ -0,0 +1,18 @@
# Generated by Django 3.2.5 on 2021-11-10 14:15
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('app', '0007_auto_20211110_1352'),
]
operations = [
migrations.AlterField(
model_name='taxonrange',
name='taxon',
field=models.TextField(blank=True, null=True),
),
]

View file

@ -93,7 +93,7 @@ class Range(models.Model):
source = models.TextField(blank=True, null=True) source = models.TextField(blank=True, null=True)
range_alternate_id = models.TextField(blank=True, null=True) range_alternate_id = models.TextField(blank=True, null=True)
geologic_region = models.TextField(blank=True, null=True) geologic_region = models.TextField(blank=True, null=True)
gmba_v2_id = models.TextField(blank=True, null=True, unique=True) gmba_v2_id = models.TextField(blank=True, null=True)
gmba_v2_id_str = models.TextField(blank=True, null=True) gmba_v2_id_str = models.TextField(blank=True, null=True)
wiki_data_id = models.TextField(blank=True, null=True) wiki_data_id = models.TextField(blank=True, null=True)
wiki_data_url = models.TextField(blank=True, null=True) wiki_data_url = models.TextField(blank=True, null=True)
@ -155,7 +155,7 @@ class Resource(models.Model):
team = models.TextField(blank=True, null=True) team = models.TextField(blank=True, null=True)
inventory = models.TextField(blank=True, null=True) inventory = models.TextField(blank=True, null=True)
doi = models.TextField(blank=True, null=True) doi = models.TextField(blank=True, null=True)
WikiDataID = models.TextField(blank=True, null=True) wiki_data_id = models.TextField(blank=True, null=True)
class Meta: class Meta:
db_table = 'resource' db_table = 'resource'
@ -318,7 +318,7 @@ class RangeNameTranslation(models.Model):
class AddElevation(models.Model): class AddElevation(models.Model):
id = models.AutoField(primary_key=True) id = models.AutoField(primary_key=True)
gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id') gmba_v2_id = models.TextField(blank=True, null=True)
elev_min = models.TextField(blank=True, null=True) elev_min = models.TextField(blank=True, null=True)
elev_max = models.TextField(blank=True, null=True) elev_max = models.TextField(blank=True, null=True)
elev_range = models.TextField(blank=True, null=True) elev_range = models.TextField(blank=True, null=True)
@ -334,7 +334,7 @@ class GMBA_V2_Centroid(models.Model):
class ImportGeom210915(models.Model): class ImportGeom210915(models.Model):
id = models.AutoField(primary_key=True) id = models.AutoField(primary_key=True)
gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id') gmba_v2_id = models.TextField(blank=True, null=True)
area = models.TextField(blank=True, null=True) area = models.TextField(blank=True, null=True)
perimeter = models.TextField(blank=True, null=True) perimeter = models.TextField(blank=True, null=True)
color_all = models.TextField(blank=True, null=True) color_all = models.TextField(blank=True, null=True)
@ -355,7 +355,7 @@ class ResourceKeyword(models.Model):
class NamesImport(models.Model): class NamesImport(models.Model):
id = models.AutoField(primary_key=True) id = models.AutoField(primary_key=True)
gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id') gmba_v2_id = models.TextField(blank=True, null=True)
cn = models.TextField(blank=True, null=True) cn = models.TextField(blank=True, null=True)
de = models.TextField(blank=True, null=True) de = models.TextField(blank=True, null=True)
es = models.TextField(blank=True, null=True) es = models.TextField(blank=True, null=True)
@ -544,7 +544,7 @@ class Species(models.Model):
class PeopleFunction(models.Model): class PeopleFunction(models.Model):
id = models.AutoField(primary_key=True) id = models.AutoField(primary_key=True)
person = models.ForeignKey(Person, models.DO_NOTHING, blank=True, null=True) person = models.ForeignKey(Person, models.DO_NOTHING, blank=True, null=True)
scale = models.ForeignKey(GMBA_function, models.DO_NOTHING, blank=True, null=True) function = models.ForeignKey(GMBA_function, models.DO_NOTHING, blank=True, null=True)
class SpeciesRange(models.Model): class SpeciesRange(models.Model):
@ -564,7 +564,7 @@ class PeopleRange(models.Model):
class TaxonRange(models.Model): class TaxonRange(models.Model):
id = models.AutoField(primary_key=True) id = models.AutoField(primary_key=True)
range = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True) range = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True)
taxon = models.ForeignKey(Taxon, models.DO_NOTHING, blank=True, null=True) taxon = models.TextField(blank=True, null=True)
subrange_or_region = models.TextField(blank=True, null=True) subrange_or_region = models.TextField(blank=True, null=True)
taxon_status = models.TextField(blank=True, null=True) taxon_status = models.TextField(blank=True, null=True)
distribution = models.TextField(blank=True, null=True) distribution = models.TextField(blank=True, null=True)