First working version of the import magmt command

./manage.py import --all
This commit is contained in:
PCoder 2021-11-10 19:50:16 +05:30
parent ec5f4111ac
commit b9dcab334b
6 changed files with 278 additions and 83 deletions

View file

@ -1,54 +1,63 @@
from django.core.management.base import BaseCommand, CommandError
from django.apps import apps
from app.models import Country
from django.db.utils import IntegrityError
import csv
import json
class Command(BaseCommand):
help = 'Imports csv to DB'
csv_files = [
"v2-LU_GMBA_SpeciesGroups.csv",
"v2-LU_Countries.csv",
"v2-LU_Languages.csv",
"v2-LU_Sources.csv",
"v2-LU_RedListCategories.csv",
"v2-LU_RangeTypes.csv",
"v2-LU_PeopleStatus.csv",
"v2-LU_TrendsQuantity.csv",
"v2-LU_TrendsQuality.csv",
"v2-LU_TaxonUnit.csv",
"v2-LU_TaxonStatus.csv",
"v2-AddElevations.csv",
"v2-GMBA_Function.csv",
"v2-Gmba_V2_centroid.csv",
"v2-ImportGeom210915.csv",
"v2-LanguageLink.csv",
"v2-Keywords.csv",
"v2-NamesImport.csv",
"v2-Organisations.csv",
"v2-Peaks.csv",
"v2-PeopleRanges.csv",
"v2-PeopleFunction.csv",
"v2-PeopleResources.csv",
"v2-RangeCountries.csv",
"v2-RangeNameTranslations.csv",
"v2-RangeOnlineInfo.csv",
"v2-Ranges.csv",
"v2-ResourceRanges.csv",
"v2-ResourceKeywords.csv",
"v2-Repositories.csv",
"v2-Resources.csv",
"v2-Species.csv",
"v2-Searches.csv",
"v2-TaxonRange.csv",
"v2-SpeciesRange.csv",
"v2-People.csv"
]
csv_files_models_dict = {
"v2-LU_GMBA_SpeciesGroups.csv": "GMBA_SpeciesGroup",
"v2-LU_Countries.csv": "Country",
"v2-LU_Languages.csv": "Language",
"v2-LU_Sources.csv": "Source",
"v2-LU_RedListCategories.csv": "RedListCategory",
"v2-LU_RangeTypes.csv": "RangeType",
"v2-LU_PeopleStatus.csv": "PeopleStatus",
"v2-LU_TrendsQuantity.csv": "TrendsQuantity",
"v2-LU_TrendsQuality.csv": "TrendsQuality",
"v2-LU_TaxonUnit.csv": "TaxonUnit",
"v2-LU_TaxonStatus.csv": "TaxonStatus",
"v2-Ranges-cleaned.csv": "Range",
"v2-AddElevations.csv": "AddElevation",
"v2-GMBA_Function.csv": "GMBA_function",
"v2-Gmba_V2_centroid.csv": "GMBA_V2_Centroid",
"v2-ImportGeom210915.csv": "ImportGeom210915",
"v2-LanguageLink.csv": "LanguageLink",
"v2-Keywords.csv": "Keyword",
"v2-NamesImport.csv": "NamesImport",
"v2-Organisations-cleaned.csv": "Organization",
"v2-Peaks.csv": "Peak",
"v2-People.csv": "Person",
"v2-PeopleRanges.csv": "PeopleRange",
"v2-PeopleFunction.csv": "PeopleFunction",
"v2-Resources.csv": "Resource",
"v2-PeopleResources.csv": "PeopleResource",
"v2-RangeCountries.csv": "RangeCountry",
"v2-RangeNameTranslations.csv": "RangeNameTranslation",
"v2-RangeOnlineInfo.csv": "RangeOnlineInfo",
"v2-ResourceRanges.csv": "ResourceRange",
"v2-ResourceKeywords.csv": "ResourceKeyword",
"v2-Repositories.csv": "Repository",
"v2-Species.csv": "Species",
"v2-Searches.csv": "Search",
"v2-TaxonRange.csv": "TaxonRange",
"v2-SpeciesRange.csv": "SpeciesRange"
}
cols_to_django_fields = {
"ID": 'id',
"Source": 'source',
"RangeName": 'range_name',
"LanguageTranslation": 'language_translation',
"RangeName": 'range_name_id',
"LanguageTranslation": 'language_translation_id',
"RangeNameTranslation": 'range_name_translation',
"GMBA_ID_v2": 'gmba_v2_id',
"Elev_Min": 'elev_min',
@ -57,7 +66,7 @@ class Command(BaseCommand):
"TaxonStatus": 'taxon_status',
"InfoSource": 'info_source',
"URL": 'url',
"GMBA function": 'GMBA_function',
"GMBA function": 'gmba_function',
"TaxonUnit": 'taxon_unit',
"Range_ID": 'id',
"RangeNameMap": 'range_name_map',
@ -109,8 +118,8 @@ class Command(BaseCommand):
"Trend": 'trend',
"RepositoryName": 'repository_name',
"RepositoryURL": 'repository_url',
"Resource": 'resource',
"Keyword": 'keyword',
"Resource": 'resource_id',
"Keyword": 'keyword_id',
"Keyword_ID": 'keyword_id',
"Mother": 'mother',
"CN": 'cn',
@ -120,9 +129,9 @@ class Command(BaseCommand):
"PT": 'pt',
"RU": 'ru',
"TR": 'tr',
"ResourceTitle": 'resource_title',
"ResourceTitle": 'resource_title_id',
"LanguageLetterCode": 'language_letter_code',
"LanguageNumberCode": 'language_number_code',
"LanguageNumberCode": 'language_number_code_id',
"OrgNum1": 'org_num1',
"Organisation Search": 'organisation_search',
"OrgAlphaSearch": 'org_alpha_search',
@ -137,11 +146,11 @@ class Command(BaseCommand):
"City": 'city',
"Region": 'region',
"SearchURL": 'search_url',
"LatLon": 'lat_lon',
"LatLon": 'lat_long',
"URL Org": 'url',
"Tel Org": 'tel',
"Email Org": 'email',
"Country": 'country',
"Country": 'country_id',
"Tags": 'tags',
"Description": 'description',
"Northing": 'northing',
@ -170,7 +179,8 @@ class Command(BaseCommand):
"DOI": 'doi',
"ShortName": 'short_name',
"FormalName": 'formal_name',
"Membership within the UN System": '',
"Membership within the UN System": 'membership_within_un_system',
"Membership within the UN System": 'membership_within_un_system',
"Continent": 'continent',
"EU_MS": 'eu_ms',
"EEA_MS": 'eea_ms',
@ -179,7 +189,7 @@ class Command(BaseCommand):
"Point_Name": 'point_name',
"Elevation": 'elevation',
"Link": 'link',
"Repository": 'repository',
"Repository": 'repository_id',
"SearchString": 'search_string',
"SearchDate": 'search_date',
"Result": 'result',
@ -191,7 +201,7 @@ class Command(BaseCommand):
"Last name": 'last_name',
"Full name": 'full_name',
"SearchName": 'search_name',
"e-mail 1": 'email_1',
"e-mail 1": 'contact_email',
"e-mail 2": 'email_2',
"Skype": 'skype',
"Professional phone": 'professional_phone',
@ -203,7 +213,7 @@ class Command(BaseCommand):
"Entry date": 'entry_date',
"Newsletter": 'news_letter',
"CountryLookup": 'country_lookup',
"Organisation": 'organization',
"Organisation": 'organization_id',
"Birds": 'birds',
"Mammals": 'mammals',
'Reptiles': 'reptiles',
@ -237,7 +247,7 @@ class Command(BaseCommand):
'Landscape': 'landscape',
'Regional': 'regional',
'National': 'national',
'Global': 'global',
'Global': '_global',
'Geographic area of expertise': 'geographic_area_of_expertise',
'ProfileOnWeb': 'profile_on_web',
'Updated': 'updated',
@ -245,24 +255,30 @@ class Command(BaseCommand):
'WebOfScience': 'web_of_science',
'Twitter': 'twitter',
'Instagram': 'instagram',
'ScientificName': 'scientific_name',
'Class': 'class',
'ScientificName': 'scientific_name_id',
'Class': '_class',
'EnglishName': 'english_name',
'Language': 'language',
'Person': 'person',
'Function': 'function',
'Range': 'range',
'Person': 'person_id',
'Field': 'field_id',
'Method': 'method_id',
'Scale': 'scale_id',
'Function': 'function_id',
'Range': 'range_id',
'Endemic': 'endemic',
'SourceURL': 'source_url',
'MountainRange': 'mountain_range',
'TaxonRangeID': 'id',
'SubRangeOrRegion': 'subrange_or_region',
'Taxon': 'taxon',
'Taxon': 'taxon_id',
'Distribution': 'distribution',
'RedList': 'red_list',
'RedList': 'redlist',
'CountUnit': 'count_unit',
'NumberUnits': 'number_of_units',
'Remarks': 'remarks',
'RangeType': 'range_type',
'Role': 'role',
'RedListCategory': 'red_list_category'
@ -270,22 +286,110 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('--path', type=str, help="file path")
parser.add_argument('--model_name', type=str, help="model name", required=True)
parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app', required=True)
parser.add_argument('--csv_folder_path', type=str, help="Path where the csvs are located")
parser.add_argument('--model_name', type=str, help="model name")
parser.add_argument('--app_name', type=str, help="django app name that the model is connected to", default='app')
parser.add_argument('--all', action='store_true', help="'Imports all csvs")
# ./manage.py import --path /home/pcoder/Downloads/gmbadb/csvs/v2-LU_RedListCategories.csv --model_name RedListCategory --app_name app
def handle(self, *args, **options):
file_path = options['path']
_model = apps.get_model(options['app_name'], options['model_name'])
with open(file_path, 'r') as csv_file:
reader = csv.reader(csv_file, delimiter=',', quotechar='|')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
csv.register_dialect(
'mydialect',
delimiter=',',
quotechar='"',
doublequote=True,
skipinitialspace=True,
lineterminator='\n',
quoting=csv.QUOTE_MINIMAL)
csv_folder_path = '/home/pcoder/Downloads/gmbadb/csvs'
if options['csv_folder_path']:
csv_folder_path = options['csv_folder_path']
if options.get('all'):
print("Doing an import of all csvs")
for csv_file_name, model_name in self.csv_files_models_dict.items():
print("Importing %s -- %s" % (csv_file_name, model_name))
if model_name in ['Range', 'NamesImport', 'ImportGeom210915', 'Organization', 'AddElevation',
'GMBA_V2_Centroid', 'Person', 'PeopleRange', 'PeopleFunction', "PeopleResource",
"RangeCountry", "RangeNameTranslation", "RangeOnlineInfo", "ResourceRange",
"ResourceKeyword", "Repository"]:
# we have already imported and do not want to spend more time redoing stuff
continue
_object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)}
m = _model(**_object_dict)
m.save()
if csv_folder_path.endswith('/'):
file_path = '%s%s' % (csv_folder_path, csv_file_name)
else:
file_path = '%s/%s' % (csv_folder_path, csv_file_name)
_model = apps.get_model(options.get('app_name', 'app'), model_name)
with open(file_path, 'r') as csv_file:
reader = csv.reader(csv_file, dialect='mydialect')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
continue
_object_dict = {str(self.cols_to_django_fields.get(key)): str(value.lstrip('"').rstrip('"')) for key, value in zip(header, row)}
if model_name == 'Range':
# Reinstate range_name key
_object_dict['range_name'] = _object_dict['range_name_id']
_object_dict.pop('range_name_id')
if model_name == 'Keyword':
_object_dict['keyword'] = _object_dict['keyword_id']
_object_dict.pop('keyword_id')
if model_name == 'Organization' and 'country_id' in _object_dict:
_object_dict['country'] = _object_dict['country_id']
_object_dict.pop('country_id')
if model_name == 'PeopleRange' and 'mountain_range' in _object_dict:
_object_dict['range_id'] = _object_dict['mountain_range']
_object_dict.pop('mountain_range')
if model_name == 'Species' and 'scientific_name_id' in _object_dict:
_object_dict['scientific_name'] = _object_dict['scientific_name_id']
_object_dict.pop('scientific_name_id')
if model_name == 'TaxonRange' and 'taxon_id' in _object_dict:
_object_dict['taxon'] = _object_dict['taxon_id']
_object_dict.pop('taxon_id')
if model_name == 'Person' and 'organization_id' in _object_dict:
print("organization_id=%s" % _object_dict['organization_id'])
if _object_dict['organization_id'] == '' or _object_dict['organization_id'] is None:
_object_dict['organization_id'] = '-1'
else:
_object_dict['organization_id'] = int(float(_object_dict['organization_id']))
print(_object_dict)
if _object_dict is None:
print("Object None for %s" % model_name)
m = _model(**_object_dict)
try:
m.save()
except IntegrityError as ie:
print(str(ie))
if "UNIQUE constraint failed: range.gmba_v2_id" in str(ie):
print("======")
print("Could not save %s" % json.dumps(_object_dict))
print("======")
print("Done importing %s" % model_name)
else:
_model = apps.get_model(options.get('app_name', 'app'), options['model_name'])
file_path = options.get('path')
csv.register_dialect(
'mydialect',
delimiter=',',
quotechar='"',
doublequote=True,
skipinitialspace=True,
lineterminator='\n',
quoting=csv.QUOTE_MINIMAL)
with open(file_path, 'r', newline='') as csv_file:
reader = csv.reader(csv_file, dialect='mydialect')
first = True
for row in reader:
if first:
# Assume the first row to be the header
header = row
header = [h.strip('"') for h in header]
first = False
continue
_object_dict = {self.cols_to_django_fields.get(key): value.lstrip('"').rstrip('"') for key, value in zip(header, row)}
m = _model(**_object_dict)
m.save()
print("Done importing %s" % str(_model))

View file

@ -0,0 +1,33 @@
# Generated by Django 3.2.5 on 2021-11-10 12:18
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('app', '0004_auto_20211108_0106'),
]
operations = [
migrations.AlterField(
model_name='addelevation',
name='gmba_v2_id',
field=models.TextField(blank=True, null=True),
),
migrations.AlterField(
model_name='importgeom210915',
name='gmba_v2_id',
field=models.TextField(blank=True, null=True),
),
migrations.AlterField(
model_name='namesimport',
name='gmba_v2_id',
field=models.TextField(blank=True, null=True),
),
migrations.AlterField(
model_name='range',
name='gmba_v2_id',
field=models.TextField(blank=True, null=True),
),
]

View file

@ -0,0 +1,18 @@
# Generated by Django 3.2.5 on 2021-11-10 12:34
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('app', '0005_auto_20211110_1218'),
]
operations = [
migrations.RenameField(
model_name='peoplefunction',
old_name='scale',
new_name='function',
),
]

View file

@ -0,0 +1,22 @@
# Generated by Django 3.2.5 on 2021-11-10 13:52
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('app', '0006_rename_scale_peoplefunction_function'),
]
operations = [
migrations.RemoveField(
model_name='resource',
name='WikiDataID',
),
migrations.AddField(
model_name='resource',
name='wiki_data_id',
field=models.TextField(blank=True, null=True),
),
]

View file

@ -0,0 +1,18 @@
# Generated by Django 3.2.5 on 2021-11-10 14:15
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('app', '0007_auto_20211110_1352'),
]
operations = [
migrations.AlterField(
model_name='taxonrange',
name='taxon',
field=models.TextField(blank=True, null=True),
),
]

View file

@ -93,7 +93,7 @@ class Range(models.Model):
source = models.TextField(blank=True, null=True)
range_alternate_id = models.TextField(blank=True, null=True)
geologic_region = models.TextField(blank=True, null=True)
gmba_v2_id = models.TextField(blank=True, null=True, unique=True)
gmba_v2_id = models.TextField(blank=True, null=True)
gmba_v2_id_str = models.TextField(blank=True, null=True)
wiki_data_id = models.TextField(blank=True, null=True)
wiki_data_url = models.TextField(blank=True, null=True)
@ -155,7 +155,7 @@ class Resource(models.Model):
team = models.TextField(blank=True, null=True)
inventory = models.TextField(blank=True, null=True)
doi = models.TextField(blank=True, null=True)
WikiDataID = models.TextField(blank=True, null=True)
wiki_data_id = models.TextField(blank=True, null=True)
class Meta:
db_table = 'resource'
@ -318,7 +318,7 @@ class RangeNameTranslation(models.Model):
class AddElevation(models.Model):
id = models.AutoField(primary_key=True)
gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id')
gmba_v2_id = models.TextField(blank=True, null=True)
elev_min = models.TextField(blank=True, null=True)
elev_max = models.TextField(blank=True, null=True)
elev_range = models.TextField(blank=True, null=True)
@ -334,7 +334,7 @@ class GMBA_V2_Centroid(models.Model):
class ImportGeom210915(models.Model):
id = models.AutoField(primary_key=True)
gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id')
gmba_v2_id = models.TextField(blank=True, null=True)
area = models.TextField(blank=True, null=True)
perimeter = models.TextField(blank=True, null=True)
color_all = models.TextField(blank=True, null=True)
@ -355,7 +355,7 @@ class ResourceKeyword(models.Model):
class NamesImport(models.Model):
id = models.AutoField(primary_key=True)
gmba_v2_id = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True, to_field='gmba_v2_id')
gmba_v2_id = models.TextField(blank=True, null=True)
cn = models.TextField(blank=True, null=True)
de = models.TextField(blank=True, null=True)
es = models.TextField(blank=True, null=True)
@ -544,7 +544,7 @@ class Species(models.Model):
class PeopleFunction(models.Model):
id = models.AutoField(primary_key=True)
person = models.ForeignKey(Person, models.DO_NOTHING, blank=True, null=True)
scale = models.ForeignKey(GMBA_function, models.DO_NOTHING, blank=True, null=True)
function = models.ForeignKey(GMBA_function, models.DO_NOTHING, blank=True, null=True)
class SpeciesRange(models.Model):
@ -564,7 +564,7 @@ class PeopleRange(models.Model):
class TaxonRange(models.Model):
id = models.AutoField(primary_key=True)
range = models.ForeignKey(Range, models.DO_NOTHING, blank=True, null=True)
taxon = models.ForeignKey(Taxon, models.DO_NOTHING, blank=True, null=True)
taxon = models.TextField(blank=True, null=True)
subrange_or_region = models.TextField(blank=True, null=True)
taxon_status = models.TextField(blank=True, null=True)
distribution = models.TextField(blank=True, null=True)