Skip to content
Snippets Groups Projects
Commit 73f518a5 authored by jvfpw18's avatar jvfpw18
Browse files

Create a new class for table definitions

parent 9266139f
No related branches found
No related tags found
2 merge requests!13v1.1.0,!11Refactor definitions
...@@ -73,6 +73,7 @@ def insert(file_name, table, year, offset=2, delimiters=[';', '\\n', '"'], null= ...@@ -73,6 +73,7 @@ def insert(file_name, table, year, offset=2, delimiters=[';', '\\n', '"'], null=
def create(table, ignore_definitions=False): def create(table, ignore_definitions=False):
'''Creates table from mapping_protocol metadata''' '''Creates table from mapping_protocol metadata'''
table = gen_data_table(table, META) table = gen_data_table(table, META)
table.gen_definitions()
with ENGINE.connect() as connection: with ENGINE.connect() as connection:
trans = connection.begin() trans = connection.begin()
...@@ -90,6 +91,7 @@ def drop(table): ...@@ -90,6 +91,7 @@ def drop(table):
def remap(table, auto_confirmation=True, verify_definitions=False): def remap(table, auto_confirmation=True, verify_definitions=False):
'''Applies change made in mapping protocols to database''' '''Applies change made in mapping protocols to database'''
table = gen_data_table(table, META) table = gen_data_table(table, META)
table.gen_definitions()
table.map_from_database() table.map_from_database()
table.remap(auto_confirmation, verify_definitions) table.remap(auto_confirmation, verify_definitions)
......
...@@ -41,6 +41,11 @@ class MissingProtocolError(DatabaseError): ...@@ -41,6 +41,11 @@ class MissingProtocolError(DatabaseError):
requires a protocol while there is none loaded''' requires a protocol while there is none loaded'''
pass pass
class MissingDefinitionsError(DatabaseError):
'''This exception should be raised if the program tries to use methods that
requires the table Definitions while there is none loaded'''
pass
class MissingForeignKeyError(DatabaseError): class MissingForeignKeyError(DatabaseError):
'''This exception should be raised if an expected foreign key is not found.''' '''This exception should be raised if an expected foreign key is not found.'''
def __init__(self, referred_table=None): def __init__(self, referred_table=None):
......
...@@ -30,11 +30,12 @@ from sqlalchemy import Table, Column, inspect, Integer, String, Boolean,\ ...@@ -30,11 +30,12 @@ from sqlalchemy import Table, Column, inspect, Integer, String, Boolean,\
from sqlalchemy.sql import select, insert, update, delete, func from sqlalchemy.sql import select, insert, update, delete, func
import pandas as pd import pandas as pd
from database.base import DatabaseColumnError, MissingProtocolError, DatabaseMappingError,\ from database.base import DatabaseColumnError, MissingProtocolError, DatabaseMappingError, \
InvalidTargetError, MissingForeignKeyError, MissingTableError,\ InvalidTargetError, MissingForeignKeyError, MissingTableError, \
CircularReferenceError CircularReferenceError, MissingDefinitionsError
from database.protocol import Protocol from database.protocol import Protocol
from database.types import get_type from database.types import get_type
from database.definitions import Definitions
import settings import settings
# Disable no-member warnings to silence false positives from Table instances dinamically generated # Disable no-member warnings to silence false positives from Table instances dinamically generated
...@@ -145,6 +146,8 @@ class DatabaseTable(Table): ...@@ -145,6 +146,8 @@ class DatabaseTable(Table):
self._mapping_table = gen_mapping_table(self) self._mapping_table = gen_mapping_table(self)
if not hasattr(self, '_protocol'): if not hasattr(self, '_protocol'):
self._protocol = None self._protocol = None
if not hasattr(self, '_definitions'):
self._definitions = None
if 'protocol' in kwargs.keys(): if 'protocol' in kwargs.keys():
self.load_protocol(kwargs['protocol']) self.load_protocol(kwargs['protocol'])
...@@ -295,47 +298,21 @@ class DatabaseTable(Table): ...@@ -295,47 +298,21 @@ class DatabaseTable(Table):
logger.debug("Table %s not present in database.", self.name) logger.debug("Table %s not present in database.", self.name)
raise MissingTableError(self.name) raise MissingTableError(self.name)
def get_definitions(self): def get_columns_dict(self, ignore_diff=False):
''' '''
Returns a dictionary with definitions from a table definitions file Get a dictionary of columns, comparing the columns of the associated protocol with the columns in definitions
'''
definitions = self.name + '.json'
logger.debug("Acquiring definitions from %s", definitions)
definitions = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions)
definitions = json.loads(open(definitions).read())
logger.debug("Definitions loaded")
return definitions
def update_defintions(self, definitions):
'''
Update the table definitions with an altered definitions object
'''
logger.debug("Updating table definitions from %s", definitions)
definitions_json = self.name + '.json'
definitions_json = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions_json)
new_definitions_json = jsbeautifier.beautify(json.dumps(definitions, ensure_ascii=False))
with open(definitions_json, "w") as def_json:
def_json.write(new_definitions_json)
logger.debug("Definitions Updated")
def get_columns_dict(self, tdef_columns, ignore_diff=False):
'''
Get a dictionary of columns, comparing the columns of the associated protocol with those the columns in table
definition.
:param tdef_columns: column dict from table definitions
:param ignore_diff: when set True will ignore differences in table_definition and get the data only from :param ignore_diff: when set True will ignore differences in table_definition and get the data only from
mapping_protocol when both exists mapping_protocol when both exists
:return: {"column_name": ["column_type(str)", "target"]} :return: {"column_name": ["column_type(str)", "target"]}
''' '''
self.check_definitions()
if self._protocol is None: if self._protocol is None:
if not tdef_columns: if self._definitions.columns is None:
raise MissingProtocolError("You must first load a protocol or add columns to the table definition") raise MissingProtocolError("You must first load a protocol or add columns to the table definition")
else: else:
logger.warning("Table creation will be entirely based on the table definition") logger.warning("Table creation will be entirely based on the table definition")
return tdef_columns return self._definitions.columns
else: else:
column_dict = {} column_dict = {}
for column in self._protocol.get_targets(): for column in self._protocol.get_targets():
...@@ -347,11 +324,11 @@ class DatabaseTable(Table): ...@@ -347,11 +324,11 @@ class DatabaseTable(Table):
column[0] = column[0].strip() column[0] = column[0].strip()
column_dict[column[0]] = [column[1], self._protocol.target_from_dbcolumn(column[0])] column_dict[column[0]] = [column[1], self._protocol.target_from_dbcolumn(column[0])]
if not ignore_diff and tdef_columns: if not ignore_diff and self._definitions.columns:
for c_name, c_type in tdef_columns.items(): for c_name, c_type in self._definitions.columns.items():
if c_name not in column_dict.keys(): if c_name not in column_dict.keys():
prompt = input("The column {} is not present on the mapping protocol but is on the table definition," prompt = input("The column {} is not present on the mapping protocol but is on the "
" should it exist ? (Y/n): ".format(c_name)) "table definition, should it exist ? (Y/n): ".format(c_name))
if prompt.upper() in ['', 'Y']: if prompt.upper() in ['', 'Y']:
print("Column {} will be created, please update the protocol later".format(c_name)) print("Column {} will be created, please update the protocol later".format(c_name))
column_dict[c_name] = c_type column_dict[c_name] = c_type
...@@ -360,29 +337,6 @@ class DatabaseTable(Table): ...@@ -360,29 +337,6 @@ class DatabaseTable(Table):
return column_dict return column_dict
def get_targets_from_definitions(self):
'''
Get a list containing all the targets from table definition
'''
targets = []
definitions = self.get_definitions()
for column_name, parameter_list in definitions['columns'].items():
targets.append(parameter_list[1])
return targets
def get_dbcolumn_from_target_definition(self, target):
'''
Gets database column from a target column name. Ouput is a list
with the column name and type contents.
:return: ['column_name','column_type']
'''
definitions = self.get_definitions()
for column_name, parameter_list in definitions['columns'].items():
if parameter_list[1] == target:
return [column_name, parameter_list[0]]
def load_protocol(self, protocol): def load_protocol(self, protocol):
''' '''
Takes a Protocol instance and loads it for further use Takes a Protocol instance and loads it for further use
...@@ -403,6 +357,7 @@ class DatabaseTable(Table): ...@@ -403,6 +357,7 @@ class DatabaseTable(Table):
''' '''
Creates the mapping table in the database Creates the mapping table in the database
''' '''
self.check_definitions()
if bind is None: if bind is None:
bind = self.metadata.bind bind = self.metadata.bind
...@@ -413,10 +368,9 @@ class DatabaseTable(Table): ...@@ -413,10 +368,9 @@ class DatabaseTable(Table):
with bind.connect() as connection: with bind.connect() as connection:
logger.info("Populating mapping table") logger.info("Populating mapping table")
columns = [c[1] for c in self.columns.items()] columns = [c[1] for c in self.columns.items()]
definitions = self.get_definitions()
for c in columns: for c in columns:
column = {} column = {}
column['target_name'] = definitions['columns'][c.name][1] column['target_name'] = self._definitions.columns[c.name][1]
if not column['target_name']: if not column['target_name']:
continue continue
column['name'] = c.name column['name'] = c.name
...@@ -430,9 +384,9 @@ class DatabaseTable(Table): ...@@ -430,9 +384,9 @@ class DatabaseTable(Table):
''' '''
Inserts or updates table entry in the sources table Inserts or updates table entry in the sources table
''' '''
self.check_definitions()
if bind is None: if bind is None:
bind = self.metadata.bind bind = self.metadata.bind
definitions = self.get_definitions()
source_table = gen_source_table(self.metadata) source_table = gen_source_table(self.metadata)
# Create source table if doesnt exist # Create source table if doesnt exist
...@@ -441,8 +395,6 @@ class DatabaseTable(Table): ...@@ -441,8 +395,6 @@ class DatabaseTable(Table):
source_table.create(bind=bind) source_table.create(bind=bind)
logger.debug("Source table creation: no exceptions.") logger.debug("Source table creation: no exceptions.")
source = definitions['data_source']
logger.debug("Checking for '%s' in source table", self.name) logger.debug("Checking for '%s' in source table", self.name)
base_select = select([source_table.c.id]).where(source_table.c.table_name == self.name) base_select = select([source_table.c.id]).where(source_table.c.table_name == self.name)
table_id = bind.execute(base_select).fetchone() table_id = bind.execute(base_select).fetchone()
...@@ -455,7 +407,7 @@ class DatabaseTable(Table): ...@@ -455,7 +407,7 @@ class DatabaseTable(Table):
logger.debug("Table not found. Running insert query") logger.debug("Table not found. Running insert query")
base_query = insert(source_table) base_query = insert(source_table)
base_query = base_query.values(table_name=self.name, source=source) base_query = base_query.values(table_name=self.name, source=self._definitions.source)
bind.execute(base_query) bind.execute(base_query)
...@@ -466,27 +418,26 @@ class DatabaseTable(Table): ...@@ -466,27 +418,26 @@ class DatabaseTable(Table):
Table definitions must also be defined to allow primary key and foreign keys addition. Table definitions must also be defined to allow primary key and foreign keys addition.
Useful for table creation. Useful for table creation.
''' '''
self.check_definitions()
if self.columns.keys(): if self.columns.keys():
logger.warning("Table mapping already has columns. Nothing done.") logger.warning("Table mapping already has columns. Nothing done.")
return return
if bind is None: if bind is None:
bind = self.metadata.bind bind = self.metadata.bind
definitions = self.get_definitions() column_dict = self.get_columns_dict(ignore_defintions)
column_dict = self.get_columns_dict(definitions.get('columns'), ignore_defintions)
for c_name, c_type in column_dict.items(): for c_name, c_type in column_dict.items():
column = Column(c_name, get_type(c_type[0])) column = Column(c_name, get_type(c_type[0]))
self.append_column(column) self.append_column(column)
definitions['columns'] = column_dict self._definitions.update_columns(column_dict)
self.update_defintions(definitions)
primary_key = [self.columns.get(c) for c in definitions['pk']] primary_key = [self.columns.get(c) for c in self._definitions.pkcolumns]
if primary_key: if primary_key:
self.constraints.add(PrimaryKeyConstraint(*primary_key)) self.constraints.add(PrimaryKeyConstraint(*primary_key))
for foreign_key in definitions["foreign_keys"]: for foreign_key in self._definitions.fkcolumns:
keys = [self.columns.get(c) for c in foreign_key["keys"]] keys = [self.columns.get(c) for c in foreign_key["keys"]]
ref_table = DatabaseTable(foreign_key["reference_table"], self.metadata) ref_table = DatabaseTable(foreign_key["reference_table"], self.metadata)
...@@ -568,7 +519,6 @@ class DatabaseTable(Table): ...@@ -568,7 +519,6 @@ class DatabaseTable(Table):
field_type = get_type(field_type) field_type = get_type(field_type)
if target is not None and self._mapping_table.exists(): if target is not None and self._mapping_table.exists():
entry = { entry = {
'target_name': target, 'target_name': target,
...@@ -681,7 +631,8 @@ class DatabaseTable(Table): ...@@ -681,7 +631,8 @@ class DatabaseTable(Table):
The method uses target_names as the criteria to decide if columns are the same or not. The method uses target_names as the criteria to decide if columns are the same or not.
''' '''
target_list = self.get_targets_from_definitions() self.check_definitions()
target_list = self._definitions.get_targets()
query = self._mapping_table.select() query = self._mapping_table.select()
results = self.metadata.bind.execute(query).fetchall() results = self.metadata.bind.execute(query).fetchall()
...@@ -699,7 +650,7 @@ class DatabaseTable(Table): ...@@ -699,7 +650,7 @@ class DatabaseTable(Table):
continue continue
name, field_type = result name, field_type = result
try: try:
new_name, new_type = self.get_dbcolumn_from_target_definition(target) new_name, new_type = self._definitions.get_dbcolumn_from_target(target)
except InvalidTargetError: except InvalidTargetError:
to_drop_columns.append(target) to_drop_columns.append(target)
continue continue
...@@ -722,6 +673,7 @@ class DatabaseTable(Table): ...@@ -722,6 +673,7 @@ class DatabaseTable(Table):
mapping table. mapping table.
If verify_definitions is set it will ask any difference between mapping_protocol and table_definition If verify_definitions is set it will ask any difference between mapping_protocol and table_definition
''' '''
self.check_definitions()
if not self.exists(): if not self.exists():
print("Table {} doesn't exist".format(self.name)) print("Table {} doesn't exist".format(self.name))
return return
...@@ -729,9 +681,8 @@ class DatabaseTable(Table): ...@@ -729,9 +681,8 @@ class DatabaseTable(Table):
mtable = self._mapping_table mtable = self._mapping_table
# Update table definitions # Update table definitions
definitions = self.get_definitions() column_dict = self.get_columns_dict(ignore_diff=not verify_definitions)
definitions['columns'] = self.get_columns_dict(definitions.get('columns'), ignore_diff=not verify_definitions) self._definitions.update_columns(column_dict)
self.update_defintions(definitions)
if not mtable.exists(): if not mtable.exists():
print("Mapping table for {} not found.".format(self.name)) print("Mapping table for {} not found.".format(self.name))
...@@ -763,13 +714,13 @@ class DatabaseTable(Table): ...@@ -763,13 +714,13 @@ class DatabaseTable(Table):
with self.metadata.bind.connect() as connection: with self.metadata.bind.connect() as connection:
# Create new columns # Create new columns
if accept_new_columns: if accept_new_columns:
for column in new_columns: for target in new_columns:
try: try:
dbcolumn = self._protocol.dbcolumn_from_target(column) dbcolumn = self._definitions.get_dbcolumn_from_target(target)
except InvalidTargetError: except InvalidTargetError:
continue continue
self.add_column(dbcolumn[0], dbcolumn[1], column, bind=connection) self.add_column(dbcolumn[0], dbcolumn[1], target, bind=connection)
# Drop columns # Drop columns
if accept_drop_columns: if accept_drop_columns:
...@@ -1088,6 +1039,19 @@ class DatabaseTable(Table): ...@@ -1088,6 +1039,19 @@ class DatabaseTable(Table):
ttable.schema = temp_schema ttable.schema = temp_schema
def check_definitions(self):
''' Raises MissingDefinitionsError if the definitions is not loaded.'''
if self._definitions is None:
raise MissingDefinitionsError('You must first load the table Definitions')
def gen_definitions(self, keys=None):
''' Associates a Definitions object to the table '''
logger.debug('Generating Definitions.')
if not self._definitions:
self._definitions = Definitions(self.name, keys)
else:
logger.debug('Table definitions already loaded, nothing done.')
def gen_data_table(table, meta): def gen_data_table(table, meta):
'''Returns a DatabaseTable instance with associated mapping protocol''' '''Returns a DatabaseTable instance with associated mapping protocol'''
table = DatabaseTable(table, meta) table = DatabaseTable(table, meta)
......
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
HOTMapper is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
HOTMapper is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
import logging
import os
import json
import jsbeautifier
import settings
from database.base import InvalidTargetError
logger = logging.getLogger(__name__)
standard_keys = {
'source': 'data_source',
'description': 'pairing_description',
'pkcolumns': 'pk',
'fkcolumns': 'foreign_keys',
'columns': 'columns'
}
class Definitions(object):
'''
Class created from the Table definitions, contains primary key, foreign key, descriptions, source
and columns
'''
def __init__(self, t_name, keys=None):
self.source = None
self.description = None
self.columns = None
self.pkcolumns = None
self.fkcolumns = None
self._name = t_name
self.load_json(keys)
def load_json(self, keys=None):
''' Read the table definition json into the correct Definitions variables '''
definitions = self._name + '.json'
logger.debug("Acquiring definitions from %s", definitions)
definitions = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions)
definitions = json.loads(open(definitions).read())
self.load_from_dict(definitions, keys)
def update_columns(self, columns):
''' Update Table definition json with a new columns dict '''
definitions_json = self._name + '.json'
logger.debug("Updating table definitions from %s", definitions_json)
definitions_json = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions_json)
self.columns = columns
new_definitions = self.to_dict()
new_definitions = jsbeautifier.beautify(json.dumps(new_definitions, ensure_ascii=False))
with open(definitions_json, "w") as def_json:
def_json.write(new_definitions)
logger.debug("Definitions Updated")
def load_from_dict(self, definitions, keys=None):
''' Takes a definitions dictionary and load the object Definitions variables '''
if not keys:
keys = standard_keys
self.source = definitions[keys['source']]
self.description = definitions[keys['description']]
self.pkcolumns = definitions[keys['pkcolumns']]
self.fkcolumns = definitions[keys['fkcolumns']]
self.columns = definitions[keys['columns']]
logger.debug("Definitions loaded")
def to_dict(self, keys=None):
''' Transforms a Definition object into a dictionary for writing in a json file '''
if not keys:
keys = standard_keys
definitions = {
keys['description']: self.description,
keys['source']: self.source,
keys['pkcolumns']: self.pkcolumns,
keys['fkcolumns']: self.fkcolumns,
keys['columns']: self.columns
}
return definitions
def get_targets(self):
''' Returns a list containing all columns targets '''
targets = []
for column_name, parameter_list in self.columns.items():
targets.append(parameter_list[1])
return targets
def get_dbcolumn_from_target(self, target):
''' Gets a database column from a target column name. Ouput is a list
with the column name and type contents.
:return: ['column_name','column_type'] '''
found = False
for column_name, parameter_list in self.columns.items():
if parameter_list[1] == target:
found = True
return [column_name, parameter_list[0]]
if not found:
raise InvalidTargetError(target)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment