diff --git a/database/actions.py b/database/actions.py index 749b26c8d67008c827cfa66557fd92e27479f44b..4f2b9a55d5bb3357984dd6cc074032c0c175a30c 100644 --- a/database/actions.py +++ b/database/actions.py @@ -73,6 +73,7 @@ def insert(file_name, table, year, offset=2, delimiters=[';', '\\n', '"'], null= def create(table, ignore_definitions=False): '''Creates table from mapping_protocol metadata''' table = gen_data_table(table, META) + table.gen_definitions() with ENGINE.connect() as connection: trans = connection.begin() @@ -90,6 +91,7 @@ def drop(table): def remap(table, auto_confirmation=True, verify_definitions=False): '''Applies change made in mapping protocols to database''' table = gen_data_table(table, META) + table.gen_definitions() table.map_from_database() table.remap(auto_confirmation, verify_definitions) diff --git a/database/base.py b/database/base.py index 4312529b178941128993a302a88f717dab1caefc..d49badb781e183ab5636815eb1a5e516caeaf098 100644 --- a/database/base.py +++ b/database/base.py @@ -41,6 +41,11 @@ class MissingProtocolError(DatabaseError): requires a protocol while there is none loaded''' pass +class MissingDefinitionsError(DatabaseError): + '''This exception should be raised if the program tries to use methods that + requires the table Definitions while there is none loaded''' + pass + class MissingForeignKeyError(DatabaseError): '''This exception should be raised if an expected foreign key is not found.''' def __init__(self, referred_table=None): diff --git a/database/database_table.py b/database/database_table.py index d67e9442f0f1d19061cb946b06a3eede1fdce427..7a54ae7f3aee9f4c1a2afe8a1a0e02d4fd87144f 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -30,11 +30,12 @@ from sqlalchemy import Table, Column, inspect, Integer, String, Boolean,\ from sqlalchemy.sql import select, insert, update, delete, func import pandas as pd -from database.base import DatabaseColumnError, MissingProtocolError, DatabaseMappingError,\ - InvalidTargetError, MissingForeignKeyError, MissingTableError,\ - CircularReferenceError +from database.base import DatabaseColumnError, MissingProtocolError, DatabaseMappingError, \ + InvalidTargetError, MissingForeignKeyError, MissingTableError, \ + CircularReferenceError, MissingDefinitionsError from database.protocol import Protocol from database.types import get_type +from database.definitions import Definitions import settings # Disable no-member warnings to silence false positives from Table instances dinamically generated @@ -145,6 +146,8 @@ class DatabaseTable(Table): self._mapping_table = gen_mapping_table(self) if not hasattr(self, '_protocol'): self._protocol = None + if not hasattr(self, '_definitions'): + self._definitions = None if 'protocol' in kwargs.keys(): self.load_protocol(kwargs['protocol']) @@ -295,47 +298,21 @@ class DatabaseTable(Table): logger.debug("Table %s not present in database.", self.name) raise MissingTableError(self.name) - def get_definitions(self): + def get_columns_dict(self, ignore_diff=False): ''' - Returns a dictionary with definitions from a table definitions file - ''' - definitions = self.name + '.json' - logger.debug("Acquiring definitions from %s", definitions) - definitions = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions) - definitions = json.loads(open(definitions).read()) - logger.debug("Definitions loaded") - - return definitions - - def update_defintions(self, definitions): - ''' - Update the table definitions with an altered definitions object - ''' - logger.debug("Updating table definitions from %s", definitions) - definitions_json = self.name + '.json' - definitions_json = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions_json) - - new_definitions_json = jsbeautifier.beautify(json.dumps(definitions, ensure_ascii=False)) - with open(definitions_json, "w") as def_json: - def_json.write(new_definitions_json) - - logger.debug("Definitions Updated") - - def get_columns_dict(self, tdef_columns, ignore_diff=False): - ''' - Get a dictionary of columns, comparing the columns of the associated protocol with those the columns in table - definition. - :param tdef_columns: column dict from table definitions + Get a dictionary of columns, comparing the columns of the associated protocol with the columns in definitions :param ignore_diff: when set True will ignore differences in table_definition and get the data only from mapping_protocol when both exists :return: {"column_name": ["column_type(str)", "target"]} ''' + self.check_definitions() + if self._protocol is None: - if not tdef_columns: + if self._definitions.columns is None: raise MissingProtocolError("You must first load a protocol or add columns to the table definition") else: logger.warning("Table creation will be entirely based on the table definition") - return tdef_columns + return self._definitions.columns else: column_dict = {} for column in self._protocol.get_targets(): @@ -347,11 +324,11 @@ class DatabaseTable(Table): column[0] = column[0].strip() column_dict[column[0]] = [column[1], self._protocol.target_from_dbcolumn(column[0])] - if not ignore_diff and tdef_columns: - for c_name, c_type in tdef_columns.items(): + if not ignore_diff and self._definitions.columns: + for c_name, c_type in self._definitions.columns.items(): if c_name not in column_dict.keys(): - prompt = input("The column {} is not present on the mapping protocol but is on the table definition," - " should it exist ? (Y/n): ".format(c_name)) + prompt = input("The column {} is not present on the mapping protocol but is on the " + "table definition, should it exist ? (Y/n): ".format(c_name)) if prompt.upper() in ['', 'Y']: print("Column {} will be created, please update the protocol later".format(c_name)) column_dict[c_name] = c_type @@ -360,29 +337,6 @@ class DatabaseTable(Table): return column_dict - def get_targets_from_definitions(self): - ''' - Get a list containing all the targets from table definition - ''' - targets = [] - definitions = self.get_definitions() - for column_name, parameter_list in definitions['columns'].items(): - targets.append(parameter_list[1]) - - return targets - - def get_dbcolumn_from_target_definition(self, target): - ''' - Gets database column from a target column name. Ouput is a list - with the column name and type contents. - :return: ['column_name','column_type'] - ''' - definitions = self.get_definitions() - for column_name, parameter_list in definitions['columns'].items(): - if parameter_list[1] == target: - return [column_name, parameter_list[0]] - - def load_protocol(self, protocol): ''' Takes a Protocol instance and loads it for further use @@ -403,6 +357,7 @@ class DatabaseTable(Table): ''' Creates the mapping table in the database ''' + self.check_definitions() if bind is None: bind = self.metadata.bind @@ -413,10 +368,9 @@ class DatabaseTable(Table): with bind.connect() as connection: logger.info("Populating mapping table") columns = [c[1] for c in self.columns.items()] - definitions = self.get_definitions() for c in columns: column = {} - column['target_name'] = definitions['columns'][c.name][1] + column['target_name'] = self._definitions.columns[c.name][1] if not column['target_name']: continue column['name'] = c.name @@ -430,9 +384,9 @@ class DatabaseTable(Table): ''' Inserts or updates table entry in the sources table ''' + self.check_definitions() if bind is None: bind = self.metadata.bind - definitions = self.get_definitions() source_table = gen_source_table(self.metadata) # Create source table if doesnt exist @@ -441,8 +395,6 @@ class DatabaseTable(Table): source_table.create(bind=bind) logger.debug("Source table creation: no exceptions.") - source = definitions['data_source'] - logger.debug("Checking for '%s' in source table", self.name) base_select = select([source_table.c.id]).where(source_table.c.table_name == self.name) table_id = bind.execute(base_select).fetchone() @@ -455,7 +407,7 @@ class DatabaseTable(Table): logger.debug("Table not found. Running insert query") base_query = insert(source_table) - base_query = base_query.values(table_name=self.name, source=source) + base_query = base_query.values(table_name=self.name, source=self._definitions.source) bind.execute(base_query) @@ -466,27 +418,26 @@ class DatabaseTable(Table): Table definitions must also be defined to allow primary key and foreign keys addition. Useful for table creation. ''' + self.check_definitions() if self.columns.keys(): logger.warning("Table mapping already has columns. Nothing done.") return if bind is None: bind = self.metadata.bind - definitions = self.get_definitions() - column_dict = self.get_columns_dict(definitions.get('columns'), ignore_defintions) + column_dict = self.get_columns_dict(ignore_defintions) for c_name, c_type in column_dict.items(): column = Column(c_name, get_type(c_type[0])) self.append_column(column) - definitions['columns'] = column_dict - self.update_defintions(definitions) + self._definitions.update_columns(column_dict) - primary_key = [self.columns.get(c) for c in definitions['pk']] + primary_key = [self.columns.get(c) for c in self._definitions.pkcolumns] if primary_key: self.constraints.add(PrimaryKeyConstraint(*primary_key)) - for foreign_key in definitions["foreign_keys"]: + for foreign_key in self._definitions.fkcolumns: keys = [self.columns.get(c) for c in foreign_key["keys"]] ref_table = DatabaseTable(foreign_key["reference_table"], self.metadata) @@ -568,7 +519,6 @@ class DatabaseTable(Table): field_type = get_type(field_type) - if target is not None and self._mapping_table.exists(): entry = { 'target_name': target, @@ -681,7 +631,8 @@ class DatabaseTable(Table): The method uses target_names as the criteria to decide if columns are the same or not. ''' - target_list = self.get_targets_from_definitions() + self.check_definitions() + target_list = self._definitions.get_targets() query = self._mapping_table.select() results = self.metadata.bind.execute(query).fetchall() @@ -699,7 +650,7 @@ class DatabaseTable(Table): continue name, field_type = result try: - new_name, new_type = self.get_dbcolumn_from_target_definition(target) + new_name, new_type = self._definitions.get_dbcolumn_from_target(target) except InvalidTargetError: to_drop_columns.append(target) continue @@ -722,6 +673,7 @@ class DatabaseTable(Table): mapping table. If verify_definitions is set it will ask any difference between mapping_protocol and table_definition ''' + self.check_definitions() if not self.exists(): print("Table {} doesn't exist".format(self.name)) return @@ -729,9 +681,8 @@ class DatabaseTable(Table): mtable = self._mapping_table # Update table definitions - definitions = self.get_definitions() - definitions['columns'] = self.get_columns_dict(definitions.get('columns'), ignore_diff=not verify_definitions) - self.update_defintions(definitions) + column_dict = self.get_columns_dict(ignore_diff=not verify_definitions) + self._definitions.update_columns(column_dict) if not mtable.exists(): print("Mapping table for {} not found.".format(self.name)) @@ -763,13 +714,13 @@ class DatabaseTable(Table): with self.metadata.bind.connect() as connection: # Create new columns if accept_new_columns: - for column in new_columns: + for target in new_columns: try: - dbcolumn = self._protocol.dbcolumn_from_target(column) + dbcolumn = self._definitions.get_dbcolumn_from_target(target) except InvalidTargetError: continue - self.add_column(dbcolumn[0], dbcolumn[1], column, bind=connection) + self.add_column(dbcolumn[0], dbcolumn[1], target, bind=connection) # Drop columns if accept_drop_columns: @@ -1088,6 +1039,19 @@ class DatabaseTable(Table): ttable.schema = temp_schema + def check_definitions(self): + ''' Raises MissingDefinitionsError if the definitions is not loaded.''' + if self._definitions is None: + raise MissingDefinitionsError('You must first load the table Definitions') + + def gen_definitions(self, keys=None): + ''' Associates a Definitions object to the table ''' + logger.debug('Generating Definitions.') + if not self._definitions: + self._definitions = Definitions(self.name, keys) + else: + logger.debug('Table definitions already loaded, nothing done.') + def gen_data_table(table, meta): '''Returns a DatabaseTable instance with associated mapping protocol''' table = DatabaseTable(table, meta) diff --git a/database/definitions.py b/database/definitions.py new file mode 100644 index 0000000000000000000000000000000000000000..6770a76e6570a62f4b3db9181e1a28dbe4e28c08 --- /dev/null +++ b/database/definitions.py @@ -0,0 +1,119 @@ +''' +Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre +Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR + +This file is part of HOTMapper. + +HOTMapper is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +HOTMapper is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with HOTMapper. If not, see <https://www.gnu.org/licenses/>. +''' + +import logging +import os +import json +import jsbeautifier +import settings +from database.base import InvalidTargetError + +logger = logging.getLogger(__name__) + +standard_keys = { + 'source': 'data_source', + 'description': 'pairing_description', + 'pkcolumns': 'pk', + 'fkcolumns': 'foreign_keys', + 'columns': 'columns' +} + +class Definitions(object): + ''' + Class created from the Table definitions, contains primary key, foreign key, descriptions, source + and columns + ''' + def __init__(self, t_name, keys=None): + self.source = None + self.description = None + self.columns = None + self.pkcolumns = None + self.fkcolumns = None + self._name = t_name + self.load_json(keys) + + def load_json(self, keys=None): + ''' Read the table definition json into the correct Definitions variables ''' + definitions = self._name + '.json' + logger.debug("Acquiring definitions from %s", definitions) + definitions = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions) + definitions = json.loads(open(definitions).read()) + self.load_from_dict(definitions, keys) + + def update_columns(self, columns): + ''' Update Table definition json with a new columns dict ''' + definitions_json = self._name + '.json' + logger.debug("Updating table definitions from %s", definitions_json) + definitions_json = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions_json) + + self.columns = columns + new_definitions = self.to_dict() + new_definitions = jsbeautifier.beautify(json.dumps(new_definitions, ensure_ascii=False)) + with open(definitions_json, "w") as def_json: + def_json.write(new_definitions) + + logger.debug("Definitions Updated") + + def load_from_dict(self, definitions, keys=None): + ''' Takes a definitions dictionary and load the object Definitions variables ''' + if not keys: + keys = standard_keys + + self.source = definitions[keys['source']] + self.description = definitions[keys['description']] + self.pkcolumns = definitions[keys['pkcolumns']] + self.fkcolumns = definitions[keys['fkcolumns']] + self.columns = definitions[keys['columns']] + logger.debug("Definitions loaded") + + def to_dict(self, keys=None): + ''' Transforms a Definition object into a dictionary for writing in a json file ''' + if not keys: + keys = standard_keys + + definitions = { + keys['description']: self.description, + keys['source']: self.source, + keys['pkcolumns']: self.pkcolumns, + keys['fkcolumns']: self.fkcolumns, + keys['columns']: self.columns + } + return definitions + + def get_targets(self): + ''' Returns a list containing all columns targets ''' + targets = [] + for column_name, parameter_list in self.columns.items(): + targets.append(parameter_list[1]) + + return targets + + def get_dbcolumn_from_target(self, target): + ''' Gets a database column from a target column name. Ouput is a list + with the column name and type contents. + :return: ['column_name','column_type'] ''' + found = False + for column_name, parameter_list in self.columns.items(): + if parameter_list[1] == target: + found = True + return [column_name, parameter_list[0]] + + if not found: + raise InvalidTargetError(target)