From a5e46d3b2f3dc066da79f7a8b65031ff3d61c34a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Frans=20Pondaco=20Winandy?= <jvfpw18@inf.ufpr.br> Date: Thu, 4 Jul 2019 07:59:09 -0300 Subject: [PATCH 1/3] Echo columns from mapping protocol to table_definition --- database/database_table.py | 23 +++++++++++++++++++++++ requirements.txt | 1 + 2 files changed, 24 insertions(+) diff --git a/database/database_table.py b/database/database_table.py index 73cd16a..a48234b 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -24,6 +24,7 @@ import time import json import re import logging +import jsbeautifier from sqlalchemy import Table, Column, inspect, Integer, String, Boolean,\ PrimaryKeyConstraint, ForeignKeyConstraint, text from sqlalchemy.sql import select, insert, update, delete, func @@ -306,6 +307,20 @@ class DatabaseTable(Table): return definitions + def update_defintions(self, definitions): + ''' + Update the table definitions with an altered definitions object + ''' + logger.debug("Updating table definitions from %s", definitions) + definitions_json = self.name + '.json' + definitions_json = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions_json) + + new_definitions_json = jsbeautifier.beautify(json.dumps(definitions, ensure_ascii=False)) + with open(definitions_json, "w") as def_json: + def_json.write(new_definitions_json) + + logger.debug("Definitions Updated") + def load_protocol(self, protocol): ''' Takes a Protocol instance and loads it for further use @@ -396,18 +411,26 @@ class DatabaseTable(Table): bind = self.metadata.bind definitions = self.get_definitions() + column_dict = definitions.get('columns') + if not column_dict: + column_dict = {} for column in self._protocol.get_targets(): try: column = self._protocol.dbcolumn_from_target(column) except InvalidTargetError: continue + if column[0] not in column_dict.keys(): + column_dict[column[0]] = column[1] if column[0]: column[0] = column[0].strip() column = Column(column[0], get_type(column[1])) self.append_column(column) + definitions['columns'] = column_dict + self.update_defintions(definitions) + primary_key = [self.columns.get(c) for c in definitions['pk']] if primary_key: self.constraints.add(PrimaryKeyConstraint(*primary_key)) diff --git a/requirements.txt b/requirements.txt index 7c18f5d..c3861e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ ipython==7.5.0 ipython-genutils==0.2.0 isort==4.3.20 jedi==0.13.3 +jsbeautifier==1.10.0 lazy-object-proxy==1.4.1 manage.py==0.2.10 mccabe==0.6.1 -- GitLab From 18b75e70827226d9e620b3cf1416946fb27d4243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Frans=20Pondaco=20Winandy?= <jvfpw18@inf.ufpr.br> Date: Tue, 9 Jul 2019 08:55:40 -0300 Subject: [PATCH 2/3] Table can be created using only table definition --- database/database_table.py | 51 +++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/database/database_table.py b/database/database_table.py index a48234b..99fdb2d 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -321,6 +321,35 @@ class DatabaseTable(Table): logger.debug("Definitions Updated") + def get_columns_to_create(self, tdef_columns): + if self._protocol is None: + if not tdef_columns: + raise MissingProtocolError("You must first load a protocol or add columns to the table definition") + else: + logger.warning("Table creation will be entirely based on the table definition") + return tdef_columns + else: + column_dict = {} + for column in self._protocol.get_targets(): + try: + column = self._protocol.dbcolumn_from_target(column) + except InvalidTargetError: + continue + if column[0]: + column[0] = column[0].strip() + column_dict[column[0]] = [column[1], self._protocol.target_from_dbcolumn(column[0])] + + for c_name, c_type in tdef_columns.items(): + if c_name not in column_dict.keys(): + prompt = input("The column {} is not present on the mapping protocol but is on the table definition," + "should it still be created ? (Y/n): ".format(c_name)) + if prompt.upper() in ['', 'Y']: + print("Column {} will be created, please update the protocol later".format(c_name)) + column_dict[c_name] = c_type + else: + print("Column {} will be removed from the table_definitions.".format(c_name)) + return column_dict + def load_protocol(self, protocol): ''' Takes a Protocol instance and loads it for further use @@ -341,7 +370,6 @@ class DatabaseTable(Table): ''' Creates the mapping table in the database ''' - self.check_protocol() if bind is None: bind = self.metadata.bind @@ -352,9 +380,10 @@ class DatabaseTable(Table): with bind.connect() as connection: logger.info("Populating mapping table") columns = [c[1] for c in self.columns.items()] + definitions = self.get_definitions() for c in columns: column = {} - column['target_name'] = self._protocol.target_from_dbcolumn(c.name) + column['target_name'] = definitions['columns'][c.name][1] if not column['target_name']: continue column['name'] = c.name @@ -403,7 +432,6 @@ class DatabaseTable(Table): be defined to allow primary key and foreign keys addition. Useful for table creation. ''' - self.check_protocol() if self.columns.keys(): logger.warning("Table mapping already has columns. Nothing done.") return @@ -411,21 +439,10 @@ class DatabaseTable(Table): bind = self.metadata.bind definitions = self.get_definitions() - column_dict = definitions.get('columns') - if not column_dict: - column_dict = {} - - for column in self._protocol.get_targets(): - try: - column = self._protocol.dbcolumn_from_target(column) - except InvalidTargetError: - continue - if column[0] not in column_dict.keys(): - column_dict[column[0]] = column[1] - if column[0]: - column[0] = column[0].strip() - column = Column(column[0], get_type(column[1])) + column_dict = self.get_columns_to_create(definitions.get('columns')) + for c_name, c_type in column_dict.items(): + column = Column(c_name, get_type(c_type[0])) self.append_column(column) definitions['columns'] = column_dict -- GitLab From a2298f14359eed3fbc34353a45e86e690c2b7689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Frans=20Pondaco=20Winandy?= <jvfpw18@inf.ufpr.br> Date: Fri, 12 Jul 2019 08:36:50 -0300 Subject: [PATCH 3/3] Remap can now be done only from table_definition --- database/actions.py | 8 ++-- database/database_table.py | 92 ++++++++++++++++++++++++++------------ manage.py | 16 ++++--- tests/database_test.py | 2 +- 4 files changed, 79 insertions(+), 39 deletions(-) diff --git a/database/actions.py b/database/actions.py index 83ee1ed..749b26c 100644 --- a/database/actions.py +++ b/database/actions.py @@ -70,13 +70,13 @@ def insert(file_name, table, year, offset=2, delimiters=[';', '\\n', '"'], null= trans.commit() -def create(table): +def create(table, ignore_definitions=False): '''Creates table from mapping_protocol metadata''' table = gen_data_table(table, META) with ENGINE.connect() as connection: trans = connection.begin() - table.create(bind=connection) + table.create(bind=connection, ignore_definitions=ignore_definitions) table.set_source(bind=connection) table.create_mapping_table(bind=connection) trans.commit() @@ -87,12 +87,12 @@ def drop(table): table.drop() -def remap(table, auto_confirmation=True): +def remap(table, auto_confirmation=True, verify_definitions=False): '''Applies change made in mapping protocols to database''' table = gen_data_table(table, META) table.map_from_database() - table.remap(auto_confirmation) + table.remap(auto_confirmation, verify_definitions) def csv_from_tabbed(table_name, input_file, output_file, year, sep=';'): table = gen_data_table(table_name, META) diff --git a/database/database_table.py b/database/database_table.py index 99fdb2d..d3c0dce 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -321,7 +321,15 @@ class DatabaseTable(Table): logger.debug("Definitions Updated") - def get_columns_to_create(self, tdef_columns): + def get_columns_dict(self, tdef_columns, ignore_diff=False): + ''' + Get a dictionary of columns, comparing the columns of the associated protocol with those the columns in table + definition. + :param tdef_columns: column dict from table definitions + :param ignore_diff: when set True will ignore differences in table_definition and get the data only from + mapping_protocol when both exists + :return: {"column_name": ["column_type(str)", "target"]} + ''' if self._protocol is None: if not tdef_columns: raise MissingProtocolError("You must first load a protocol or add columns to the table definition") @@ -339,17 +347,42 @@ class DatabaseTable(Table): column[0] = column[0].strip() column_dict[column[0]] = [column[1], self._protocol.target_from_dbcolumn(column[0])] - for c_name, c_type in tdef_columns.items(): - if c_name not in column_dict.keys(): - prompt = input("The column {} is not present on the mapping protocol but is on the table definition," - "should it still be created ? (Y/n): ".format(c_name)) - if prompt.upper() in ['', 'Y']: - print("Column {} will be created, please update the protocol later".format(c_name)) - column_dict[c_name] = c_type - else: - print("Column {} will be removed from the table_definitions.".format(c_name)) + if not ignore_diff: + for c_name, c_type in tdef_columns.items(): + if c_name not in column_dict.keys(): + prompt = input("The column {} is not present on the mapping protocol but is on the table definition," + " should it exist ? (Y/n): ".format(c_name)) + if prompt.upper() in ['', 'Y']: + print("Column {} will be created, please update the protocol later".format(c_name)) + column_dict[c_name] = c_type + else: + print("Column {} will be removed from the table_definitions.".format(c_name)) + return column_dict + def get_targets_from_definitions(self): + ''' + Get a list containing all the targets from table definition + ''' + targets = [] + definitions = self.get_definitions() + for column_name, parameter_list in definitions['columns'].items(): + targets.append(parameter_list[1]) + + return targets + + def get_dbcolumn_from_target_definition(self, target): + ''' + Gets database column from a target column name. Ouput is a list + with the column name and type contents. + :return: ['column_name','column_type'] + ''' + definitions = self.get_definitions() + for column_name, parameter_list in definitions['columns'].items(): + if parameter_list[1] == target: + return [column_name, parameter_list[0]] + + def load_protocol(self, protocol): ''' Takes a Protocol instance and loads it for further use @@ -426,10 +459,11 @@ class DatabaseTable(Table): bind.execute(base_query) - def map_from_protocol(self, create=False, bind=None): + def map_from_protocol(self, create=False, bind=None, ignore_defintions=False): ''' - Uses information from a protocol to generate self columns. Table definitions must also - be defined to allow primary key and foreign keys addition. + Uses information from a protocol or, if protocol is not present, from table definitions + to generate self columns. + Table definitions must also be defined to allow primary key and foreign keys addition. Useful for table creation. ''' if self.columns.keys(): @@ -439,7 +473,7 @@ class DatabaseTable(Table): bind = self.metadata.bind definitions = self.get_definitions() - column_dict = self.get_columns_to_create(definitions.get('columns')) + column_dict = self.get_columns_dict(definitions.get('columns'), ignore_defintions) for c_name, c_type in column_dict.items(): column = Column(c_name, get_type(c_type[0])) @@ -475,7 +509,7 @@ class DatabaseTable(Table): self.constraints.add(ForeignKeyConstraint(keys, fkeys)) - def create(self, bind=None, checkfirst=False): + def create(self, bind=None, checkfirst=False, ignore_definitions=False): ''' Overrides sqlalchemy's create method to use map_from_protocol before creating. ''' @@ -485,7 +519,7 @@ class DatabaseTable(Table): logger.error("Table %s already exists", self.name) return - self.map_from_protocol(create=True, bind=bind) + self.map_from_protocol(create=True, bind=bind, ignore_defintions=ignore_definitions) super().create(bind=bind, checkfirst=checkfirst) @@ -636,7 +670,7 @@ class DatabaseTable(Table): def compare_mapping(self): ''' - Compares contents of mapping table to protocol and returns tuple with differences in + Compares contents of mapping table to table definitions and returns tuple with differences in the following format: new_columns, to_drop_columns, update_columns @@ -647,19 +681,17 @@ class DatabaseTable(Table): The method uses target_names as the criteria to decide if columns are the same or not. ''' - self.check_protocol() - - protocol_target_list = self._protocol.get_targets() + target_list = self.get_targets_from_definitions() query = self._mapping_table.select() results = self.metadata.bind.execute(query).fetchall() db_target_list = [t[1] for t in results] - new_columns = [c for c in protocol_target_list if c not in db_target_list and c != ''] - to_drop_columns = [c for c in db_target_list if c not in protocol_target_list] + new_columns = [c for c in target_list if c not in db_target_list and c != ''] + to_drop_columns = [c for c in db_target_list if c not in target_list] update_columns = [] - for target in protocol_target_list: + for target in target_list: query = select([self._mapping_table.c.name, self._mapping_table.c.type])\ .where(self._mapping_table.c.target_name == target) result = self.metadata.bind.execute(query).fetchone() @@ -667,7 +699,7 @@ class DatabaseTable(Table): continue name, field_type = result try: - new_name, new_type = self._protocol.dbcolumn_from_target(target) + new_name, new_type = self.get_dbcolumn_from_target_definition(target) except InvalidTargetError: to_drop_columns.append(target) continue @@ -683,20 +715,24 @@ class DatabaseTable(Table): return new_columns, to_drop_columns, update_columns - def remap(self, auto_confirmation=True): + def remap(self, auto_confirmation=True, verify_definitions=False): ''' Checks mapping protocol for differences in table structure - then attempts to apply differences according to what is recorded in the - mapping table + mapping table. + If verify_definitions is set it will ask any difference between mapping_protocol and table_definition ''' if not self.exists(): print("Table {} doesn't exist".format(self.name)) return - self.check_protocol() - mtable = self._mapping_table + # Update table definitions + definitions = self.get_definitions() + definitions['columns'] = self.get_columns_dict(definitions.get('columns'), ignore_diff=not verify_definitions) + self.update_defintions(definitions) + if not mtable.exists(): print("Mapping table for {} not found.".format(self.name)) print("Creating mapping table...") diff --git a/manage.py b/manage.py index cb87c7c..efe2f9e 100755 --- a/manage.py +++ b/manage.py @@ -35,9 +35,11 @@ def insert(csv_file, table, year, sep=';', null='',notifybackup=None): if notifybackup: database.actions.generate_backup() @manager.command -def create(table): - '''Creates table using mapping protocols''' - database.actions.create(table) +def create(table, ignore_definitions=False): + '''Creates table using mapping protocols + If ignore_definitions is set, it will ignore the columns from table definition if both, table_definitions and + mapping_protocol, exists (though it will still get primary_key, foreign_key and source information)''' + database.actions.create(table, ignore_definitions) @manager.command def drop(table): @@ -45,9 +47,11 @@ def drop(table): database.actions.drop(table) @manager.command -def remap(table, auto_confirmation=False): - '''Restructures a table to match the mapping protocol.''' - database.actions.remap(table, auto_confirmation) +def remap(table, auto_confirmation=False, verify_definitions=False): + '''Restructures a table to match the mapping protocol. + If auto_confirmation is set it will not ask before doing any operation + If verify_definitions is set it will ask any difference between mapping_protocol and table_definition''' + database.actions.remap(table, auto_confirmation, verify_definitions) @manager.command def update_from_file(csv_file, table, year, columns=None, target_list=None, offset=2, sep=';', diff --git a/tests/database_test.py b/tests/database_test.py index b0a3497..ca4ddd8 100755 --- a/tests/database_test.py +++ b/tests/database_test.py @@ -91,7 +91,7 @@ def modify_mapping_protocol(): def test_creation(): if not ENGINE.dialect.has_table(ENGINE, 'test_reference'): database.actions.execute_sql_script('test_reference.sql') - database.actions.create(table_test) + database.actions.create(table_test, ignore_definitions=True) print("Executing fetchall query:") with ENGINE.connect() as connection: table = Table(table_test, META, autoload=True, autoload_with=ENGINE) -- GitLab