diff --git a/database/actions.py b/database/actions.py index 83ee1edf6265c825c8e4bb41026b7644e7497733..749b26c8d67008c827cfa66557fd92e27479f44b 100644 --- a/database/actions.py +++ b/database/actions.py @@ -70,13 +70,13 @@ def insert(file_name, table, year, offset=2, delimiters=[';', '\\n', '"'], null= trans.commit() -def create(table): +def create(table, ignore_definitions=False): '''Creates table from mapping_protocol metadata''' table = gen_data_table(table, META) with ENGINE.connect() as connection: trans = connection.begin() - table.create(bind=connection) + table.create(bind=connection, ignore_definitions=ignore_definitions) table.set_source(bind=connection) table.create_mapping_table(bind=connection) trans.commit() @@ -87,12 +87,12 @@ def drop(table): table.drop() -def remap(table, auto_confirmation=True): +def remap(table, auto_confirmation=True, verify_definitions=False): '''Applies change made in mapping protocols to database''' table = gen_data_table(table, META) table.map_from_database() - table.remap(auto_confirmation) + table.remap(auto_confirmation, verify_definitions) def csv_from_tabbed(table_name, input_file, output_file, year, sep=';'): table = gen_data_table(table_name, META) diff --git a/database/database_table.py b/database/database_table.py index 73cd16a54d9c5781cd8c0e601ede100b7f860c9f..d3c0dce175e39374474f1c78672621e38ba95e73 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -24,6 +24,7 @@ import time import json import re import logging +import jsbeautifier from sqlalchemy import Table, Column, inspect, Integer, String, Boolean,\ PrimaryKeyConstraint, ForeignKeyConstraint, text from sqlalchemy.sql import select, insert, update, delete, func @@ -306,6 +307,82 @@ class DatabaseTable(Table): return definitions + def update_defintions(self, definitions): + ''' + Update the table definitions with an altered definitions object + ''' + logger.debug("Updating table definitions from %s", definitions) + definitions_json = self.name + '.json' + definitions_json = os.path.join(settings.TABLE_DEFINITIONS_FOLDER, definitions_json) + + new_definitions_json = jsbeautifier.beautify(json.dumps(definitions, ensure_ascii=False)) + with open(definitions_json, "w") as def_json: + def_json.write(new_definitions_json) + + logger.debug("Definitions Updated") + + def get_columns_dict(self, tdef_columns, ignore_diff=False): + ''' + Get a dictionary of columns, comparing the columns of the associated protocol with those the columns in table + definition. + :param tdef_columns: column dict from table definitions + :param ignore_diff: when set True will ignore differences in table_definition and get the data only from + mapping_protocol when both exists + :return: {"column_name": ["column_type(str)", "target"]} + ''' + if self._protocol is None: + if not tdef_columns: + raise MissingProtocolError("You must first load a protocol or add columns to the table definition") + else: + logger.warning("Table creation will be entirely based on the table definition") + return tdef_columns + else: + column_dict = {} + for column in self._protocol.get_targets(): + try: + column = self._protocol.dbcolumn_from_target(column) + except InvalidTargetError: + continue + if column[0]: + column[0] = column[0].strip() + column_dict[column[0]] = [column[1], self._protocol.target_from_dbcolumn(column[0])] + + if not ignore_diff: + for c_name, c_type in tdef_columns.items(): + if c_name not in column_dict.keys(): + prompt = input("The column {} is not present on the mapping protocol but is on the table definition," + " should it exist ? (Y/n): ".format(c_name)) + if prompt.upper() in ['', 'Y']: + print("Column {} will be created, please update the protocol later".format(c_name)) + column_dict[c_name] = c_type + else: + print("Column {} will be removed from the table_definitions.".format(c_name)) + + return column_dict + + def get_targets_from_definitions(self): + ''' + Get a list containing all the targets from table definition + ''' + targets = [] + definitions = self.get_definitions() + for column_name, parameter_list in definitions['columns'].items(): + targets.append(parameter_list[1]) + + return targets + + def get_dbcolumn_from_target_definition(self, target): + ''' + Gets database column from a target column name. Ouput is a list + with the column name and type contents. + :return: ['column_name','column_type'] + ''' + definitions = self.get_definitions() + for column_name, parameter_list in definitions['columns'].items(): + if parameter_list[1] == target: + return [column_name, parameter_list[0]] + + def load_protocol(self, protocol): ''' Takes a Protocol instance and loads it for further use @@ -326,7 +403,6 @@ class DatabaseTable(Table): ''' Creates the mapping table in the database ''' - self.check_protocol() if bind is None: bind = self.metadata.bind @@ -337,9 +413,10 @@ class DatabaseTable(Table): with bind.connect() as connection: logger.info("Populating mapping table") columns = [c[1] for c in self.columns.items()] + definitions = self.get_definitions() for c in columns: column = {} - column['target_name'] = self._protocol.target_from_dbcolumn(c.name) + column['target_name'] = definitions['columns'][c.name][1] if not column['target_name']: continue column['name'] = c.name @@ -382,13 +459,13 @@ class DatabaseTable(Table): bind.execute(base_query) - def map_from_protocol(self, create=False, bind=None): + def map_from_protocol(self, create=False, bind=None, ignore_defintions=False): ''' - Uses information from a protocol to generate self columns. Table definitions must also - be defined to allow primary key and foreign keys addition. + Uses information from a protocol or, if protocol is not present, from table definitions + to generate self columns. + Table definitions must also be defined to allow primary key and foreign keys addition. Useful for table creation. ''' - self.check_protocol() if self.columns.keys(): logger.warning("Table mapping already has columns. Nothing done.") return @@ -396,18 +473,15 @@ class DatabaseTable(Table): bind = self.metadata.bind definitions = self.get_definitions() + column_dict = self.get_columns_dict(definitions.get('columns'), ignore_defintions) - for column in self._protocol.get_targets(): - try: - column = self._protocol.dbcolumn_from_target(column) - except InvalidTargetError: - continue - if column[0]: - column[0] = column[0].strip() - column = Column(column[0], get_type(column[1])) - + for c_name, c_type in column_dict.items(): + column = Column(c_name, get_type(c_type[0])) self.append_column(column) + definitions['columns'] = column_dict + self.update_defintions(definitions) + primary_key = [self.columns.get(c) for c in definitions['pk']] if primary_key: self.constraints.add(PrimaryKeyConstraint(*primary_key)) @@ -435,7 +509,7 @@ class DatabaseTable(Table): self.constraints.add(ForeignKeyConstraint(keys, fkeys)) - def create(self, bind=None, checkfirst=False): + def create(self, bind=None, checkfirst=False, ignore_definitions=False): ''' Overrides sqlalchemy's create method to use map_from_protocol before creating. ''' @@ -445,7 +519,7 @@ class DatabaseTable(Table): logger.error("Table %s already exists", self.name) return - self.map_from_protocol(create=True, bind=bind) + self.map_from_protocol(create=True, bind=bind, ignore_defintions=ignore_definitions) super().create(bind=bind, checkfirst=checkfirst) @@ -596,7 +670,7 @@ class DatabaseTable(Table): def compare_mapping(self): ''' - Compares contents of mapping table to protocol and returns tuple with differences in + Compares contents of mapping table to table definitions and returns tuple with differences in the following format: new_columns, to_drop_columns, update_columns @@ -607,19 +681,17 @@ class DatabaseTable(Table): The method uses target_names as the criteria to decide if columns are the same or not. ''' - self.check_protocol() - - protocol_target_list = self._protocol.get_targets() + target_list = self.get_targets_from_definitions() query = self._mapping_table.select() results = self.metadata.bind.execute(query).fetchall() db_target_list = [t[1] for t in results] - new_columns = [c for c in protocol_target_list if c not in db_target_list and c != ''] - to_drop_columns = [c for c in db_target_list if c not in protocol_target_list] + new_columns = [c for c in target_list if c not in db_target_list and c != ''] + to_drop_columns = [c for c in db_target_list if c not in target_list] update_columns = [] - for target in protocol_target_list: + for target in target_list: query = select([self._mapping_table.c.name, self._mapping_table.c.type])\ .where(self._mapping_table.c.target_name == target) result = self.metadata.bind.execute(query).fetchone() @@ -627,7 +699,7 @@ class DatabaseTable(Table): continue name, field_type = result try: - new_name, new_type = self._protocol.dbcolumn_from_target(target) + new_name, new_type = self.get_dbcolumn_from_target_definition(target) except InvalidTargetError: to_drop_columns.append(target) continue @@ -643,20 +715,24 @@ class DatabaseTable(Table): return new_columns, to_drop_columns, update_columns - def remap(self, auto_confirmation=True): + def remap(self, auto_confirmation=True, verify_definitions=False): ''' Checks mapping protocol for differences in table structure - then attempts to apply differences according to what is recorded in the - mapping table + mapping table. + If verify_definitions is set it will ask any difference between mapping_protocol and table_definition ''' if not self.exists(): print("Table {} doesn't exist".format(self.name)) return - self.check_protocol() - mtable = self._mapping_table + # Update table definitions + definitions = self.get_definitions() + definitions['columns'] = self.get_columns_dict(definitions.get('columns'), ignore_diff=not verify_definitions) + self.update_defintions(definitions) + if not mtable.exists(): print("Mapping table for {} not found.".format(self.name)) print("Creating mapping table...") diff --git a/manage.py b/manage.py index cb87c7c46a4cb38fb9247110ae4f8cdc646b9917..efe2f9e5cbfed0d02d83ca0afc6c2f6ad7f8630f 100755 --- a/manage.py +++ b/manage.py @@ -35,9 +35,11 @@ def insert(csv_file, table, year, sep=';', null='',notifybackup=None): if notifybackup: database.actions.generate_backup() @manager.command -def create(table): - '''Creates table using mapping protocols''' - database.actions.create(table) +def create(table, ignore_definitions=False): + '''Creates table using mapping protocols + If ignore_definitions is set, it will ignore the columns from table definition if both, table_definitions and + mapping_protocol, exists (though it will still get primary_key, foreign_key and source information)''' + database.actions.create(table, ignore_definitions) @manager.command def drop(table): @@ -45,9 +47,11 @@ def drop(table): database.actions.drop(table) @manager.command -def remap(table, auto_confirmation=False): - '''Restructures a table to match the mapping protocol.''' - database.actions.remap(table, auto_confirmation) +def remap(table, auto_confirmation=False, verify_definitions=False): + '''Restructures a table to match the mapping protocol. + If auto_confirmation is set it will not ask before doing any operation + If verify_definitions is set it will ask any difference between mapping_protocol and table_definition''' + database.actions.remap(table, auto_confirmation, verify_definitions) @manager.command def update_from_file(csv_file, table, year, columns=None, target_list=None, offset=2, sep=';', diff --git a/requirements.txt b/requirements.txt index 7c18f5d52c20e33363dde66045438e92243161d6..c3861e1dd18e4772c0cf8f992e02fb85278aced6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ ipython==7.5.0 ipython-genutils==0.2.0 isort==4.3.20 jedi==0.13.3 +jsbeautifier==1.10.0 lazy-object-proxy==1.4.1 manage.py==0.2.10 mccabe==0.6.1 diff --git a/tests/database_test.py b/tests/database_test.py index b0a34973ccde3515e81c4e1c1df4c10393729ef3..ca4ddd896e30785c9197ffb456c79555c1540702 100755 --- a/tests/database_test.py +++ b/tests/database_test.py @@ -91,7 +91,7 @@ def modify_mapping_protocol(): def test_creation(): if not ENGINE.dialect.has_table(ENGINE, 'test_reference'): database.actions.execute_sql_script('test_reference.sql') - database.actions.create(table_test) + database.actions.create(table_test, ignore_definitions=True) print("Executing fetchall query:") with ENGINE.connect() as connection: table = Table(table_test, META, autoload=True, autoload_with=ENGINE)