diff --git a/database/actions.py b/database/actions.py index ec5302f7d724d88701d1d71db429b48cb4bee23d..d755a1ce2f2078b375c90f023f5b4c2d6d986919 100644 --- a/database/actions.py +++ b/database/actions.py @@ -69,7 +69,6 @@ def insert(file_name, table, year, offset=2, delimiters=[';', '\\n', '"'], null= ttable = temporary_data(connection, file_name, table, year, offset, delimiters, null) table.insert_from_temporary(ttable, bind=connection) - ttable.drop() trans.commit() diff --git a/database/database_table.py b/database/database_table.py index 8d0c6389e770ce8b167bede6f0f83c77f526f109..e4327c307aa95822f278f26c2235cf3c69db23b1 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -912,6 +912,9 @@ class DatabaseTable(Table): for original in columns: self._resolv_derivative(original, year) + if(len(self._derivatives) == 0): + return + max_level = max([self._derivatives[d]['level'] for d in self._derivatives]) for i in range(1, max_level + 1): @@ -1081,9 +1084,86 @@ class DatabaseTable(Table): else: logger.debug('Table definitions already loaded, nothing done.') +class DatabaseTablePsql(DatabaseTable): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get_temporary(self, header_columns=[], year=None): + ''' + Returns a temporary table with identical structure to self. If a header_columns list + is passed, will check protocol to ensure any of the columns is not mapped. Unmapped + columns will be added with original name and type VARCHAR(255). + + If a header_columns list is provided, a year must be passed to allow mapping to originals. + ''' + if header_columns and not year: + raise Exception + + additional = header_columns.copy() + if year: + for column in header_columns: + target = self._protocol.target_from_original(column, year) + try: + if target and self._protocol.dbcolumn_from_target(target): + additional.remove(column) + except InvalidTargetError: + pass + + timestamp = time.strftime('%Y%m%d%H%M%S') + name = '_' + timestamp + '_' + self.name + + logger.info("Acquiring temporary table with name '%s'", name) + logger.debug("Temporary table '%s' with list of extra columns %s", name, header_columns) + ttable = Table(name, self.metadata, prefixes=['TEMPORARY']) + + for target in self._protocol.get_targets(): + try: + column_name, column_type = self._protocol.dbcolumn_from_target(target) + ttable.append_column(Column(column_name, get_type(column_type))) + except InvalidTargetError: + pass + + for column in additional: + ttable.append_column(Column(column, String(255))) + + return ttable + + def populate_temporary(self, ttable, in_file, header, year, delimiters=[';', '\\n', '"'], + null='', offset=2, bind=None): + ''' + Visits a temporary table ttable and bulk inserts data from in_file in it. The header + list of the original file must be supplied to ensure columns are correctly mapped. + ''' + if bind is None: + bind = self.metadata.bind + + columns = header.copy() + for i, column in enumerate(columns): + try: + target = self._protocol.target_from_original(column, year) + columns[i] = self._protocol.dbcolumn_from_target(target)[0] or column + except InvalidTargetError: + pass + + columns = ['"{}"'.format(c) for c in columns] + delimiters = ["'{}'".format(d) for d in delimiters] + delimiters = ', '.join(delimiters) + query_columns = ', '.join(columns) + query = 'copy {}({}) from \'{}\' delimiter \'|\' CSV HEADER'.format(ttable.name, query_columns,in_file) + + query = text(query) + + bind.execute(query) + + return query + def gen_data_table(table, meta): '''Returns a DatabaseTable instance with associated mapping protocol''' - table = DatabaseTable(table, meta) + if(settings.DATABASE_DIALECT == 'postgresql'): + table = DatabaseTablePsql(table, meta) + else: + table = DatabaseTablePsql(table, meta) protocol_path = os.path.join(settings.MAPPING_PROTOCOLS_FOLDER, table.name + '.csv') if os.path.isfile(protocol_path) and table._protocol is None: diff --git a/settings.py b/settings.py index 36dd10f504d092e046af72592e81115e2fe7195c..e9461ddbddbf7de17fe10081b30a020f5a2ae696 100644 --- a/settings.py +++ b/settings.py @@ -22,18 +22,18 @@ along with HOTMapper. If not, see <https://www.gnu.org/licenses/>. import logging # SQL dialect used by sqlalchemy. -DATABASE_DIALECT = 'monetdb' +DATABASE_DIALECT = 'postgresql' # Login credentials in database -DATABASE_USER = 'monetdb' -DATABASE_USER_PASSWORD = 'monetdb' +DATABASE_USER = 'diogohal' +DATABASE_USER_PASSWORD = 'diogohal' # Host to connect to. Bulk inserts won't work remotely unless you can specify an # absolute path in the server DATABASE_HOST = 'localhost' # Database to connect to -DATABASE = 'hotmapper_demo' +DATABASE = 'diogohal' # Column used to run aggregations and denormalizations YEAR_COLUMN = 'ano_censo'