From 41de8cc5f16c9a73e830221e488300f5c633b0aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Frans=20Pondaco=20Winandy?= <jvfpw18@inf.ufpr.br> Date: Thu, 14 May 2020 10:45:42 -0300 Subject: [PATCH 1/4] WIP rewrite of apply_derivatives --- .idea/encodings.xml | 4 + .idea/hotmapper.iml | 26 ++ .idea/inspectionProfiles/Project_Default.xml | 13 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + .idea/vcs.xml | 10 + .idea/workspace.xml | 245 +++++++++++++++++++ database/database_table.py | 181 +++++++++----- mapping-protocols | 2 +- settings.py | 2 +- sql-scripts | 2 +- 11 files changed, 429 insertions(+), 71 deletions(-) create mode 100644 .idea/encodings.xml create mode 100644 .idea/hotmapper.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..15a15b2 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,4 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="Encoding" addBOMForNewFiles="with NO BOM" /> +</project> \ No newline at end of file diff --git a/.idea/hotmapper.iml b/.idea/hotmapper.iml new file mode 100644 index 0000000..b3f5709 --- /dev/null +++ b/.idea/hotmapper.iml @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<module type="PYTHON_MODULE" version="4"> + <component name="FacetManager"> + <facet type="django" name="Django"> + <configuration> + <option name="rootFolder" value="$MODULE_DIR$" /> + <option name="settingsModule" value="settings.py" /> + <option name="manageScript" value="$MODULE_DIR$/manage.py" /> + <option name="environment" value="<map/>" /> + <option name="doNotUseTestRunner" value="false" /> + <option name="trackFilePattern" value="migrations" /> + </configuration> + </facet> + </component> + <component name="NewModuleRootManager"> + <content url="file://$MODULE_DIR$" /> + <orderEntry type="jdk" jdkName="Python 3.7 (hotmapper)" jdkType="Python SDK" /> + <orderEntry type="sourceFolder" forTests="false" /> + </component> + <component name="TemplatesService"> + <option name="TEMPLATE_CONFIGURATION" value="Django" /> + </component> + <component name="TestRunnerService"> + <option name="PROJECT_TEST_RUNNER" value="Unittests" /> + </component> +</module> \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..3a6c6c9 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,13 @@ +<component name="InspectionProjectProfileManager"> + <profile version="1.0"> + <option name="myName" value="Project Default" /> + <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true"> + <option name="ignoredErrors"> + <list> + <option value="E302" /> + </list> + </option> + </inspection_tool> + <inspection_tool class="PySingleQuotedDocstringInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" /> + </profile> +</component> \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..861c93d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="JavaScriptSettings"> + <option name="languageLevel" value="ES6" /> + </component> + <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (hotmapper)" project-jdk-type="Python SDK" /> +</project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..dfe34df --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/hotmapper.iml" filepath="$PROJECT_DIR$/.idea/hotmapper.iml" /> + </modules> + </component> +</project> \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..d87d897 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="$PROJECT_DIR$" vcs="Git" /> + <mapping directory="$PROJECT_DIR$/env/src/sqlalchemy-monetdb" vcs="Git" /> + <mapping directory="$PROJECT_DIR$/mapping-protocols" vcs="Git" /> + <mapping directory="$PROJECT_DIR$/sql-scripts" vcs="Git" /> + <mapping directory="$PROJECT_DIR$/table-definitions" vcs="Git" /> + </component> +</project> \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..29dd00e --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,245 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ChangeListManager"> + <list default="true" id="aa9b8e4e-02cb-4085-b90c-6022543ea627" name="Default Changelist" comment=""> + <change beforePath="$PROJECT_DIR$/database/database_table.py" beforeDir="false" afterPath="$PROJECT_DIR$/database/database_table.py" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/mapping-protocols" beforeDir="false" afterPath="$PROJECT_DIR$/mapping-protocols" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/mapping-protocols/escola.csv" beforeDir="false" afterPath="$PROJECT_DIR$/mapping-protocols/escola.csv" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/mapping-protocols/turma.csv" beforeDir="false" afterPath="$PROJECT_DIR$/mapping-protocols/turma.csv" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/settings.py" beforeDir="false" afterPath="$PROJECT_DIR$/settings.py" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/table-definitions" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/table-definitions/docente.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/docente.json" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/table-definitions/escola.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/escola.json" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/table-definitions/matricula.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/matricula.json" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/table-definitions/pnad.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/pnad.json" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/table-definitions/turma.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/turma.json" afterDir="false" /> + </list> + <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> + <option name="SHOW_DIALOG" value="false" /> + <option name="HIGHLIGHT_CONFLICTS" value="true" /> + <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> + <option name="LAST_RESOLUTION" value="IGNORE" /> + </component> + <component name="DjangoConsoleOptions" custom-start-script="import sys; print('Python %s on %s' % (sys.version, sys.platform)) import django; print('Django %s' % django.get_version()) sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS]) if 'setup' in dir(django): django.setup() import django_manage_shell; django_manage_shell.run(PROJECT_ROOT)"> + <option name="myCustomStartScript" value="import sys; print('Python %s on %s' % (sys.version, sys.platform)) import django; print('Django %s' % django.get_version()) sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS]) if 'setup' in dir(django): django.setup() import django_manage_shell; django_manage_shell.run(PROJECT_ROOT)" /> + </component> + <component name="FileEditorManager"> + <splitter split-orientation="horizontal" split-proportion="0.38232163"> + <split-first> + <leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> + <file pinned="false" current-in-tab="false"> + <entry file="file://$PROJECT_DIR$/manage.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="1426"> + <caret line="65" selection-start-line="65" selection-end-line="65" /> + </state> + </provider> + </entry> + </file> + <file pinned="false" current-in-tab="false"> + <entry file="file://$PROJECT_DIR$/database/actions.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="2967"> + <caret line="137" column="21" selection-start-line="137" selection-start-column="21" selection-end-line="137" selection-end-column="21" /> + </state> + </provider> + </entry> + </file> + <file pinned="false" current-in-tab="true"> + <entry file="file://$PROJECT_DIR$/database/database_table.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="366"> + <caret line="923" column="49" selection-start-line="923" selection-start-column="49" selection-end-line="923" selection-end-column="49" /> + </state> + </provider> + </entry> + </file> + <file pinned="false" current-in-tab="false"> + <entry file="file://$PROJECT_DIR$/settings.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="1426"> + <caret line="62" selection-start-line="62" selection-end-line="62" /> + </state> + </provider> + </entry> + </file> + <file pinned="false" current-in-tab="false"> + <entry file="file://$PROJECT_DIR$/auto.sh"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="3243"> + <caret line="141" column="56" selection-start-line="141" selection-start-column="56" selection-end-line="141" selection-end-column="56" /> + </state> + </provider> + </entry> + </file> + </leaf> + </split-first> + <split-second> + <leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> + <file pinned="false" current-in-tab="true"> + <entry file="file://$PROJECT_DIR$/database/database_table.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="296"> + <caret line="888" column="16" lean-forward="true" selection-start-line="888" selection-start-column="16" selection-end-line="888" selection-end-column="16" /> + </state> + </provider> + </entry> + </file> + <file pinned="false" current-in-tab="false"> + <entry file="file://$PROJECT_DIR$/database/protocol.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="2323"> + <caret line="104" column="35" selection-start-line="104" selection-start-column="35" selection-end-line="104" selection-end-column="35" /> + </state> + </provider> + </entry> + </file> + </leaf> + </split-second> + </splitter> + </component> + <component name="FindInProjectRecents"> + <findStrings> + <find>_get_de</find> + <find>_derivative_recursion</find> + <find>_reso</find> + </findStrings> + </component> + <component name="Git.Settings"> + <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" /> + </component> + <component name="IdeDocumentHistory"> + <option name="CHANGED_PATHS"> + <list> + <option value="$PROJECT_DIR$/settings.py" /> + <option value="$PROJECT_DIR$/database/database_table.py" /> + </list> + </option> + </component> + <component name="ProjectFrameBounds" extendedState="6"> + <option name="x" value="1600" /> + <option name="width" value="1920" /> + <option name="height" value="1042" /> + </component> + <component name="ProjectLevelVcsManager" settingsEditedManually="true" /> + <component name="ProjectView"> + <navigator proportions="" version="1"> + <foldersAlwaysOnTop value="true" /> + </navigator> + <panes /> + </component> + <component name="PropertiesComponent"> + <property name="WebServerToolWindowFactoryState" value="false" /> + <property name="last_opened_file_path" value="$PROJECT_DIR$" /> + <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" /> + <property name="nodejs_npm_path_reset_for_default_project" value="true" /> + <property name="settings.editor.selected.configurable" value="preferences.pluginManager" /> + </component> + <component name="RunDashboard"> + <option name="ruleStates"> + <list> + <RuleState> + <option name="name" value="ConfigurationTypeDashboardGroupingRule" /> + </RuleState> + <RuleState> + <option name="name" value="StatusDashboardGroupingRule" /> + </RuleState> + </list> + </option> + </component> + <component name="SvnConfiguration"> + <configuration /> + </component> + <component name="TaskManager"> + <task active="true" id="Default" summary="Default task"> + <changelist id="aa9b8e4e-02cb-4085-b90c-6022543ea627" name="Default Changelist" comment="" /> + <created>1585053766024</created> + <option name="number" value="Default" /> + <option name="presentableId" value="Default" /> + <updated>1585053766024</updated> + <workItem from="1585053767960" duration="1312000" /> + <workItem from="1585139723643" duration="122000" /> + <workItem from="1585139870979" duration="8128000" /> + <workItem from="1585224059022" duration="5809000" /> + <workItem from="1585230046456" duration="4743000" /> + <workItem from="1585310779353" duration="127000" /> + </task> + <servers /> + </component> + <component name="TimeTrackingManager"> + <option name="totallyTimeSpent" value="20241000" /> + </component> + <component name="ToolWindowManager"> + <frame x="1600" y="0" width="1920" height="1042" extended-state="6" /> + <layout> + <window_info content_ui="combo" id="Project" order="0" weight="0.11235357" /> + <window_info id="Structure" order="1" side_tool="true" weight="0.25" /> + <window_info id="Favorites" order="2" side_tool="true" /> + <window_info anchor="bottom" id="Message" order="0" /> + <window_info anchor="bottom" id="Find" order="1" /> + <window_info anchor="bottom" id="Run" order="2" /> + <window_info anchor="bottom" id="Debug" order="3" weight="0.4" /> + <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" /> + <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" /> + <window_info anchor="bottom" id="TODO" order="6" /> + <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" /> + <window_info anchor="bottom" id="Version Control" order="8" /> + <window_info anchor="bottom" id="Database Changes" order="9" /> + <window_info anchor="bottom" id="Event Log" order="10" side_tool="true" /> + <window_info active="true" anchor="bottom" id="Terminal" order="11" visible="true" weight="0.17218544" /> + <window_info anchor="bottom" id="Python Console" order="12" /> + <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" /> + <window_info anchor="right" id="Ant Build" order="1" weight="0.25" /> + <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" /> + <window_info anchor="right" id="SciView" order="3" /> + <window_info anchor="right" id="Database" order="4" /> + </layout> + </component> + <component name="TypeScriptGeneratedFilesManager"> + <option name="version" value="1" /> + </component> + <component name="editorHistoryManager"> + <entry file="file://$PROJECT_DIR$/manage.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="1426"> + <caret line="65" selection-start-line="65" selection-end-line="65" /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/database/actions.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="2967"> + <caret line="137" column="21" selection-start-line="137" selection-start-column="21" selection-end-line="137" selection-end-column="21" /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/settings.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="1426"> + <caret line="62" selection-start-line="62" selection-end-line="62" /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/auto.sh"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="3243"> + <caret line="141" column="56" selection-start-line="141" selection-start-column="56" selection-end-line="141" selection-end-column="56" /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/database/database_table.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="296"> + <caret line="888" column="16" lean-forward="true" selection-start-line="888" selection-start-column="16" selection-end-line="888" selection-end-column="16" /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/database/protocol.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="2323"> + <caret line="104" column="35" selection-start-line="104" selection-start-column="35" selection-end-line="104" selection-end-column="35" /> + </state> + </provider> + </entry> + </component> +</project> \ No newline at end of file diff --git a/database/database_table.py b/database/database_table.py index db3555f..e367277 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -149,6 +149,8 @@ class DatabaseTable(Table): self._protocol = None if not hasattr(self, '_definitions'): self._definitions = None + if not hasattr(self, '_derivatives'): + self._derivatives = {} if 'protocol' in kwargs.keys(): self.load_protocol(kwargs['protocol']) @@ -779,10 +781,9 @@ class DatabaseTable(Table): have been resolved and updated, to ensure dependencies will not be ignored. ''' if self._protocol is None: - return {'original': original, 'dbcolumn': original, 'new': original, 'level': 0} + return {'query': original, 'dbcolumn': original, 'level': 0} target = self._get_variable_target(original, year) - if target in self._derivatives: # This variable has been evaluated already, just return return self._derivatives[target] @@ -792,56 +793,46 @@ class DatabaseTable(Table): print(target) raise CircularReferenceError(target) - original = self._protocol.original_from_target(target, year) or original - try: - dbcolumn = self._protocol.dbcolumn_from_target(target) - except InvalidTargetError: - dbcolumn = None + # Query for the column, header of csv or var name if empty + case = self._protocol.original_from_target(target, year) or original - if is_aggregation(original): + if not case.startswith('~'): + # Possible header, not a query to be evaluated here + return {'query': case, 'dbcolumn': original, 'level': 0} + + # original column array [var_name, type] + dbcolumn = self._protocol.dbcolumn_from_target(target) + + if is_aggregation(case): # Aggregation not integrated - derivative = {'original': original, 'dbcolumn': dbcolumn, 'new': original, 'level': -1} + derivative = {'query': case, 'dbcolumn': dbcolumn, 'level': -1} self._derivatives[target] = derivative return derivative - denorm_match = re.match(r'~?([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)', original) - if denorm_match is not None: - table, column = denorm_match.groups() - table = gen_data_table(table, self.metadata) - - if table is self: - return self._derivative_recursion(column, year, recursion_list) - derivative = table._resolv_derivative(column, year) - - self._derivatives[target] = {'original': original, 'dbcolumn': dbcolumn, 'level': 0, 'dbmapped': True, - 'new': '.'.join([table.name, derivative['dbcolumn'][0]])} - return self._derivatives[target] - - if not original.startswith('~'): - # Possibly keyword, definitely not a variable. Shouldn't change the level. - return {'original': original, 'processed': original, 'dbcolumn': dbcolumn, 'level': 0} - - # Well, looks like we actually got a derivative here - original = original.strip('~ ') - str_list = re.findall(r'("[\w]+"|[\w]+)', original) - level = 0 - substitutions = [] + # Well, looks like we actually got a derivative or denormalization here recursion_list.append(str(self) + '.' + target) - for substring in str_list: - derivative = self._derivative_recursion(substring.strip('"'), year, - recursion_list=recursion_list) - if derivative['dbcolumn']: - substitutions.append({'original': substring, 'new': derivative['dbcolumn'][0]}) - if derivative['level'] >= level: - level = derivative['level'] + 1 - - processed = original - dbmapped = False # column neded to execute the derivative is present on table or need a file. - for substitution in substitutions: - processed = re.sub(substitution['original'], substitution['new'], processed) - dbmapped = True - self._derivatives[target] = {'original': original, 'dbcolumn': dbcolumn, 'level': level, - 'processed': processed, 'dbmapped': dbmapped} + level = 1 # level of dependency of this var + referred_tables = [] + + case = case.strip("~") # doesn't need "~" anymore + for substr in case.split(): + if '.' in substr: # We have a var from another table + table = substr.split('.')[0] + table = gen_data_table(table, self.metadata) + var_name = substr.split('.')[1] + else: + table = self + var_name = substr + + # If it is a var, will need to be evaluated as it's a dependency. + if table._protocol.target_from_dbcolumn(var_name) is not None: + # Prevents lowering level if var is an aggregation. + print("VAR", var_name) + level = max(level + table._derivative_recursion(var_name, year, recursion_list)['level'], level) + if table is not self and table not in referred_tables: + referred_tables.append(table) + + self._derivatives[target] = {'query': case, 'dbcolumn': dbcolumn, 'level': level, 'tables': referred_tables} return self._derivatives[target] def _resolv_derivative(self, original, year): @@ -853,7 +844,7 @@ class DatabaseTable(Table): self._derivatives = {} return self._derivative_recursion(original, year) - def _get_denormalizations(self, ttable, originals, year): + def _get_denormalizations(self, ttable, originals, year, bind): ''' Searches protocol for denormalizations and yields the necessary update queries. ''' @@ -869,15 +860,48 @@ class DatabaseTable(Table): for table in external: query = update(ttable) + + referred_table = gen_data_table(table, self.metadata) + referred_table.map_from_database(bind) + try: + fk_tuples = [(fk_column, fkey) for fk_column, fkey in self.get_relations(referred_table)] + except MissingForeignKeyError: + logger.warning("Using relations from " + table + " instead of " + str(self) + " to apply derivative.") + fk_tuples = [(fk_column, fkey) for fk_column, fkey in referred_table.get_relations(self)] + for dst, src in external[table]: query = query.values(**{dst[0]: src}) - for fk_column, fkey in self.get_relations(table): - fk_column = ttable.columns.get(fk_column.name) + for fk_column, fkey in fk_tuples: + # fk_column = ttable.columns.get(fk_column.name) query = query.where(fk_column == fkey) if year: query = query.where(ttable.columns.get(settings.YEAR_COLUMN) == year) yield query + def _apply_denormalization(self, ttable, year, column, denorm_query, referred_tables, bind): + query = update(ttable) + + query = query.values(**{column: text(denorm_query)}) + + for ref_table in referred_tables: + ref_table = gen_data_table(ref_table, self.metadata) + ref_table.map_from_database(bind) + try: + fk_tuples = [(ttable.columns.get(fk_column.name), fkey) + for fk_column, fkey in self.get_relations(ref_table)] + except MissingForeignKeyError: + logger.warning("Using relations from " + str(ref_table) + + " instead of " + str(self) + " to apply derivative.") + fk_tuples = [(ttable.columns.get(fkey.name),fk_column) + for fk_column, fkey in ref_table.get_relations(self)] + + for fk_column, fkey in fk_tuples: + query = query.where(fk_column == fkey) + + if year: + query = query.where(ttable.columns.get(settings.YEAR_COLUMN) == year) + bind.execute(query) + def apply_derivatives(self, ttable, columns, year, bind=None, dbonly=False): ''' Given a list of columns, searches for derivatives and denormalizations and applies them @@ -887,34 +911,55 @@ class DatabaseTable(Table): if bind is None: bind = self.metadata.bind - self._derivatives = {} for original in columns: self._resolv_derivative(original, year) - originals = [(self._derivatives[d]['dbcolumn'], self._derivatives[d]['original'])\ - for d in self._derivatives if self._derivatives[d]['level'] == 0] + max_level = max([self._derivatives[d]['level'] for d in self._derivatives]) - t_schema = ttable.schema - ttable.schema = None - for query in self._get_denormalizations(ttable, originals, year): - bind.execute(query) + for i in range(1, max_level + 1): + query = {} - ttable.schema = t_schema - if len(self._derivatives) > 0: - max_level = max([self._derivatives[d]['level'] for d in self._derivatives]) - for i in range(max_level): - i = i+1 - query = {} - level = [self._derivatives[d] for d in self._derivatives if\ - self._derivatives[d]['level'] == i] - for derivative in level: - if not dbonly or derivative['dbmapped']: - query[derivative['dbcolumn'][0]] = text(derivative['processed']) + level = [self._derivatives[d] for d in self._derivatives if self._derivatives[d]['level'] == i] + for derivative in level: + if len(derivative['tables']) == 0: + query[derivative['dbcolumn'][0]] = text(derivative['query']) + else: + self._apply_denormalization(ttable, year, derivative['dbcolumn'][0], derivative['query'], + derivative['tables'], bind) + if query: query = update(ttable).values(**query) - bind.execute(query) + + + + print(self._derivatives) + + # originals = [(self._derivatives[d]['dbcolumn'], self._derivatives[d]['original'])\ + # for d in self._derivatives if self._derivatives[d]['level'] == 0] + # + # t_schema = ttable.schema + # ttable.schema = None + # for query in self._get_denormalizations(ttable, originals, year, bind): + # bind.execute(query) + # + # ttable.schema = t_schema + # if len(self._derivatives) > 0: + # max_level = max([self._derivatives[d]['level'] for d in self._derivatives]) + # for i in range(max_level): + # i = i+1 + # query = {} + # level = [self._derivatives[d] for d in self._derivatives if\ + # self._derivatives[d]['level'] == i] + # for derivative in level: + # if not dbonly or derivative['dbmapped']: + # query[derivative['dbcolumn'][0]] = text(derivative['processed']) + # + # query = update(ttable).values(**query) + # + # bind.execute(query) + return self._derivatives def _get_aggregations(self, year): diff --git a/mapping-protocols b/mapping-protocols index 822be1e..81a2f75 160000 --- a/mapping-protocols +++ b/mapping-protocols @@ -1 +1 @@ -Subproject commit 822be1ecbd1fc3ccabadb1642f6c4e9fb73772f1 +Subproject commit 81a2f750573394c7653fe810e1ec32f3174d556f diff --git a/settings.py b/settings.py index 36dd10f..b8c1601 100644 --- a/settings.py +++ b/settings.py @@ -33,7 +33,7 @@ DATABASE_USER_PASSWORD = 'monetdb' DATABASE_HOST = 'localhost' # Database to connect to -DATABASE = 'hotmapper_demo' +DATABASE = 'test' # Column used to run aggregations and denormalizations YEAR_COLUMN = 'ano_censo' diff --git a/sql-scripts b/sql-scripts index 9829db1..79e1ad9 160000 --- a/sql-scripts +++ b/sql-scripts @@ -1 +1 @@ -Subproject commit 9829db1c65b0c79f7cf13ff0939fa00257e02577 +Subproject commit 79e1ad907e0340eb6d47f6195e5072d729ab5d41 -- GitLab From 58d9986aea2e76777a0a14649ff6d3cc74c92c21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Frans=20Pondaco=20Winandy?= <jvfpw18@inf.ufpr.br> Date: Wed, 27 May 2020 11:47:07 -0300 Subject: [PATCH 2/4] Add .idea to gitignore --- .gitignore | 1 + .idea/encodings.xml | 4 - .idea/hotmapper.iml | 26 -- .idea/inspectionProfiles/Project_Default.xml | 13 - .idea/misc.xml | 7 - .idea/modules.xml | 8 - .idea/vcs.xml | 10 - .idea/workspace.xml | 245 ------------------- 8 files changed, 1 insertion(+), 313 deletions(-) delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/hotmapper.iml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml delete mode 100644 .idea/workspace.xml diff --git a/.gitignore b/.gitignore index d348b13..c5c0cdd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ pairing/ .cache .coverage .vscode +.idea/ diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index 15a15b2..0000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,4 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="Encoding" addBOMForNewFiles="with NO BOM" /> -</project> \ No newline at end of file diff --git a/.idea/hotmapper.iml b/.idea/hotmapper.iml deleted file mode 100644 index b3f5709..0000000 --- a/.idea/hotmapper.iml +++ /dev/null @@ -1,26 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> - <component name="FacetManager"> - <facet type="django" name="Django"> - <configuration> - <option name="rootFolder" value="$MODULE_DIR$" /> - <option name="settingsModule" value="settings.py" /> - <option name="manageScript" value="$MODULE_DIR$/manage.py" /> - <option name="environment" value="<map/>" /> - <option name="doNotUseTestRunner" value="false" /> - <option name="trackFilePattern" value="migrations" /> - </configuration> - </facet> - </component> - <component name="NewModuleRootManager"> - <content url="file://$MODULE_DIR$" /> - <orderEntry type="jdk" jdkName="Python 3.7 (hotmapper)" jdkType="Python SDK" /> - <orderEntry type="sourceFolder" forTests="false" /> - </component> - <component name="TemplatesService"> - <option name="TEMPLATE_CONFIGURATION" value="Django" /> - </component> - <component name="TestRunnerService"> - <option name="PROJECT_TEST_RUNNER" value="Unittests" /> - </component> -</module> \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index 3a6c6c9..0000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,13 +0,0 @@ -<component name="InspectionProjectProfileManager"> - <profile version="1.0"> - <option name="myName" value="Project Default" /> - <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true"> - <option name="ignoredErrors"> - <list> - <option value="E302" /> - </list> - </option> - </inspection_tool> - <inspection_tool class="PySingleQuotedDocstringInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" /> - </profile> -</component> \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 861c93d..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="JavaScriptSettings"> - <option name="languageLevel" value="ES6" /> - </component> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (hotmapper)" project-jdk-type="Python SDK" /> -</project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index dfe34df..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/hotmapper.iml" filepath="$PROJECT_DIR$/.idea/hotmapper.iml" /> - </modules> - </component> -</project> \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index d87d897..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,10 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="VcsDirectoryMappings"> - <mapping directory="$PROJECT_DIR$" vcs="Git" /> - <mapping directory="$PROJECT_DIR$/env/src/sqlalchemy-monetdb" vcs="Git" /> - <mapping directory="$PROJECT_DIR$/mapping-protocols" vcs="Git" /> - <mapping directory="$PROJECT_DIR$/sql-scripts" vcs="Git" /> - <mapping directory="$PROJECT_DIR$/table-definitions" vcs="Git" /> - </component> -</project> \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index 29dd00e..0000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,245 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ChangeListManager"> - <list default="true" id="aa9b8e4e-02cb-4085-b90c-6022543ea627" name="Default Changelist" comment=""> - <change beforePath="$PROJECT_DIR$/database/database_table.py" beforeDir="false" afterPath="$PROJECT_DIR$/database/database_table.py" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/mapping-protocols" beforeDir="false" afterPath="$PROJECT_DIR$/mapping-protocols" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/mapping-protocols/escola.csv" beforeDir="false" afterPath="$PROJECT_DIR$/mapping-protocols/escola.csv" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/mapping-protocols/turma.csv" beforeDir="false" afterPath="$PROJECT_DIR$/mapping-protocols/turma.csv" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/settings.py" beforeDir="false" afterPath="$PROJECT_DIR$/settings.py" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/table-definitions" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/table-definitions/docente.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/docente.json" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/table-definitions/escola.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/escola.json" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/table-definitions/matricula.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/matricula.json" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/table-definitions/pnad.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/pnad.json" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/table-definitions/turma.json" beforeDir="false" afterPath="$PROJECT_DIR$/table-definitions/turma.json" afterDir="false" /> - </list> - <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> - <option name="SHOW_DIALOG" value="false" /> - <option name="HIGHLIGHT_CONFLICTS" value="true" /> - <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> - <option name="LAST_RESOLUTION" value="IGNORE" /> - </component> - <component name="DjangoConsoleOptions" custom-start-script="import sys; print('Python %s on %s' % (sys.version, sys.platform)) import django; print('Django %s' % django.get_version()) sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS]) if 'setup' in dir(django): django.setup() import django_manage_shell; django_manage_shell.run(PROJECT_ROOT)"> - <option name="myCustomStartScript" value="import sys; print('Python %s on %s' % (sys.version, sys.platform)) import django; print('Django %s' % django.get_version()) sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS]) if 'setup' in dir(django): django.setup() import django_manage_shell; django_manage_shell.run(PROJECT_ROOT)" /> - </component> - <component name="FileEditorManager"> - <splitter split-orientation="horizontal" split-proportion="0.38232163"> - <split-first> - <leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> - <file pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/manage.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="1426"> - <caret line="65" selection-start-line="65" selection-end-line="65" /> - </state> - </provider> - </entry> - </file> - <file pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/database/actions.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="2967"> - <caret line="137" column="21" selection-start-line="137" selection-start-column="21" selection-end-line="137" selection-end-column="21" /> - </state> - </provider> - </entry> - </file> - <file pinned="false" current-in-tab="true"> - <entry file="file://$PROJECT_DIR$/database/database_table.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="366"> - <caret line="923" column="49" selection-start-line="923" selection-start-column="49" selection-end-line="923" selection-end-column="49" /> - </state> - </provider> - </entry> - </file> - <file pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/settings.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="1426"> - <caret line="62" selection-start-line="62" selection-end-line="62" /> - </state> - </provider> - </entry> - </file> - <file pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/auto.sh"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="3243"> - <caret line="141" column="56" selection-start-line="141" selection-start-column="56" selection-end-line="141" selection-end-column="56" /> - </state> - </provider> - </entry> - </file> - </leaf> - </split-first> - <split-second> - <leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> - <file pinned="false" current-in-tab="true"> - <entry file="file://$PROJECT_DIR$/database/database_table.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="296"> - <caret line="888" column="16" lean-forward="true" selection-start-line="888" selection-start-column="16" selection-end-line="888" selection-end-column="16" /> - </state> - </provider> - </entry> - </file> - <file pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/database/protocol.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="2323"> - <caret line="104" column="35" selection-start-line="104" selection-start-column="35" selection-end-line="104" selection-end-column="35" /> - </state> - </provider> - </entry> - </file> - </leaf> - </split-second> - </splitter> - </component> - <component name="FindInProjectRecents"> - <findStrings> - <find>_get_de</find> - <find>_derivative_recursion</find> - <find>_reso</find> - </findStrings> - </component> - <component name="Git.Settings"> - <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" /> - </component> - <component name="IdeDocumentHistory"> - <option name="CHANGED_PATHS"> - <list> - <option value="$PROJECT_DIR$/settings.py" /> - <option value="$PROJECT_DIR$/database/database_table.py" /> - </list> - </option> - </component> - <component name="ProjectFrameBounds" extendedState="6"> - <option name="x" value="1600" /> - <option name="width" value="1920" /> - <option name="height" value="1042" /> - </component> - <component name="ProjectLevelVcsManager" settingsEditedManually="true" /> - <component name="ProjectView"> - <navigator proportions="" version="1"> - <foldersAlwaysOnTop value="true" /> - </navigator> - <panes /> - </component> - <component name="PropertiesComponent"> - <property name="WebServerToolWindowFactoryState" value="false" /> - <property name="last_opened_file_path" value="$PROJECT_DIR$" /> - <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" /> - <property name="nodejs_npm_path_reset_for_default_project" value="true" /> - <property name="settings.editor.selected.configurable" value="preferences.pluginManager" /> - </component> - <component name="RunDashboard"> - <option name="ruleStates"> - <list> - <RuleState> - <option name="name" value="ConfigurationTypeDashboardGroupingRule" /> - </RuleState> - <RuleState> - <option name="name" value="StatusDashboardGroupingRule" /> - </RuleState> - </list> - </option> - </component> - <component name="SvnConfiguration"> - <configuration /> - </component> - <component name="TaskManager"> - <task active="true" id="Default" summary="Default task"> - <changelist id="aa9b8e4e-02cb-4085-b90c-6022543ea627" name="Default Changelist" comment="" /> - <created>1585053766024</created> - <option name="number" value="Default" /> - <option name="presentableId" value="Default" /> - <updated>1585053766024</updated> - <workItem from="1585053767960" duration="1312000" /> - <workItem from="1585139723643" duration="122000" /> - <workItem from="1585139870979" duration="8128000" /> - <workItem from="1585224059022" duration="5809000" /> - <workItem from="1585230046456" duration="4743000" /> - <workItem from="1585310779353" duration="127000" /> - </task> - <servers /> - </component> - <component name="TimeTrackingManager"> - <option name="totallyTimeSpent" value="20241000" /> - </component> - <component name="ToolWindowManager"> - <frame x="1600" y="0" width="1920" height="1042" extended-state="6" /> - <layout> - <window_info content_ui="combo" id="Project" order="0" weight="0.11235357" /> - <window_info id="Structure" order="1" side_tool="true" weight="0.25" /> - <window_info id="Favorites" order="2" side_tool="true" /> - <window_info anchor="bottom" id="Message" order="0" /> - <window_info anchor="bottom" id="Find" order="1" /> - <window_info anchor="bottom" id="Run" order="2" /> - <window_info anchor="bottom" id="Debug" order="3" weight="0.4" /> - <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" /> - <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" /> - <window_info anchor="bottom" id="TODO" order="6" /> - <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" /> - <window_info anchor="bottom" id="Version Control" order="8" /> - <window_info anchor="bottom" id="Database Changes" order="9" /> - <window_info anchor="bottom" id="Event Log" order="10" side_tool="true" /> - <window_info active="true" anchor="bottom" id="Terminal" order="11" visible="true" weight="0.17218544" /> - <window_info anchor="bottom" id="Python Console" order="12" /> - <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" /> - <window_info anchor="right" id="Ant Build" order="1" weight="0.25" /> - <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" /> - <window_info anchor="right" id="SciView" order="3" /> - <window_info anchor="right" id="Database" order="4" /> - </layout> - </component> - <component name="TypeScriptGeneratedFilesManager"> - <option name="version" value="1" /> - </component> - <component name="editorHistoryManager"> - <entry file="file://$PROJECT_DIR$/manage.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="1426"> - <caret line="65" selection-start-line="65" selection-end-line="65" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/database/actions.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="2967"> - <caret line="137" column="21" selection-start-line="137" selection-start-column="21" selection-end-line="137" selection-end-column="21" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/settings.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="1426"> - <caret line="62" selection-start-line="62" selection-end-line="62" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/auto.sh"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="3243"> - <caret line="141" column="56" selection-start-line="141" selection-start-column="56" selection-end-line="141" selection-end-column="56" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/database/database_table.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="296"> - <caret line="888" column="16" lean-forward="true" selection-start-line="888" selection-start-column="16" selection-end-line="888" selection-end-column="16" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/database/protocol.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="2323"> - <caret line="104" column="35" selection-start-line="104" selection-start-column="35" selection-end-line="104" selection-end-column="35" /> - </state> - </provider> - </entry> - </component> -</project> \ No newline at end of file -- GitLab From 524a87a592b08ede0c8615c070b3728958e8d520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Frans=20Pondaco=20Winandy?= <jvfpw18@inf.ufpr.br> Date: Tue, 18 Aug 2020 17:23:40 -0300 Subject: [PATCH 3/4] Fix derivatives not being executed --- database/database_table.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/database/database_table.py b/database/database_table.py index e367277..8483a85 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -203,11 +203,13 @@ class DatabaseTable(Table): bind = self.metadata.bind pks = get_primary_keys(self) - primary_keys = (ttable.columns.get(pk.name) for pk in pks) - new_pk = PrimaryKeyConstraint(*primary_keys) - query = AddConstraint(new_pk) - bind.execute(query) + if len(pks) > 0: + primary_keys = (ttable.columns.get(pk.name) for pk in pks) + + new_pk = PrimaryKeyConstraint(*primary_keys) + query = AddConstraint(new_pk) + bind.execute(query) def populate_temporary(self, ttable, in_file, header, year, delimiters=[';', '\\n', '"'], null='', offset=2, bind=None): @@ -790,7 +792,7 @@ class DatabaseTable(Table): if target is not None and str(self) + '.' + target in recursion_list: # This is a circular reference. Don't be like that. - print(target) + print(target, self) raise CircularReferenceError(target) # Query for the column, header of csv or var name if empty @@ -815,7 +817,10 @@ class DatabaseTable(Table): referred_tables = [] case = case.strip("~") # doesn't need "~" anymore - for substr in case.split(): + str_list = re.findall(r'("[\w]"|[\w.]+)', case) + # str_list = case.split() + for substr in str_list: + # print('ss', substr) if '.' in substr: # We have a var from another table table = substr.split('.')[0] table = gen_data_table(table, self.metadata) @@ -825,12 +830,17 @@ class DatabaseTable(Table): var_name = substr # If it is a var, will need to be evaluated as it's a dependency. - if table._protocol.target_from_dbcolumn(var_name) is not None: + if table._protocol and table._protocol.target_from_dbcolumn(var_name) is not None: # Prevents lowering level if var is an aggregation. - print("VAR", var_name) level = max(level + table._derivative_recursion(var_name, year, recursion_list)['level'], level) - if table is not self and table not in referred_tables: - referred_tables.append(table) + else: + var_target = self._get_variable_target(var_name.strip('"'), year) + if var_target is not None: + var_db = self._protocol.dbcolumn_from_target(var_target)[0] + case = case.replace(var_name, var_db) + + if table is not self and table not in referred_tables: + referred_tables.append(table) self._derivatives[target] = {'query': case, 'dbcolumn': dbcolumn, 'level': level, 'tables': referred_tables} return self._derivatives[target] @@ -880,11 +890,12 @@ class DatabaseTable(Table): def _apply_denormalization(self, ttable, year, column, denorm_query, referred_tables, bind): query = update(ttable) + print(ttable.name) query = query.values(**{column: text(denorm_query)}) + print('qc', query, column) for ref_table in referred_tables: - ref_table = gen_data_table(ref_table, self.metadata) ref_table.map_from_database(bind) try: fk_tuples = [(ttable.columns.get(fk_column.name), fkey) -- GitLab From bc0d5a698fc3e49301d22cbf5371df015c6a4d1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Frans=20Pondaco=20Winandy?= <jvfpw18@inf.ufpr.br> Date: Wed, 19 Aug 2020 11:16:04 -0300 Subject: [PATCH 4/4] Make constants execute before denormalizations, fix level 3 error --- database/database_table.py | 104 ++++++++++--------------------------- settings.py | 2 +- 2 files changed, 27 insertions(+), 79 deletions(-) diff --git a/database/database_table.py b/database/database_table.py index 8483a85..b785db3 100644 --- a/database/database_table.py +++ b/database/database_table.py @@ -818,13 +818,12 @@ class DatabaseTable(Table): case = case.strip("~") # doesn't need "~" anymore str_list = re.findall(r'("[\w]"|[\w.]+)', case) - # str_list = case.split() for substr in str_list: - # print('ss', substr) if '.' in substr: # We have a var from another table table = substr.split('.')[0] table = gen_data_table(table, self.metadata) var_name = substr.split('.')[1] + level += 1 else: table = self var_name = substr @@ -833,11 +832,11 @@ class DatabaseTable(Table): if table._protocol and table._protocol.target_from_dbcolumn(var_name) is not None: # Prevents lowering level if var is an aggregation. level = max(level + table._derivative_recursion(var_name, year, recursion_list)['level'], level) - else: - var_target = self._get_variable_target(var_name.strip('"'), year) - if var_target is not None: - var_db = self._protocol.dbcolumn_from_target(var_target)[0] - case = case.replace(var_name, var_db) + + var_target = self._get_variable_target(var_name.strip('"'), year) + if var_target is not None: + var_db = self._protocol.dbcolumn_from_target(var_target)[0] + case = case.replace(var_name, var_db) if table is not self and table not in referred_tables: referred_tables.append(table) @@ -854,65 +853,41 @@ class DatabaseTable(Table): self._derivatives = {} return self._derivative_recursion(original, year) - def _get_denormalizations(self, ttable, originals, year, bind): - ''' - Searches protocol for denormalizations and yields the necessary update queries. - ''' - exp = r'([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)' - external = {} - for dst, original in originals: - original = original.strip(' ~\n\t') - for match in re.finditer(exp, original): - table, column = match.groups() - if table not in external: - external[table] = [] - external[table].append([dst, text(original)]) - - for table in external: - query = update(ttable) - - referred_table = gen_data_table(table, self.metadata) - referred_table.map_from_database(bind) - try: - fk_tuples = [(fk_column, fkey) for fk_column, fkey in self.get_relations(referred_table)] - except MissingForeignKeyError: - logger.warning("Using relations from " + table + " instead of " + str(self) + " to apply derivative.") - fk_tuples = [(fk_column, fkey) for fk_column, fkey in referred_table.get_relations(self)] + def _apply_denormalization(self, ttable, year, column, denorm_query, referred_tables, bind): - for dst, src in external[table]: - query = query.values(**{dst[0]: src}) - for fk_column, fkey in fk_tuples: - # fk_column = ttable.columns.get(fk_column.name) - query = query.where(fk_column == fkey) - if year: - query = query.where(ttable.columns.get(settings.YEAR_COLUMN) == year) - yield query + # Hack to make pymonetdb be able to work with the columns from 2 tables when one is temporary + t_schema = ttable.schema + ttable.schema = None - def _apply_denormalization(self, ttable, year, column, denorm_query, referred_tables, bind): query = update(ttable) - print(ttable.name) query = query.values(**{column: text(denorm_query)}) - print('qc', query, column) for ref_table in referred_tables: ref_table.map_from_database(bind) - try: - fk_tuples = [(ttable.columns.get(fk_column.name), fkey) - for fk_column, fkey in self.get_relations(ref_table)] - except MissingForeignKeyError: - logger.warning("Using relations from " + str(ref_table) + + fk_tuples = [(ttable.columns.get(fk_column.name), fkey) + for fk_column, fkey in self.get_relations(ref_table)] + if not fk_tuples: + logger.warning("Trying to use relations from " + str(ref_table) + " instead of " + str(self) + " to apply derivative.") fk_tuples = [(ttable.columns.get(fkey.name),fk_column) for fk_column, fkey in ref_table.get_relations(self)] + if not fk_tuples: + logger.error("COULDN'T ESTABLISH " + ref_table.name + " RELATION WITH " + self.name + + " IGNORING COLUMN: " + column) + ttable.schema = t_schema + return for fk_column, fkey in fk_tuples: query = query.where(fk_column == fkey) if year: query = query.where(ttable.columns.get(settings.YEAR_COLUMN) == year) + bind.execute(query) + ttable.schema = t_schema + def apply_derivatives(self, ttable, columns, year, bind=None, dbonly=False): ''' Given a list of columns, searches for derivatives and denormalizations and applies them @@ -933,7 +908,8 @@ class DatabaseTable(Table): level = [self._derivatives[d] for d in self._derivatives if self._derivatives[d]['level'] == i] for derivative in level: if len(derivative['tables']) == 0: - query[derivative['dbcolumn'][0]] = text(derivative['query']) + if not dbonly: + query[derivative['dbcolumn'][0]] = text(derivative['query']) else: self._apply_denormalization(ttable, year, derivative['dbcolumn'][0], derivative['query'], derivative['tables'], bind) @@ -942,35 +918,6 @@ class DatabaseTable(Table): query = update(ttable).values(**query) bind.execute(query) - - - - print(self._derivatives) - - # originals = [(self._derivatives[d]['dbcolumn'], self._derivatives[d]['original'])\ - # for d in self._derivatives if self._derivatives[d]['level'] == 0] - # - # t_schema = ttable.schema - # ttable.schema = None - # for query in self._get_denormalizations(ttable, originals, year, bind): - # bind.execute(query) - # - # ttable.schema = t_schema - # if len(self._derivatives) > 0: - # max_level = max([self._derivatives[d]['level'] for d in self._derivatives]) - # for i in range(max_level): - # i = i+1 - # query = {} - # level = [self._derivatives[d] for d in self._derivatives if\ - # self._derivatives[d]['level'] == i] - # for derivative in level: - # if not dbonly or derivative['dbmapped']: - # query[derivative['dbcolumn'][0]] = text(derivative['processed']) - # - # query = update(ttable).values(**query) - # - # bind.execute(query) - return self._derivatives def _get_aggregations(self, year): @@ -1049,7 +996,8 @@ class DatabaseTable(Table): foreign_key = fk break if not foreign_key: - raise MissingForeignKeyError(table) + logger.warning("Couldn't find foreign key relation between " + self.name + " and " + table.name) + return None for _, fk_column in foreign_key.columns.items(): fkey = list(fk_column.foreign_keys)[0] fkey = fkey.column.name diff --git a/settings.py b/settings.py index b8c1601..36dd10f 100644 --- a/settings.py +++ b/settings.py @@ -33,7 +33,7 @@ DATABASE_USER_PASSWORD = 'monetdb' DATABASE_HOST = 'localhost' # Database to connect to -DATABASE = 'test' +DATABASE = 'hotmapper_demo' # Column used to run aggregations and denormalizations YEAR_COLUMN = 'ano_censo' -- GitLab