diff --git a/.ipynb_checkpoints/distCalc-checkpoint.ipynb b/.ipynb_checkpoints/distCalc-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6066c1061263539ee650e82f2979e08d25ff1f27 --- /dev/null +++ b/.ipynb_checkpoints/distCalc-checkpoint.ipynb @@ -0,0 +1,543 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "2c81bc78-04e0-4bad-83ef-380cf3be1610", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "af419e44-d6ef-41f7-970c-78c316aeb712", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "class DistCalc:\n", + " def __init__(self):\n", + " self.stat_f = []\n", + " self.stat_ks = []\n", + " self.stat_t = []\n", + " self.stat_cohend = []\n", + " self.stat_f_matches = []\n", + " self.stat_ks_matches = []\n", + " self.stat_t_matches = []\n", + " self.stat_cohend_matches = []\n", + " self.stat_f_new = []\n", + " self.stat_ks_new = []\n", + " self.stat_t_new = []\n", + " self.stat_cohend_new = []\n", + " self.stat_f_empty = []\n", + " self.stat_ks_empty = []\n", + " self.stat_t_empty = []\n", + " self.stat_cohend_empty = []\n", + " \n", + " self.stat_f_top3 = []\n", + " self.stat_ks_top3 = []\n", + " self.stat_t_top3 = []\n", + " self.stat_cohend_top3 = []\n", + " self.stat_f_matches_top3 = []\n", + " self.stat_ks_matches_top3 = []\n", + " self.stat_t_matches_top3 = []\n", + " self.stat_cohend_matches_top3 = []\n", + " self.stat_f_new_top3 = []\n", + " self.stat_ks_new_top3 = []\n", + " self.stat_t_new_top3 = []\n", + " self.stat_cohend_new_top3 = []\n", + " self.stat_f_empty_top3 = []\n", + " self.stat_ks_empty_top3 = []\n", + " self.stat_t_empty_top3 = []\n", + " self.stat_cohend_empty_top3 = []\n", + " \n", + " self.years = []\n", + " \n", + " @property\n", + " def get_stat_f(self):\n", + " return self.stat_f\n", + " \n", + " @property\n", + " def get_stat_ks(self):\n", + " return self.stat_ks\n", + " \n", + " @property\n", + " def get_stat_t(self):\n", + " return self.stat_t\n", + " \n", + " @property\n", + " def get_stat_cohend(self):\n", + " return self.stat_cohend\n", + " \n", + " @property\n", + " def get_stat_f_top3(self):\n", + " return self.stat_f\n", + " \n", + " @property\n", + " def get_stat_ks_top3(self):\n", + " return self.stat_ks\n", + " \n", + " @property\n", + " def get_stat_t_top3(self):\n", + " return self.stat_t\n", + " \n", + " @property\n", + " def get_stat_cohend_top3(self):\n", + " return self.stat_cohend\n", + " \n", + " @property\n", + " def get_years(self):\n", + " return self.years\n", + " \n", + " def calc(self, df, stat_column, threshold):\n", + " anos = df.ano_coluna1.unique()\n", + " self.years = np.union1d(self.years, anos)\n", + "\n", + " # Itera sobre todos os anos\n", + " for ano in anos:\n", + " # Constroi dataframe do ano\n", + " ano_df = df[df.ano_coluna1 == ano]\n", + "\n", + " # Estruturas\n", + " base_columns = ano_df.coluna1.unique() # Colunas que ja existiam na base\n", + " new_columns = ano_df.coluna2.unique() # Colunas do próximo ano\n", + " true_new_columns = np.setdiff1d(new_columns, base_columns) # Colunas que nao existiam na base\n", + " base_empty_columns = np.setdiff1d(base_columns, new_columns) # Colunas da base que nao receberam dados\n", + " all_columns = np.union1d(base_columns, new_columns) # Todas as colunas possiveis\n", + " # Alterar para um dicionario\n", + " prev_col = [] # Colunas da base para match\n", + " next_col = [] # Colunas do proximo ano para match\n", + "\n", + " # Itera sobre o dataframe\n", + " for index, row in ano_df.iterrows():\n", + " # Ignora colunas ja selecionadas\n", + " if row['coluna1'] in prev_col or row['coluna2'] in next_col:\n", + " continue\n", + " # Testa treshold\n", + " if row[stat_column] > threshold:\n", + " break\n", + "\n", + " # Adiciona nas listas\n", + " prev_col.append(row['coluna1'])\n", + " next_col.append(row['coluna2'])\n", + "\n", + " all_match_columns = np.union1d(prev_col, next_col)\n", + " not_match_columns = np.setdiff1d(all_columns, all_match_columns)\n", + " found_new_columns = np.setdiff1d(new_columns, next_col) # Colunas novas encontradas pelo algoritmo\n", + " no_data_columns = np.setdiff1d(base_columns, prev_col) # Colunas que não receram dados encontradas pelo algoritmo\n", + "\n", + " # ========== CALCULA ACURACIAS ========== \n", + " acertos_p = 0\n", + " acertos = 0\n", + " for i in range(len(prev_col)):\n", + " if prev_col[i] == next_col[i]: \n", + " acertos_p += 1\n", + " acuracia_matches = acertos_p / len(prev_col)\n", + " acertos += acertos_p\n", + " \n", + " acertos_p = 0\n", + " unionNewColumns = np.union1d(found_new_columns, true_new_columns)\n", + " for col in unionNewColumns:\n", + " if col in true_new_columns and col in found_new_columns:\n", + " acertos_p += 1\n", + " if(len(unionNewColumns) > 0):\n", + " acuracia_new_columns = acertos_p / len(unionNewColumns)\n", + " else:\n", + " acuracia_new_columns = 1.0\n", + " acertos += acertos_p \n", + " \n", + " acertos_p = 0\n", + " unionEmptyColumns = np.union1d(no_data_columns, base_empty_columns)\n", + " for col in unionEmptyColumns:\n", + " if col in base_empty_columns and col in no_data_columns:\n", + " acertos_p += 1\n", + " if(len(unionEmptyColumns) > 0):\n", + " acuracia_empty_columns = acertos_p / len(unionEmptyColumns)\n", + " else:\n", + " acuracia_empty_columns = 1.0\n", + " acertos += acertos_p\n", + " \n", + " soma_acuracia = acuracia_matches * len(prev_col) + acuracia_new_columns * len(unionNewColumns) + acuracia_empty_columns * len(unionEmptyColumns)\n", + " # acuracia_total = soma_acuracia / (len(prev_col) + len(unionNewColumns) + len(unionEmptyColumns))\n", + " acuracia_total = acertos / len(all_columns)\n", + " \n", + " # ========== ADICIONA ACURACIAS ==========\n", + " if(stat_column == 'estatistica_f'):\n", + " self.stat_f.append([ano, acuracia_total])\n", + " self.stat_f_matches.append([ano, acuracia_matches])\n", + " self.stat_f_new.append([ano, acuracia_new_columns])\n", + " self.stat_f_empty.append([ano, acuracia_empty_columns])\n", + " elif(stat_column == 'estatistica_t'):\n", + " self.stat_t.append([ano, acuracia_total])\n", + " self.stat_t_matches.append([ano, acuracia_matches])\n", + " self.stat_t_new.append([ano, acuracia_new_columns])\n", + " self.stat_t_empty.append([ano, acuracia_empty_columns])\n", + " elif(stat_column == 'estatistica_ks'):\n", + " self.stat_ks.append([ano, acuracia_total])\n", + " self.stat_ks_matches.append([ano, acuracia_matches])\n", + " self.stat_ks_new.append([ano, acuracia_new_columns])\n", + " self.stat_ks_empty.append([ano, acuracia_empty_columns])\n", + " elif(stat_column == 'estatistica_cohend'):\n", + " self.stat_cohend.append([ano, acuracia_total])\n", + " self.stat_cohend_matches.append([ano, acuracia_matches])\n", + " self.stat_cohend_new.append([ano, acuracia_new_columns])\n", + " self.stat_cohend_empty.append([ano, acuracia_empty_columns])\n", + "\n", + " \n", + " def calcTop3(self, df, stat_column, threshold):\n", + " anos = df.ano_coluna1.unique()\n", + " \n", + " # Itera sobre todos os anos\n", + " for ano in anos:\n", + " # Constroi dataframe do ano\n", + " ano_df = df[df.ano_coluna1 == ano]\n", + "\n", + " # Estruturas\n", + " base_columns = ano_df.coluna1.unique() # Colunas que ja existiam na base\n", + " new_columns = ano_df.coluna2.unique() # Colunas do próximo ano\n", + " intersection_columns = np.intersect1d(base_columns, new_columns) # Colunas que possuem match\n", + " true_new_columns = np.setdiff1d(new_columns, base_columns) # Colunas que nao existiam na base\n", + " true_empty_columns = np.setdiff1d(base_columns, new_columns) # Colunas da base que nao receberam dados\n", + " all_columns = np.union1d(base_columns, new_columns) # Todas as colunas possiveis\n", + " resultados = [] # Resultados dos matches\n", + " prev_col = [] # Colunas da base que tiveram match\n", + " next_col = [] # Colunas do proximo ano que tiveram match\n", + "\n", + " # Encontra as top3 novas colunas que mais se encaixam com as colunas base\n", + " for col in base_columns:\n", + " top3 = ano_df[(ano_df.coluna1 == col) & (ano_df[stat_column] < threshold)].iloc[:3,:]\n", + " resultados.append(top3.values)\n", + "\n", + " # Preenche prev_col e next_col\n", + " for res in resultados:\n", + " for i in res:\n", + " prev_col = np.union1d(prev_col, i[0])\n", + " next_col = np.union1d(next_col, i[2])\n", + "\n", + " # Determina alguns c\n", + " all_match_columns = np.union1d(next_col, prev_col) # Colunas que tiveram algum match\n", + " not_match_columns = np.setdiff1d(all_columns, all_match_columns) # Colunas que não tiveram nenhum match\n", + " found_new_columns = np.setdiff1d(new_columns, next_col) # Colunas novas encontradas pelo algoritmo\n", + " no_data_columns = np.setdiff1d(base_columns, prev_col) # Colunas que não receram dados encontradas pelo algoritmo\n", + "\n", + " # Calcula acurácia\n", + " acuracia_matches = 0\n", + " acuracia_novas_colunas = 0\n", + " acuracia_colunas_vazias = 0\n", + " \n", + " # ========== CALCULA ACURACIA TOTAL ==========\n", + " # Acurácia matches\n", + " acertos = 0\n", + " for res in resultados:\n", + " if(len(res) == 0):\n", + " continue\n", + " for i in res:\n", + " if i[0] == i[2]:\n", + " acertos += 1\n", + " break\n", + " \n", + " # Acurácia novas colunas\n", + " for new in found_new_columns:\n", + " if new in true_new_columns:\n", + " acertos += 1\n", + "\n", + " # Acurácia colunas vazias\n", + " for no_data in no_data_columns:\n", + " if no_data in true_empty_columns:\n", + " acertos += 1\n", + "\n", + " # Acurácia total\n", + " acuracia_total = acertos / len(all_columns)\n", + " \n", + " \n", + " # ========== CALCULA ACURACIA PARCIAL ==========\n", + " acertos_p = 0\n", + " unionNewColumns = np.union1d(found_new_columns, true_new_columns)\n", + " if len(unionNewColumns) > 0:\n", + " for col in unionNewColumns:\n", + " if col in found_new_columns and col in true_new_columns:\n", + " acertos_p += 1\n", + " acuracia_new_columns = acertos_p / len(unionNewColumns) \n", + " else:\n", + " acuracia_new_columns = 1.0\n", + "\n", + " acertos_p = 0\n", + " unionEmptyColumns = np.union1d(no_data_columns, true_empty_columns)\n", + " if len(unionEmptyColumns) > 0:\n", + " for col in unionEmptyColumns:\n", + " if col in no_data_columns and col in true_empty_columns:\n", + " acertos_p += 1\n", + " acuracia_empty_columns = acertos_p / len(unionEmptyColumns) \n", + " else:\n", + " acuracia_empty_columns = 1.0\n", + " \n", + " acertos_p = 0\n", + " results_len = 0\n", + " for res in resultados:\n", + " if(len(res) == 0):\n", + " continue\n", + " results_len += 1\n", + " for i in res:\n", + " if i[0] == i[2]:\n", + " acertos_p += 1\n", + " break\n", + " \n", + " acuracia_matches = acertos_p / len(prev_col)\n", + " # soma_acuracia = acuracia_matches * results_len + acuracia_new_columns * len(unionNewColumns) + acuracia_empty_columns * len(unionEmptyColumns)\n", + " # acuracia_total = soma_acuracia / (results_len + len(unionNewColumns) + len(unionEmptyColumns))\n", + " \n", + " # print(ano)\n", + " # print(f'{acuracia_matches} matches')\n", + " # print(f'{acuracia_new_columns} new')\n", + " # print(f'{acuracia_empty_columns} empty')\n", + " # print(f'{acuracia_total} total')\n", + " \n", + " # =========================\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " # Adiciona acuracia\n", + " if(stat_column == 'estatistica_f'):\n", + " self.stat_f_top3.append([ano, acuracia_total])\n", + " self.stat_f_matches_top3.append([ano, acuracia_matches])\n", + " self.stat_f_new_top3.append([ano, acuracia_new_columns])\n", + " self.stat_f_empty_top3.append([ano, acuracia_empty_columns])\n", + " elif(stat_column == 'estatistica_t'):\n", + " self.stat_t_top3.append([ano, acuracia_total])\n", + " self.stat_t_matches_top3.append([ano, acuracia_matches])\n", + " self.stat_t_new_top3.append([ano, acuracia_new_columns])\n", + " self.stat_t_empty_top3.append([ano, acuracia_empty_columns])\n", + " elif(stat_column == 'estatistica_ks'):\n", + " self.stat_ks_top3.append([ano, acuracia_total])\n", + " self.stat_ks_matches_top3.append([ano, acuracia_matches])\n", + " self.stat_ks_new_top3.append([ano, acuracia_new_columns])\n", + " self.stat_ks_empty_top3.append([ano, acuracia_empty_columns])\n", + " elif(stat_column == 'estatistica_cohend'):\n", + " self.stat_cohend_top3.append([ano, acuracia_total])\n", + " self.stat_cohend_matches_top3.append([ano, acuracia_matches])\n", + " self.stat_cohend_new_top3.append([ano, acuracia_new_columns])\n", + " self.stat_cohend_empty_top3.append([ano, acuracia_empty_columns])" + ] + }, + { + "cell_type": "markdown", + "id": "9eaff904-7ee7-45a0-9768-0f21989c65bd", + "metadata": {}, + "source": [ + "## Import the results for each statistical method" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "26287a6f-5537-4509-a09d-52dd59b3a76d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Import F results\n", + "df_f = pd.read_csv('Testes_R/Result_F/F_subsequente.csv', sep=',')\n", + "stat_column = 'estatistica_f'\n", + "df_f[stat_column] = (df_f[stat_column] - 1).abs()\n", + "df_f = df_f.sort_values(by=['ano_coluna1', stat_column])\n", + "df_f = df_f[~df_f['coluna1'].str.contains('ANO_CENSO') & ~df_f['coluna2'].str.contains('ANO_CENSO')]\n", + "\n", + "# Import T results\n", + "df_t = pd.read_csv('Testes_R/Result_T/T_subsequente.csv', sep=',')\n", + "stat_column = 'estatistica_t'\n", + "df_t[stat_column] = df_t[stat_column].abs()\n", + "df_t = df_t.sort_values(by=['ano_coluna1', stat_column])\n", + "df_t = df_t[~df_t['coluna1'].str.contains('ANO_CENSO') & ~df_t['coluna2'].str.contains('ANO_CENSO')]\n", + "\n", + "# Import COHEND results\n", + "df_c = pd.read_csv('Testes_R/Result_COHEND/COHEND_subsequente.csv', sep=',')\n", + "stat_column = 'estatistica_cohend'\n", + "df_c[stat_column] = df_c[stat_column].abs()\n", + "df_c = df_c.sort_values(by=['ano_coluna1', stat_column])\n", + "df_c = df_c[~df_c['coluna1'].str.contains('ANO_CENSO') & ~df_c['coluna2'].str.contains('ANO_CENSO')]\n", + "\n", + "# Import KS results\n", + "df_ks = pd.read_csv('Testes_R/Result_KS/KS_subsequente.csv', sep=',')\n", + "stat_column = 'estatistica_ks'\n", + "df_ks[stat_column] = (df_ks[stat_column]).abs()\n", + "df_ks = df_ks.sort_values(by=['ano_coluna1', stat_column])\n", + "df_ks = df_ks[~df_ks['coluna1'].str.contains('ANO_CENSO') & ~df_ks['coluna2'].str.contains('ANO_CENSO')]" + ] + }, + { + "cell_type": "markdown", + "id": "e25f4f2d-3fb9-4cfc-8a92-c2e8b887262c", + "metadata": {}, + "source": [ + "## Calcule the columns matches" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f9541a11-c1bf-4318-847a-100917e13204", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dist = DistCalc()\n", + "dist.calc(df_f, 'estatistica_f', 0.7)\n", + "dist.calc(df_t, 'estatistica_t', 40)\n", + "dist.calc(df_c, 'estatistica_cohend', 0.15)\n", + "dist.calc(df_ks, 'estatistica_ks', 0.10)\n", + "\n", + "dist.calcTop3(df_f, 'estatistica_f', 0.7)\n", + "dist.calcTop3(df_t, 'estatistica_t', 40)\n", + "dist.calcTop3(df_c, 'estatistica_cohend', 0.15)\n", + "dist.calcTop3(df_ks, 'estatistica_ks', 0.10)" + ] + }, + { + "cell_type": "markdown", + "id": "47bcb19b-6aba-4d4a-9de0-4633bfa0eb20", + "metadata": {}, + "source": [ + "## Create the result dataframes" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "527ff27d-f321-4749-a94d-dd7d824ef682", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ================= KS =================\n", + "result_ks = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "resultTop3_ks = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "for i, ano in enumerate(dist.get_years):\n", + " new_row = [ano, dist.stat_ks_matches[i][1], dist.stat_ks_new[i][1], dist.stat_ks_empty[i][1], dist.stat_ks[i][1]]\n", + " result_ks.loc[len(result_ks)] = new_row\n", + " new_row = [ano, dist.stat_ks_matches_top3[i][1], dist.stat_ks_new_top3[i][1], dist.stat_ks_empty_top3[i][1], dist.stat_ks_top3[i][1]]\n", + " resultTop3_ks.loc[len(resultTop3_ks)] = new_row\n", + " \n", + "result_ks.loc[len(result_ks)] = result_ks.mean()\n", + "result_ks.loc[len(result_ks)] = result_ks.std()\n", + "resultTop3_ks.loc[len(resultTop3_ks)] = resultTop3_ks.mean()\n", + "resultTop3_ks.loc[len(resultTop3_ks)] = resultTop3_ks.std()\n", + "result_ks = result_ks.round(3)\n", + "resultTop3_ks = resultTop3_ks.round(3)\n", + "\n", + "# ================= F =================\n", + "result_f = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "resultTop3_f = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "for i, ano in enumerate(dist.get_years):\n", + " new_row = [ano, dist.stat_f_matches[i][1], dist.stat_f_new[i][1], dist.stat_f_empty[i][1], dist.stat_f[i][1]]\n", + " result_f.loc[len(result_f)] = new_row\n", + " new_row = [ano, dist.stat_f_matches_top3[i][1], dist.stat_f_new_top3[i][1], dist.stat_f_empty_top3[i][1], dist.stat_f_top3[i][1]]\n", + " resultTop3_f.loc[len(resultTop3_f)] = new_row\n", + " \n", + "result_f.loc[len(result_f)] = result_f.mean()\n", + "result_f.loc[len(result_f)] = result_f.std()\n", + "resultTop3_f.loc[len(resultTop3_f)] = resultTop3_f.mean()\n", + "resultTop3_f.loc[len(resultTop3_f)] = resultTop3_f.std()\n", + "result_f = result_f.round(3)\n", + "resultTop3_f = resultTop3_f.round(3)\n", + "\n", + "# ================= COHEN =================\n", + "result_cohend = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "resultTop3_cohend = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "for i, ano in enumerate(dist.get_years):\n", + " new_row = [ano, dist.stat_cohend_matches[i][1], dist.stat_cohend_new[i][1], dist.stat_cohend_empty[i][1], dist.stat_cohend[i][1]]\n", + " result_cohend.loc[len(result_cohend)] = new_row\n", + " new_row = [ano, dist.stat_cohend_matches_top3[i][1], dist.stat_cohend_new_top3[i][1], dist.stat_cohend_empty_top3[i][1], dist.stat_cohend_top3[i][1]]\n", + " resultTop3_cohend.loc[len(resultTop3_cohend)] = new_row\n", + " \n", + "result_cohend.loc[len(result_cohend)] = result_cohend.mean()\n", + "result_cohend.loc[len(result_cohend)] = result_cohend.std()\n", + "resultTop3_cohend.loc[len(resultTop3_cohend)] = resultTop3_cohend.mean()\n", + "resultTop3_cohend.loc[len(resultTop3_cohend)] = resultTop3_cohend.std()\n", + "result_cohend = result_cohend.round(3)\n", + "resultTop3_cohend = resultTop3_cohend.round(3)\n", + "\n", + "# ================= T =================\n", + "result_t = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "resultTop3_t = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "for i, ano in enumerate(dist.get_years):\n", + " new_row = [ano, dist.stat_t_matches[i][1], dist.stat_t_new[i][1], dist.stat_t_empty[i][1], dist.stat_t[i][1]]\n", + " result_t.loc[len(result_t)] = new_row\n", + " new_row = [ano, dist.stat_t_matches_top3[i][1], dist.stat_t_new_top3[i][1], dist.stat_t_empty_top3[i][1], dist.stat_t_top3[i][1]]\n", + " resultTop3_t.loc[len(resultTop3_t)] = new_row\n", + " \n", + "result_t.loc[len(result_t)] = result_t.mean()\n", + "result_t.loc[len(result_t)] = result_t.std()\n", + "resultTop3_t.loc[len(resultTop3_t)] = resultTop3_t.mean()\n", + "resultTop3_t.loc[len(resultTop3_t)] = resultTop3_t.std()\n", + "result_t = result_t.round(3)\n", + "resultTop3_t = resultTop3_t.round(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "4cb4afc8-6149-40a7-8f77-af06183d4d23", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "result_ks.to_csv(f'./result_ks.csv', index=False)\n", + "resultTop3_ks.to_csv(f'./resultTop3_ks.csv', index=False)\n", + "\n", + "result_f.to_csv(f'./result_f.csv', index=False)\n", + "resultTop3_f.to_csv(f'./resultTop3_f.csv', index=False)\n", + "\n", + "result_t.to_csv(f'./result_t.csv', index=False)\n", + "resultTop3_t.to_csv(f'./resultTop3_t.csv', index=False)\n", + "\n", + "result_cohend.to_csv(f'./result_cohend.csv', index=False)\n", + "resultTop3_cohend.to_csv(f'./resultTop3_cohend.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0d2606e-2ddb-4752-a101-823af86fec45", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/resultTop3_cohend-checkpoint.csv b/.ipynb_checkpoints/resultTop3_cohend-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..45d492311e08370d3ef3c1be7a4763eac377eff8 --- /dev/null +++ b/.ipynb_checkpoints/resultTop3_cohend-checkpoint.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,1.0,0.0,0.0,0.833 +2009.0,1.0,1.0,1.0,1.0 +2010.0,1.0,1.0,1.0,1.0 +2011.0,1.0,1.0,1.0,1.0 +2012.0,1.0,1.0,1.0,1.0 +2013.0,1.0,0.0,0.0,0.706 +2014.0,1.0,0.0,0.0,0.765 +2015.0,1.0,0.0,0.0,0.765 +2016.0,1.0,0.0,0.0,0.706 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.5,0.944,0.5,0.905 +2019.0,0.867,0.364,0.0,0.654 +2020.0,0.909,0.0,0.0,0.769 +2013.5,0.948,0.522,0.464,0.864 +4.031,0.131,0.479,0.48,0.13 diff --git a/.ipynb_checkpoints/resultTop3_f-checkpoint.csv b/.ipynb_checkpoints/resultTop3_f-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..1228e553169493f073bb7341bcbf2fcef422ad88 --- /dev/null +++ b/.ipynb_checkpoints/resultTop3_f-checkpoint.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,1.0,1.0,1.0,1.0 +2009.0,1.0,1.0,1.0,1.0 +2010.0,1.0,1.0,1.0,1.0 +2011.0,1.0,1.0,1.0,1.0 +2012.0,1.0,1.0,1.0,1.0 +2013.0,1.0,0.0,0.0,0.765 +2014.0,1.0,0.0,0.0,0.765 +2015.0,1.0,0.0,0.0,0.765 +2016.0,1.0,0.0,0.0,0.765 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.5,0.944,0.5,0.905 +2019.0,0.933,0.364,0.0,0.692 +2020.0,0.864,0.0,0.0,0.731 +2013.5,0.95,0.593,0.536,0.885 +4.031,0.13,0.47,0.48,0.123 diff --git a/.ipynb_checkpoints/resultTop3_ks-checkpoint.csv b/.ipynb_checkpoints/resultTop3_ks-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..b91eee0a0dce556d063db7f7773a0782d47517de --- /dev/null +++ b/.ipynb_checkpoints/resultTop3_ks-checkpoint.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,1.0,1.0,1.0,1.0 +2009.0,1.0,1.0,1.0,1.0 +2010.0,1.0,1.0,1.0,1.0 +2011.0,1.0,1.0,1.0,1.0 +2012.0,1.0,0.818,1.0,0.882 +2013.0,0.941,1.0,1.0,0.941 +2014.0,0.941,1.0,1.0,0.941 +2015.0,0.941,0.0,1.0,0.941 +2016.0,0.941,1.0,1.0,0.941 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.5,0.944,0.5,0.905 +2019.0,0.947,0.571,1.0,0.846 +2020.0,0.923,1.0,1.0,0.923 +2013.5,0.938,0.881,0.964,0.952 +4.031,0.125,0.27,0.129,0.049 diff --git a/.ipynb_checkpoints/result_cohend-checkpoint.csv b/.ipynb_checkpoints/result_cohend-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..87a9872d2bd3e1cfbf3dbdcd288b226abf74741a --- /dev/null +++ b/.ipynb_checkpoints/result_cohend-checkpoint.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,0.6,0.0,0.0,0.5 +2009.0,0.4,0.0,0.0,0.333 +2010.0,0.4,0.0,0.0,0.333 +2011.0,0.667,1.0,1.0,0.667 +2012.0,0.667,1.0,1.0,0.882 +2013.0,0.455,0.0,0.0,0.294 +2014.0,0.882,1.0,1.0,0.882 +2015.0,1.0,1.0,1.0,1.0 +2016.0,0.636,0.0,0.0,0.412 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.0,0.944,0.5,0.857 +2019.0,0.6,0.385,0.0,0.538 +2020.0,0.571,0.0,0.0,0.462 +2013.5,0.634,0.524,0.464,0.654 +4.031,0.269,0.478,0.48,0.264 diff --git a/.ipynb_checkpoints/result_f-checkpoint.csv b/.ipynb_checkpoints/result_f-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..289934c551d139e6ac90447080cc941970b6ab90 --- /dev/null +++ b/.ipynb_checkpoints/result_f-checkpoint.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,1.0,1.0,1.0,1.0 +2009.0,1.0,1.0,1.0,1.0 +2010.0,1.0,1.0,1.0,1.0 +2011.0,1.0,1.0,1.0,1.0 +2012.0,0.667,1.0,1.0,0.882 +2013.0,0.824,1.0,1.0,0.824 +2014.0,1.0,1.0,1.0,1.0 +2015.0,1.0,1.0,1.0,1.0 +2016.0,0.917,0.0,0.0,0.647 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.0,0.944,0.5,0.857 +2019.0,0.533,0.5,0.0,0.538 +2020.0,0.692,1.0,1.0,0.692 +2013.5,0.831,0.889,0.821,0.889 +4.031,0.275,0.278,0.359,0.152 diff --git a/.ipynb_checkpoints/result_ks-checkpoint.csv b/.ipynb_checkpoints/result_ks-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..102a80ce16a50eee482f195dc51a343affbe9c70 --- /dev/null +++ b/.ipynb_checkpoints/result_ks-checkpoint.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,0.667,1.0,1.0,0.667 +2009.0,0.667,1.0,1.0,0.667 +2010.0,0.667,1.0,1.0,0.667 +2011.0,0.667,1.0,1.0,0.667 +2012.0,1.0,1.0,1.0,1.0 +2013.0,0.412,1.0,1.0,0.412 +2014.0,0.706,1.0,1.0,0.706 +2015.0,0.882,1.0,1.0,0.882 +2016.0,0.647,1.0,1.0,0.647 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.0,0.944,0.5,0.857 +2019.0,0.684,1.0,1.0,0.769 +2020.0,0.846,1.0,1.0,0.846 +2013.5,0.703,0.996,0.964,0.77 +4.031,0.254,0.014,0.129,0.164 diff --git a/result.csv b/algorithm results/result.csv similarity index 100% rename from result.csv rename to algorithm results/result.csv diff --git a/resultTop3.csv b/algorithm results/resultTop3.csv similarity index 100% rename from resultTop3.csv rename to algorithm results/resultTop3.csv diff --git a/algorithm results/resultTop3_cohend.csv b/algorithm results/resultTop3_cohend.csv new file mode 100644 index 0000000000000000000000000000000000000000..07b04721122be5a1b3af82f6adac45fdaf1ed1f1 --- /dev/null +++ b/algorithm results/resultTop3_cohend.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,1.0,0.0,0.0,0.714 +2009.0,1.0,1.0,1.0,1.0 +2010.0,1.0,1.0,1.0,1.0 +2011.0,1.0,1.0,1.0,1.0 +2012.0,0.833,0.875,1.0,0.857 +2013.0,1.0,0.0,0.0,0.857 +2014.0,0.818,0.333,0.0,0.625 +2015.0,1.0,0.0,0.0,0.867 +2016.0,1.0,0.0,0.0,0.733 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.5,0.909,0.5,0.8 +2019.0,1.0,1.0,1.0,1.0 +2020.0,0.857,0.0,0.0,0.632 +2013.5,0.929,0.58,0.536,0.863 +4.031,0.137,0.463,0.48,0.138 diff --git a/algorithm results/resultTop3_f.csv b/algorithm results/resultTop3_f.csv new file mode 100644 index 0000000000000000000000000000000000000000..dabc1945ef4311199c485ed64e310e2f9d376f8e --- /dev/null +++ b/algorithm results/resultTop3_f.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,1.0,1.0,1.0,1.0 +2009.0,1.0,1.0,1.0,1.0 +2010.0,1.0,1.0,1.0,1.0 +2011.0,1.0,1.0,1.0,1.0 +2012.0,0.833,1.0,1.0,0.923 +2013.0,1.0,1.0,1.0,1.0 +2014.0,0.917,0.333,0.0,0.75 +2015.0,1.0,1.0,1.0,1.0 +2016.0,1.0,1.0,0.0,0.929 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.5,0.909,0.5,0.8 +2019.0,1.0,0.25,1.0,0.812 +2020.0,0.875,1.0,1.0,0.875 +2013.5,0.938,0.892,0.821,0.935 +4.031,0.132,0.247,0.359,0.087 diff --git a/algorithm results/resultTop3_ks.csv b/algorithm results/resultTop3_ks.csv new file mode 100644 index 0000000000000000000000000000000000000000..c8e233c96926fb34e8fa8d0d20578e05aa125997 --- /dev/null +++ b/algorithm results/resultTop3_ks.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,1.0,1.0,1.0,1.0 +2009.0,1.0,1.0,1.0,1.0 +2010.0,1.0,1.0,1.0,1.0 +2011.0,1.0,1.0,1.0,1.0 +2012.0,1.0,0.875,0.0,0.857 +2013.0,1.0,1.0,1.0,1.0 +2014.0,0.917,0.333,0.0,0.75 +2015.0,1.0,1.0,1.0,1.0 +2016.0,1.0,1.0,0.0,0.929 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.5,0.909,0.5,0.8 +2019.0,1.0,0.75,1.0,0.938 +2020.0,0.867,0.0,0.0,0.722 +2013.5,0.949,0.848,0.679,0.928 +4.031,0.13,0.293,0.447,0.099 diff --git a/resultTop3_t.csv b/algorithm results/resultTop3_t.csv similarity index 100% rename from resultTop3_t.csv rename to algorithm results/resultTop3_t.csv diff --git a/algorithm results/result_cohend.csv b/algorithm results/result_cohend.csv new file mode 100644 index 0000000000000000000000000000000000000000..5d0468e6b96abe654a04be91cf562fe55ce19a4f --- /dev/null +++ b/algorithm results/result_cohend.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,0.6,0.0,0.0,0.429 +2009.0,0.4,0.0,0.0,0.286 +2010.0,0.4,0.0,0.0,0.286 +2011.0,0.667,1.0,1.0,0.667 +2012.0,1.0,0.875,0.0,0.857 +2013.0,0.583,0.0,0.0,0.5 +2014.0,0.222,0.2,0.0,0.167 +2015.0,0.692,0.0,0.0,0.6 +2016.0,0.727,0.0,0.0,0.471 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.0,1.0,1.0,0.867 +2019.0,0.833,1.0,1.0,0.875 +2020.0,0.667,0.0,0.0,0.4 +2013.5,0.628,0.434,0.357,0.6 +4.031,0.285,0.476,0.479,0.27 diff --git a/algorithm results/result_f.csv b/algorithm results/result_f.csv new file mode 100644 index 0000000000000000000000000000000000000000..43a1ab88e5702531bd1324e89873c9a270a3297a --- /dev/null +++ b/algorithm results/result_f.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,1.0,1.0,1.0,1.0 +2009.0,1.0,1.0,1.0,1.0 +2010.0,1.0,1.0,1.0,1.0 +2011.0,1.0,1.0,1.0,1.0 +2012.0,0.6,0.875,0.0,0.714 +2013.0,0.615,1.0,1.0,0.615 +2014.0,0.417,0.5,0.0,0.4 +2015.0,0.857,1.0,1.0,0.857 +2016.0,1.0,0.0,0.0,0.867 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.0,1.0,1.0,0.867 +2019.0,0.833,1.0,1.0,0.875 +2020.0,0.786,0.0,0.0,0.611 +2013.5,0.793,0.812,0.714,0.843 +4.031,0.286,0.356,0.452,0.183 diff --git a/algorithm results/result_ks.csv b/algorithm results/result_ks.csv new file mode 100644 index 0000000000000000000000000000000000000000..c0984fb625f82137bb16f2fb7c885a1a9ce91ec9 --- /dev/null +++ b/algorithm results/result_ks.csv @@ -0,0 +1,17 @@ +ano_base,match,new,empty,total +2007.0,1.0,1.0,1.0,1.0 +2008.0,0.667,1.0,1.0,0.667 +2009.0,0.667,1.0,1.0,0.667 +2010.0,0.667,1.0,1.0,0.667 +2011.0,0.667,1.0,1.0,0.667 +2012.0,1.0,0.875,0.0,0.857 +2013.0,0.615,1.0,1.0,0.615 +2014.0,0.273,0.333,0.0,0.25 +2015.0,0.857,1.0,1.0,0.857 +2016.0,0.846,0.0,0.0,0.733 +2017.0,1.0,1.0,1.0,1.0 +2018.0,0.0,1.0,1.0,0.867 +2019.0,1.0,1.0,1.0,1.0 +2020.0,1.0,0.0,0.0,0.684 +2013.5,0.733,0.801,0.714,0.752 +4.031,0.288,0.369,0.452,0.194 diff --git a/result_t.csv b/algorithm results/result_t.csv similarity index 100% rename from result_t.csv rename to algorithm results/result_t.csv diff --git a/distCalc.ipynb b/distCalc.ipynb index 6066c1061263539ee650e82f2979e08d25ff1f27..c804b967856eb7345a11f9ee9e0a97290d4b2123 100644 --- a/distCalc.ipynb +++ b/distCalc.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 2, "id": "af419e44-d6ef-41f7-970c-78c316aeb712", "metadata": { "tags": [] @@ -341,7 +341,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, "id": "26287a6f-5537-4509-a09d-52dd59b3a76d", "metadata": { "tags": [] @@ -355,12 +355,12 @@ "df_f = df_f.sort_values(by=['ano_coluna1', stat_column])\n", "df_f = df_f[~df_f['coluna1'].str.contains('ANO_CENSO') & ~df_f['coluna2'].str.contains('ANO_CENSO')]\n", "\n", - "# Import T results\n", - "df_t = pd.read_csv('Testes_R/Result_T/T_subsequente.csv', sep=',')\n", - "stat_column = 'estatistica_t'\n", - "df_t[stat_column] = df_t[stat_column].abs()\n", - "df_t = df_t.sort_values(by=['ano_coluna1', stat_column])\n", - "df_t = df_t[~df_t['coluna1'].str.contains('ANO_CENSO') & ~df_t['coluna2'].str.contains('ANO_CENSO')]\n", + "# # Import T results\n", + "# df_t = pd.read_csv('Testes_R/Result_T/T_subsequente.csv', sep=',')\n", + "# stat_column = 'estatistica_t'\n", + "# df_t[stat_column] = df_t[stat_column].abs()\n", + "# df_t = df_t.sort_values(by=['ano_coluna1', stat_column])\n", + "# df_t = df_t[~df_t['coluna1'].str.contains('ANO_CENSO') & ~df_t['coluna2'].str.contains('ANO_CENSO')]\n", "\n", "# Import COHEND results\n", "df_c = pd.read_csv('Testes_R/Result_COHEND/COHEND_subsequente.csv', sep=',')\n", @@ -387,7 +387,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "id": "f9541a11-c1bf-4318-847a-100917e13204", "metadata": { "tags": [] @@ -396,12 +396,12 @@ "source": [ "dist = DistCalc()\n", "dist.calc(df_f, 'estatistica_f', 0.7)\n", - "dist.calc(df_t, 'estatistica_t', 40)\n", + "# dist.calc(df_t, 'estatistica_t', 40)\n", "dist.calc(df_c, 'estatistica_cohend', 0.15)\n", "dist.calc(df_ks, 'estatistica_ks', 0.10)\n", "\n", "dist.calcTop3(df_f, 'estatistica_f', 0.7)\n", - "dist.calcTop3(df_t, 'estatistica_t', 40)\n", + "# dist.calcTop3(df_t, 'estatistica_t', 40)\n", "dist.calcTop3(df_c, 'estatistica_cohend', 0.15)\n", "dist.calcTop3(df_ks, 'estatistica_ks', 0.10)" ] @@ -416,7 +416,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 7, "id": "527ff27d-f321-4749-a94d-dd7d824ef682", "metadata": { "tags": [] @@ -471,26 +471,26 @@ "result_cohend = result_cohend.round(3)\n", "resultTop3_cohend = resultTop3_cohend.round(3)\n", "\n", - "# ================= T =================\n", - "result_t = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", - "resultTop3_t = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", - "for i, ano in enumerate(dist.get_years):\n", - " new_row = [ano, dist.stat_t_matches[i][1], dist.stat_t_new[i][1], dist.stat_t_empty[i][1], dist.stat_t[i][1]]\n", - " result_t.loc[len(result_t)] = new_row\n", - " new_row = [ano, dist.stat_t_matches_top3[i][1], dist.stat_t_new_top3[i][1], dist.stat_t_empty_top3[i][1], dist.stat_t_top3[i][1]]\n", - " resultTop3_t.loc[len(resultTop3_t)] = new_row\n", + "# # ================= T =================\n", + "# result_t = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "# resultTop3_t = pd.DataFrame(columns=['ano_base', 'match', 'new', 'empty', 'total'])\n", + "# for i, ano in enumerate(dist.get_years):\n", + "# new_row = [ano, dist.stat_t_matches[i][1], dist.stat_t_new[i][1], dist.stat_t_empty[i][1], dist.stat_t[i][1]]\n", + "# result_t.loc[len(result_t)] = new_row\n", + "# new_row = [ano, dist.stat_t_matches_top3[i][1], dist.stat_t_new_top3[i][1], dist.stat_t_empty_top3[i][1], dist.stat_t_top3[i][1]]\n", + "# resultTop3_t.loc[len(resultTop3_t)] = new_row\n", " \n", - "result_t.loc[len(result_t)] = result_t.mean()\n", - "result_t.loc[len(result_t)] = result_t.std()\n", - "resultTop3_t.loc[len(resultTop3_t)] = resultTop3_t.mean()\n", - "resultTop3_t.loc[len(resultTop3_t)] = resultTop3_t.std()\n", - "result_t = result_t.round(3)\n", - "resultTop3_t = resultTop3_t.round(3)" + "# result_t.loc[len(result_t)] = result_t.mean()\n", + "# result_t.loc[len(result_t)] = result_t.std()\n", + "# resultTop3_t.loc[len(resultTop3_t)] = resultTop3_t.mean()\n", + "# resultTop3_t.loc[len(resultTop3_t)] = resultTop3_t.std()\n", + "# result_t = result_t.round(3)\n", + "# resultTop3_t = resultTop3_t.round(3)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 9, "id": "4cb4afc8-6149-40a7-8f77-af06183d4d23", "metadata": { "tags": [] @@ -503,8 +503,8 @@ "result_f.to_csv(f'./result_f.csv', index=False)\n", "resultTop3_f.to_csv(f'./resultTop3_f.csv', index=False)\n", "\n", - "result_t.to_csv(f'./result_t.csv', index=False)\n", - "resultTop3_t.to_csv(f'./resultTop3_t.csv', index=False)\n", + "# result_t.to_csv(f'./result_t.csv', index=False)\n", + "# resultTop3_t.to_csv(f'./resultTop3_t.csv', index=False)\n", "\n", "result_cohend.to_csv(f'./result_cohend.csv', index=False)\n", "resultTop3_cohend.to_csv(f'./resultTop3_cohend.csv', index=False)" @@ -535,7 +535,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/resultTop3_cohend.csv b/resultTop3_cohend.csv index 07b04721122be5a1b3af82f6adac45fdaf1ed1f1..45d492311e08370d3ef3c1be7a4763eac377eff8 100644 --- a/resultTop3_cohend.csv +++ b/resultTop3_cohend.csv @@ -1,17 +1,17 @@ ano_base,match,new,empty,total 2007.0,1.0,1.0,1.0,1.0 -2008.0,1.0,0.0,0.0,0.714 +2008.0,1.0,0.0,0.0,0.833 2009.0,1.0,1.0,1.0,1.0 2010.0,1.0,1.0,1.0,1.0 2011.0,1.0,1.0,1.0,1.0 -2012.0,0.833,0.875,1.0,0.857 -2013.0,1.0,0.0,0.0,0.857 -2014.0,0.818,0.333,0.0,0.625 -2015.0,1.0,0.0,0.0,0.867 -2016.0,1.0,0.0,0.0,0.733 +2012.0,1.0,1.0,1.0,1.0 +2013.0,1.0,0.0,0.0,0.706 +2014.0,1.0,0.0,0.0,0.765 +2015.0,1.0,0.0,0.0,0.765 +2016.0,1.0,0.0,0.0,0.706 2017.0,1.0,1.0,1.0,1.0 -2018.0,0.5,0.909,0.5,0.8 -2019.0,1.0,1.0,1.0,1.0 -2020.0,0.857,0.0,0.0,0.632 -2013.5,0.929,0.58,0.536,0.863 -4.031,0.137,0.463,0.48,0.138 +2018.0,0.5,0.944,0.5,0.905 +2019.0,0.867,0.364,0.0,0.654 +2020.0,0.909,0.0,0.0,0.769 +2013.5,0.948,0.522,0.464,0.864 +4.031,0.131,0.479,0.48,0.13 diff --git a/resultTop3_f.csv b/resultTop3_f.csv index dabc1945ef4311199c485ed64e310e2f9d376f8e..1228e553169493f073bb7341bcbf2fcef422ad88 100644 --- a/resultTop3_f.csv +++ b/resultTop3_f.csv @@ -4,14 +4,14 @@ ano_base,match,new,empty,total 2009.0,1.0,1.0,1.0,1.0 2010.0,1.0,1.0,1.0,1.0 2011.0,1.0,1.0,1.0,1.0 -2012.0,0.833,1.0,1.0,0.923 -2013.0,1.0,1.0,1.0,1.0 -2014.0,0.917,0.333,0.0,0.75 -2015.0,1.0,1.0,1.0,1.0 -2016.0,1.0,1.0,0.0,0.929 +2012.0,1.0,1.0,1.0,1.0 +2013.0,1.0,0.0,0.0,0.765 +2014.0,1.0,0.0,0.0,0.765 +2015.0,1.0,0.0,0.0,0.765 +2016.0,1.0,0.0,0.0,0.765 2017.0,1.0,1.0,1.0,1.0 -2018.0,0.5,0.909,0.5,0.8 -2019.0,1.0,0.25,1.0,0.812 -2020.0,0.875,1.0,1.0,0.875 -2013.5,0.938,0.892,0.821,0.935 -4.031,0.132,0.247,0.359,0.087 +2018.0,0.5,0.944,0.5,0.905 +2019.0,0.933,0.364,0.0,0.692 +2020.0,0.864,0.0,0.0,0.731 +2013.5,0.95,0.593,0.536,0.885 +4.031,0.13,0.47,0.48,0.123 diff --git a/resultTop3_ks.csv b/resultTop3_ks.csv index c8e233c96926fb34e8fa8d0d20578e05aa125997..b91eee0a0dce556d063db7f7773a0782d47517de 100644 --- a/resultTop3_ks.csv +++ b/resultTop3_ks.csv @@ -4,14 +4,14 @@ ano_base,match,new,empty,total 2009.0,1.0,1.0,1.0,1.0 2010.0,1.0,1.0,1.0,1.0 2011.0,1.0,1.0,1.0,1.0 -2012.0,1.0,0.875,0.0,0.857 -2013.0,1.0,1.0,1.0,1.0 -2014.0,0.917,0.333,0.0,0.75 -2015.0,1.0,1.0,1.0,1.0 -2016.0,1.0,1.0,0.0,0.929 +2012.0,1.0,0.818,1.0,0.882 +2013.0,0.941,1.0,1.0,0.941 +2014.0,0.941,1.0,1.0,0.941 +2015.0,0.941,0.0,1.0,0.941 +2016.0,0.941,1.0,1.0,0.941 2017.0,1.0,1.0,1.0,1.0 -2018.0,0.5,0.909,0.5,0.8 -2019.0,1.0,0.75,1.0,0.938 -2020.0,0.867,0.0,0.0,0.722 -2013.5,0.949,0.848,0.679,0.928 -4.031,0.13,0.293,0.447,0.099 +2018.0,0.5,0.944,0.5,0.905 +2019.0,0.947,0.571,1.0,0.846 +2020.0,0.923,1.0,1.0,0.923 +2013.5,0.938,0.881,0.964,0.952 +4.031,0.125,0.27,0.129,0.049 diff --git a/result_cohend.csv b/result_cohend.csv index 5d0468e6b96abe654a04be91cf562fe55ce19a4f..87a9872d2bd3e1cfbf3dbdcd288b226abf74741a 100644 --- a/result_cohend.csv +++ b/result_cohend.csv @@ -1,17 +1,17 @@ ano_base,match,new,empty,total 2007.0,1.0,1.0,1.0,1.0 -2008.0,0.6,0.0,0.0,0.429 -2009.0,0.4,0.0,0.0,0.286 -2010.0,0.4,0.0,0.0,0.286 +2008.0,0.6,0.0,0.0,0.5 +2009.0,0.4,0.0,0.0,0.333 +2010.0,0.4,0.0,0.0,0.333 2011.0,0.667,1.0,1.0,0.667 -2012.0,1.0,0.875,0.0,0.857 -2013.0,0.583,0.0,0.0,0.5 -2014.0,0.222,0.2,0.0,0.167 -2015.0,0.692,0.0,0.0,0.6 -2016.0,0.727,0.0,0.0,0.471 +2012.0,0.667,1.0,1.0,0.882 +2013.0,0.455,0.0,0.0,0.294 +2014.0,0.882,1.0,1.0,0.882 +2015.0,1.0,1.0,1.0,1.0 +2016.0,0.636,0.0,0.0,0.412 2017.0,1.0,1.0,1.0,1.0 -2018.0,0.0,1.0,1.0,0.867 -2019.0,0.833,1.0,1.0,0.875 -2020.0,0.667,0.0,0.0,0.4 -2013.5,0.628,0.434,0.357,0.6 -4.031,0.285,0.476,0.479,0.27 +2018.0,0.0,0.944,0.5,0.857 +2019.0,0.6,0.385,0.0,0.538 +2020.0,0.571,0.0,0.0,0.462 +2013.5,0.634,0.524,0.464,0.654 +4.031,0.269,0.478,0.48,0.264 diff --git a/result_f.csv b/result_f.csv index 43a1ab88e5702531bd1324e89873c9a270a3297a..289934c551d139e6ac90447080cc941970b6ab90 100644 --- a/result_f.csv +++ b/result_f.csv @@ -4,14 +4,14 @@ ano_base,match,new,empty,total 2009.0,1.0,1.0,1.0,1.0 2010.0,1.0,1.0,1.0,1.0 2011.0,1.0,1.0,1.0,1.0 -2012.0,0.6,0.875,0.0,0.714 -2013.0,0.615,1.0,1.0,0.615 -2014.0,0.417,0.5,0.0,0.4 -2015.0,0.857,1.0,1.0,0.857 -2016.0,1.0,0.0,0.0,0.867 +2012.0,0.667,1.0,1.0,0.882 +2013.0,0.824,1.0,1.0,0.824 +2014.0,1.0,1.0,1.0,1.0 +2015.0,1.0,1.0,1.0,1.0 +2016.0,0.917,0.0,0.0,0.647 2017.0,1.0,1.0,1.0,1.0 -2018.0,0.0,1.0,1.0,0.867 -2019.0,0.833,1.0,1.0,0.875 -2020.0,0.786,0.0,0.0,0.611 -2013.5,0.793,0.812,0.714,0.843 -4.031,0.286,0.356,0.452,0.183 +2018.0,0.0,0.944,0.5,0.857 +2019.0,0.533,0.5,0.0,0.538 +2020.0,0.692,1.0,1.0,0.692 +2013.5,0.831,0.889,0.821,0.889 +4.031,0.275,0.278,0.359,0.152 diff --git a/result_ks.csv b/result_ks.csv index c0984fb625f82137bb16f2fb7c885a1a9ce91ec9..102a80ce16a50eee482f195dc51a343affbe9c70 100644 --- a/result_ks.csv +++ b/result_ks.csv @@ -4,14 +4,14 @@ ano_base,match,new,empty,total 2009.0,0.667,1.0,1.0,0.667 2010.0,0.667,1.0,1.0,0.667 2011.0,0.667,1.0,1.0,0.667 -2012.0,1.0,0.875,0.0,0.857 -2013.0,0.615,1.0,1.0,0.615 -2014.0,0.273,0.333,0.0,0.25 -2015.0,0.857,1.0,1.0,0.857 -2016.0,0.846,0.0,0.0,0.733 +2012.0,1.0,1.0,1.0,1.0 +2013.0,0.412,1.0,1.0,0.412 +2014.0,0.706,1.0,1.0,0.706 +2015.0,0.882,1.0,1.0,0.882 +2016.0,0.647,1.0,1.0,0.647 2017.0,1.0,1.0,1.0,1.0 -2018.0,0.0,1.0,1.0,0.867 -2019.0,1.0,1.0,1.0,1.0 -2020.0,1.0,0.0,0.0,0.684 -2013.5,0.733,0.801,0.714,0.752 -4.031,0.288,0.369,0.452,0.194 +2018.0,0.0,0.944,0.5,0.857 +2019.0,0.684,1.0,1.0,0.769 +2020.0,0.846,1.0,1.0,0.846 +2013.5,0.703,0.996,0.964,0.77 +4.031,0.254,0.014,0.129,0.164