diff --git a/database/create.sh b/database/create.sh index 81236920eaf2a4b04ddec465d0b4ec1a02ae11af..870699cfed5cb9db084ae70668e832e781d268e6 100755 --- a/database/create.sh +++ b/database/create.sh @@ -1,6 +1,25 @@ #!/bin/bash +# Copyright (C) 2007-2013 Centro de Computacao Cientifica e Software Livre +# Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR +# +# This file is part of database +# +# database is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. -DB_NAME=$1 +export DB_NAME=$1 DB_LINK_LOCATION=/usr/share/postgresql/8.4/contrib/dblink.sql # checking parameters @@ -9,6 +28,10 @@ if [[ -z $DB_NAME ]]; then exit 1 fi + +# importing db_link functions +psql $DB_NAME -f $DB_LINK_LOCATION + # creating database tables and structure for file in $(ls -B create/); do psql $DB_NAME -f create/$file @@ -29,5 +52,5 @@ for file in $(ls -B query/); do psql $DB_NAME -f query/$file done -# importing db_link functions -psql $DB_NAMR -f $DB_LINK_LOCATION +# installing crontab +./cron.sh diff --git a/database/create/002-dimensions.sql b/database/create/002-dimensions.sql index a62f05cdebd8a4a1e5b2fc7e8420273a6dfa5241..59762687694883c519ba216155418b79c521175b 100644 --- a/database/create/002-dimensions.sql +++ b/database/create/002-dimensions.sql @@ -37,21 +37,22 @@ CREATE TABLE dim_inventory ( macaddr MACADDR NOT NULL, load_date DATE NOT NULL DEFAULT CURRENT_DATE REFERENCES dim_date, project project_enum DEFAULT 'proinfo' NOT NULL, - + -- inventory info - disk1_model TEXT, - disk1_size INTEGER, - disk1_used INTEGER, + disk1_model TEXT NOT NULL, + disk1_size INTEGER NOT NULL, + disk1_used INTEGER NOT NULL, disk2_model TEXT, disk2_size INTEGER, disk2_used INTEGER, - memory INTEGER, - processor TEXT, - os_type TEXT, - os_distro TEXT, - os_kernel TEXT, + memory INTEGER NOT NULL, + processor TEXT NOT NULL, + os_type TEXT NOT NULL, + os_distro TEXT NOT NULL, + os_kernel TEXT NOT NULL, + is_first BIT NOT NULL DEFAULT '0', is_current BIT NOT NULL DEFAULT '1', CHECK (macaddr <> '00:00:00:00:00:00') diff --git a/database/create/003-facts.sql b/database/create/003-facts.sql index b3f4b72eec5e46b225da24bd7ba40cf955a2a671..79098b3e6fb16daba528938bb1a7669b40a39d2f 100644 --- a/database/create/003-facts.sql +++ b/database/create/003-facts.sql @@ -5,6 +5,7 @@ CREATE TABLE fact_contact ( cit_id INTEGER NOT NULL REFERENCES dim_city, dat_id DATE NOT NULL REFERENCES dim_date, macaddr MACADDR NOT NULL, + project project_enum DEFAULT 'proinfo' NOT NULL, UNIQUE (sch_id, cit_id, dat_id, macaddr) ); diff --git a/database/create/004-aggregates.sql b/database/create/004-aggregates.sql new file mode 100644 index 0000000000000000000000000000000000000000..7217908da6f13f23031efe7c9521873b521c4e71 --- /dev/null +++ b/database/create/004-aggregates.sql @@ -0,0 +1,13 @@ +CREATE TABLE aggr_availability ( + base_date DATE NOT NULL, + macaddr MACADDR NOT NULL, + project project_enum DEFAULT 'proinfo' NOT NULL, + sch_inep CHARACTER VARYING(12) NOT NULL, + sch_name CHARACTER VARYING(150) NOT NULL, + city CHARACTER VARYING(100) NOT NULL, + state CHARACTER VARYING(2) NOT NULL, + region CHARACTER VARYING(12) NOT NULL, + is_green SMALLINT NOT NULL DEFAULT 0, + is_yellow SMALLINT NOT NULL DEFAULT 0, + is_red SMALLINT NOT NULL DEFAULT 0 +); diff --git a/database/create/004-control.sql b/database/create/005-control.sql similarity index 100% rename from database/create/004-control.sql rename to database/create/005-control.sql diff --git a/database/create/005-indexes.sql b/database/create/006-indexes.sql similarity index 100% rename from database/create/005-indexes.sql rename to database/create/006-indexes.sql diff --git a/database/create/006-language.sql b/database/create/007-language.sql similarity index 100% rename from database/create/006-language.sql rename to database/create/007-language.sql diff --git a/database/cron.sh b/database/cron.sh index bc3a073ccc7dc66039dd9f5baa540f1edf57e7fd..87e6eb00c70aee05661b3e4fa340515f94444c1f 100644 --- a/database/cron.sh +++ b/database/cron.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2004-2010 Centro de Computacao Cientifica e Software Livre +# Copyright (C) 2007-2013 Centro de Computacao Cientifica e Software Livre # Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR # # This file is part of database @@ -19,7 +19,6 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # USA. -LOADFILE='/var/lib/postgresql/database/load/load.sh' -LOGFILE="/var/lib/postgresql/database/log/$(date +%d_%m_%y).log" +COMMAND="psql -d $DB_NAME -c 'SELECT load_dw();'" -echo "59 23 * * * $LOADFILE > $LOGFILE" | crontab - +echo "59 23 * * * $COMMAND" | crontab - diff --git a/database/load/000-sa.sql b/database/load/000-sa.sql index 18bf856aa97c4e2ab134f149869fabc3c68f9750..fd6bf0b121c628c31461a7a8a1fb50667de9ad29 100644 --- a/database/load/000-sa.sql +++ b/database/load/000-sa.sql @@ -49,7 +49,7 @@ BEGIN i.disk2_size, i.disk2_used, i.extra_hds, - '1'::boolean as valid + '0'::boolean as valid FROM proinfo_inventory i LEFT JOIN @@ -61,6 +61,9 @@ BEGIN -- how many rows were inserted? GET DIAGNOSTICS total_rows = ROW_COUNT; + -- creating primary key (and btree index) + ALTER TABLE sa_inventory ADD PRIMARY KEY ("id"); + -- logging INSERT INTO control (name, start_time, end_time, total) VALUES ('create sa inventory', start_ts, CLOCK_TIMESTAMP(), total_rows); @@ -125,7 +128,7 @@ BEGIN -- truncating OLTP table - --TRUNCATE TABLE proinfo_net_usage; + TRUNCATE TABLE proinfo_net_usage; END; $$ language plpgsql; @@ -147,15 +150,27 @@ CREATE OR REPLACE FUNCTION sa_sanitize_inventory() returns void as $$ DECLARE start_ts TIMESTAMP; total_rows INTEGER; + new_inv record; BEGIN -- getting initial timestamp start_ts = CLOCK_TIMESTAMP(); - -- updating the 'valid' column on invalid records + + -- we create this table marking as rows as invalid. + -- when a computer sends more than one inventory per day, we get + -- only the last one. Validating the newers: + FOR new_inv IN + SELECT max(id) AS id FROM sa_inventory GROUP BY sch_id, macaddr, contact_date + LOOP + UPDATE sa_inventory set valid = '1' WHERE id = new_inv.id; + END LOOP; + + -- Checking whether valid records have the required information UPDATE sa_inventory SET valid = '0' WHERE - sch_id is NULL OR + valid = '1' AND + (sch_id is NULL OR cit_id is NULL OR contact_date is NULL OR os_type is NULL OR @@ -168,7 +183,8 @@ BEGIN disk1_used is NULL OR NOT valid_macaddress(macaddr) OR project < 0 OR project > 3 OR - inep is NULL; + inep is NULL); + -- log the invalid information into rejected table INSERT INTO rejected_inventory diff --git a/database/load/002-dim_inventory.sql b/database/load/002-dim_inventory.sql index a6b62d4bb4584f32d7659c70d8ee5e022d3648dd..8395882cb28aba58bb31faddb91c838fe5a6e187 100644 --- a/database/load/002-dim_inventory.sql +++ b/database/load/002-dim_inventory.sql @@ -9,118 +9,151 @@ SELECT CASE WHEN $1 = 0 THEN 'proinfo'::project_enum END as project; $$ LANGUAGE SQL; + CREATE OR REPLACE FUNCTION load_dim_inventory() returns void as $$ DECLARE inv_row record; + date_row record; + start_ts TIMESTAMP; + total_rows INTEGER; + is_new BOOLEAN; BEGIN - -- I think the better way to compare the current inventory and the new - -- one is to iterate over each record. Using one batch insert will - -- certainly perform way faster, but here we have more flexibility. - -- Also, we are inside one trasaction and it will be commited just once - -- at the end of the function. - FOR inv_row IN SELECT * FROM sa_inventory s LEFT JOIN (SELECT * FROM dim_inventory - WHERE is_current = '1') d ON s.sch_id = d.sch_id AND - s.macaddr::macaddr = d.macaddr AND NOT - (s.memory >= d.memory * 0.9 AND - s.memory <= d.memory * 1.1 AND - s.processor = d.processor AND - s.os_type = d.os_type AND - s.os_distro = d.os_distro AND - s.os_kernel = d.os_kernel AND - s.disk1_model = d.disk1_model AND - s.disk1_size >= d.disk1_size * 0.9 AND - s.disk1_size <= d.disk1_size * 1.1 AND - s.disk1_used = d.disk1_used AND - s.disk2_model = d.disk2_model AND - s.disk2_size >= d.disk2_size * 0.9 AND - s.disk2_size <= d.disk2_size * 1.1 AND - s.disk2_used = d.disk2_used) - WHERE valid = '1' LOOP - - - -- IF .... THEN - -- CONTINUE - -- END IF; - - -- If we get here, we decided to update the record. First insert - -- the new inventory - INSERT INTO dim_inventory (sch_id, macaddr, load_date, project, - disk1_model, disk1_size, disk1_used, disk2_model, - disk2_size, disk2_used, memory, processor, os_type, - os_distro, os_kernel, is_current) VALUES - (inv_row.sch_id, inv_row.macaddr::macaddr, - inv_row.contact_date, to_project_enum(inv_row.project), - inv_row.disk1_model, inv_row.disk1_size, - inv_row.disk1_used, inv_row.disk2_model, - inv_row.disk2_size, inv_row.disk2_used, - inv_row.memory, inv_row.processor, - inv_row.os_type, inv_row.os_distro, - inv_row.os_kernel, '1'); - - -- Then update the 'is_current' flag of the last one. - UPDATE dim_inventory SET is_current = '0' WHERE - id = inv_row.id; - + -- getting initial timestamp + start_ts = CLOCK_TIMESTAMP(); + total_rows := 0; + + -- since we removed multiple inventories on the same date on sa_sanitize, + -- looping one day at a time will ensure that we won't have more than one + -- inventory from the same computer. + FOR date_row IN SELECT DISTINCT contact_date FROM sa_inventory WHERE valid = '1' ORDER BY contact_date + LOOP + + -- I think the better way to compare the current inventory and the new + -- one is to iterate over each record. Using one batch insert will + -- certainly perform way faster, but here we have more flexibility. + FOR inv_row IN + SELECT + s.sch_id as new_sch_id, + s.macaddr as new_macaddr, + s.contact_date as new_contact_date, + s.project as new_project, + s.disk1_model as new_disk1_model, + s.disk1_size as new_disk1_size, + s.disk1_used as new_disk1_used, + s.disk2_model as new_disk2_model, + s.disk2_size as new_disk2_size, + s.disk2_used as new_disk2_used, + s.memory as new_memory, + s.processor as new_processor, + s.os_type as new_os_type, + s.os_distro as new_os_distro, + s.os_kernel as new_os_kernel, + + d.id as cur_id, + d.sch_id as cur_sch_id, + d.load_date as cur_load_date, + d.disk1_model as cur_disk1_model, + d.disk1_size as cur_disk1_size, + d.disk1_used as cur_disk1_used, + d.disk2_model as cur_disk2_model, + d.disk2_size as cur_disk2_size, + d.disk2_used as cur_disk2_used, + d.memory as cur_memory, + d.processor as cur_processor, + d.os_type as cur_os_type, + d.os_distro as cur_os_distro, + d.os_kernel as cur_os_kernel + FROM sa_inventory s LEFT JOIN + (SELECT * FROM dim_inventory WHERE is_current = '1') as d + ON s.sch_id = d.sch_id AND s.macaddr::macaddr = d.macaddr + WHERE s.contact_date = date_row.contact_date AND s.valid = '1' + LOOP + + + -- in which cases we shouldn't update the table + -- Note that since disk2_* can be null, the logic is a little + -- more complicated... The other columns are NOT NULL. + IF (inv_row.cur_id IS NOT NULL AND + inv_row.new_memory >= inv_row.cur_memory * 0.9 AND + inv_row.new_memory <= inv_row.cur_memory * 1.1 AND + inv_row.new_processor = inv_row.cur_processor AND + inv_row.new_os_type = inv_row.cur_os_type AND + inv_row.new_os_distro = inv_row.cur_os_distro AND + inv_row.new_os_kernel = inv_row.cur_os_kernel AND + inv_row.new_disk1_model = inv_row.cur_disk1_model AND + inv_row.new_disk1_size >= inv_row.cur_disk1_size * 0.9 AND + inv_row.new_disk1_size <= inv_row.cur_disk1_size * 1.1 AND + inv_row.new_disk1_used >= inv_row.cur_disk1_used * 0.9 AND + inv_row.new_disk1_used <= inv_row.cur_disk1_used * 1.1 AND + ( + (inv_row.new_disk2_model IS NULL AND inv_row.cur_disk2_model IS NULL) + OR + inv_row.new_disk2_model = inv_row.cur_disk2_model + ) AND + ( + (inv_row.new_disk2_size IS NULL AND inv_row.cur_disk2_size IS NULL) + OR + (inv_row.new_disk2_size >= inv_row.cur_disk2_size * 0.9 AND + inv_row.new_disk2_size <= inv_row.cur_disk2_size * 1.1) + ) AND + ( + (inv_row.new_disk2_used IS NULL AND inv_row.cur_disk2_used IS NULL) + OR + (inv_row.new_disk2_used >= inv_row.cur_disk2_used * 0.9 AND + inv_row.new_disk2_used <= inv_row.cur_disk2_used * 1.1) + )) + THEN + CONTINUE; + END IF; + + -- if this inventory is older than the current, ignore it. Probably + -- the nicest way here would be to insert the historical data, but + -- that would complicate the loading function too much and slow all + -- things down. + IF (inv_row.new_contact_date <= inv_row.cur_load_date) THEN + CONTINUE; + END IF; + + -- if cur_id is NULL then it's a new record. Marking it accordingly.. + IF (inv_row.cur_id IS NULL) THEN + is_new := '1'; + ELSE + is_new := '0'; + END IF; + + -- If we get here, we decided to update the record. First insert + -- the new inventory + INSERT INTO dim_inventory (sch_id, macaddr, load_date, project, + disk1_model, disk1_size, disk1_used, disk2_model, + disk2_size, disk2_used, memory, processor, os_type, + os_distro, os_kernel, is_first, is_current) VALUES + (inv_row.new_sch_id, inv_row.new_macaddr::macaddr, + inv_row.new_contact_date, to_project_enum(inv_row.new_project), + inv_row.new_disk1_model, inv_row.new_disk1_size, + inv_row.new_disk1_used, inv_row.new_disk2_model, + inv_row.new_disk2_size, inv_row.new_disk2_used, + inv_row.new_memory, inv_row.new_processor, + inv_row.new_os_type, inv_row.new_os_distro, + inv_row.new_os_kernel, (SELECT CASE WHEN is_new THEN B'1' ELSE B'0' END), '1'); + + -- only for logging purposes + total_rows := total_rows + 1; + + -- Since we only insert newer inventories, we must update the + -- 'is_current' flag of the last inventory. Note that maybe we + -- don't have a previous inventory to update + IF (inv_row.cur_id IS NOT NULL) THEN + UPDATE dim_inventory SET is_current = '0' WHERE + id = inv_row.cur_id; + END IF; + + END LOOP; END LOOP; + -- logging + INSERT INTO control (name, start_time, end_time, total) + VALUES ('loading dim_inventory', start_ts, CLOCK_TIMESTAMP(), total_rows); + END; $$ language plpgsql; - - ---CREATE OR REPLACE FUNCTION load_dim_inventory() returns void as $$ ---BEGIN - -- creating a temporary table containing the inventories - -- we will need to insert/update - i.e. new inventories - -- EXCEPT current inventories --- CREATE TEMP TABLE new_inventories ON COMMIT DROP AS - --- SELECT sch_id, macaddr FROM --- ( --- -- new computer information --- (SELECT --- sch_id, macaddr, project, memory, --- disk1_model, disk1_size, --- disk2_model, disk2_size, --- os_type, os_distro, os_kernel --- FROM sa_inventory --- ) - --- EXCEPT - - -- list of current inventory for each computer --- (SELECT --- sch_id, macaddr, project, memory, --- disk1_model, disk1_size, --- disk2_model, disk2_size, --- os_type, os_distro, os_kernel --- FROM dim_inventory --- WHERE is_current = '1' --- ) --- ) as A; - - -- setting is_current to '0' on old records --- UPDATE dim_inventory SET is_current = '0' --- WHERE --- is_current = '1' and (sch_id, macaddr) IN (SELECT * FROM new_inventories); - - -- inserting new records --- INSERT INTO dim_inventory --- (sch_id, macaddr, load_date, project, --- disk1_model, disk1_size, disk1_used, --- disk2_model, disk2_size, disk2_used, --- memory, processor, --- os_type, os_distro, os_kernel, is_current) --- SELECT --- s.sch_id, s.macaddr, s.contact_date, s.project, --- s.disk1_model, s.disk1_size, s.disk1_used, --- s.disk2_model, s.disk2_size, s.disk2_used, --- s.memory, s.processor, --- s.os_type, s.os_distro, s.os_kernel, '1' --- FROM new_inventories n INNER JOIN sa_inventory s --- ON n.sch_id = s.sch_id and n.macaddr = s.macaddr; - - -- when the transaction commits, the temporary table - -- 'new_inventories' will be removed ---END; ---$$ language plpgsql; diff --git a/database/load/003-fact_contact.sql b/database/load/003-fact_contact.sql index 8fc92365f6ce2048a75e8db679ac1754c7655ec4..8e771ffffe01e1ad4fec98af152938527717c8dd 100644 --- a/database/load/003-fact_contact.sql +++ b/database/load/003-fact_contact.sql @@ -15,10 +15,10 @@ BEGIN INTO result from sa_inventory; -- - INSERT INTO fact_contact (sch_id, cit_id, dat_id, macaddr) - (SELECT sch_id, cit_id, contact_date, macaddr::macaddr FROM sa_inventory WHERE valid = '1') + INSERT INTO fact_contact (sch_id, cit_id, dat_id, macaddr, project) + (SELECT sch_id, cit_id, contact_date, macaddr::macaddr, to_project_enum(project) FROM sa_inventory WHERE valid = '1') EXCEPT - (SELECT sch_id, cit_id, dat_id, macaddr FROM fact_contact WHERE dat_id >= result.min_date AND dat_id <= result.max_date); + (SELECT sch_id, cit_id, dat_id, macaddr, project FROM fact_contact WHERE dat_id >= result.min_date AND dat_id <= result.max_date); -- how many rows were inserted? GET DIAGNOSTICS total_rows = ROW_COUNT; diff --git a/database/load/004-fact_net_usage.sql b/database/load/004-fact_net_usage.sql index 428fc342ff0b0422fa74be68fbd30e3c131ad1b4..8fc8b97b1db5db06a4b84dc4017bad165d6b7ec0 100644 --- a/database/load/004-fact_net_usage.sql +++ b/database/load/004-fact_net_usage.sql @@ -18,7 +18,8 @@ BEGIN INSERT INTO fact_net_usage (sch_id, cit_id, dat_id, macaddr, collect_time, down_kbits, down_packages, up_kbits, up_packages) (SELECT sch_id, cit_id, contact_date, macaddr, collect_time, down_kbits, - down_packages, up_kbits, up_packages FROM sa_net_usage) + down_packages, up_kbits, up_packages FROM sa_net_usage + WHERE valid = '1') EXCEPT (SELECT sch_id, cit_id, dat_id, macaddr, collect_time, down_kbits, down_packages, up_kbits, up_packages FROM fact_net_usage WHERE diff --git a/database/load/005-aggregates.sql b/database/load/005-aggregates.sql new file mode 100644 index 0000000000000000000000000000000000000000..5ac6275f750159295f6d1696f7a00070d9866052 --- /dev/null +++ b/database/load/005-aggregates.sql @@ -0,0 +1,72 @@ +CREATE OR REPLACE FUNCTION load_aggr_availability() returns void as $$ +DECLARE + date_it date; + i INTEGER; + start_ts TIMESTAMP; + total_rows INTEGER; + cur_rows INTEGER; +BEGIN + + -- getting initial timestamp + start_ts = CLOCK_TIMESTAMP(); + total_rows := 0; + + -- remove the index - later we create it again + DROP INDEX IF EXISTS aggr_availability_base_date_idx; + + -- remove old data + TRUNCATE TABLE aggr_availability; + + -- iterate over past months + FOR i IN 0..5 LOOP + date_it := CURRENT_DATE - (i * interval '1 month'); + + INSERT INTO aggr_availability + SELECT + date_it, + f.macaddr, + f.project, + s.inep, + s.name, + c.name, + c.state, + c.region, + CASE WHEN date_it - max_dat_id <= 10 THEN 1 ELSE 0 END as is_green, + CASE WHEN date_it - max_dat_id > 10 AND date_it - max_dat_id <= 30 THEN 1 ELSE 0 END as is_yellow, + CASE WHEN date_it - max_dat_id > 30 THEN 1 ELSE 0 END as is_red + FROM + (SELECT + max(dat_id) as max_dat_id, + f.sch_id, + f.cit_id, + f.macaddr, + f.project + FROM + fact_contact f + WHERE + dat_id <= date_it + GROUP BY + f.cit_id, f.sch_id, f.macaddr, f.project) as f + INNER JOIN + dim_school s + ON + f.sch_id = s.id + INNER JOIN + dim_city c + ON + f.cit_id = c.id; + + -- how many rows were inserted? + GET DIAGNOSTICS cur_rows := ROW_COUNT; + total_rows := total_rows + cur_rows; + + END LOOP; + + -- creating index to speed-up per-date queries, and it speeds up a lot.. + CREATE INDEX aggr_availability_base_date_idx ON aggr_availability(base_date); + + -- logging + INSERT INTO control (name, start_time, end_time, total) + VALUES ('loading aggr_availability', start_ts, CLOCK_TIMESTAMP(), total_rows); +END; +$$ language plpgsql; diff --git a/database/load/999-load.sql b/database/load/999-load.sql index 7ccf170ef29905ed7fafc96bcaaf360615986055..34df3b01ba0ee8e337c69ca3fb3ef35b3f6750b3 100644 --- a/database/load/999-load.sql +++ b/database/load/999-load.sql @@ -2,7 +2,7 @@ -- only function that should be called directly. Run it once a day. CREATE OR REPLACE FUNCTION load_dw() returns void as $$ BEGIN - + -- populating dim_date with next 10 years PERFORM populate_dim_date(CAST(extract(year FROM CURRENT_DATE) AS INTEGER) + 10); @@ -17,7 +17,8 @@ BEGIN -- ET'L' -> Load PERFORM load_dim_inventory(); PERFORM load_fact_contact(); - --SELECT load_fact_net_usage(); + PERFORM load_fact_net_usage(); + PERFORM load_aggr_availability(); -- dropping staging area tables PERFORM sa_drop(); diff --git a/database/query/alert.sql b/database/query/alert.sql new file mode 100644 index 0000000000000000000000000000000000000000..181e6293420cebfe42667479c3ee014c0b2b6aaf --- /dev/null +++ b/database/query/alert.sql @@ -0,0 +1,208 @@ +CREATE OR REPLACE FUNCTION alert_hd_and_mem_brazil(proj project_enum) RETURNS + TABLE ("hd" bigint, "memory" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + SUM(CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END), + SUM( + CASE WHEN + i1.disk1_model != i2.disk1_model + OR + i1.disk1_size != i2.disk1_size + OR + (i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL)) + OR + (i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL)) + THEN 1 + ELSE 0 END + ) + FROM + dim_inventory i1 + INNER JOIN + dim_inventory i2 + ON + i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND + i1.is_first = '1' AND i2.is_current = '1' + WHERE + i1.project = proj; +END; +$$ language plpgsql; + + + +CREATE OR REPLACE FUNCTION alert_hd_and_mem_region(proj project_enum) RETURNS + TABLE ("region" varchar, "hd" bigint, "memory" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + c.region, + SUM(CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END), + SUM( + CASE WHEN + i1.disk1_model != i2.disk1_model + OR + i1.disk1_size != i2.disk1_size + OR + (i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL)) + OR + (i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL)) + THEN 1 + ELSE 0 END + ) + FROM + dim_inventory i1 + INNER JOIN + dim_inventory i2 + ON + i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND + i1.is_first = '1' AND i2.is_current = '1' + INNER JOIN + dim_school s + ON + i1.sch_id = s.id + INNER JOIN + dim_city c + ON + s.cit_id = c.id + WHERE + i1.project = proj + GROUP BY + c.region; + +END; +$$ language plpgsql; + + +CREATE OR REPLACE FUNCTION alert_hd_and_mem_state(proj project_enum, region_in text) RETURNS + TABLE ("state" varchar, "hd" bigint, "memory" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + c.state, + SUM(CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END), + SUM( + CASE WHEN + i1.disk1_model != i2.disk1_model + OR + i1.disk1_size != i2.disk1_size + OR + (i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL)) + OR + (i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL)) + THEN 1 + ELSE 0 END + ) + FROM + dim_inventory i1 + INNER JOIN + dim_inventory i2 + ON + i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND + i1.is_first = '1' AND i2.is_current = '1' + INNER JOIN + dim_school s + ON + i1.sch_id = s.id + INNER JOIN + dim_city c + ON + s.cit_id = c.id + WHERE + i1.project = proj AND c.region = region_in + GROUP BY + c.region, c.state; + +END; +$$ language plpgsql; + + +CREATE OR REPLACE FUNCTION alert_hd_and_mem_city(proj project_enum, state_in text) RETURNS + TABLE ("city" varchar, "hd" bigint, "memory" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + c.name, + SUM(CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END), + SUM( + CASE WHEN + i1.disk1_model != i2.disk1_model + OR + i1.disk1_size != i2.disk1_size + OR + (i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL)) + OR + (i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL)) + THEN 1 + ELSE 0 END + ) + FROM + dim_inventory i1 + INNER JOIN + dim_inventory i2 + ON + i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND + i1.is_first = '1' AND i2.is_current = '1' + INNER JOIN + dim_school s + ON + i1.sch_id = s.id + INNER JOIN + dim_city c + ON + s.cit_id = c.id + WHERE + i1.project = proj AND c.state = state_in + GROUP BY + c.name; + +END; +$$ language plpgsql; + + + +CREATE OR REPLACE FUNCTION alert_hd_and_mem_report(proj project_enum, region_in text, state_in text, city_in text) RETURNS + TABLE ("load_date" text, "region" text, "state" varchar, "city" text, "school" text, "machine" macaddr, "old_date" date, "new_date" date, "os_distro" text, "processor" text, "old_memory_size" integer, "new_memory_size" integer, "old_hd_size" integer, "new_hd_size" integer, "memory_alert" integer, "hd_alert" integer) AS $$ +BEGIN + RETURN QUERY SELECT + to_char(current_date, 'DD/MM/YYYY'), + initcap(c.region), + c.state, + initcap(c.name), + initcap(s.name), + i1.macaddr, + i1.load_date, i2.load_date, + i1.os_distro, i1.processor, + i1.memory, i2.memory, + i1.disk1_size, i2.disk1_size, + CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END, + CASE WHEN + i1.disk1_model != i2.disk1_model + OR + i1.disk1_size != i2.disk1_size + OR + (i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL)) + OR + (i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL)) + THEN 1 ELSE 0 END + FROM + dim_inventory i1 + INNER JOIN + dim_inventory i2 + ON + i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND + i1.is_first = '1' AND i2.is_current = '1' + INNER JOIN + dim_school s + ON + i1.sch_id = s.id + INNER JOIN + dim_city c + ON + s.cit_id = c.id + WHERE + i1.project = proj AND c.region = region_in AND c.state = state_in AND c.name = city_in + AND + ((i1.memory != i2.memory) OR (i1.disk1_model != i2.disk1_model OR i1.disk1_size != i2.disk1_size OR + (i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL)) + OR + (i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL)))) + ORDER BY + s.name, i1.macaddr, i1.load_date; +END; +$$ language plpgsql; diff --git a/database/query/audit.sql b/database/query/audit.sql new file mode 100644 index 0000000000000000000000000000000000000000..d69881b5cc25c7cfdcee681642dbe3ce049a36d3 --- /dev/null +++ b/database/query/audit.sql @@ -0,0 +1,65 @@ +CREATE OR REPLACE FUNCTION audit_city(proj project_enum, region_in text, state_in text, city_in text, base_date DATE DEFAULT CURRENT_DATE) RETURNS + TABLE ("data_atual" text, "regiao" text, "aud_estado" varchar, "cidade" text, "escola" text, "instaladas" bigint, "nao_instaladas" integer) AS $$ +BEGIN + + RETURN QUERY SELECT + to_char(base_date, 'DD/MM/YYYY'), + initcap(region), + state, + initcap(city), + initcap(sch_name), + COUNT(*), + 0 + FROM + (SELECT + DISTINCT + f.sch_id as sch_id, + s.name as sch_name, + f.macaddr as macaddr, + c.region as region, + c.state as state, + c.name as city + FROM + fact_contact f + INNER JOIN + dim_city c + ON + f.cit_id = c.id + INNER JOIN + dim_school s + ON + f.sch_id = s.id + WHERE + project = proj AND c.region = upper(region_in) AND c.state = upper(state_in) AND c.name = upper(city_in) AND f.dat_id <= base_date + ) AS a + GROUP BY + region, state, city, sch_name; +END; +$$ language plpgsql; + + + +CREATE OR REPLACE FUNCTION audit_no_agent_city(proj project_enum, region_in text, state_in text, city_in text, base_date DATE DEFAULT CURRENT_DATE) RETURNS + TABLE ("data_atual" text, "regiao" text, "aud_estado" varchar, "cidade" text, "escola" text) AS $$ +BEGIN + + RETURN QUERY SELECT + to_char(base_date, 'DD/MM/YYYY'), + initcap(c.region), + c.state, + initcap(c.name), + initcap(s.name) + FROM + dim_school s + INNER JOIN + dim_city c + ON + s.cit_id = c.id + LEFT JOIN + (SELECT * FROM fact_contact WHERE dat_id <= base_date AND project = proj) AS f + ON + c.id = f.cit_id + WHERE + c.region = upper(region_in) AND c.state = upper(state_in) AND c.name = upper(city_in) AND f.cit_id is NULL; +END; +$$ language plpgsql; \ No newline at end of file diff --git a/database/query/availability.sql b/database/query/availability.sql index c3a9f6f10c2058cf2337f59cce2d657965fb627f..9df7ba815efec2b9758cce385aeb9bf722004963 100644 --- a/database/query/availability.sql +++ b/database/query/availability.sql @@ -1,213 +1,257 @@ -/*Brasil*/ -create or replace function is_within(dat date, low integer, high integer) returns integer as $$ -select (case when $1 > current_date - $2 and $1 < current_date - $3 then 1 else 0 END); -$$ language sql; - -create or replace function is_green(date) returns integer as $$ -select is_within($1, 10, 0); -$$ language sql; - -create or replace function is_yellow(date) returns integer as $$ -select is_within($1, 20, 10); -$$ language sql; - -create or replace function is_red(date) returns integer as $$ -select is_within($1, 30, 20); -$$ language sql; - -create or replace function is_black(date) returns integer as $$ -select (case when $1 < current_date - 30 then 1 else 0 END) as black -$$ language sql; - -create or replace function in_date_range(date) returns boolean as $$ -select (case when $1 > current_date - interval '6 months' then true else false END); -$$ language sql; - -create or replace function get_color(date) returns text as $$ -select (case when is_green($1) = 1 then 'green' else - case when is_yellow($1) = 1 then 'yellow' else - case when is_red($1) = 1 then 'red' else 'black' END END END); -$$ language sql; - -create or replace function days_lastContact(date) returns int as $$ -select (current_date - $1); -$$ language sql; - -create or replace function num_contactsMonth(macaddr, integer) returns integer as $$ -select count(*)::integer as contacted from fact_contact where macaddr = $1 and sch_id = $2 and (extract(month from current_date) = extract(month from dat_id)) and (extract(year from current_date) = extract(year from dat_id)); -$$ language sql; - -/* trocado 'dat_id' por 'load_date' ( aparentemente deu certo ); substituir em todas as outras funçoes */ -create or replace function availability_brazil(proj project_enum) returns table -("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint, "Preto" bigint) as $$ -select sum(green) as "Verde", sum(yellow) as "Amarelo", sum(red) as "Red", sum(black) as "Preto" from ( - select is_green(load_date) as green, - is_yellow(load_date) as yellow, - is_red(load_date) as red, - is_black(load_date) as black - from dim_inventory i - where project = $1 and load_date > current_date - 40 -) as temp; -$$ language sql; - -create or replace function availability_brazil_lines(proj project_enum) returns table -("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint, "Preto" bigint, "Data" date) as $$ -select sum(green), sum(yellow), sum(red), sum(black), dat_id -from ( - select is_green(dat_id) as green, - is_yellow(dat_id) as yellow, - is_red(dat_id) as red, - is_black(dat_id) as black, - dat_id - from dim_inventory i join fact_contact c on i.macaddr = c.macaddr - where project = $1 and in_date_range(dat_id) -) as temp -group by dat_id -order by dat_id; -$$ language sql; - -create or replace view school_and_location as select s.id as id, inep, s.name as school, address, cep, c.name as city, state, region -from dim_school s join dim_city c on c.id = s.cit_id; - -/*Regiao*/ -create or replace function availability_region(proj project_enum) returns table -("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint, "Preto" bigint, "Regiao" text) as $$ -select sum(green), sum(yellow), sum(red), sum(black), region from ( - select is_green(dat_id) as green, - is_yellow(dat_id) as yellow, - is_red(dat_id) as red, - is_black(dat_id) as black, sl.region - from dim_inventory i join fact_contact c on i.macaddr = c.macaddr - join school_and_location sl on i.sch_id = sl.id - where project = $1 and dat_id > current_date - 40 -) as temp -group by region; -$$ language sql; - - -create or replace function availability_region_lines(proj project_enum) returns table -("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint, "Preto" bigint, "Data" date) as $$ -select sum(green), sum(yellow), sum(red), sum(black), dat_id -from ( - select is_green(dat_id) as green, - is_yellow(dat_id) as yellow, - is_red(dat_id) as red, - is_black(dat_id) as black, - dat_id - from dim_inventory i join fact_contact c on i.macaddr = c.macaddr - where project = $1 and in_date_range(dat_id) -) as temp -group by dat_id -order by dat_id; -$$ language sql; - -/*Estado*/ -create or replace function availability_state(proj project_enum, region text) returns table -("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint, "Preto" bigint, "Estado" text) as $$ -select sum(green), sum(yellow), sum(red), sum(black), state from ( - select is_green(dat_id) as green, - is_yellow(dat_id) as yellow, - is_red(dat_id) as red, - is_black(dat_id) as black, sl.state - from dim_inventory i join fact_contact c on i.macaddr = c.macaddr - join school_and_location sl on i.sch_id = sl.id - where project = $1 and dat_id > current_date - 40 - and region = $2 -) as temp -group by state; -$$ language sql; - -create or replace function availability_state_lines(proj project_enum, region text) returns table -("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint, "Preto" bigint, "Data" date) as $$ -select sum(green), sum(yellow), sum(red), sum(black), dat_id -from ( - select is_green(dat_id) as green, - is_yellow(dat_id) as yellow, - is_red(dat_id) as red, - is_black(dat_id) as black, - dat_id - from dim_inventory i join fact_contact c on i.macaddr = c.macaddr -join school_and_location sl on i.sch_id = sl.id - where project = $1 and region = $2 and in_date_range(dat_id) -) as temp -group by dat_id -order by dat_id; -$$ language sql; - -/*Cidade*/ -create or replace function availability_city(proj project_enum, region text, state text) returns table -("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint, "Preto" bigint, "Cidade" text) as $$ -select sum(green), sum(yellow), sum(red), sum(black), city from ( - select is_green(dat_id) as green, - is_yellow(dat_id) as yellow, - is_red(dat_id) as red, - is_black(dat_id) as black, sl.city - from dim_inventory i join fact_contact c on i.macaddr = c.macaddr - join school_and_location sl on i.sch_id = sl.id - where project = $1 and dat_id > current_date - 40 - and region = $2 - and state = $3 -) as temp -group by city; -$$ language sql; - -create or replace function availability_city_lines(proj project_enum, region text, state text) returns table -("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint, "Preto" bigint, "Data" date) as $$ -select sum(green), sum(yellow), sum(red), sum(black), dat_id -from ( - select is_green(dat_id) as green, - is_yellow(dat_id) as yellow, - is_red(dat_id) as red, - is_black(dat_id) as black, - dat_id - from dim_inventory i join fact_contact c on i.macaddr = c.macaddr -join school_and_location sl on i.sch_id = sl.id - where project = $1 and region = $2 and state = $3 and in_date_range(dat_id) -) as temp -group by dat_id -order by dat_id; -$$ language sql; - -/*Relatório*/ - -create or replace function availability_report(proj project_enum, region text, state text, city text) returns table -("Date" date, "Maquina" macaddr, "Regiao" text, "Estado" text, "Cidade" text, "UltimoContato" date, "DiasAposUltimoContato" int, "ContatosNoMes" int, "Escola" text, "Cor" text) as $$ -select data, macad, region, state, city, ult_contact, days_last_contact, num_contacts_in_month, school, color -from ( - select (select max(end_time)::date from control) as data, - i.macaddr as macad, - region, - state, - city, - c.dat_id as ult_contact, - days_lastContact(max(c.dat_id)) as days_last_contact, - num_contactsMonth(i.macaddr, i.sch_id) as num_contacts_in_month, - school, - get_color(max(dat_id)) as color - from dim_inventory i join fact_contact c on i.sch_id = c.sch_id and i.macaddr = c.macaddr -join school_and_location sl on c.sch_id = sl.id - where project = $1 and region = $2 and state = $3 and city = $4 - group by c.dat_id, i.macaddr, i.sch_id, region, state, city, school - order by i.sch_id -) as temp; -$$ language sql; - - - -/*select to_char((select max(end_time) from control), 'DD/MM/YYYY') - as load_date, machine, initcap(region) as region, state, - initcap(city) as city, last_contact, days_last_contact, - month_contacts, initcap(school) as school, color = 'green' as - green, color = 'yellow' as yellow, color = 'red' as red*/ - - - - - - - - - - - - +/** + * The functions on this file return the number of computers of a given + * 'project' that contacted the server in the last 10 days (green), 10 + * to 30 days (yellow), or more than 30 days (red). You can also provide + * 'base_date' to refer to the situation in a particular date. + */ + + +/** + * Returns the total amount of computers of the given 'project' and their + * last contact date status. + * + * @param in project_enum proj The type of computer/machine + * @return TABLE(bigint, bigint, bigint) + */ +CREATE OR REPLACE FUNCTION availability_brazil(proj project_enum) RETURNS + TABLE ("Verde" bigint, "Amarelo" bigint, "Vermelho" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + SUM(is_green), + SUM(is_yellow), + SUM(is_red) + FROM + aggr_availability + WHERE + project = proj AND base_date = (SELECT max(base_date) FROM aggr_availability); +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' in the + * current date and the last six months. + * + * @param in project_enum proj The type of computer/machine + * @return TABLE(date, bigint, bigint, bigint) + */ +CREATE OR REPLACE FUNCTION availability_brazil_monthly_history(proj project_enum) RETURNS + TABLE ("Data" text, "Verde" bigint, "Amarelo" bigint, "Vermelho" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + get_month_name(extract(month from base_date)::integer)::text, + SUM(is_green), + SUM(is_yellow), + SUM(is_red) + FROM + aggr_availability + WHERE + project = proj + GROUP BY + base_date + ORDER BY + base_date; +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' and their + * last contact date status grouped by region. + * + * @param in project_enum proj The type of computer/machine + * @return TABLE(varchar, bigint, bigint, bigint) + */ +CREATE OR REPLACE FUNCTION availability_region(proj project_enum) RETURNS + TABLE ("Região" varchar, "Verde" bigint, "Amarelo" bigint, "Vermelho" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + region, + SUM(is_green), + SUM(is_yellow), + SUM(is_red) + FROM + aggr_availability + WHERE + project = proj AND base_date = (SELECT max(base_date) FROM aggr_availability) + GROUP BY + region; +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' in the + * current date and the last six months of a given region. + * + * @param in project_enum proj The type of computer/machine + * @param in region_in text The region + * @return TABLE(date, bigint, bigint, bigint) + */ +CREATE OR REPLACE FUNCTION availability_region_monthly_history(proj project_enum, region_in text) RETURNS + TABLE ("Data" text, "Verde" bigint, "Amarelo" bigint, "Vermelho" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + get_month_name(extract(month from base_date)::integer)::text, + SUM(is_green), + SUM(is_yellow), + SUM(is_red) + FROM + aggr_availability + WHERE + project = proj AND region = region_in + GROUP BY + base_date + ORDER BY + base_date; +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' and their + * last contact date status grouped by state in a given 'region_in' + * + * @param in project_enum proj The type of computer/machine + * @param in region_in text The region + * @return TABLE(varchar, bigint, bigint, bigint) + */ +CREATE OR REPLACE FUNCTION availability_state(proj project_enum, region_in text) RETURNS + TABLE ("Estado" varchar, "Verde" bigint, "Amarelo" bigint, "Vermelho" bigint) AS $$ +BEGIN + + RETURN QUERY SELECT + state, + SUM(is_green), + SUM(is_yellow), + SUM(is_red) + FROM + aggr_availability + WHERE + project = proj AND region = region_in AND base_date = (SELECT max(base_date) FROM aggr_availability) + GROUP BY + state; +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' in the + * current date and the last six months in the given 'states_in'. + * + * @param in project_enum proj The type of computer/machine + * @param in state_in text The state + * @return TABLE(date, bigint, bigint, bigint) + */ +CREATE OR REPLACE FUNCTION availability_state_monthly_history(proj project_enum, state_in text) RETURNS + TABLE ("Data" text, "Verde" bigint, "Amarelo" bigint, "Vermelho" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + get_month_name(extract(month from base_date)::integer)::text, + SUM(is_green), + SUM(is_yellow), + SUM(is_red) + FROM + aggr_availability + WHERE + project = proj AND state = state_in + GROUP BY + base_date + ORDER BY + base_date; +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' and their + * last contact date status grouped by city in a given 'state_in' + * + * @param in project_enum proj The type of computer/machine + * @param in state_in text The state + * @return TABLE(varchar, bigint, bigint, bigint) + */ +CREATE OR REPLACE FUNCTION availability_city(proj project_enum, state_in text) RETURNS + TABLE ("Cidade" varchar, "Verde" bigint, "Amarelo" bigint, "Vermelho" bigint) AS $$ +BEGIN + + RETURN QUERY SELECT + city, + SUM(is_green), + SUM(is_yellow), + SUM(is_red) + FROM + aggr_availability + WHERE + project = proj AND state = state_in AND base_date = (SELECT max(base_date) FROM aggr_availability) + GROUP BY + city + ORDER BY + city; +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' + * region, state and city grouped by school. + * + * @param in project_enum proj The type of computer/machine + * @param in region text The region + * @param in state text The state + * @param in city text The city + * @return TABLE(TEXT, macaddr, TEXT, TEXT, TEXT, DATE, INT, BIGINT, + * TEXT, BOOLEAN, BOOLEAN, BOOLEAN) + */ +CREATE OR REPLACE FUNCTION availability_report(proj project_enum, region TEXT, state TEXT, city TEXT) RETURNS TABLE +("load_date" TEXT, "machine" macaddr, "region" TEXT, "state" TEXT, "city" TEXT, "last_contact" DATE, +"days_last_contact" INT, "month_contacts" BIGINT, "school" TEXT, "green" BOOLEAN, "yellow" BOOLEAN, "red" BOOLEAN) AS $$ + SELECT + to_char((SELECT max(base_date) FROM aggr_availability), 'DD/MM/YYYY'), + c.macaddr AS macad, + region, + state, + cy.name, + c.last_contact, + days_last_contact, + (SELECT + COUNT(*) + FROM + fact_contact AS fc + WHERE + fc.macaddr = c.macaddr AND + fc.sch_id = c.sch_id AND + EXTRACT(MONTH FROM fc.dat_id) = EXTRACT(MONTH FROM CURRENT_DATE) AND + EXTRACT(YEAR FROM fc.dat_id) = EXTRACT(YEAR FROM CURRENT_DATE)) AS num_contacts_in_month, + s.name, + days_last_contact <= 10 AS g, + days_last_contact > 10 AND days_last_contact <= 30 AS y, + days_last_contact > 30 AS r + FROM + (SELECT + MAX(dat_id) AS last_contact, + sch_id, + cit_id, + macaddr, + project, + CURRENT_DATE - MAX(dat_id) AS days_last_contact + FROM + fact_contact + GROUP BY + sch_id, + macaddr, + cit_id, + project) c + INNER JOIN + dim_city cy + ON + c.cit_id = cy.id + INNER JOIN + dim_school s + ON + c.sch_id = s.id + WHERE + project = $1 AND region = $2 AND state = $3 AND cy.name = $4 + ORDER BY + c.sch_id; +$$ LANGUAGE SQL; diff --git a/database/query/get_month_name.sql b/database/query/get_month_name.sql new file mode 100644 index 0000000000000000000000000000000000000000..84117a5859d5e4498a2aa2564773276b2b68cb6a --- /dev/null +++ b/database/query/get_month_name.sql @@ -0,0 +1,17 @@ +/* Function to get the name of the month +* +* Receives as input the number of month. +* +* Returns the name of the month. +*/ +CREATE OR REPLACE FUNCTION get_month_name(integer) RETURNS varchar(9) as $$ +DECLARE + array_months VARCHAR[] := ARRAY['Janeiro', 'Fevereiro', 'Março','Abril', +'Maio', 'Junho', 'Julho', 'Agosto', 'Setembro', 'Outubro', 'Novembro', +'Dezembro']; +BEGIN + + RETURN array_months[$1]; + +END; +$$ LANGUAGE 'plpgsql'; \ No newline at end of file diff --git a/database/query/inventory.sql b/database/query/inventory.sql new file mode 100644 index 0000000000000000000000000000000000000000..54dc5a5c6ebaa1d2e9d7626d337f013ff1837902 --- /dev/null +++ b/database/query/inventory.sql @@ -0,0 +1,217 @@ +/** + * The functions on this file return the number of computers of a given + * 'project' that contacted the server in the last 10 days (green), 10 + * to 30 days (yellow), or more than 30 days (red). You can also provide + * 'base_date' to refer to the situation in a particular date. + */ + + +/** + * Returns the total amount of computers of the given 'project' and their + * last contact date status. + * + * @param in project_enum proj The type of computer/machine + * @param in DATE base_date The particular date until which we're + * interested. Default value is today. + * @return TABLE(bigint, bigint, bigint) + */ +DROP FUNCTION inventory_brazil(project_enum, DATE); +CREATE OR REPLACE FUNCTION inventory_brazil(proj project_enum, b_date DATE DEFAULT CURRENT_DATE) RETURNS + TABLE ("Total de máquinas" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + COUNT(*) + FROM + (SELECT + DISTINCT macaddr, sch_inep + FROM + aggr_availability + WHERE + project = proj + AND + base_date <= b_date + ) AS a; +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' in the + * current date and the last three months. + * + * TODO: generalize the number of months and create the 'dates' temp table + * in a more elegant manner. + * + * @param in project_enum proj The type of computer/machine + * @return TABLE(date, bigint, bigint, bigint) + */ +DROP FUNCTION inventory_brazil_monthly_history(project_enum); +CREATE OR REPLACE FUNCTION inventory_brazil_monthly_history(proj project_enum) RETURNS + TABLE ("Data" text, "Total de máquinas" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + get_month_name(extract(month from base_date)::integer)::text, + count(*) + FROM + aggr_availability + WHERE + project = proj + GROUP BY + base_date + ORDER BY base_date; +END; +$$ language plpgsql; + + +/** + * Returns the total amount of computers of the given 'project' and their + * last contact date status grouped by region. + * + * @param in project_enum proj The type of computer/machine + * @param in DATE base_date The particular date until which we're + * interested. Default value is today. + * @return TABLE(varchar, bigint, bigint, bigint) + */ +DROP FUNCTION inventory_region(project_enum, DATE); +CREATE OR REPLACE FUNCTION inventory_region(proj project_enum, b_date DATE DEFAULT CURRENT_DATE) RETURNS + TABLE ("Região" varchar, "Total de máquinas" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + region, + COUNT(*) + FROM + (SELECT macaddr, sch_inep, region + FROM aggr_availability + WHERE base_date <= b_date AND + project = proj + GROUP BY macaddr, sch_inep, region) AS a + GROUP BY region; +END; +$$ language plpgsql; + +DROP FUNCTION inventory_region_monthly_history(project_enum, text); +CREATE OR REPLACE FUNCTION inventory_region_monthly_history(proj project_enum, region_in text) RETURNS + TABLE ("Data" text, "Total de máquinas" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + get_month_name(extract(month from base_date)::integer)::text, + COUNT(*) + FROM + (SELECT + macaddr, sch_inep, base_date + FROM + aggr_availability + WHERE + project = proj AND region = region_in + GROUP BY + macaddr, sch_inep, base_date) AS a + GROUP BY base_date + ORDER BY base_date; +END; +$$ language plpgsql; + +DROP FUNCTION inventory_state(project_enum, text, date); +CREATE OR REPLACE FUNCTION inventory_state(proj project_enum, region_in text, b_date DATE DEFAULT CURRENT_DATE) RETURNS + TABLE ("Estado" varchar, "Total de máquinas" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + state, + COUNT(*) + FROM + (SELECT + state, macaddr, sch_inep + FROM + aggr_availability + WHERE + project = proj AND region = region_in AND base_date <= b_date + GROUP BY + sch_inep, macaddr, state) AS a + GROUP BY state + ORDER BY state; +END; +$$ language plpgsql; + +drop function inventory_state_monthly_history(proj project_enum, state_in text); +CREATE OR REPLACE FUNCTION inventory_state_monthly_history(proj project_enum, state_in text) RETURNS + TABLE ("Data" text, "Total de máquinas" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + get_month_name(extract(month from base_date)::integer)::text, + COUNT(*) + FROM + (SELECT + macaddr, + sch_inep, + base_date + FROM + aggr_availability + WHERE + state = state_in AND + project = proj + GROUP BY + macaddr, sch_inep, base_date) AS a + GROUP BY base_date ORDER BY base_date; +END; +$$ language plpgsql; + +DROP FUNCTION inventory_city(project_enum, text, DATE); +CREATE OR REPLACE FUNCTION inventory_city(proj project_enum, state_in text, b_date DATE DEFAULT CURRENT_DATE) RETURNS + TABLE ("Cidade" varchar, "Total de máquinas" bigint) AS $$ +BEGIN + RETURN QUERY SELECT + city, + count(*) + FROM + (SELECT city + FROM + aggr_availability + WHERE + state = state_in AND + base_date <= b_date + GROUP BY city, macaddr, sch_inep) as a + GROUP BY city + ORDER BY city; +END; +$$ language plpgsql; + +DROP FUNCTION inventory_report(proj project_enum, region_in text, state_in text, city_in text); +CREATE OR REPLACE FUNCTION inventory_report(proj project_enum, region_in text, state_in text, city_in text) RETURNS + TABLE (data_atual text, regiao text, inv_estado character varying, cidade text, + escola text, maquina macaddr, data date, SO text, processor text, + memoria text, HD text) AS $$ +BEGIN + RETURN QUERY SELECT to_char((SELECT max(base_date) FROM aggr_availability), 'DD/MM/YYYY'), + initcap(c.region), + c.state, + initcap(c.name), + initcap(s.name), + d.macaddr, + d.load_date, + d.os_distro, + d.processor, + simplify_memory(d.memory), + simplify_hd(d.disk1_size) + FROM + (SELECT + macaddr, + sch_id, + max(load_date) AS load_date + FROM dim_inventory + WHERE project = proj + GROUP BY macaddr, sch_id) AS i + JOIN dim_school s + ON i.sch_id = s.id + JOIN dim_city c + ON + s.cit_id = c.id AND + region = UPPER(region_in) AND + state = UPPER(state_in) AND + c.name = UPPER(city_in) + JOIN dim_inventory d + ON + d.sch_id = i.sch_id AND + d.macaddr = i.macaddr AND + d.load_date = i.load_date + ORDER BY s.name, macaddr; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/database/query/simplfy_memory.sql b/database/query/simplfy_memory.sql new file mode 100644 index 0000000000000000000000000000000000000000..8600c22620771948c7a6386ff66a7e26cae74af1 --- /dev/null +++ b/database/query/simplfy_memory.sql @@ -0,0 +1,14 @@ +CREATE OR REPLACE FUNCTION simplify_memory(integer) RETURNS text AS $$ +select case + when $1 < 1024 then $1 || ' KiB' + when $1 < 1048576 then round($1 / 1024, 2) || ' MiB' + when $1 < 1073741824 then round($1 / 1048576, 2) || ' GiB' + else round($1 / 1073741824, 2) || ' TiB' +end; +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION simplify_hd(integer) RETURNS text AS $$ +select case + when $1 < 1000 then $1 || ' GB' + else round($1 / 1000, 2) || ' TB' end; +$$ LANGUAGE SQL; \ No newline at end of file diff --git a/database/test/proinfo_inventory.py b/database/test/proinfo_inventory.py index 40bf370871fba761f82ccea7ad317e7aa82f8b13..c85a2af9335a0f525fe9e0f02e75eba1306f0055 100755 --- a/database/test/proinfo_inventory.py +++ b/database/test/proinfo_inventory.py @@ -8,9 +8,7 @@ import unittest # TODO: Unit tests MACHINE_PEAK = 10000 -DAYS = 4 -NUM_REMOVE_MACHINE = 5 -FIRST_MAC_REMOVE = 2 +DAYS = 3 MACHINE_DOUBLE_COMM_RATIO = 0.12 MACHINE_MULTI_COMM_RATIO = 0.02 MACHINE_DECREASE_RATIO = 1.1 @@ -37,7 +35,7 @@ WHERE i.macaddr = %s AND s.id = i.sch_id AND s.inep = %s''', (mac, INEP)) def insert_data(cur): for day in range(DAYS): - machines = machine_increase(day + 1) + machines = machine_increase(day) refresh_machines(machines, day, cur) return max_machine(machines, DAYS - 1) @@ -54,31 +52,18 @@ def gen_macaddress(integer): p4 = (integer & 2147483647) >> 24 return "00:00:" + toHex(p4) + ":" + toHex(p3) + ":" + toHex(p2) + ":" + toHex(p1) -def insert_machine(machine, day, cur, contact): - macaddr = gen_macaddress(machine) -# print "Mac {0}".format(macaddr) - if machine < MULTI_HD_TOTAL: - cur.execute(TEMPLATE, (contact, macaddr, HD_MODEL, HD_SIZE, HD_USED, HD_EXTRA)) - else: - cur.execute(TEMPLATE, (contact, macaddr, None, None, None, 0)) - - -def fail_machines(machine, day): - if machine >= FIRST_MAC_REMOVE + day and machine < FIRST_MAC_REMOVE + NUM_REMOVE_MACHINE + day: - return 0 - else: - return 1 - def refresh_machines(machines, day, cur): contact = INITIAL_DATE + timedelta(days = day) for i in updated_machines(machines, day): - if day % 2 == 0: - insert_machine(i, day, cur, contact) - elif fail_machines(i, day): - insert_machine(i, day, cur, contact) + macaddr = gen_macaddress(i) +# print "Mac {0}".format(macaddr) + if i < MULTI_HD_TOTAL: + cur.execute(TEMPLATE, (contact, macaddr, HD_MODEL, HD_SIZE, HD_USED, HD_EXTRA)) + else: + cur.execute(TEMPLATE, (contact, macaddr, None, None, None, 0)) def max_machine(machines, day): - return intround(machines) + return intround(machines / (MACHINE_DECREASE_RATIO * (day + 1))) def intround(f): return int(round(f)) @@ -105,7 +90,7 @@ def get_connection(dbname, user): if __name__ == '__main__': if len(sys.argv) != 3: - sys.exit ("How to use: test_migration <database> <user>") + sys.exit("Usage: {0} <database> <user>".format(sys.argv[0])) connection = get_connection(sys.argv[1], sys.argv[2]) cur = connection.cursor() count = insert_data(cur)