diff --git a/database/create/004-aggregates.sql b/database/create/004-aggregates.sql index 96ef46ca1c019a56582fd8fc90026fc7db823072..3dd0f1c6629218b7f406052dcc7d494018938161 100644 --- a/database/create/004-aggregates.sql +++ b/database/create/004-aggregates.sql @@ -1,11 +1,15 @@ CREATE TABLE aggr_availability ( - sch_id INTEGER NOT NULL REFERENCES dim_school, - cit_id INTEGER NOT NULL REFERENCES dim_city, - dat_id DATE NOT NULL REFERENCES dim_date, - mac_count BIGINT NOT NULL DEFAULT 0, - mac_count_red BIGINT NOT NULL DEFAULT 0, - mac_count_yellow BIGINT NOT NULL DEFAULT 0, - mac_count_green BIGINT NOT NULL DEFAULT 0, - project project_enum DEFAULT 'proinfo' NOT NULL, - UNIQUE (sch_id, cit_id, dat_id, project) + base_date DATE NOT NULL, + macaddr MACADDR NOT NULL, + project project_enum DEFAULT 'proinfo' NOT NULL, + sch_inep VARCHAR (12) NOT NULL, + sch_name VARCHAR (150) NOT NULL, + city VARCHAR (100) NOT NULL, + state VARCHAR (2) NOT NULL, + region VARCHAR (12) NOT NULL, + is_green SMALLINT DEFAULT 0 NOT NULL, + is_yellow SMALLINT DEFAULT 0 NOT NULL, + is_red SMALLINT DEFAULT 0 NOT NULL ); + +CREATE INDEX aggr_availability_base_date_idx ON aggr_availability USING btree (base_date); diff --git a/database/load/005-aggregates.sql b/database/load/005-aggregates.sql index 10a0b62ba02cba1dacc0a62ecbb87b822a8d2c91..248d7931b848a0be4ccf8162c261b3cf0698919b 100644 --- a/database/load/005-aggregates.sql +++ b/database/load/005-aggregates.sql @@ -10,21 +10,23 @@ BEGIN start_ts = CLOCK_TIMESTAMP(); total_rows := 0; + -- remove the index - later we create it again + DROP INDEX IF EXISTS aggr_availability_base_date_idx; + -- remove old data TRUNCATE TABLE aggr_availability; -- iterate over past months - FOR date_it IN SELECT (date_trunc('month', generate_series( - current_date - interval '4 months', - current_date, - interval '1 month')) - interval '1 day')::date UNION select current_date LOOP + FOR i IN 0..5 LOOP + date_it := CURRENT_DATE - (i * interval '1 month'); INSERT INTO aggr_availability SELECT date_it, f.macaddr, - t.id_point, - t.name, + f.project, + s.inep, + s.name, c.name, c.state, c.region, @@ -34,27 +36,24 @@ BEGIN FROM (SELECT max(dat_id) as max_dat_id, - f.id_tc, - f.id_city, + f.sch_id, + f.cit_id, f.macaddr, + f.project FROM - fact_communicate f + fact_contact f WHERE dat_id <= date_it GROUP BY - f.id_city, f.id_tc, f.macaddr) as f - INNER JOIN - telecenter t - ON - f.id_tc = t.id + f.cit_id, f.sch_id, f.macaddr, f.project) as f INNER JOIN - point_pt + dim_school s ON - t.id_point = pt.id + f.sch_id = s.id INNER JOIN - city c + dim_city c ON - pt.id_city = c.id; + f.cit_id = c.id; -- how many rows were inserted? GET DIAGNOSTICS cur_rows := ROW_COUNT; @@ -62,6 +61,9 @@ BEGIN END LOOP; + -- creating index to speed-up per-date queries, and it speeds up a lot.. + CREATE INDEX aggr_availability_base_date_idx ON aggr_availability(base_date); + -- logging INSERT INTO control (name, start_time, end_time, total) VALUES ('loading aggr_availability', start_ts, CLOCK_TIMESTAMP(), total_rows);