Select Git revision
postgres.ts
-
Lucas Fernandes de Oliveira authored
Signed-off-by:
Lucas Fernandes de Oliveira <lfo14@inf.ufpr.br>
Lucas Fernandes de Oliveira authoredSigned-off-by:
Lucas Fernandes de Oliveira <lfo14@inf.ufpr.br>
postgres.ts 30.82 KiB
/*
* Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
* Departamento de Informatica - Universidade Federal do Parana
*
* This file is part of blend.
*
* blend is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* blend is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with blend. If not, see <http://www.gnu.org/licenses/>.
*/
import { Adapter } from "../core/adapter";
import { Metric } from "../core/metric";
import { Dimension } from "../core/dimension";
import { Clause } from "../core/clause";
import { Filter, FilterOperator } from "../core/filter";
import { AggregationType, RelationType } from "../common/types";
import { View } from "../core/view";
import { Pool, PoolConfig } from "pg";
interface ExpandedView {
dimensions: Dimension[];
metrics: Metric[];
dimMetrics: Metric[];
keys: Dimension[];
clauses: Clause[];
from: string;
id: string;
origin: boolean;
}
interface DimInfo {
dim: Dimension;
views: ExpandedView[];
}
interface DimTranslation {
aliased: string;
noalias: string;
}
export class PostgresAdapter extends Adapter {
private pool: Pool;
constructor (config: PoolConfig) {
super();
this.pool = new Pool(config);
}
public getDataFromView(view: View, cb: (error: Error, result?: any[]) => void): void {
const query = this.getQueryFromView(view);
this.pool.connect((err, client, done) => {
if (err) {
cb (err);
return;
}
client.query(query, [], (error, result) => {
// call 'done()' to release client back to pool
done();
cb(error, (result) ? result.rows : null);
});
});
}
public materializeView(view: View): boolean {
return false;
}
public getQueryFromView(view: View): string {
/*
Find the base (materialized) views that has this data and exapand
than (make a parse to the format used in the adapter)
*/
const materialized = this.searchMaterializedViews(view).sort((a, b) => {
return (a.id < b.id) ? -1 : 1;
}).map((item) => {
return {
id: item.id,
from: "view_" + item.id,
dimMetrics: [],
metrics: item.metrics.filter((i) => {
return view.metrics.some((j) => i.name === j.name);
}),
dimensions: item.dimensions,
keys: item.keys,
clauses: item.clauses,
origin: item.origin
};
});
// Remove repeated views from the result
let partialJoin = [materialized[0]];
for (let i = 1; i < materialized.length; ++i) {
if (materialized[i - 1].id !== materialized[i].id) {
partialJoin.push(materialized[i]);
}
}
/*
If there is more than one source of data (tables/views)
a join is needed.
Partial Join represents how many sources still exists,
every join reduces this number.
*/
while (partialJoin.length > 1) {
/*
Variable map finds what dimenensions are still needed to
complete this query, they are required for 2 reasons.
1 - To make joins
2 - Because they are in the query
For each view that has this dimension we add one score to
this dimension, if they are in the query the same.
Automatically if the dimension is in the query there will be
at least one view with this atribute (or the query could not be
completed) so dimensions in the query always have score of
at least 2.
To make a join the dimension must be in 2 different views,
creating a score of 2 either.
If the score is less than 2 so this dimension is not required
anymore and can be removed.
*/
let map: { [key: string]: number } = {};
let partialsChange = false;
for (let i = 0; i < partialJoin.length; ++i) {
const dims = partialJoin[i].dimensions;
for (let k = 0; k < dims.length; ++k) {
if (!map[dims[k].name]) {
map[dims[k].name] = 1;
}
else {
++map[dims[k].name];
}
}
}
for (let i = 0; i < view.dimensions.length; ++i) {
let dim = view.dimensions[i];
while (dim !== null) {
if (map[dim.name]) {
++map[dim.name];
}
dim = dim.parent;
}
}
for (let i = 0; i < partialJoin.length; ++i) {
const dims = partialJoin[i].dimensions.filter((item) => {
return map[item.name] > 1;
});
const keys = partialJoin[i].keys.filter((item) => {
return map[item.name] > 1;
});
/*
At this point the dimensions with less than score 2
are removed, if this happens the view is agreggated
again, with less dimensions, removing this dimension
from the view.
*/
if (dims.length < partialJoin[i].dimensions.length) {
const partial = new View({
metrics: partialJoin[i].metrics,
dimensions: dims,
keys: keys,
origin: false,
clauses: partialJoin[i].clauses,
materialized: false
});
const from = "(" +
this.buildQuery(partial, [partialJoin[i]]) +
") AS view_" + partial.id + "\n";
partialJoin[i].id = partial.id;
partialJoin[i].dimensions = partial.dimensions;
partialJoin[i].keys = partial.keys;
partialJoin[i].origin = partial.origin;
partialJoin[i].from = from;
partialsChange = true;
}
}
/*
If at least one of the views changed (have the number of
dimensions reduced) returns to the begining of the loop
again.
Othewise we need to make a join.
*/
if (!partialsChange) {
/*
Sorting the views by keys.
If the keys are identical, then they
will be in sequence, and views with identical
keys can be joined.
Sort an array of keys is the same as sort a
array of strings.
*/
const sorted = partialJoin.sort((a, b) => {
return this.compareKeys(a.keys, b.keys);
});
/*
First of all, the remaining views are splited in segments.
A segment contains views with the same keys that are great
to make joins. Joins like this do not create "dimensional
metrics".
In joins like this one row of each view will be connected
with at most one row of each other table.
*/
const segment = [[sorted[0]]];
let segmentId = 0;
for (let i = 1; i < sorted.length; ++i) {
if (this.compareKeys(sorted[i - 1].keys, sorted[i].keys) === 0) {
segment[segmentId].push(sorted[i]);
}
else {
++segmentId;
segment.push([sorted[i]]);
}
}
partialJoin = [];
let ableToJoin = false;
for (let i = 0; i < segment.length; ++i) {
/*
If a segment has more than one view, a join can be made
*/
if (segment[i].length > 1) {
let mets: Metric[] = [];
let clauses: Clause[] = [];
let dims: Dimension[] = [];
let dimMetrics: Metric[] = [];
for (let j = 0; j < segment[i].length; ++j) {
mets = mets.concat(segment[i][j].metrics);
clauses = clauses.concat(segment[i][j].clauses);
dims = dims.concat(segment[i][j].dimensions);
dimMetrics = dimMetrics.concat(segment[i][j].dimMetrics);
}
dims = this.removeDuplicatedDimensions(dims);
/*
Its atributes are just concatenated and the
duplicates removed.
*/
const partial = new View({
metrics: mets,
dimensions: dims,
keys: segment[i][0].keys,
origin: false,
clauses: clauses,
materialized: false
});
const viewFrom = "(" +
this.buildQuery(partial, segment[i]) +
") AS view_" + partial.id + "\n";
partialJoin.push({
id: partial.id,
from: viewFrom,
dimMetrics: dimMetrics,
metrics: partial.metrics,
dimensions: partial.dimensions,
keys: partial.keys,
clauses: partial.clauses,
origin: partial.origin
});
ableToJoin = true;
}
else {
/*
If the segment has just one view, anything can be
done at this point, so just reinsert this view in
set of views.
*/
partialJoin.push(segment[i][0]);
}
}
/*
If at least one join was made in the last part (a segment
with more than one view) than return to the begining of the
loop.
This permits after a join remove the dimensions that were
only choosen to this join, and are no longer required
Ideally the joins should be restrict the join method used
above, but in some cases this can not be done.
So if all the segments have only one view inside, move
to the next method.
*/
if (!ableToJoin) {
/*
At this point 2 views will be joined, first the
similarity with each pair of views is calculated,
the pair with the biggedt similarity will be joined.
Similarity is calculated with the number of common
dimensions in the keys.
*/
let similarity = 0;
let idx0 = 0;
let idx1 = 1;
for (let i = 0; i < partialJoin.length; ++i) {
for (let j = i + 1 ; j < partialJoin.length; ++j) {
const pi = partialJoin[i].keys;
const pj = partialJoin[j].keys;
let score = this.similarDimensions (pi, pj);
if (similarity < score) {
similarity = score;
idx0 = i;
idx1 = j;
}
}
}
const partial0 = partialJoin[idx0];
const partial1 = partialJoin[idx1];
partialJoin.splice(idx1, 1);
partialJoin.splice(idx0, 1);
/*
Once the views are select they are joined with the
same method, concatenedted its atributes and
removing duplicates, however the nasty effect of
this join is the creation of "dimensional metrics".
"Dimensional metrics" are metrics that can no longer
be aggregated, and at this point to the end
of a query they will act as dimensions.
This change happens to avoid inconsistency generated
by a join where one row of one table can be connected
to more than one of other table.
Take this example.
View0 : metrics [met0], dimensions [dim0]
values: [{met0: 10, dim0: 1}]
View1 : metrics [met1], dimensions [dim2]
values: [{met1: 10, dim2: 1}. {met1: 5, dim2: 2}]
View2 : metrics [], dimensions [dim0, dim1, dim2]
values: [
{dim0: 1, dim1: 1, dim2: 1},
{dim0: 1, dim1: 1, dim2: 2}
]
The query is metrics [met0, met1] and dimensions [dim1]
First a join of View0 and View1 is made, the result
is: [
{dim0: 1, dim1: 1, dim2: 1, met0: 10},
{dim0: 1, dim1: 1, dim2: 2, met0: 10}
]
Note that the value of met0 is duplicated.
Now dim0 is removed, than joined with view2 resulting
in: [
{met1: 10, dim1: 1, dim2: 1, met0: 10},
{met1: 5 , dim1: 1, dim2: 2, met0: 10}
]
Lets assume that the agregation is SUM
If we remove dim2 and re-agregate the result is: [
{met1: 15, dim1: 1, met0: 20}
]
This result is wrong. The replication of the value
met0 affects the result.
See if met1 was not required, first the dimemnsion would
be reduced, left dim0 and dim1, than joined that reduced
again resulting in the value [
{dim1:1, met0: 10}
]
Is this case there is no duplication and the aggregation
does not include more rows than should.
To solve this problem the met0 must become a dimension,
in other words, not aggregated again. If the met0 was
not agregated in the query met0, met1, dim1 the result
is: [
{met1: 15, dim1: 1, met0: 10}
]
what is compatible.
After this extreme long explanation what must be
known is: Joining views with diferent keys
generate "dimensional metrics".
Views with "dimensional metrics" can not used for future
queries because can not be re-agregated, so this must be
avoided and is one-query only views.
*/
let dimMetrics: Metric[];
let mets: Metric[];
let dims = partial0.dimensions.concat(partial1.dimensions);
dims = this.removeDuplicatedDimensions(dims);
let keys = partial0.keys.concat(partial1.keys);
keys = this.removeDuplicatedDimensions(keys);
if (partial0.keys.length === similarity) {
/*
Here the metrics become dimensions, but the effect
can be reduced. If the keys of partial0
is a sub set of the keys ou partial1
than the number of rows of partial 1 is not
affected, in other words the metrics of partial1
can be aggregated and does not need to become
dimensions.
*/
partial0.dimMetrics = partial0.dimMetrics.concat(partial0.metrics);
partial0.metrics = [];
mets = partial1.metrics;
}
else if (partial1.keys.length === similarity) {
/*
The same occurs if the keys of partia1 is a subset
of partial0.
*/
partial1.dimMetrics = partial1.dimMetrics.concat(partial1.metrics);
partial1.metrics = [];
mets = partial0.metrics;
}
else {
/*
But if there is no sub set, than both sides have
the metrics turned in dimensions.
*/
partial0.dimMetrics = partial0.dimMetrics.concat(partial0.metrics);
partial0.metrics = [];
partial1.dimMetrics = partial1.dimMetrics.concat(partial1.metrics);
partial1.metrics = [];
mets = [];
}
dimMetrics = partial0.dimMetrics.concat(partial1.dimMetrics);
const partial = new View({
metrics: mets,
dimensions: dims,
keys: keys,
origin: false,
clauses: partial0.clauses.concat(partial1.clauses),
materialized: false
});
const id = new View({
metrics: mets.concat(dimMetrics),
dimensions: dims,
keys: keys,
origin: false,
clauses: partial0.clauses.concat(partial1.clauses),
materialized: false
}).id;
const viewFrom = "(" +
this.buildQuery(partial, [partial0, partial1]) +
") AS view_" + id + "\n";
partialJoin.push({
id: id,
from: viewFrom,
dimMetrics: dimMetrics,
metrics: mets,
dimensions: dims,
keys: keys,
clauses: partial.clauses,
origin: false
});
}
}
}
/*
When only one view remain, the query is made and a ;
is added at the end.
TODO: Probrably this last line adds one more
layer to the query, that is in fact unnecessary.
Think a way to remove-it.
*/
return this.buildQuery(view, partialJoin) + ";";
}
private searchMaterializedViews(view: View): View[] {
let r: View[] = [];
if (view.materialized) {
return [view];
}
else {
let children = view.childViews;
for (let i = 0; i < children.length; ++i) {
r = r.concat(this.searchMaterializedViews(children[i]));
}
}
return r;
}
private buildQuery(target: View, views: ExpandedView[]) {
const metrics = target.metrics;
const dimensions = target.dimensions;
const clauses = target.clauses;
let dimMap: {[key: string]: DimInfo} = {};
let nameMap: {[key: string]: ExpandedView} = {};
for (let i = 0; i < views.length; ++i) {
const mets = views[i].metrics;
const dims = views[i].dimensions;
for (let j = 0; j < mets.length; ++j) {
if (!nameMap[mets[j].name]) {
nameMap[mets[j].name] = views[i];
}
}
for (let j = 0; j < dims.length; ++j) {
if (!dimMap[dims[j].name]) {
dimMap[dims[j].name] = {
dim: dims[j],
views: [views[i]]
};
nameMap[dims[j].name] = views[i];
}
else {
dimMap[dims[j].name].views.push(views[i]);
}
}
}
// Projection
const strMetrics = metrics.map((metric) => {
const view = nameMap[metric.name];
if (view) {
return this.translateMetric(metric, view);
}
return "";
}).filter((item) => item !== "");
const parsedDimensions = dimensions.map((dimension) => {
let dim = dimension;
while (!nameMap[dim.name]) {
dim = dim.parent;
}
const view = nameMap[dim.name];
return this.translateDimension(dimension, dim, view);
});
let parsedDimMetrics: DimTranslation[] = [];
for (let i = 0; i < views.length; ++i) {
const dimMets = views[i].dimMetrics.map((item) => {
return this.translateDimMetric(item, views[i]);
});
parsedDimMetrics = parsedDimMetrics.concat(dimMets);
}
const totalDimensions = parsedDimensions.concat(parsedDimMetrics);
const strDimensions = totalDimensions.map ((item) => item.aliased);
const grouped = totalDimensions.map((item) => item.noalias);
const elements = strMetrics.concat(strDimensions);
// Joins
let conds: string[] = [];
for (let i in dimMap) {
let remainViews = dimMap[i].views.slice();
let dim = dimMap[i].dim;
let leftSide = this.buildColumn(dim, remainViews.shift().id);
if (remainViews.length > 0) {
while (remainViews.length > 0) {
const id = remainViews.shift().id;
const rightSide = this.buildColumn(dim, id);
conds.push(leftSide + " = " + rightSide);
}
}
}
// Selection
let covered: Clause[] = [];
for (let i = 0; i < views.length; ++i) {
// Get the clauses that children already cover
covered = covered.concat(views[i].clauses);
}
const toCover = clauses.filter((item) => !covered.some ((clause) => {
return clause.id === item.id;
}));
toCover.forEach((item) => {
const clause = "(" + this.translateClause(item, nameMap) + ")";
if (clause !== "()") {
conds.push(clause);
}
});
// Assembly
const projection = "SELECT " + elements.join(",");
const source = " FROM " + views.map((view) => view.from).join(",");
const selection = (conds.length > 0) ? " WHERE " + conds.join(" AND ") : "";
let grouping = "";
if (grouped.length > 0) {
grouping = " GROUP BY " + grouped.join(",");
}
return projection + source + selection + grouping;
}
private getAggregateFunction(aggrType: AggregationType, origin: boolean): string {
switch (aggrType) {
case AggregationType.SUM:
return "SUM";
case AggregationType.AVG:
return "AVG";
case AggregationType.COUNT:
return (origin) ? "COUNT" : "SUM";
case AggregationType.MAX:
return "MAX";
case AggregationType.MIN:
return "MIN";
default:
return "";
}
}
private translateRelation(relation: RelationType, arg: string): string {
switch (relation) {
case RelationType.DAY:
return this.applyRelation("EXTRACT", ["DAY FROM "], [arg]);
case RelationType.MONTH:
return this.applyRelation("EXTRACT", ["MONTH FROM "], [arg]);
case RelationType.YEAR:
return this.applyRelation("EXTRACT", ["YEAR FROM "], [arg]);
case RelationType.DAYOFWEEK:
return this.applyRelation("EXTRACT", ["DOW FROM "], [arg]);
default:
return "";
}
}
private applyRelation(name: string, args: string[], values: string[]): string {
/*
This adapter uses the concept of functions in Postgres to
implement BLENDB sub-dimention relations, this functions
applys the transformation to build the call of a Postgres
funtion. Note that this function can be native from postgres,
like EXTRACT, or even implemented on the database.
This function is short and only used in the translateRelation
method however is a bit complex and is possible to be used
several times, because of that is puted appart to make easyer update
and avoid problems
Example
applyRelation ("EXTRACT", "["DAY FROM"]", ["view_0.date"])
output: EXTRACT(DAY FROM view_0.date)
*/
return name + "(" + args.map((item, idx) => item + values[idx]).join(",") + ")";
}
private buildColumn (item: Metric|Dimension, id: string): string {
const quotedName = "\"" + item.name + "\"";
return "view_" + id + "." + quotedName;
}
private translateClause(clause: Clause, map: {[key: string]: ExpandedView}): string {
const r = clause.filters.map((item) => {
return this.translateFilter(item, map);
}).filter((item) => {
return item !== "";
});
return r.join(" OR ");
}
private translateFilter(filter: Filter, map: {[key: string]: ExpandedView}): string {
if (!map[filter.target.name]) {
return "";
}
const viewId = map[filter.target.name].id;
const leftSide = this.buildColumn(filter.target, viewId);
const op = this.translateOperator(filter.operator);
const dataType = this.translateDataType(filter.target.dataType);
const quotedValue = "'" + filter.value + "'";
return leftSide + op + quotedValue + dataType;
}
private translateMetric(metric: Metric, view: ExpandedView): string {
const func = this.getAggregateFunction(metric.aggregation, view.origin);
const quotedName = "\"" + metric.name + "\"";
const extMetric = func + "(" + this.buildColumn(metric, view.id) + ")";
return extMetric + " AS " + quotedName;
}
private translateDimMetric(metric: Metric, view: ExpandedView): DimTranslation {
const quotedName = "\"" + metric.name + "\"";
const extMetric = this.buildColumn(metric, view.id);
return { aliased: extMetric + " AS " + quotedName, noalias: extMetric };
}
private translateDimension(dimension: Dimension,
ancestor: Dimension,
view: ExpandedView): DimTranslation {
const quotedName = "\"" + dimension.name + "\"";
let extDimension = this.buildColumn(ancestor, view.id);
let aux = dimension;
while (aux.name !== ancestor.name) {
extDimension = this.translateRelation(aux.relation, extDimension);
aux = aux.parent;
}
return { aliased: extDimension + " AS " + quotedName, noalias: extDimension };
}
private translateOperator(op: FilterOperator): string {
switch (op) {
case FilterOperator.EQUAL:
return " = ";
case FilterOperator.NOTEQUAL:
return " != ";
case FilterOperator.GREATER:
return " > ";
case FilterOperator.LOWER:
return " < ";
case FilterOperator.GREATEREQ:
return " >= ";
case FilterOperator.LOWEREQ:
return " <= ";
default:
return "";
}
}
private translateDataType(dt: string ): string {
switch (dt) {
case "date":
return "::DATE";
case "integer":
return "::INTEGER";
case "boolean":
return "::BOOLEAN";
default:
return "";
}
}
private compareKeys(a: Dimension[], b: Dimension[]): number {
let length = 0;
let res = a.length - b.length;
if (a.length < b.length) {
length = a.length;
}
else {
length = b.length;
}
for (let i = 0; i < length; ++i) {
if (a[i].name < b[i].name) {
return -1;
}
else if (a[i].name > b[i].name) {
return 1;
}
}
return res;
}
private similarDimensions(a: Dimension[], b: Dimension[]): number {
let count = 0;
for (let i = 0; i < a.length; ++i) {
if (b.some((itemB) => a[i].name === itemB.name)) {
count++;
}
}
return count;
}
private removeDuplicatedDimensions(candidateDims: Dimension[]): Dimension[] {
let filterDims: { [key: string]: boolean } = {};
const dims = [];
for (let i = 0; i < candidateDims.length; ++i) {
if (!filterDims[candidateDims[i].name]) {
dims.push(candidateDims[i]);
filterDims[candidateDims[i].name] = true;
}
}
return dims;
}
}