create table division ( id serial primary key, name varchar not null ); -- nested sets table create table location ( id serial primary key, name varchar not null, division_id integer not null references division(id), tree_id integer not null, parent_id integer references location(id), lft integer not null, rgt integer not null, level integer not null ); create table report ( id serial primary key, somevar_1 integer not null, somevar_2 integer not null, tyme timestamptz not null, -- other stuff location_id integer not null references location(id) ); -- division hierarchy is Division0 > Division1 > Division2 > Division3 -- reports are generated at Division3, and aggregated at -- either Division2 or Division1 (usually the latter) -- this means that to get my data for Division1, i have to -- do two self-joins on the location table -- plus one for the report table
given the above schema, i'm experimenting with different ways to speed up querying for the sum of each
somevar aggregated at either Division2 or Division1. right now, what i do is
select sum(somevar_1) as somevar_1, sum(somevar_2) as somevar_2, div1.name, div1.id from report join location as div3 on report.location_id = div3.id join location as div2 on div3.parent_id = div2.id join location as div1 on div2.parent_id = div1.id where -- for example, all reports this year date_trunc('year', tyme) = '2019-01-01' group by div1.id, div1.name
the above query runs in about 4.8s locally with a row count a bit shy of 55k reports for 2019. this is just the base data, without any further processing. the processing step also does something similar, and it's in a web application, so the user is waiting for a not-insignificant amount of time.
for my question: can i use recursive CTEs to speed this up? i haven't yet grokked recursive CTEs, so writing one is still a bit beyond me at this point.