Postgresql – Get distinct values for all columns for all tables from specific schema

jsonpostgresqlpostgresql-9.5

I would like to get all distinct values for all columns in each table of a specific schema.

First, I summarised all my tables/columns in a new_table (myschema.dico)

  CREATE TABLE myschema.dico AS (
    WITH tables AS (
        SELECT table_name
        FROM information_schema.tables
        WHERE table_schema = 'myschema' and table_type ='BASE TABLE' and table_name LIKE 'XXX' --criterias if needed
        ORDER BY table_name
                    )
    SELECT tables.table_name, 
    column_name, 
    concat(tables.table_name,'.',column_name) AS address, 
    concat ('myschema.',tables.table_name) AS home
    FROM information_schema.columns
    RIGHT JOIN tables ON tables.table_name = columns.table_name
    ORDER BY 1,2
    ;

Then, added a new column to update my distinct values

ALTER TABLE myschema.dico ADD COLUMN dico TEXT;

myschema.dico table looks like this:

table_namecolumn_nameaddresshomedico
table1onetable1.onemyschema.table1
table1twotable1.twomyschema.table1
table1threetable1.threemyschema.table1
table2aaatable2.aaamyschema.table2
table2bbbtable2.bbbmyschema.table2

Then I tried to create a function to update all my 'dico' column's values with all distinct values :

CREATE OR REPLACE FUNCTION dico_columns() RETURNS void AS $$
DECLARE

  cur_table CURSOR FOR 
    SELECT *
    FROM myschema.dico
    FOR UPDATE;
 cur_table_row RECORD ;
 
  cur_dico2 CURSOR FOR 
    SELECT *
    FROM myschema.dico
    FOR UPDATE;
 cur_dico2_row RECORD ;
 
BEGIN

    OPEN cur_table;
    FETCH cur_table INTO cur_table_row;
    LOOP 
        EXIT WHEN NOT FOUND;
        OPEN cur_dico2;
        FETCH cur_dico2 INTO cur_dico2_row;
        LOOP
            EXIT WHEN NOT FOUND;
            
            IF cur_table_row.home IS NOT NULL 
            THEN  UPDATE myschema.dico 
                  SET dico= ( 
                              SELECT json_build_object(colname, array_agg(DISTINCT val))
                              FROM cur_table_row.home --refers to myschema.table 
                              CROSS JOIN LATERAL json_each_text(row_to_json(cur_table_row.table_name)) AS j(colname,val) --table_name = table
                              WHERE colname=cur_table_row.column_name --refers to table.column
                              GROUP BY colname)
                   WHERE CURRENT OF cur_dico2;

            END IF;
            FETCH cur_dico2 INTO cur_dico2_row;
        END LOOP;
        CLOSE cur_dico2;
        FETCH cur_table INTO cur_table_row;
    END LOOP;
    CLOSE cur_table;
    
END ;
$$ LANGUAGE plpgsql;
    
SELECT dico_columns();

Unfortunately it seems not possible to use 'FROM cursor_row.value'.
I tried many things but I'm not familiar with functions.

Do you have a better idea ?

Currently on postgres 9.5.

Best Answer

It needs some refinement, but to show you how to get it started

but basucally make a update a simple update with public.Selecttext and you achieve the same, without loop

CREATE TABLE dico
    ("table_name" varchar(6), "column_name" varchar(5), "address" varchar(12), "home" varchar(15), "dico" TEXT)
;
    
INSERT INTO dico
    ("table_name", "column_name", "address", "home", "dico")
VALUES
    ('table1', 'one', 'table1.one', 'myschema.table1', ''),
    ('table1', 'two', 'table1.two', 'myschema.table1', ''),
    ('table1', 'three', 'table1.three', 'myschema.table1', ''),
    ('table2', 'aaa', 'table2.aaa', 'myschema.table2', ''),
    ('table2', 'bbb', 'table2.bbb', 'myschema.table2', '')
;
CREATE TABLE table1
    ("one" varchar(6), "two" varchar(5), "three" varchar(12))
;
    
INSERT INTO table1
    ("one", "two", "three")
VALUES
    ('a', 'd', 'f'),
    ('b', 'd', 'h'),
    ('a', 'd', 'g'),
    ('c', 'e', 'f'),
    ('a', 'e', 'g')
;

CREATE TABLE table2
    ("aaa" varchar(6), "bbb" varchar(5))
;
    
INSERT INTO table2
    ("aaa", "bbb")
VALUES
    ('aa', 'dd'),
    ('bbb', 'd'),
    ('aaa', 'd'),
    ('ab', 'ee'),
    ('aba', 'ee')
;
CREATE OR REPLACE FUNCTION public.Selecttext(tablen text, coln text)
RETURNS SETOF RECORD
LANGUAGE 'plpgsql'
AS $BODY$
BEGIN 
    RETURN QUERY EXECUTE 'SELECT textcol::TEXT FROM (SELECT json_build_object(colname, array_agg(DISTINCT val)) AS textcol FROM '
    ||$1||E'  CROSS JOIN LATERAL json_each_text(row_to_json( '
    ||$1||E' )) AS j(colname,val) WHERE colname =  \''
    ||$2||E'\' GROUP BY colname) t2';
END 
$BODY$;
CREATE OR REPLACE FUNCTION dico_columns() RETURNS void AS $$
DECLARE

  cur_table CURSOR FOR 
    SELECT "table_name", "column_name", "address", "home", "dico"
    FROM dico
    ;
   _tabl text;
   _coln text;
   _address text;
   _home text;
   _dico text;
 

BEGIN

    OPEN cur_table;
    LOOP
    FETCH cur_table INTO _tabl,_coln,_address,_home,_dico;
     
        EXIT WHEN NOT FOUND;

  IF _home IS NOT NULL  THEN
               UPDATE dico 
                SET dico= (sELECT textcol FROM  Selecttext(_tabl,_coln) AS t(textcol TEXT)) 
                WHERE "table_name" = _tabl AND "column_name"  = _coln
                AND "home"  = _home;
                   
  END IF;

    END LOOP;
    CLOSE cur_table;
    
END ;

$$ LANGUAGE plpgsql;
    
;
SELECT dico_columns()
| dico_columns |
| :----------- |
|              |
SELECT json_build_object(colname, array_agg(DISTINCT val)) AS textcol
FROM table1
CROSS JOIN LATERAL json_each_text(row_to_json(table1)) AS j(colname,val) 
WHERE colname='one' 
GROUP BY colname
| textcol                 |
| :---------------------- |
| {"one" : ["a","b","c"]} |
sELECT textcol FROM  Selecttext('table1','one') AS t(textcol TEXT)
| textcol                 |
| :---------------------- |
| {"one" : ["a","b","c"]} |
sELECT textcol FROM  Selecttext('table1','two') AS t(textcol TEXT)
| textcol             |
| :------------------ |
| {"two" : ["d","e"]} |
SELECT * frOM dico
table_name | column_name | address      | home            | dico                                   
:--------- | :---------- | :----------- | :-------------- | :--------------------------------------
table1     | one         | table1.one   | myschema.table1 | {"one" : ["a","b","c"]}                
table1     | two         | table1.two   | myschema.table1 | {"two" : ["d","e"]}                    
table1     | three       | table1.three | myschema.table1 | {"three" : ["f","g","h"]}              
table2     | aaa         | table2.aaa   | myschema.table2 | {"aaa" : ["aa","aaa","ab","aba","bbb"]}
table2     | bbb         | table2.bbb   | myschema.table2 | {"bbb" : ["d","dd","ee"]}              

db<>fiddle here