Windowing data with Postgresql

Cyril Canovas
Cyril Canovas
Windowing data with Postgresql
Table of Contents
Table of Contents

Data analysis, dasboarding, optimization sometimes requires setting up a data windowing system.The principle of data windowing is quite simple, it consists of aggregating an ordered set of elements into a single structure.
Let's see how we can do this without any lines of code by simply using the sql language of postgresql database.

"Goat" a data

We will use for this example the OHLC crypto market data by hour captured on Binance platform with API.Values are organized by hour ​​and by currency.

The transformation consists, in our case, of creating a grouping structure based on the “open_time” field and of associating a set of ordered elements with this group.

"Goat" to go

First, we are generating and numbering the groups. The groups are picked randomly in our example by using order by random().

DO $$
declare max_elements integer = 100; -- max time buckets or groups
BEGIN
	drop table if exists temp_ids;
	create local temporary table temp_ids 
	(
		row_number bigint NOT NULL,
		open_time bigint NOT NULL,
		PRIMARY KEY (row_number)
		
	);
	insert into temp_ids
		select  ROW_NUMBER() OVER (ORDER BY "open_time")-1,"open_time"
	from
	(
		select "open_time"
		from onehour_binanceklines
		group by open_time
		order by random()
		limit max_elements
	) dummy;
	create index open_time_index on temp_ids (open_time);
END $$;
select * from temp_ids;

The groups are generated in temporary table temp_ids.

Then we will create the first sub-group references in a new temporary table named temp_results.

DO $$
declare max_elements integer = 100; -- max time buckets or groups
BEGIN
	drop table if exists temp_ids;
	create local temporary table temp_ids 
	(
		group_number bigint NOT NULL,
		open_time bigint NOT NULL,
		PRIMARY KEY (group_number)
	);
	insert into temp_ids
		select  ROW_NUMBER() OVER (ORDER BY "open_time")-1,"open_time"
	from
	(
		select "open_time"
		from onehour_binanceklines
		group by open_time
		order by random()
		limit max_elements
	) dummy;
	create index open_time_index on temp_ids (open_time);
	drop table if exists temp_results;
	create local temporary table temp_results
	(
		group_number bigint NOT NULL,
		sub_group int NULL,
		open_time bigint NOT NULL
	);
	insert into temp_results
	select group_number,0,open_time from temp_ids;
END $$;
select * from temp_results;

Now, we have this :

The next step is to generate the missing sub-group references by using left outer join LATERAL clause.

DO $$
declare max_elements integer = 100; -- max time buckets or groups
declare window_size integer = 1; -- window_size -1 
BEGIN
	drop table if exists temp_ids;
	create local temporary table temp_ids 
	(
		group_number bigint NOT NULL,
		open_time bigint NOT NULL,
		PRIMARY KEY (group_number)
	);
	insert into temp_ids
		select  ROW_NUMBER() OVER (ORDER BY "open_time")-1,"open_time"
	from
	(
		select "open_time"
		from onehour_binanceklines
		group by open_time
		order by random()
		limit max_elements
	) dummy;
	create index open_time_index on temp_ids (open_time);
	
	drop table if exists temp_results;
	create local temporary table temp_results
	(
		group_number bigint NOT NULL,
		sub_group int NULL,
		open_time bigint NOT NULL
	);
	
	insert into temp_results
	select group_number,0,open_time from temp_ids;
	if (window_size)>0 then
		insert into temp_results
		select tt.group_number,0,prev_row.open_time from temp_ids tt
		left outer join LATERAL
		(
			select distinct oo.open_time 
			from onehour_binanceklines oo
			where
				(oo.open_time < tt.open_time)
			order by oo.open_time desc
			limit window_size
		) prev_row on true;
	end if;
END $$;
select * from temp_results;

To achieve the transformation, the subgroups are renumbered using a PARTITION BY clause.

DO $$
declare max_elements integer = 100; -- max time buckets or groups
declare window_size integer = 1; -- window_size -1 
BEGIN
	drop table if exists temp_ids;
	create local temporary table temp_ids 
	(
		group_number bigint NOT NULL,
		open_time bigint NOT NULL,
		PRIMARY KEY (group_number)
	);
	insert into temp_ids
		select  ROW_NUMBER() OVER (ORDER BY "open_time")-1,"open_time"
	from
	(
		select "open_time"
		from onehour_binanceklines
		group by open_time
		order by random()
		limit max_elements
	) dummy;
	create index open_time_index on temp_ids (open_time);
	
	drop table if exists temp_results;
	create local temporary table temp_results
	(
		group_number bigint NOT NULL,
		sub_group int NULL,
		open_time bigint NOT NULL
	);
	
	insert into temp_results
	select group_number,0,open_time from temp_ids;
	if (window_size)>0 then
		insert into temp_results
		select tt.group_number,0,prev_row.open_time from temp_ids tt
		left outer join LATERAL
		(
			select distinct oo.open_time 
			from onehour_binanceklines oo
			where
				(oo.open_time < tt.open_time)
			order by oo.open_time desc
			limit window_size
		) prev_row on true;
	end if;
	create index group_number_open_time_index on temp_results (group_number,"open_time"); 
	update temp_results
	set sub_group=tt.sub_group
	from
	(
		select "group_number","open_time",ROW_NUMBER() OVER (PARTITION BY "group_number" ORDER BY "group_number","open_time" desc )-1 sub_group from temp_results
	) tt
	where temp_results."group_number"=tt."group_number"
	and temp_results."open_time"=tt."open_time";
END $$;
select * from temp_results;

Finally, the data are extracted with no code.

DO $$
declare max_elements integer = 100; -- max time buckets or groups
declare window_size integer = 1; -- window_size -1 
BEGIN
	drop table if exists temp_ids;
	create local temporary table temp_ids 
	(
		group_number bigint NOT NULL,
		open_time bigint NOT NULL,
		PRIMARY KEY (group_number)
	);
	insert into temp_ids
		select  ROW_NUMBER() OVER (ORDER BY "open_time")-1,"open_time"
	from
	(
		select "open_time"
		from onehour_binanceklines
		group by open_time
		order by random()
		limit max_elements
	) dummy;
	create index open_time_index on temp_ids (open_time);
	
	drop table if exists temp_results;
	create local temporary table temp_results
	(
		group_number bigint NOT NULL,
		sub_group int NULL,
		open_time bigint NOT NULL
	);
	
	insert into temp_results
	select group_number,0,open_time from temp_ids;
	if (window_size)>0 then
		insert into temp_results
		select tt.group_number,0,prev_row.open_time from temp_ids tt
		left outer join LATERAL
		(
			select distinct oo.open_time 
			from onehour_binanceklines oo
			where
				(oo.open_time < tt.open_time)
			order by oo.open_time desc
			limit window_size
		) prev_row on true;
	end if;
	create index group_number_open_time_index on temp_results (group_number,"open_time"); 
	update temp_results
	set sub_group=tt.sub_group
	from
	(
		select "group_number","open_time",ROW_NUMBER() OVER (PARTITION BY "group_number" ORDER BY "group_number","open_time" desc )-1 sub_group from temp_results
	) tt
	where temp_results."group_number"=tt."group_number"
	and temp_results."open_time"=tt."open_time";
END $$;
select tt."group_number",tt."sub_group",bk."open_time",pi."symbol",bk."open",bk."high",bk."low",bk."close" from temp_results tt
inner join onehour_binanceklines bk on (bk."open_time"=tt."open_time")
inner join pairinfos pi on (pi."id"=bk."pairinfo_id")


Join the conversation.

Great! Check your inbox and click the link
Great! Next, complete checkout for full access to Goat Review
Welcome back! You've successfully signed in
You've successfully subscribed to Goat Review
Success! Your account is fully activated, you now have access to all content
Success! Your billing info has been updated
Your billing was not updated