commit 921569827c0c44581ddb9ce7ddeb68c263f109e9
parent b434c05927b3e3179c2039f19affa5e0dcaec728
Author: Christian Grothoff <grothoff@gnunet.org>
Date: Sun, 23 Mar 2025 14:27:45 +0100
-more debugging, seems to work now
Diffstat:
2 files changed, 113 insertions(+), 60 deletions(-)
diff --git a/src/backenddb/pg_statistics_examples.sql b/src/backenddb/pg_statistics_examples.sql
@@ -14,8 +14,8 @@
-- TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/>
--
--- @file example-statistics-0001.sql
--- @brief examples for how to add statistics to the merchant backend
+-- @file pg_statistics_example.sql
+-- @brief example/test case for how to add statistics to the merchant backend, NOT for production!
-- @author Christian Grothoff
-- Everything in one big transaction
@@ -24,7 +24,7 @@ BEGIN;
-- Check patch versioning is in place.
-- SELECT _v.register_patch('example-statistics-0001', NULL, NULL);
--- CREATE SCHEMA example_statistics;
+CREATE SCHEMA IF NOT EXISTS example_statistics;
SET search_path TO example_statistics,merchant;
@@ -57,21 +57,25 @@ VALUES
,'sales (before refunds)'
,'amount'
,ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 60, 120, 180, 240, 300,
- 24 * 60 * 60, 30 * 24 * 60 * 60, 365 * 24 * 60 * 60] -- second, minute, day, month, year
+ 60, 120 -- , 180, 240, 300,
+-- 24 * 60 * 60, 30 * 24 * 60 * 60, 365 * 24 * 60 * 60
+] -- second, minute, day, month, year
,ARRAY[1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 5, 5, 5, 5, 5,
- 60, 60 * 60, 24 * 60 * 60]
+ 10, 10 --, 5, 5, 5,
+-- 60, 60 * 60, 24 * 60 * 60
+ ]
),
('products-sold'
,'number of products sold'
,'number'
,ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 60, 120, 180, 240, 300,
- 24 * 60 * 60, 30 * 24 * 60 * 60, 365 * 24 * 60 * 60] -- second, minute, day, month, year
+ 60 -- , 120, 180, 240, 300,
+-- 24 * 60 * 60, 30 * 24 * 60 * 60, 365 * 24 * 60 * 60
+] -- second, minute, day, month, year
,ARRAY[1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 5, 5, 5, 5, 5,
- 60, 60 * 60, 24 * 60 * 60]
+ 10 --, 5, 5, 5, 5,
+-- 60, 60 * 60, 24 * 60 * 60
+ ]
)
ON CONFLICT DO NOTHING;
diff --git a/src/backenddb/pg_statistics_helpers.sql b/src/backenddb/pg_statistics_helpers.sql
@@ -20,7 +20,7 @@ CREATE OR REPLACE FUNCTION interval_to_start (
IN in_timestamp TIMESTAMP,
IN in_range statistic_range,
OUT out_bucket_start INT8
-)
+)
LANGUAGE plpgsql
AS $$
BEGIN
@@ -566,7 +566,7 @@ BEGIN
-- events are obsolete, delete them
DELETE FROM merchant_statistic_counter_event
WHERE imeta_serial_id = my_meta
- AND instance_id = my_instance_id
+ AND merchant_serial = my_instance_id
AND slot < my_time - my_range;
END IF;
END IF;
@@ -771,7 +771,7 @@ BEGIN
-- events are obsolete, delete them
DELETE FROM merchant_statistic_amount_event
WHERE imeta_serial_id = my_meta
- AND instance_id = my_instance_id
+ AND merchant_serial = my_instance_id
AND slot < my_time - my_range;
END IF;
END IF;
@@ -795,7 +795,9 @@ CREATE OR REPLACE PROCEDURE merchant_statistic_counter_gc ()
LANGUAGE plpgsql
AS $$
DECLARE
+ my_time INT8 DEFAULT ROUND(EXTRACT(epoch FROM CURRENT_TIMESTAMP(0)::TIMESTAMP) * 1000000)::INT8 / 1000 / 1000;
my_instance INT8;
+ my_instance_name TEXT;
my_rec RECORD;
my_sum RECORD;
my_meta INT8;
@@ -818,58 +820,83 @@ BEGIN
SELECT imeta_serial_id
,ranges
,precisions
+ ,slug
FROM merchant_statistic_interval_meta
LOOP
+ -- First, we query the current interval statistic to update its counters
+ SELECT merchant_id
+ INTO my_instance_name
+ FROM merchant_instances
+ WHERE merchant_serial = my_instance;
+ PERFORM FROM merchant_statistic_interval_number_get (my_rec.slug, my_instance_name);
+
my_meta = my_rec.imeta_serial_id;
my_ranges = my_rec.ranges;
my_precisions = my_rec.precisions;
- FOR my_i IN 1..COALESCE(array_length(my_ranges,1)-1,0)
+ FOR my_i IN 1..COALESCE(array_length(my_ranges,1),0)
LOOP
- min_slot = my_ranges[my_i];
- end_slot = my_ranges[my_i + 1];
my_precision = my_precisions[my_i];
+ IF 1 >= my_precision
+ THEN
+ -- Cannot coarsen in this case
+ CONTINUE;
+ END IF;
+
+ IF 1 = my_i
+ THEN
+ min_slot = 0;
+ ELSE
+ min_slot = my_ranges[my_i - 1];
+ END IF;
+ end_slot = my_ranges[my_i];
+ RAISE NOTICE 'Coarsening from [%,%) at %', my_time - end_slot, my_time - min_slot, my_precision;
+
LOOP
EXIT WHEN min_slot >= end_slot;
max_slot = min_slot + my_precision;
SELECT SUM(delta) AS total,
- COUNT(*) AS matches
+ COUNT(*) AS matches,
+ MIN(nevent_serial_id) AS rep_serial_id
INTO my_sum
FROM merchant_statistic_counter_event
WHERE merchant_serial=my_instance
AND imeta_serial_id=my_meta
- AND slot >= min_slot
- AND slot < max_slot;
+ AND slot >= my_time - max_slot
+ AND slot < my_time - min_slot;
+
+ RAISE NOTICE 'Found % entries between [%,%)', my_sum.matches, my_time - max_slot, my_time - min_slot;
-- we only proceed if we had more then one match (optimization)
IF FOUND AND my_sum.matches > 1
THEN
my_total = my_sum.total;
+
+ RAISE NOTICE 'combining % entries to representative % for slots [%-%)', my_sum.matches, my_sum.rep_serial_id, my_time - max_slot, my_time - min_slot;
+
-- combine entries
DELETE FROM merchant_statistic_counter_event
WHERE merchant_serial=my_instance
AND imeta_serial_id=my_meta
- AND slot >= min_slot
- AND slot < max_slot;
- -- The previous delete ensures that this is a conflict-free INSERT
- INSERT INTO merchant_statistic_counter_event
- (imeta_serial_id
- ,merchant_serial
- ,slot
- ,delta
- ) VALUES
- (my_meta
- ,my_instance
- ,min_slot
- ,my_total);
+ AND slot >= my_time - max_slot
+ AND slot < my_time - min_slot
+ AND nevent_serial_id > my_sum.rep_serial_id;
+ -- Now update the representative to the sum
+ UPDATE merchant_statistic_counter_event SET
+ delta = my_total
+ WHERE imeta_serial_id = my_meta
+ AND merchant_serial = my_instance
+ AND nevent_serial_id = my_sum.rep_serial_id;
END IF;
min_slot = min_slot + my_precision;
END LOOP; -- min_slot to end_slot by precision loop
END LOOP; -- my_i loop
-- Finally, delete all events beyond the range we care about
+
+ RAISE NOTICE 'deleting entries of %/% before % - % = %', my_instance, my_meta, my_time, my_ranges[array_length(my_ranges,1)], my_time - my_ranges[array_length(my_ranges,1)];
DELETE FROM merchant_statistic_counter_event
WHERE merchant_serial=my_instance
AND imeta_serial_id=my_meta
- AND slot > my_ranges[array_length(my_ranges,1)];
+ AND slot < my_time - my_ranges[array_length(my_ranges,1)];
END LOOP; -- my_rec loop
END LOOP; -- my_instance loop
END $$;
@@ -883,7 +910,9 @@ CREATE OR REPLACE PROCEDURE merchant_statistic_amount_gc ()
LANGUAGE plpgsql
AS $$
DECLARE
+ my_time INT8 DEFAULT ROUND(EXTRACT(epoch FROM CURRENT_TIMESTAMP(0)::TIMESTAMP) * 1000000)::INT8 / 1000 / 1000;
my_instance INT8;
+ my_instance_name TEXT;
my_rec RECORD;
my_sum RECORD;
my_meta INT8;
@@ -908,73 +937,93 @@ BEGIN
SELECT imeta_serial_id
,ranges
,precisions
+ ,slug
FROM merchant_statistic_interval_meta
LOOP
-
+
+ -- First, we query the current interval statistic to update its counters
+ SELECT merchant_id
+ INTO my_instance_name
+ FROM merchant_instances
+ WHERE merchant_serial = my_instance;
+ PERFORM FROM merchant_statistic_interval_amount_get (my_rec.slug, my_instance_name);
+
my_meta = my_rec.imeta_serial_id;
my_ranges = my_rec.ranges;
- my_precisions = my_rec.precisions;
+ my_precisions = my_rec.precisions;
FOR my_currency IN
SELECT DISTINCT delta_curr
FROM merchant_statistic_amount_event
WHERE imeta_serial_id = my_meta
LOOP
- FOR my_i IN 1..COALESCE(array_length(my_ranges,1)-1,0)
+ FOR my_i IN 1..COALESCE(array_length(my_ranges,1),0)
LOOP
- min_slot = my_ranges[my_i];
- end_slot = my_ranges[my_i + 1];
my_precision = my_precisions[my_i];
+ IF 1 >= my_precision
+ THEN
+ -- Cannot coarsen in this case
+ CONTINUE;
+ END IF;
+
+ IF 1 = my_i
+ THEN
+ min_slot = 0;
+ ELSE
+ min_slot = my_ranges[my_i - 1];
+ END IF;
+ end_slot = my_ranges[my_i];
+
+ RAISE NOTICE 'Coarsening from [%,%) at %', my_time - end_slot, my_time - min_slot, my_precision;
LOOP
EXIT WHEN min_slot >= end_slot;
max_slot = min_slot + my_precision;
SELECT SUM(delta_value) AS total_val,
SUM(delta_frac) AS total_frac,
- COUNT(*) AS matches
+ COUNT(*) AS matches,
+ MIN(aevent_serial_id) AS rep_serial_id
INTO my_sum
FROM merchant_statistic_amount_event
WHERE imeta_serial_id=my_meta
AND merchant_serial=my_instance
AND delta_curr = my_currency
- AND slot >= min_slot
- AND slot < max_slot;
+ AND slot >= my_time - max_slot
+ AND slot < my_time - max_slot;
-- we only proceed if we had more then one match (optimization)
IF FOUND AND my_sum.matches > 1
THEN
+ -- normalize new total
my_total_frac = my_sum.total_frac % 100000000;
my_total_val = my_sum.total_val + my_sum.total_frac / 100000000;
+
-- combine entries
DELETE FROM merchant_statistic_amount_event
WHERE imeta_serial_id=my_meta
AND merchant_serial=my_instance
AND delta_curr = my_currency
- AND slot >= min_slot
- AND slot < max_slot;
- -- The previous delete ensures that this is a conflict-free INSERT
- INSERT INTO merchant_statistic_amount_event
- (imeta_serial_id
- ,merchant_serial
- ,slot
- ,delta_curr
- ,delta_value
- ,delta_frac
- ) VALUES
- (my_meta
- ,my_instance
- ,min_slot
- ,my_currency
- ,my_total_val
- ,my_total_frac);
+ AND slot >= my_time - max_slot
+ AND slot < my_time - max_slot
+ AND aevent_serial_id > my_sum.rep_serial_id;
+ -- Now update the representative to the sum
+ UPDATE merchant_statistic_amount_event SET
+ delta_value = my_total_value
+ ,delta_frac = my_total_frac
+ WHERE imeta_serial_id = my_meta
+ AND merchant_serial = my_instance
+ AND delta_curr = my_currency
+ AND aevent_serial_id = my_sum.rep_serial_id;
END IF;
min_slot = min_slot + my_precision;
END LOOP; -- min_slot to end_slot by precision loop
END LOOP; -- my_i loop
END LOOP; -- my_currency loop
-- Finally, delete all events beyond the range we care about
+
+ RAISE NOTICE 'deleting entries of %/% before % - % = %', my_instance, my_meta, my_time, my_ranges[array_length(my_ranges,1)], my_time - my_ranges[array_length(my_ranges,1)];
DELETE FROM merchant_statistic_amount_event
WHERE merchant_serial=my_instance
AND imeta_serial_id=my_meta
- AND slot > my_ranges[array_length(my_ranges,1)];
+ AND slot < my_time - my_ranges[array_length(my_ranges,1)];
END LOOP; -- my_rec loop
END LOOP; -- my_instance loop
END $$;