Syntax
APPROX_COUNT_DISTINCT(expr) OVER over_clause
Purpose
You can call this function to calculate the number of deduplicated rows for the expr expression. Only one value is returned, and this value is an approximate value. The function processes a large amount of data at a much faster speed than COUNT(), and the deviation from the exact result is negligible.
Examples
obclient> CREATE TABLE employees (manager_id INT,last_name varchar(50),hiredate varchar(50),SALARY INT);
INSERT INTO employees VALUES(100, 'Raphaely', '2017-07-01', 1700);
INSERT INTO employees VALUES(100, 'De Haan', '2018-05-01',11000);
INSERT INTO employees VALUES(100, 'Errazuriz', '2017-07-21', 1400);
INSERT INTO employees VALUES(100, 'Hartstein', '2019-05-01',14000);
INSERT INTO employees VALUES(100, 'Raphaely', '2017-07-22', 1700);
INSERT INTO employees VALUES(100, 'Weiss', '2019-07-11',13500);
INSERT INTO employees VALUES(100, 'Russell', '2019-10-05', 13000);
INSERT INTO employees VALUES(100, 'Partners', '2018-12-01',14000);
INSERT INTO employees VALUES(200, 'Ross', '2019-06-11',13500);
INSERT INTO employees VALUES(200, 'Bell', '2019-05-25', 13000);
INSERT INTO employees VALUES(200, 'Part', '2018-08-11',14000);
COMMIT;
obclient> SELECT last_name, salary, APPROX_COUNT_DISTINCT(salary) OVER (ORDER BY hiredate) "Variance"
FROM employees WHERE manager_id = 100 ORDER BY last_name, salary, "Variance";
+-----------+--------+----------+
LAST_NAME SALARY Variance
+-----------+--------+----------+
De Haan 11000 3
Errazuriz 1400 2
Hartstein 14000 4
Partners 14000 4
Raphaely 1700 1
Raphaely 1700 2
Russell 13000 6
Weiss 13500 5
+-----------+--------+----------+