67b1cb7a创建于 2024年2月1日历史提交
-- Prepare Data
CREATE SCHEMA sonic_hashjoin_test_string_nospill;
SET current_schema = sonic_hashjoin_test_string_nospill;

-- table with 10,000 rows
CREATE TABLE sonic_hashjoin_test_string_nospill.VEC_HASHJOIN_TABLE_STRING_01(
 C_INT INT,					--0 serial number from 1
 C_BIGINT BIGINT,			--1 big int positive or negative
 C_SMALLINT SMALLINT,		--2 small int positive or negative
 C_CHAR_1 CHAR,				--3 one char (store as dict in charDatumArray, NO NULL)
 C_CHAR_2 CHAR(1),			--4 sex (store as dict in charDatumArray, HAS NULL)
 C_CHAR_3 CHAR(10),			--5 only a-z (store as char in charDatumArray, NO NULL)
 C_CHAR_4 CHAR(100),		--6 simple chinese (store as char in charDatumArray, NO NULL)
 C_BPCHAR_1 BPCHAR,			--7 c_char_1 + c_char_3 (encodingDatumArray, NO NULL)
 C_BPCHAR_2 BPCHAR(10),		--8 date (store as char in charDatumArray, HAS NULL)
 C_VARCHAR_1 VARCHAR,		--9 simple chinese (encodingDatumArray, NO NULL)
 C_VARCHAR_2 VARCHAR(20),	--10 date (encodingDatumArray, HAS NULL)
 C_TEXT text,				--11 rare chinese +  random char + simple chinese (encodingDatumArray, NO NULL)
 C_CHAR_5 CHAR(4),			--12 rare chinese (store as dict in charDatumArray, HAS NULL)
 C_CHAR_6 CHAR(20),			--13 rare chinese (store as char in charDatumArray, HAS NULL)
 C_VARCHAR_3 VARCHAR		--14 rare chinese (store as encodingDatumArray, HAS NULL)
 --C_NAME name /not supported by column table
 --notice: those stored in charDatumArray when simple, will store in encodingDatumArray when as key
 ) with (orientation=column);

-- table with 3,000 rows
CREATE TABLE sonic_hashjoin_test_string_nospill.VEC_HASHJOIN_TABLE_STRING_02(
 C_INT INT,
 C_BIGINT BIGINT,
 C_SMALLINT SMALLINT,
 C_CHAR_1 CHAR,
 C_CHAR_2 CHAR(1),
 C_CHAR_3 CHAR(10),
 C_CHAR_4 CHAR(100),
 C_BPCHAR_1 BPCHAR,
 C_BPCHAR_2 BPCHAR(10),
 C_VARCHAR_1 VARCHAR,
 C_VARCHAR_2 VARCHAR(20),
 C_TEXT text,
 C_CHAR_5 CHAR(4),
 C_CHAR_6 CHAR(20),
 C_VARCHAR_3 VARCHAR
 --C_NAME name /not supported by column table
 ) WITH (orientation=column);

-- table with 10,000 rows and all C_INT is same
CREATE TABLE sonic_hashjoin_test_string_nospill.VEC_HASHJOIN_TABLE_STRING_03(
 C_INT INT,
 C_BIGINT BIGINT,
 C_SMALLINT SMALLINT,
 C_CHAR_1 CHAR,
 C_CHAR_2 CHAR(1),
 C_CHAR_3 CHAR(10),
 C_CHAR_4 CHAR(100),
 C_BPCHAR_1 BPCHAR,
 C_BPCHAR_2 BPCHAR(10),
 C_VARCHAR_1 VARCHAR,
 C_VARCHAR_2 VARCHAR(20),
 C_TEXT text,
 C_CHAR_5 CHAR(4),
 C_CHAR_6 CHAR(20),
 C_VARCHAR_3 VARCHAR
 ) WITH (orientation=column);

COPY VEC_HASHJOIN_TABLE_STRING_01 FROM '@abs_srcdir@/data/vec_sonic_hashjoin_string_1.data' DELIMITER AS '|';
COPY VEC_HASHJOIN_TABLE_STRING_02 FROM '@abs_srcdir@/data/vec_sonic_hashjoin_string_2.data' DELIMITER AS '|';
COPY VEC_HASHJOIN_TABLE_STRING_03 FROM '@abs_srcdir@/data/vec_sonic_hashjoin_string_3.data' DELIMITER AS '|';

analyze VEC_HASHJOIN_TABLE_STRING_01;
analyze VEC_HASHJOIN_TABLE_STRING_02;
analyze VEC_HASHJOIN_TABLE_STRING_03;

SET enable_nestloop to off;
SET enable_mergejoin to off;
SET enable_hashjoin to on;
SET enable_sonic_hashjoin to on;
SHOW enable_sonic_hashjoin;

-- TEST IN MEMORY JOIN
--- test data number of single DN > 64K
--- integer as join key
SELECT COUNT(*) FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT;

SELECT * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;

--- char as join key
SELECT COUNT(*) FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT AND
A.C_CHAR_1 = B.C_CHAR_1 AND A.C_CHAR_2 = B.C_CHAR_2 AND A.C_CHAR_3 = B.C_CHAR_3 AND A.C_CHAR_4 = B.C_CHAR_4 AND
A.C_BPCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_2 AND
A.C_VARCHAR_1 = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_TEXT;

SELECT * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT AND
A.C_CHAR_1 = B.C_CHAR_1 AND A.C_CHAR_2 = B.C_CHAR_2 AND A.C_CHAR_3 = B.C_CHAR_3 AND A.C_CHAR_4 = B.C_CHAR_4 AND
A.C_BPCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_2 AND
A.C_VARCHAR_1 = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_TEXT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;

--test complicate join key
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT + 5 = B.C_INT AND substring(A.C_BPCHAR_1 FROM 1 FOR 1) = left(B.C_CHAR_1, 1)
WHERE B.C_BPCHAR_2 IS NOT NULL AND A.C_INT > 1000 AND A.C_INT < 2000
ORDER BY 1,2,3,4,5,6,7,8,9,10,11;

-- test data number of single DN > 16K AND probe table is bigger
--- integer as join key
SELECT /*+ leading ((A B))*/ COUNT(*) FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT;

SELECT /*+ leading ((A B))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;

--- char as join key
SELECT /*+ leading ((A B))*/ COUNT(*) FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT AND
A.C_CHAR_1 = B.C_CHAR_1 AND A.C_CHAR_2 = B.C_CHAR_2 AND A.C_CHAR_3 = B.C_CHAR_3 AND A.C_CHAR_4 = B.C_CHAR_4 AND
A.C_BPCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_2 AND
A.C_VARCHAR_1 = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_TEXT;

SELECT /*+ leading ((A B))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT AND
A.C_CHAR_1 = B.C_CHAR_1 AND A.C_CHAR_2 = B.C_CHAR_2 AND A.C_CHAR_3 = B.C_CHAR_3 AND A.C_CHAR_4 = B.C_CHAR_4 AND
A.C_BPCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_2 AND
A.C_VARCHAR_1 = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_TEXT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;

--test complicate join key
SELECT /*+ leading ((A B))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT + 5 = B.C_INT AND substring(A.C_BPCHAR_1 FROM 1 FOR 1) = left(B.C_CHAR_1, 1)
WHERE B.C_BPCHAR_2 IS NOT NULL AND A.C_INT > 1000 AND A.C_INT < 2000
ORDER BY 1,2,3,4,5,6,7,8,9,10,11;

-- test data number of one build side partition > 64k
-- SELECT /*+ leading ((A B))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_03 B ON
-- A.C_INT = B.C_INT ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;

-- test other string type
CREATE TABLE VEC_HASHJOIN_TABLE_STRING_04(
	C_INT INTEGER,
	C_BIT BIT,
	C_VARBIT VARBIT,
	C_NVARCHAR2 NVARCHAR2,
	C_NCHAR NCHAR,
	C_BYTEA BYTEA)
WITH (orientation=column) DISTRIBUTE BY HASH(C_INT);

INSERT INTO VEC_HASHJOIN_TABLE_STRING_04 VALUES (1, '0', '0', 'A', 'A', 'A');
INSERT INTO VEC_HASHJOIN_TABLE_STRING_04 VALUES (1, '1', '0', 'B', 'C', 'D');
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_04 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B on A.C_INT = B.C_INT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11,12;
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_04 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B on A.C_BIT = B.C_BIT
AND A.C_VARBIT = B.C_VARBIT AND A.C_NVARCHAR2 = B.C_NVARCHAR2 AND A.C_NCHAR = B.C_NCHAR AND A.C_BYTEA = B.C_BYTEA
ORDER BY 1,2,3;

-- test join on different type
START TRANSACTION;
INSERT INTO VEC_HASHJOIN_TABLE_STRING_01 VALUES (70000, 70000, 100, 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A');
INSERT INTO VEC_HASHJOIN_TABLE_STRING_01 VALUES (70001, 70000, 100, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_04 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B ON A.C_BIT = B.C_VARBIT AND A.C_NVARCHAR2 = B.C_NCHAR;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_1 = B.C_BPCHAR_1 AND A.C_CHAR_3 = B.C_BPCHAR_1
AND A.C_VARCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_1 AND A.C_VARCHAR_2 = B.C_BPCHAR_1 AND A.C_TEXT = B.C_BPCHAR_1
ORDER BY 1,2,3;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_1 = B.C_VARCHAR_1 AND A.C_CHAR_3 = B.C_VARCHAR_1
AND A.C_VARCHAR_1 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_BPCHAR_1 AND A.C_TEXT = B.C_BPCHAR_1
ORDER BY 1,2,3;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_1 = B.C_TEXT AND A.C_CHAR_3 = B.C_TEXT
AND A.C_VARCHAR_2 = B.C_TEXT
ORDER BY 1,2,3;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B ON A.C_CHAR_1 = B.C_NVARCHAR2 AND A.C_CHAR_3 = B.C_NVARCHAR2
AND A.C_VARCHAR_1 = B.C_NVARCHAR2 AND A.C_VARCHAR_2 = B.C_NVARCHAR2 AND A.C_TEXT = B.C_NVARCHAR2
ORDER BY 1,2,3;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B ON A.C_CHAR_1 = B.C_NCHAR AND A.C_CHAR_3 = B.C_NCHAR
AND A.C_VARCHAR_1 = B.C_NCHAR AND A.C_VARCHAR_2 = B.C_NCHAR AND A.C_TEXT = B.C_NCHAR
ORDER BY 1,2,3;
ROLLBACK;

-- test hybrid search
EXPLAIN (costs off) WITH
WITHA AS (
	SELECT A.C_INT, A.C_CHAR_1 FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_3 = B.C_CHAR_3),
WITHB AS (SELECT /*+ leading ((AA BB)) */ AA.C_INT, AA.C_CHAR_2 FROM VEC_HASHJOIN_TABLE_STRING_03 AA INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 BB ON AA.C_CHAR_3 = BB.C_CHAR_3
	WHERE EXISTS(SELECT C.C_INT FROM VEC_HASHJOIN_TABLE_STRING_03 C INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 D ON C.C_CHAR_1 = D.C_CHAR_3))
SELECT C_INT, C_CHAR_1 FROM WITHA WHERE C_CHAR_1 IN (SELECT C_CHAR_2 FROM WITHB) ORDER BY C_INT;

WITH
WITHA AS (
	SELECT A.C_INT, A.C_CHAR_1 FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_3 = B.C_CHAR_3),
WITHB AS (SELECT /*+ leading ((AA BB)) */ AA.C_INT, AA.C_CHAR_2 FROM VEC_HASHJOIN_TABLE_STRING_03 AA INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 BB ON AA.C_CHAR_3 = BB.C_CHAR_3
	WHERE EXISTS(SELECT C.C_INT FROM VEC_HASHJOIN_TABLE_STRING_03 C INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 D ON C.C_CHAR_1 = D.C_CHAR_3))
SELECT C_INT, C_CHAR_1 FROM WITHA WHERE C_CHAR_1 IN (SELECT C_CHAR_2 FROM WITHB) ORDER BY C_INT LIMIT 100;

WITH
WITHA AS (
	SELECT A.C_INT, A.C_CHAR_1 FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_3 = B.C_CHAR_3),
WITHB AS (SELECT /*+ leading ((AA BB)) */ AA.C_INT, AA.C_CHAR_2 FROM VEC_HASHJOIN_TABLE_STRING_03 AA INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 BB ON AA.C_CHAR_3 = BB.C_CHAR_3
	WHERE EXISTS(SELECT C.C_INT FROM VEC_HASHJOIN_TABLE_STRING_03 C INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 D ON C.C_CHAR_1 = D.C_CHAR_3))
SELECT COUNT(*) FROM WITHA WHERE C_CHAR_1 IN (SELECT C_CHAR_2 FROM WITHB);

-- end
DROP SCHEMA sonic_hashjoin_test_string_nospill cascade;