-- Prepare Data
CREATE SCHEMA sonic_hashjoin_test_string_nospill;
SET current_schema = sonic_hashjoin_test_string_nospill;
-- table with 10,000 rows
CREATE TABLE sonic_hashjoin_test_string_nospill.VEC_HASHJOIN_TABLE_STRING_01(
C_INT INT, --0 serial number from 1
C_BIGINT BIGINT, --1 big int positive or negative
C_SMALLINT SMALLINT, --2 small int positive or negative
C_CHAR_1 CHAR, --3 one char (store as dict in charDatumArray, NO NULL)
C_CHAR_2 CHAR(1), --4 sex (store as dict in charDatumArray, HAS NULL)
C_CHAR_3 CHAR(10), --5 only a-z (store as char in charDatumArray, NO NULL)
C_CHAR_4 CHAR(100), --6 simple chinese (store as char in charDatumArray, NO NULL)
C_BPCHAR_1 BPCHAR, --7 c_char_1 + c_char_3 (encodingDatumArray, NO NULL)
C_BPCHAR_2 BPCHAR(10), --8 date (store as char in charDatumArray, HAS NULL)
C_VARCHAR_1 VARCHAR, --9 simple chinese (encodingDatumArray, NO NULL)
C_VARCHAR_2 VARCHAR(20), --10 date (encodingDatumArray, HAS NULL)
C_TEXT text, --11 rare chinese + random char + simple chinese (encodingDatumArray, NO NULL)
C_CHAR_5 CHAR(4), --12 rare chinese (store as dict in charDatumArray, HAS NULL)
C_CHAR_6 CHAR(20), --13 rare chinese (store as char in charDatumArray, HAS NULL)
C_VARCHAR_3 VARCHAR --14 rare chinese (store as encodingDatumArray, HAS NULL)
--C_NAME name /not supported by column table
--notice: those stored in charDatumArray when simple, will store in encodingDatumArray when as key
) with (orientation=column);
-- table with 3,000 rows
CREATE TABLE sonic_hashjoin_test_string_nospill.VEC_HASHJOIN_TABLE_STRING_02(
C_INT INT,
C_BIGINT BIGINT,
C_SMALLINT SMALLINT,
C_CHAR_1 CHAR,
C_CHAR_2 CHAR(1),
C_CHAR_3 CHAR(10),
C_CHAR_4 CHAR(100),
C_BPCHAR_1 BPCHAR,
C_BPCHAR_2 BPCHAR(10),
C_VARCHAR_1 VARCHAR,
C_VARCHAR_2 VARCHAR(20),
C_TEXT text,
C_CHAR_5 CHAR(4),
C_CHAR_6 CHAR(20),
C_VARCHAR_3 VARCHAR
--C_NAME name /not supported by column table
) WITH (orientation=column);
-- table with 10,000 rows and all C_INT is same
CREATE TABLE sonic_hashjoin_test_string_nospill.VEC_HASHJOIN_TABLE_STRING_03(
C_INT INT,
C_BIGINT BIGINT,
C_SMALLINT SMALLINT,
C_CHAR_1 CHAR,
C_CHAR_2 CHAR(1),
C_CHAR_3 CHAR(10),
C_CHAR_4 CHAR(100),
C_BPCHAR_1 BPCHAR,
C_BPCHAR_2 BPCHAR(10),
C_VARCHAR_1 VARCHAR,
C_VARCHAR_2 VARCHAR(20),
C_TEXT text,
C_CHAR_5 CHAR(4),
C_CHAR_6 CHAR(20),
C_VARCHAR_3 VARCHAR
) WITH (orientation=column);
COPY VEC_HASHJOIN_TABLE_STRING_01 FROM '@abs_srcdir@/data/vec_sonic_hashjoin_string_1.data' DELIMITER AS '|';
COPY VEC_HASHJOIN_TABLE_STRING_02 FROM '@abs_srcdir@/data/vec_sonic_hashjoin_string_2.data' DELIMITER AS '|';
COPY VEC_HASHJOIN_TABLE_STRING_03 FROM '@abs_srcdir@/data/vec_sonic_hashjoin_string_3.data' DELIMITER AS '|';
analyze VEC_HASHJOIN_TABLE_STRING_01;
analyze VEC_HASHJOIN_TABLE_STRING_02;
analyze VEC_HASHJOIN_TABLE_STRING_03;
SET enable_nestloop to off;
SET enable_mergejoin to off;
SET enable_hashjoin to on;
SET enable_sonic_hashjoin to on;
SHOW enable_sonic_hashjoin;
-- TEST IN MEMORY JOIN
--- test data number of single DN > 64K
--- integer as join key
SELECT COUNT(*) FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT;
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;
--- char as join key
SELECT COUNT(*) FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT AND
A.C_CHAR_1 = B.C_CHAR_1 AND A.C_CHAR_2 = B.C_CHAR_2 AND A.C_CHAR_3 = B.C_CHAR_3 AND A.C_CHAR_4 = B.C_CHAR_4 AND
A.C_BPCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_2 AND
A.C_VARCHAR_1 = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_TEXT;
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT AND
A.C_CHAR_1 = B.C_CHAR_1 AND A.C_CHAR_2 = B.C_CHAR_2 AND A.C_CHAR_3 = B.C_CHAR_3 AND A.C_CHAR_4 = B.C_CHAR_4 AND
A.C_BPCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_2 AND
A.C_VARCHAR_1 = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_TEXT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;
--test complicate join key
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON
A.C_INT + 5 = B.C_INT AND substring(A.C_BPCHAR_1 FROM 1 FOR 1) = left(B.C_CHAR_1, 1)
WHERE B.C_BPCHAR_2 IS NOT NULL AND A.C_INT > 1000 AND A.C_INT < 2000
ORDER BY 1,2,3,4,5,6,7,8,9,10,11;
-- test data number of single DN > 16K AND probe table is bigger
--- integer as join key
SELECT /*+ leading ((A B))*/ COUNT(*) FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT;
SELECT /*+ leading ((A B))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;
--- char as join key
SELECT /*+ leading ((A B))*/ COUNT(*) FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT AND
A.C_CHAR_1 = B.C_CHAR_1 AND A.C_CHAR_2 = B.C_CHAR_2 AND A.C_CHAR_3 = B.C_CHAR_3 AND A.C_CHAR_4 = B.C_CHAR_4 AND
A.C_BPCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_2 AND
A.C_VARCHAR_1 = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_TEXT;
SELECT /*+ leading ((A B))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT = B.C_INT AND A.C_BIGINT = B.C_BIGINT AND A.C_SMALLINT = B.C_SMALLINT AND
A.C_CHAR_1 = B.C_CHAR_1 AND A.C_CHAR_2 = B.C_CHAR_2 AND A.C_CHAR_3 = B.C_CHAR_3 AND A.C_CHAR_4 = B.C_CHAR_4 AND
A.C_BPCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_2 AND
A.C_VARCHAR_1 = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_TEXT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;
--test complicate join key
SELECT /*+ leading ((A B))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_02 B ON
A.C_INT + 5 = B.C_INT AND substring(A.C_BPCHAR_1 FROM 1 FOR 1) = left(B.C_CHAR_1, 1)
WHERE B.C_BPCHAR_2 IS NOT NULL AND A.C_INT > 1000 AND A.C_INT < 2000
ORDER BY 1,2,3,4,5,6,7,8,9,10,11;
-- test data number of one build side partition > 64k
-- SELECT /*+ leading ((A B))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_03 B ON
-- A.C_INT = B.C_INT ORDER BY 1,2,3,4,5,6,7,8,9,10,11 LIMIT 100;
-- test other string type
CREATE TABLE VEC_HASHJOIN_TABLE_STRING_04(
C_INT INTEGER,
C_BIT BIT,
C_VARBIT VARBIT,
C_NVARCHAR2 NVARCHAR2,
C_NCHAR NCHAR,
C_BYTEA BYTEA)
WITH (orientation=column) DISTRIBUTE BY HASH(C_INT);
INSERT INTO VEC_HASHJOIN_TABLE_STRING_04 VALUES (1, '0', '0', 'A', 'A', 'A');
INSERT INTO VEC_HASHJOIN_TABLE_STRING_04 VALUES (1, '1', '0', 'B', 'C', 'D');
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_04 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B on A.C_INT = B.C_INT
ORDER BY 1,2,3,4,5,6,7,8,9,10,11,12;
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_04 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B on A.C_BIT = B.C_BIT
AND A.C_VARBIT = B.C_VARBIT AND A.C_NVARCHAR2 = B.C_NVARCHAR2 AND A.C_NCHAR = B.C_NCHAR AND A.C_BYTEA = B.C_BYTEA
ORDER BY 1,2,3;
-- test join on different type
START TRANSACTION;
INSERT INTO VEC_HASHJOIN_TABLE_STRING_01 VALUES (70000, 70000, 100, 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A');
INSERT INTO VEC_HASHJOIN_TABLE_STRING_01 VALUES (70001, 70000, 100, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
SELECT * FROM VEC_HASHJOIN_TABLE_STRING_04 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B ON A.C_BIT = B.C_VARBIT AND A.C_NVARCHAR2 = B.C_NCHAR;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_1 = B.C_BPCHAR_1 AND A.C_CHAR_3 = B.C_BPCHAR_1
AND A.C_VARCHAR_1 = B.C_BPCHAR_1 AND A.C_BPCHAR_2 = B.C_BPCHAR_1 AND A.C_VARCHAR_2 = B.C_BPCHAR_1 AND A.C_TEXT = B.C_BPCHAR_1
ORDER BY 1,2,3;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_1 = B.C_VARCHAR_1 AND A.C_CHAR_3 = B.C_VARCHAR_1
AND A.C_VARCHAR_1 = B.C_VARCHAR_2 AND A.C_TEXT = B.C_VARCHAR_1 AND A.C_VARCHAR_2 = B.C_BPCHAR_1 AND A.C_TEXT = B.C_BPCHAR_1
ORDER BY 1,2,3;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_1 = B.C_TEXT AND A.C_CHAR_3 = B.C_TEXT
AND A.C_VARCHAR_2 = B.C_TEXT
ORDER BY 1,2,3;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B ON A.C_CHAR_1 = B.C_NVARCHAR2 AND A.C_CHAR_3 = B.C_NVARCHAR2
AND A.C_VARCHAR_1 = B.C_NVARCHAR2 AND A.C_VARCHAR_2 = B.C_NVARCHAR2 AND A.C_TEXT = B.C_NVARCHAR2
ORDER BY 1,2,3;
SELECT /*+ leading ((B A))*/ * FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_04 B ON A.C_CHAR_1 = B.C_NCHAR AND A.C_CHAR_3 = B.C_NCHAR
AND A.C_VARCHAR_1 = B.C_NCHAR AND A.C_VARCHAR_2 = B.C_NCHAR AND A.C_TEXT = B.C_NCHAR
ORDER BY 1,2,3;
ROLLBACK;
-- test hybrid search
EXPLAIN (costs off) WITH
WITHA AS (
SELECT A.C_INT, A.C_CHAR_1 FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_3 = B.C_CHAR_3),
WITHB AS (SELECT /*+ leading ((AA BB)) */ AA.C_INT, AA.C_CHAR_2 FROM VEC_HASHJOIN_TABLE_STRING_03 AA INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 BB ON AA.C_CHAR_3 = BB.C_CHAR_3
WHERE EXISTS(SELECT C.C_INT FROM VEC_HASHJOIN_TABLE_STRING_03 C INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 D ON C.C_CHAR_1 = D.C_CHAR_3))
SELECT C_INT, C_CHAR_1 FROM WITHA WHERE C_CHAR_1 IN (SELECT C_CHAR_2 FROM WITHB) ORDER BY C_INT;
WITH
WITHA AS (
SELECT A.C_INT, A.C_CHAR_1 FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_3 = B.C_CHAR_3),
WITHB AS (SELECT /*+ leading ((AA BB)) */ AA.C_INT, AA.C_CHAR_2 FROM VEC_HASHJOIN_TABLE_STRING_03 AA INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 BB ON AA.C_CHAR_3 = BB.C_CHAR_3
WHERE EXISTS(SELECT C.C_INT FROM VEC_HASHJOIN_TABLE_STRING_03 C INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 D ON C.C_CHAR_1 = D.C_CHAR_3))
SELECT C_INT, C_CHAR_1 FROM WITHA WHERE C_CHAR_1 IN (SELECT C_CHAR_2 FROM WITHB) ORDER BY C_INT LIMIT 100;
WITH
WITHA AS (
SELECT A.C_INT, A.C_CHAR_1 FROM VEC_HASHJOIN_TABLE_STRING_01 A INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 B ON A.C_CHAR_3 = B.C_CHAR_3),
WITHB AS (SELECT /*+ leading ((AA BB)) */ AA.C_INT, AA.C_CHAR_2 FROM VEC_HASHJOIN_TABLE_STRING_03 AA INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 BB ON AA.C_CHAR_3 = BB.C_CHAR_3
WHERE EXISTS(SELECT C.C_INT FROM VEC_HASHJOIN_TABLE_STRING_03 C INNER JOIN VEC_HASHJOIN_TABLE_STRING_01 D ON C.C_CHAR_1 = D.C_CHAR_3))
SELECT COUNT(*) FROM WITHA WHERE C_CHAR_1 IN (SELECT C_CHAR_2 FROM WITHB);
-- end
DROP SCHEMA sonic_hashjoin_test_string_nospill cascade;