--test parquet foreign data wrapper.
--
-- Settings to make the result deterministic
SET datestyle = "ISO, YMD";
set enable_hdfs_predicate_pushdown=false;
--Unit 1. test plan without distributed clause
DROP FOREIGN TABLE IF EXISTS lineitem;
CREATE SERVER hdfs_server FOREIGN DATA WRAPPER HDFS_FDW OPTIONS (type 'hdfs', address '@hdfshostname@:@hdfsport@',hdfscfgpath '@hdfscfgpath@');
create foreign table lineitem (
L_ORDERKEY BIGINT
, L_PARTKEY BIGINT
, L_SUPPKEY BIGINT
, L_LINENUMBER BIGINT
, L_QUANTITY DECIMAL(15,2)
, L_EXTENDEDPRICE DECIMAL(15,2)
, L_DISCOUNT DECIMAL(15,2)
, L_TAX DECIMAL(15,2)
, L_RETURNFLAG CHAR(1)
, L_LINESTATUS CHAR(1)
, L_SHIPDATE DATE
, L_COMMITDATE DATE
, L_RECEIPTDATE DATE
, L_SHIPINSTRUCT CHAR(25)
, L_SHIPMODE CHAR(10)
, L_COMMENT VARCHAR(44)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/lineitem_parquet/000000_0,/user/hive/warehouse/mppdb.db/lineitem_parquet/000001_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS nation;
create foreign table nation (
N_NATIONKEY INT
, N_NAME CHAR(25)
, N_REGIONKEY INT
, N_COMMENT VARCHAR(152)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/nation_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS region;
create foreign table region (
R_REGIONKEY INT
, R_NAME TEXT
, R_COMMENT VARCHAR(152)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/region_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS part;
create foreign table part (
P_PARTKEY BIGINT
, P_NAME VARCHAR(55)
, P_MFGR CHAR(25)
, P_BRAND CHAR(10)
, P_TYPE VARCHAR(25)
, P_SIZE BIGINT
, P_CONTAINER CHAR(10)
, P_RETAILPRICE DECIMAL(15,2)
, P_COMMENT VARCHAR(23)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/part_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS supplier;
create foreign table supplier (
S_SUPPKEY BIGINT
, S_NAME CHAR(25)
, S_ADDRESS VARCHAR(40)
, S_NATIONKEY INT
, S_PHONE CHAR(15)
, S_ACCTBAL DECIMAL(15,2)
, S_COMMENT VARCHAR(101)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/supplier_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS partsupp;
create foreign table partsupp (
PS_PARTKEY BIGINT
, PS_SUPPKEY BIGINT
, PS_AVAILQTY BIGINT
, PS_SUPPLYCOST DECIMAL(15,2)
, PS_COMMENT VARCHAR(199)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/partsupp_parquet/000000_0') distribute by roundrobin;
analyze foreign tables;
explain (verbose, costs off, nodes off) SELECT L_SHIPDATE FROM lineitem WHERE L_orderkey BETWEEN 509050 AND 509070;
explain (verbose, costs off, nodes off) SELECT count(L_SHIPDATE) FROM lineitem WHERE L_orderkey BETWEEN 509050 AND 509070;
explain (verbose, costs off, nodes off) select n_name, r_name from nation join region on n_regionkey=r_regionkey;
explain (verbose, costs off, nodes off)
select
s.s_acctbal, s.s_name, n.n_name, p.p_partkey, ps.ps_supplycost, p.p_mfgr, s.s_address, s.s_phone, s.s_comment
from
nation n join region r
on
n.n_regionkey = r.r_regionkey and n.n_name = 'FRANCE'
join
supplier s
on
s.s_nationkey = n.n_nationkey
join
partsupp ps
on
s.s_suppkey = ps.ps_suppkey
join
part p
on
p.p_partkey = ps.ps_partkey and p.p_size = 15 and p.p_type like '%BRASS';
--Test Unit 2: distributed foreign table
DROP FOREIGN TABLE IF EXISTS lineitem;
Create foreign table lineitem (
L_ORDERKEY BIGINT
, L_PARTKEY BIGINT
, L_SUPPKEY BIGINT
, L_LINENUMBER BIGINT
, L_QUANTITY DECIMAL(15,2)
, L_EXTENDEDPRICE DECIMAL(15,2)
, L_DISCOUNT DECIMAL(15,2)
, L_TAX DECIMAL(15,2)
, L_RETURNFLAG CHAR(1)
, L_LINESTATUS CHAR(1)
, L_SHIPDATE DATE
, L_COMMITDATE DATE
, L_RECEIPTDATE DATE
, L_SHIPINSTRUCT CHAR(25)
, L_SHIPMODE CHAR(10)
, L_COMMENT VARCHAR(44)
)
SERVER hdfs_server OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/lineitem_parquet') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS customer;
CREATE FOREIGN TABLE customer (
C_CUSTKEY BIGINT
, C_NAME VARCHAR(25)
, C_ADDRESS VARCHAR(40)
, C_NATIONKEY INT
, C_PHONE CHAR(15)
, C_ACCTBAL DECIMAL(15,2)
, C_MKTSEGMENT CHAR(10)
, C_COMMENT VARCHAR(117)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/customer_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS orders;
create foreign table orders (
O_ORDERKEY BIGINT
, O_CUSTKEY BIGINT
, O_ORDERSTATUS CHAR(1)
, O_TOTALPRICE DECIMAL(15,2)
, O_ORDERDATE DATE
, O_ORDERPRIORITY CHAR(15)
, O_CLERK CHAR(15)
, O_SHIPPRIORITY BIGINT
, O_COMMENT VARCHAR(79)
)
SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/orders_parquet')
distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS part;
create foreign table part (
P_PARTKEY BIGINT
, P_NAME VARCHAR(55)
, P_MFGR CHAR(25)
, P_BRAND CHAR(10)
, P_TYPE VARCHAR(25)
, P_SIZE BIGINT
, P_CONTAINER CHAR(10)
, P_RETAILPRICE DECIMAL(15,2)
, P_COMMENT VARCHAR(23)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/part_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS supplier;
create foreign table supplier (
S_SUPPKEY BIGINT
, S_NAME CHAR(25)
, S_ADDRESS VARCHAR(40)
, S_NATIONKEY INT
, S_PHONE CHAR(15)
, S_ACCTBAL DECIMAL(15,2)
, S_COMMENT VARCHAR(101)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/supplier_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS partsupp;
create foreign table partsupp (
PS_PARTKEY BIGINT
, PS_SUPPKEY BIGINT
, PS_AVAILQTY BIGINT
, PS_SUPPLYCOST DECIMAL(15,2)
, PS_COMMENT VARCHAR(199)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/partsupp_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS nation;
create foreign table nation (
N_NATIONKEY INT
, N_NAME CHAR(25)
, N_REGIONKEY INT
, N_COMMENT VARCHAR(152)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/nation_parquet/000000_0') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS region;
create foreign table region (
R_REGIONKEY INT
, R_NAME TEXT
, R_COMMENT VARCHAR(152)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/region_parquet/000000_0') distribute by roundrobin;
analyze foreign tables;
-- there are bugs for the following three queries. cause EOF error from datanode
explain (verbose, costs off, nodes off) SELECT L_shipdate FROM lineitem WHERE L_orderkey between 2125190 and 2125200;
explain (verbose, costs off, nodes off) SELECT count(L_shipdate) FROM lineitem WHERE L_orderkey between 2125190 and 2125200;
set enable_broadcast=off;
-- TPCH - Q1
explain (verbose, costs off, nodes off) SELECT
L_RETURNFLAG, L_LINESTATUS, SUM(L_QUANTITY), SUM(L_EXTENDEDPRICE), SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)), SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)*(1+L_TAX)), AVG(L_QUANTITY), AVG(L_EXTENDEDPRICE), AVG(L_DISCOUNT), COUNT(1)
FROM
lineitem
WHERE
L_SHIPDATE<='1998-09-02'
GROUP BY L_RETURNFLAG, L_LINESTATUS
ORDER BY L_RETURNFLAG, L_LINESTATUS;
--TPCH - Q2
explain (verbose, costs off, nodes off) select
s.s_acctbal, s.s_name, n.n_name, p.p_partkey, ps.ps_supplycost, p.p_mfgr, s.s_address, s.s_phone, s.s_comment
from
nation n join region r
on
n.n_regionkey = r.r_regionkey and n.n_name = 'FRANCE'
join
supplier s
on
s.s_nationkey = n.n_nationkey
join
partsupp ps
on
s.s_suppkey = ps.ps_suppkey
join
part p
on
p.p_partkey = ps.ps_partkey and p.p_size = 15 and p.p_type like '%BRASS';
reset enable_broadcast;
DROP FOREIGN TABLE IF EXISTS lineitem CASCADE;
DROP FOREIGN TABLE IF EXISTS customer CASCADE;
DROP FOREIGN TABLE IF EXISTS orders CASCADE;
DROP FOREIGN TABLE IF EXISTS part CASCADE;
DROP FOREIGN TABLE IF EXISTS partsupp CASCADE;
DROP FOREIGN TABLE IF EXISTS supplier CASCADE;
DROP FOREIGN TABLE IF EXISTS nation CASCADE;
DROP FOREIGN TABLE IF EXISTS region CASCADE;
DROP SERVER hdfs_server CASCADE;