--
-- Test parquet foreign data wrapper.
--
-- Settings to make the result deterministic
SET datestyle = "ISO, YMD";
set enable_vector_engine=false;
-- validator tests
CREATE SERVER hdfs_server FOREIGN DATA WRAPPER HDFS_FDW OPTIONS (type 'hdfs', address '@hdfshostname@:@hdfsport@',HDFSCFGPATH '@hdfscfgpath@');
CREATE FOREIGN TABLE test_validator_filename_missing (a int)
SERVER hdfs_server distribute by roundrobin; -- ERROR
CREATE FOREIGN TABLE test_validator_invalid_option (a int)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames 'bigrow.parquet', bad_option_name '1') distribute by roundrobin; -- ERROR
CREATE FOREIGN TABLE test_validator_tooMany_option (a int)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames 'bigrow.parquet', foldername '/user/hdfs') distribute by roundrobin; -- ERROR
drop foreign table test_validator_filename_missing;
drop foreign table test_validator_invalid_option;
drop foreign table test_validator_tooMany_option;
--basic distributed foreign table test
-- error scenarios
DROP FOREIGN TABLE IF EXISTS test_missing_file;
CREATE FOREIGN TABLE test_missing_file (a int) SERVER hdfs_server
OPTIONS (format 'parquet', filenames '/user/hive/warehouse/mppdb.db/missing_file.parquet') distribute by roundrobin;
SELECT * FROM test_missing_file limit 10; -- ERROR
drop foreign table test_missing_file;
DROP FOREIGN TABLE IF EXISTS test_table_definition_mismatch;
CREATE FOREIGN TABLE test_table_definition_mismatch (type CHAR(6)) SERVER hdfs_server
OPTIONS (format 'parquet', filenames '/user/hive/warehouse/mppdb.db/bigrow.parquet') distribute by roundrobin;
SELECT * FROM test_table_definition_mismatch limit 10; -- ERROR
drop foreign table test_table_definition_mismatch;
DROP FOREIGN TABLE IF EXISTS lineitem_twoStripes;
Create foreign table lineitem_twoStripes (
L_ORDERKEY BIGINT
, L_PARTKEY BIGINT
, L_SUPPKEY BIGINT
, L_LINENUMBER BIGINT
, L_QUANTITY DECIMAL(15,2)
, L_EXTENDEDPRICE DECIMAL(15,2)
, L_DISCOUNT DECIMAL(15,2)
, L_TAX DECIMAL(15,2)
, L_RETURNFLAG CHAR(1)
, L_LINESTATUS CHAR(1)
, L_SHIPDATE DATE
, L_COMMITDATE DATE
, L_RECEIPTDATE DATE
, L_SHIPINSTRUCT CHAR(25)
, L_SHIPMODE CHAR(10)
, L_COMMENT VARCHAR(44)
)
SERVER hdfs_server
OPTIONS(format 'parquet', filenames '/user/hive/warehouse/mppdb.db/lineitem_parquet/000000_0,/user/hive/warehouse/mppdb.db/lineitem_parquet/000001_0') distribute by roundrobin;
SELECT L_SHIPDATE FROM lineitem_twoStripes WHERE L_orderkey BETWEEN 509050 AND 509070 order by l_shipdate;
SELECT L_ORDERKEY, L_SHIPDATE FROM lineitem_twoStripes for update; -- ERROR
select L_TAX from lineitem_twoStripes where L_LINESTATUS = 'V';
select L_LINESTATUS from lineitem_twoStripes where L_RETURNFLAG = 'N' order by 1 desc offset 1000 limit 20;
SELECT count(L_SHIPDATE) FROM lineitem_twoStripes WHERE L_orderkey BETWEEN 509050 AND 509070;
drop foreign table lineitem_twoStripes;
DROP FOREIGN TABLE IF EXISTS lineitem;
Create foreign table lineitem (
L_ORDERKEY BIGINT
, L_PARTKEY BIGINT
, L_SUPPKEY BIGINT
, L_LINENUMBER BIGINT
, L_QUANTITY DECIMAL(15,2)
, L_EXTENDEDPRICE DECIMAL(15,2)
, L_DISCOUNT DECIMAL(15,2)
, L_TAX DECIMAL(15,2)
, L_RETURNFLAG CHAR(1)
, L_LINESTATUS CHAR(1)
, L_SHIPDATE DATE
, L_COMMITDATE DATE
, L_RECEIPTDATE DATE
, L_SHIPINSTRUCT CHAR(25)
, L_SHIPMODE CHAR(10)
, L_COMMENT VARCHAR(44)
)
SERVER hdfs_server OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/lineitem_parquet')
distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS customer;
CREATE FOREIGN TABLE customer (
C_CUSTKEY BIGINT
, C_NAME VARCHAR(25)
, C_ADDRESS VARCHAR(40)
, C_NATIONKEY INT
, C_PHONE CHAR(15)
, C_ACCTBAL DECIMAL(15,2)
, C_MKTSEGMENT CHAR(10)
, C_COMMENT VARCHAR(117)
)
SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/customer_parquet')
distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS orders;
create foreign table orders (
O_ORDERKEY BIGINT
, O_CUSTKEY BIGINT
, O_ORDERSTATUS CHAR(1)
, O_TOTALPRICE DECIMAL(15,2)
, O_ORDERDATE DATE
, O_ORDERPRIORITY CHAR(15)
, O_CLERK CHAR(15)
, O_SHIPPRIORITY BIGINT
, O_COMMENT VARCHAR(79)
)
SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/orders_parquet')
distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS part;
create foreign table part (
P_PARTKEY BIGINT
, P_NAME VARCHAR(55)
, P_MFGR CHAR(25)
, P_BRAND CHAR(10)
, P_TYPE VARCHAR(25)
, P_SIZE BIGINT
, P_CONTAINER CHAR(10)
, P_RETAILPRICE DECIMAL(15,2)
, P_COMMENT VARCHAR(23)
)
SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/part_parquet')
distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS supplier;
create foreign table supplier (
S_SUPPKEY BIGINT
, S_NAME CHAR(25)
, S_ADDRESS VARCHAR(40)
, S_NATIONKEY INT
, S_PHONE CHAR(15)
, S_ACCTBAL DECIMAL(15,2)
, S_COMMENT VARCHAR(101)
)
SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/supplier_parquet')
distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS partsupp;
create foreign table partsupp (
PS_PARTKEY BIGINT
, PS_SUPPKEY BIGINT
, PS_AVAILQTY BIGINT
, PS_SUPPLYCOST DECIMAL(15,2)
, PS_COMMENT VARCHAR(199)
)
SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/partsupp_parquet')
distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS nation;
create foreign table nation (
N_NATIONKEY INT
, N_NAME CHAR(25)
, N_REGIONKEY INT
, N_COMMENT VARCHAR(152)
)
SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/nation_parquet')
distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS region;
create foreign table region (
R_REGIONKEY INT
, R_NAME TEXT
, R_COMMENT VARCHAR(152)
)
SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/mppdb.db/region_parquet')
distribute by roundrobin;
with recursive tmp1 as (select s_suppkey,s_nationkey,s_comment from supplier order by 1) select tmp1.s_nationkey, region.r_name from tmp1, region where tmp1.s_suppkey=region.r_regionkey group by tmp1.s_nationkey,region.r_name order by 1,2;
-- there are bugs for the following three queries. cause EOF error from datanode
SELECT L_shipdate FROM lineitem WHERE L_orderkey between 2125190 and 2125200 order by l_shipdate;
SELECT count(L_shipdate) FROM lineitem WHERE L_orderkey between 2125190 and 2125200;
SELECT * from lineitem WHERE L_orderkey between 2125190 and 2125200 order by l_orderkey, l_partkey;
create schema FVT_DATA_QUERY;
create foreign table FVT_DATA_QUERY.TABLE_EXP_FROM_SUBQUERY_T1(
D_ID int, D_W_ID int,
D_NAME varchar(10),
D_STREET_1 varchar(20))SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/subquery.db/table_exp_from_subquery_t1_parquet')
distribute by roundrobin;
CREATE VIEW FVT_DATA_QUERY.TABLE_EXP_FROM_SUBQUERY_V1 as select * from FVT_DATA_QUERY.TABLE_EXP_FROM_SUBQUERY_T1;
SELECT DISTINCT SUM(D_ID) as result1 FROM FVT_DATA_QUERY.TABLE_EXP_FROM_SUBQUERY_V1 group by D_ID order by 1;
drop schema FVT_DATA_QUERY cascade;
create foreign table tenk1(unique1 int,unique2 int,two int,four int,ten int,twenty int,hundred int,thousand int,twothousand int,
fivethous int,tenthous int,odd int,even int,stringu1 varchar,stringu2 varchar,string4 varchar)SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/tenk1_parquet')
distribute by roundrobin;
select count(distinct ss.ten) from (select ten from tenk1 a where unique1 IN (select hundred from tenk1 b)) ss;
select r_name from region left join nation on n_nationkey < 5;
drop foreign table tenk1;
create table region001 as select R_REGIONKEY+1 id, R_REGIONKEY from region;
select * from region001 order by 1;
update region001 a set a.R_REGIONKEY=34 from customer where a.R_REGIONKEY = customer.c_custkey;
select * from region001 order by 1;
drop table region001;
create or replace PROCEDURE testcase1()
AS
RESULT1 integer;
RESULT2 integer;
BEGIN
select N_NATIONKEY from nation limit 1 into RESULT1;
raise info'RESULT1 IS %',RESULT1;
return;
END;
/
select testcase1();
drop PROCEDURE testcase1;
create foreign table customer_demographics (
cd_demo_sk int ,
cd_gender char(1) ,
cd_marital_status char(1) ,
cd_education_status char(20) ,
cd_purchase_estimate int ,
cd_credit_rating char(10) ,
cd_dep_count int ,
cd_dep_employed_count int ,
cd_dep_college_count int )
server hdfs_server OPTIONS(format 'parquet', foldername '/user/hive/warehouse/customer_demographics_parquet')
distribute by roundrobin;
select count(1) from customer_demographics where cd_education_status = 'College';
select count(1) from customer_demographics where cd_education_status = 'College ';
select count(1) from customer_demographics where cd_education_status = 'College ';
drop foreign table customer_demographics;
CREATE FOREIGN TABLE region_errortable(
a int,b varchar,pt_dt varchar
)
SERVER hdfs_server OPTIONS(FORMAT 'parquet', FOLDERNAME '/user/hive/warehouse/fid.db/1234')
log into err_normaltxt remote log 'err_normaltxt' per node reject limit 'unlimited'
DISTRIBUTE BY roundrobin
PARTITION BY (pt_dt) AUTOMAPPED;
drop foreign table region_errortable;
CREATE FOREIGN TABLE region_errortable1(
a int,b varchar,pt_dt varchar
)
SERVER hdfs_server OPTIONS(FORMAT 'parquet', FOLDERNAME '/user/hive/warehouse/fid.db/1234')
WITH err_normaltxt2
DISTRIBUTE BY roundrobin
PARTITION BY (pt_dt) AUTOMAPPED;
drop foreign table region_errortable1;
reset enable_vector_engine;
DROP FOREIGN TABLE IF EXISTS lineitem CASCADE;
DROP FOREIGN TABLE IF EXISTS customer CASCADE;
DROP FOREIGN TABLE IF EXISTS orders CASCADE;
DROP FOREIGN TABLE IF EXISTS part CASCADE;
DROP FOREIGN TABLE IF EXISTS partsupp CASCADE;
DROP FOREIGN TABLE IF EXISTS supplier CASCADE;
DROP FOREIGN TABLE IF EXISTS nation CASCADE;
DROP FOREIGN TABLE IF EXISTS region CASCADE;
DROP SERVER hdfs_server CASCADE;