--
--Create table
--
--Foreign table
CREATE SERVER hdfs_server FOREIGN DATA WRAPPER 	HDFS_FDW OPTIONS (type 'hdfs', address '@hdfshostname@:@hdfsport@',hdfscfgpath '@hdfscfgpath@');
CREATE FOREIGN TABLE customer_reviews
        (
            customer_id TEXT ,
            review_date DATE ,
            review_rating INTEGER ,
            review_votes INTEGER,
            review_helpful_votes INTEGER,
            product_id CHAR(10) ,
            product_title TEXT ,
            product_sales_rank BIGINT,
            product_group TEXT,
            product_category TEXT,
            product_subcategory TEXT,
            similar_product_ids CHAR(10)
        )
        SERVER hdfs_server
        OPTIONS(format 'parquet', foldername '/user/hive/warehouse/customer_reviews_parquet') distribute by roundrobin;

DROP FOREIGN TABLE IF EXISTS d_bigrow;
CREATE FOREIGN TABLE d_bigrow(
    boolean1 BOOLEAN,
    short1 INT2,
    integer1 INT,
    long1 INT8,
    list1 INT,
    float1 FLOAT4,
    double1 FLOAT8,
    string1 TEXT,
    list2 TEXT,
    date1 DATE,
    timestamp1 TIMESTAMP
) SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/d_bigrow_parquet') distribute by roundrobin;

--Regular table
create table t (c1 int, c2 int) distribute by roundrobin;
create table s (b1 int, b2 int, b3 int) distribute by roundrobin;
create table r (a1 int, a2 int, a3 int) distribute by roundrobin;
insert into t values (1, 2), (2, 3), (3, 4), (4, 5);
insert into s values (1, 2, 2), (2, 3, 3), (3, 4, 4), (4, 5, 5);
insert into r values (3, 2,1 ), (4,3,2), (5, 4, 3), (6, 5, 4);

--TPCH table
CREATE TABLE customer (
    c_custkey integer NOT NULL,
    c_name character varying(25) NOT NULL,
    c_address character varying(40) NOT NULL,
    c_nationkey integer NOT NULL,
    c_phone character(15) NOT NULL,
    c_acctbal numeric(15,2) NOT NULL,
    c_mktsegment character(10) NOT NULL,
    c_comment character varying(117) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;

CREATE TABLE lineitem (
    l_orderkey integer NOT NULL,
    l_partkey integer NOT NULL,
    l_suppkey integer NOT NULL,
    l_linenumber integer NOT NULL,
    l_quantity numeric(15,2) NOT NULL,
    l_extendedprice numeric(15,2) NOT NULL,
    l_discount numeric(15,2) NOT NULL,
    l_tax numeric(15,2) NOT NULL,
    l_returnflag character(1) NOT NULL,
    l_linestatus character(1) NOT NULL,
    l_shipdate date NOT NULL,
    l_commitdate date NOT NULL,
    l_receiptdate date NOT NULL,
    l_shipinstruct character(25) NOT NULL,
    l_shipmode character(10) NOT NULL,
    l_comment character varying(44) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;

CREATE TABLE nation (
    n_nationkey integer NOT NULL,
    n_name character(25) NOT NULL,
    n_regionkey integer NOT NULL,
    n_comment character varying(152)
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE orders (
    o_orderkey integer NOT NULL,
    o_custkey integer NOT NULL,
    o_orderstatus character(1) NOT NULL,
    o_totalprice numeric(15,2) NOT NULL,
    o_orderdate date NOT NULL,
    o_orderpriority character(15) NOT NULL,
    o_clerk character(15) NOT NULL,
    o_shippriority integer NOT NULL,
    o_comment character varying(79) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;

CREATE TABLE part (
    p_partkey integer NOT NULL,
    p_name character varying(55) NOT NULL,
    p_mfgr character(25) NOT NULL,
    p_brand character(10) NOT NULL,
    p_type character varying(25) NOT NULL,
    p_size integer NOT NULL,
    p_container character(10) NOT NULL,
    p_retailprice numeric(15,2) NOT NULL,
    p_comment character varying(23) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE partsupp (
    ps_partkey integer NOT NULL,
    ps_suppkey integer NOT NULL,
    ps_availqty integer NOT NULL,
    ps_supplycost numeric(15,2) NOT NULL,
    ps_comment character varying(199) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;

CREATE TABLE region (
    r_regionkey integer NOT NULL,
    r_name character(25) NOT NULL,
    r_comment character varying(152)
)
DISTRIBUTE BY ROUNDROBIN ;

CREATE TABLE supplier (
    s_suppkey integer NOT NULL,
    s_name character(25) NOT NULL,
    s_address character varying(40) NOT NULL,
    s_nationkey integer NOT NULL,
    s_phone character(15) NOT NULL,
    s_acctbal numeric(15,2) NOT NULL,
    s_comment character varying(101) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;
\i @abs_srcdir@/data/tpch.data

--
--Test Begin
--
--*Disable fstream
explain (verbose on, costs off, nodes off) select * from t where c1 in (select b1 from s where c2=b2) order by c1;
explain (verbose on, costs off, nodes off) select * from d_bigrow where short1 in  (select review_rating from customer_reviews where review_votes = integer1) order by short1;
select * from t where c1 in (select b1 from s where c2=b2) order by c1;
select * from d_bigrow where short1 in  (select review_rating from customer_reviews where review_votes = integer1) order by short1;
--Remote sort
explain (verbose on, costs off, nodes off) select * from (select all * from t where c1 < 3 order by c1);
select * from (select all * from t where c1 < 3 order by c1);
--Agg
with revenue (total_revenue) as  
( select sum(l_extendedprice * (1 - l_discount)) from lineitem where l_suppkey=1 )
select * from revenue;

--*Enable fstream

explain (verbose on, costs off, nodes off) select * from t where c1 in (select b1 from s where c2=b2) order by c1;
select * from t where c1 in (select b1 from s where c2=b2)  order by c1;
explain (verbose on, costs off, nodes off) select * from d_bigrow where short1 in  (select review_rating from customer_reviews where review_votes = integer1)  order by short1;
select * from d_bigrow where short1 in  (select review_rating from customer_reviews where review_votes = integer1)  order by short1;
explain (verbose on, costs off, nodes off) select * from t where c1 in (select sum(b2) sb2 from s group by b1 having c2 =  sum(b2)) order by c1;
explain (verbose on, costs off, nodes off)  select * from t where c1 in (select b1 from s group by b1 having sum(b2) = c2) order by c1;
select * from t where c1 in (select b1 from s group by b1 having sum(b2) = c2) order by c1;
explain (verbose on, costs off, nodes off) select * from t where c1 in (select b1 from s, r where a2 = b2  group by b1 having sum(b3) = c2) order by c1;
select * from t where c1 in (select b1 from s, r where a2 = b2  group by b1 having sum(b3) = c2)  order by c1;
--Remote sort
explain (verbose on, costs off, nodes off) select * from (select all * from t where c1 < 3 order by c1);
explain performance select * from (select all * from t where c1 < 3 order by c1);
select * from (select all * from t where c1 < 3 order by c1);
explain (verbose on, costs off, nodes off) select * from (select all * from orders where o_orderkey < 3 order by o_orderkey);
select * from (select all * from orders where o_orderkey < 3 order by o_orderkey);
--Agg
with revenue (total_revenue) as  
( select sum(l_extendedprice * (1 - l_discount)) from lineitem where l_suppkey=1 )
select * from revenue;

-- normal table
create table hotel1000 ( 
                        lkbh text,
                        xm text,
                        xb text,
                        csrq text,
                        zjlx text,
                        zjhm text,
                        rzsj text,
                        rzsj_yf float4,
                        rjfh text,
                        tfsj text,
                        lddm text,
                        ldmc text,
                        pcsdm text,
                        fjdm text,
                        rzsj_q8 text,
                        etlgxsj text,
                        hjszd text) distribute by roundrobin;

explain (verbose on, costs off, nodes off) select 
       dddd.xm || ' '|| dddd.ldmc || ' rzsj:' || to_char(dddd.rzsj) as info1,
       a.xm || ' '|| a.ldmc || ' rzsj:' || to_char(a.rzsj) as info2
from hotel1000 a,
       (select 
                            xm,ldmc,lddm,rzsj,zjhm
        from 
                            (select 
                                     xm,ldmc,lddm,rzsj,zjhm
                            from
                                     hotel1000 
                             where 
                                     rzsj >= '2014-01-01 01:00:00'
                                     and 
                                     rzsj <= '2014-12-30 23:00:00'
                            )
        where 
                            xm like 'wang%'
                            and
                            lddm in(
                                     select 
                                               lddm 
                                     from
                                               hotel1000
                                     where
                                               ldmc like '%shangwu%'
                            )
                   ) dddd
where 
         a.zjhm <> dddd.zjhm and
         ABS(INTERVALTONUM(to_timestamp(a.rzsj,'yyyy-mm-dd hh24:mi:ss') - to_timestamp(dddd.rzsj,'yyyy-mm-dd hh24:mi:ss'))) < 1
;
explain (verbose on, costs off, nodes off) select 
       dddd.xm || ' '|| dddd.ldmc || ' rzsj:' || to_char(dddd.rzsj) as info1,
       a.xm || ' '|| a.ldmc || ' rzsj:' || to_char(a.rzsj) as info2
from hotel1000 a,
       (select 
                            xm,ldmc,lddm,rzsj,zjhm
        from 
                            (select 
                                     xm,ldmc,lddm,rzsj,zjhm
                            from
                                     hotel1000 
                             where 
                                     rzsj >= '2014-01-01 01:00:00'
                                     and 
                                     rzsj <= '2014-12-30 23:00:00'
                            )
        where 
                            xm like 'wang%'
                            and
                            lddm in(
                                     select 
                                               lddm 
                                     from
                                               hotel1000
                                     where
                                               ldmc like '%shangwu%'
                            )
                   ) dddd
where 
         a.zjhm <> dddd.zjhm and
         ABS(INTERVALTONUM(to_timestamp(a.rzsj,'yyyy-mm-dd hh24:mi:ss') - to_timestamp(dddd.rzsj,'yyyy-mm-dd hh24:mi:ss'))) < 1
;
drop table hotel1000;
--create foreign table
create foreign table hotel1000 ( 
                        lkbh text,
                        xm text,
                        xb text,
                        csrq text,
                        zjlx text,
                        zjhm text,
                        rzsj text,
                        rzsj_yf float4,
                        rjfh text,
                        tfsj text,
                        lddm text,
                        ldmc text,
                        pcsdm text,
                        fjdm text,
                        rzsj_q8 text,
                        etlgxsj text,
                        hjszd text) 
server hdfs_server OPTIONS(format 'parquet', foldername '/user/hive/warehouse/fvt_data_query_hive.db/hotel_1000_parquet')
distribute by roundrobin;
explain (verbose on, costs off, nodes off) select 
       dddd.xm || ' '|| dddd.ldmc || ' rzsj:' || to_char(dddd.rzsj) as info1,
       a.xm || ' '|| a.ldmc || ' rzsj:' || to_char(a.rzsj) as info2
from hotel1000 a,
       (select 
                            xm,ldmc,lddm,rzsj,zjhm
        from 
                            (select 
                                     xm,ldmc,lddm,rzsj,zjhm
                            from
                                     hotel1000 
                             where 
                                     rzsj >= '2014-01-01 01:00:00'
                                     and 
                                     rzsj <= '2014-12-30 23:00:00'
                            )
        where 
                            xm like 'wang%'
                            and
                            lddm in(
                                     select 
                                               lddm 
                                     from
                                               hotel1000
                                     where
                                               ldmc like '%shangwu%'
                            )
                   ) dddd
where 
         a.zjhm <> dddd.zjhm and
         ABS(INTERVALTONUM(to_timestamp(a.rzsj,'yyyy-mm-dd hh24:mi:ss') - to_timestamp(dddd.rzsj,'yyyy-mm-dd hh24:mi:ss'))) < 1
;

--add LLT 
drop table if exists testllt001;

create table testllt001(id int, name text) distribute by replication;

explain (verbose on, costs off, nodes off) select * from t where c1 in (select id from testllt001 where id =2);

drop table if exists testllt003;

create table testllt003( id int);

explain (verbose on, costs off, nodes off) select * from testllt003 union select R_REGIONKEY from region;

drop table testllt001;
drop table testllt003;

drop table if exists testllt100t;
create table testllt100t(c1 int, c2 int) distribute by roundrobin ;
create index testllt100t_index on testllt100t (c1);

drop table if exists testllt100s;
create table testllt100s (b1 int, b2 int, b3 int) ;
create index testllt100s_index on testllt100s (b1);

explain (verbose on, costs off, nodes off) select c1 from testllt100t where c1 in (select b1 from testllt100s where b1=7);
explain (verbose on, costs off, nodes off) select c1 from testllt100t where c1 in (select b2 from testllt100s where b1=7);

drop table if exists testllt100t;
create table testllt100t(c1 int, c2 int) with (orientation=column) distribute by roundrobin ;

drop table if exists testllt100s;
create table testllt100s (b1 int, b2 int, b3 int) with (orientation=column);

explain (verbose on, costs off, nodes off) select c1 from testllt100t where c1 in (select b1 from testllt100s where b1=7);

create index testllt100t_index on testllt100t (c1);
create index testllt100s_index on testllt100s (b1);

explain (verbose on, costs off, nodes off) select c1 from testllt100t where c1 in (select b1 from testllt100s where b1=7);

drop table testllt100t;
drop table testllt100s;

create foreign table SUBQUERY_T1(
D_ID int,
D_W_ID int,
D_NAME varchar(10),
D_STREET_1 varchar(20))SERVER hdfs_server
OPTIONS(format 'parquet', foldername '/user/hive/warehouse/hive/subquery_t1_parquet')
distribute by roundrobin;
explain (verbose on, costs off, nodes off) WITH tmp1 as (select D_ID from SUBQUERY_T1 order by D_ID)SELECT DISTINCT * FROM ( SELECT ALL * FROM SUBQUERY_T1 where D_ID = D_W_ID group by D_ID,D_NAME,D_W_ID,D_STREET_1 having D_ID < 10 order by D_ID,D_NAME,D_W_ID,D_STREET_1 DESC NULLS FIRST LIMIT 1   ) group by D_ID,D_NAME,D_W_ID,D_STREET_1 order by D_ID,D_NAME,D_W_ID,D_STREET_1 FETCH NEXT 20 ROWS ONLY ;
drop foreign table SUBQUERY_T1;

create table test_mergeappend_1(a integer);
create table test_mergeappend_2(a integer);
explain (verbose on, costs off, nodes off)(select * from test_mergeappend_1 order by a) union all (select * from test_mergeappend_2 order by 1) order by 1;
drop table test_mergeappend_1;
drop table test_mergeappend_2;
create table t_subqueryscan (a int, b int);
explain (verbose on, costs off, nodes off) select a from (select count(a) as a, count(b) as b from t_subqueryscan) order by 1;
drop table t_subqueryscan;
--end LLT
create table test200(id200 int ) distribute by roundrobin;
explain (verbose on, costs off, nodes off)select * from (select id200 as id from test200 ) y, (select id200 from test200)x;
create view test200_view as select * from test200;
explain (verbose on, costs off, nodes off)select * from (select id200 as id from test200_view ) y, (select id200 from test200_view)x;

drop table test200 cascade;

drop foreign table if exists reason;
drop foreign table if exists store_sales;
create foreign table  reason ( 
       r_reason_sk     int       ,
       r_reason_id     char(16)  ,
       r_reason_desc   char(100) ) 
server hdfs_server OPTIONS(format 'parquet', foldername '/user/hive/warehouse/reason_parquet')
distribute by roundrobin;

create foreign table  store_sales ( 
       ss_sold_date_sk        int          ,
       ss_sold_time_sk        int          ,
       ss_item_sk             int          ,
       ss_customer_sk         int          ,
       ss_cdemo_sk            int          ,
       ss_hdemo_sk            int          ,
       ss_addr_sk             int          ,
       ss_store_sk            int          ,
       ss_promo_sk            int          ,
       ss_ticket_number       int          ,
       ss_quantity            int          ,
       ss_wholesale_cost      decimal(7,2) ,
       ss_list_price          decimal(7,2) ,
       ss_sales_price         decimal(7,2) ,
       ss_ext_discount_amt    decimal(7,2) ,
       ss_ext_sales_price     decimal(7,2) ,
       ss_ext_wholesale_cost  decimal(7,2) ,
       ss_ext_list_price      decimal(7,2) ,
       ss_ext_tax             decimal(7,2) ,
       ss_coupon_amt          decimal(7,2) ,
       ss_net_paid            decimal(7,2) ,
       ss_net_paid_inc_tax    decimal(7,2) ,
       ss_net_profit          decimal(7,2) ) 
server hdfs_server OPTIONS(format 'parquet', foldername '/user/hive/warehouse/store_sales_parquet')
distribute by roundrobin;

select  case when (select count(*) 
                  from store_sales 
                  where ss_quantity between 1 and 20) > 0.001*74129
            then (select avg(ss_ext_discount_amt) 
                  from store_sales 
                  where ss_quantity between 1 and 20) 
            else (select avg(ss_net_paid)
                  from store_sales
                  where ss_quantity between 1 and 20) end bucket1
from reason
where r_reason_sk = 1
limit 100;

drop foreign table if exists reason;
drop foreign table if exists store_sales;
--
--Drop table
--
drop foreign table d_bigrow;
drop foreign table customer_reviews;
drop foreign table hotel1000;
drop table t;
drop table s;
drop table r;

drop table customer;
drop table lineitem;
drop table part;
drop table partsupp;
drop table orders;
drop table supplier;
drop table nation;
drop table region;
DROP SERVER hdfs_server CASCADE;