-- Tests for Hbase - Load/Extract
-- Added April 2014
--
-- @@@ START COPYRIGHT @@@
--
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements.  See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership.  The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License.  You may obtain a copy of the License at
--
--   http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied.  See the License for the
-- specific language governing permissions and limitations
-- under the License.
--
-- @@@ END COPYRIGHT @@@

create schema trafodion.hbase;
set schema trafodion.hbase;
CQD COMP_BOOL_226 'ON';

cqd TRAF_LOAD_TAKE_SNAPSHOT 'on';

obey TEST015(clean_up);


log LOG015 clear;


obey TEST015(setup);

cqd COMPRESSED_INTERNAL_FORMAT 'ON';
cqd COMPRESSED_INTERNAL_FORMAT_BMO 'ON';
cqd COMPRESSED_INTERNAL_FORMAT_DEFRAG_RATIO '100';
cqd HIVE_NUM_ESPS_PER_DATANODE '3';

cqd auto_query_retry_warnings 'ON';

obey TEST015(test_bulk_load_simple);

log;

obey TEST015(clean_up);
exit;

?section clean_up
drop index cd_dep_count_IDX;
drop index cd_dep_college_count_IDX;
drop index cd_dep_count_IDX2;
drop index cd_dep_college_count_IDX2;
drop table customer_demographics cascade;
drop table customer_demographics_salt cascade;
drop table customer_address cascade;
drop table customer_address_NOPK cascade;
drop table t015t1 cascade ;
drop table t015t2 cascade;
drop table t015t3 cascade;
drop table t015t4 cascade;
drop table t015t5 cascade;
drop table t015t6 cascade;
drop table "customer_address_delim" ;

?section setup
--------------------------------------------------------------------------
create table customer_demographics
(
  cd_demo_sk              int not null,
  cd_gender               char(1),
  cd_marital_status       char(1),
  cd_education_status     char(20),
  cd_purchase_estimate    int,
  cd_credit_rating        char(10),
  cd_dep_count            int,
  cd_dep_employed_count   int,
  cd_dep_college_count    int,
  primary key (cd_demo_sk)
); 

create table customer_demographics_salt
(
  cd_demo_sk              int not null,
  cd_gender               char(1),
  cd_marital_status       char(1),
  cd_education_status     char(20),
  cd_purchase_estimate    int,
  cd_credit_rating        char(10),
  cd_dep_count            int,
  cd_dep_employed_count   int,
  cd_dep_college_count    int,
  primary key (cd_demo_sk)
)
salt using 4 partitions on (cd_demo_sk); 


create table customer_address
(
  ca_address_sk        int not null,
  ca_address_id        char(16),
  ca_street_number     char(10),
  ca_street_name       varchar(60),
  ca_street_type       char(15),
  ca_suite_number      char(10),
  ca_city              varchar(60),
  ca_county            varchar(30),
  ca_state             char(2),
  ca_zip               char(10),
  ca_country           varchar(30),
  ca_gmt_offset        decimal(5,2),
  ca_location_type     char(20),
  primary key (ca_address_sk)
);


create table customer_address_NOPK
(
  ca_address_sk        int not null,
  ca_address_id        char(16),
  ca_street_number     char(10),
  ca_street_name       varchar(60),
  ca_street_type       char(15),
  ca_suite_number      char(10),
  ca_city              varchar(60),
  ca_county            varchar(30),
  ca_state             char(2),
  ca_zip               char(10),
  ca_country           varchar(30),
  ca_gmt_offset        decimal(5,2),
  ca_location_type     char(20)
);


create table t015t1 (a int) ;
create table t015t2 (a int) ;
create table t015t3 ( a int not null primary key, b int, c int);
create table t015t4 ( a int not null primary key, b int, c int) salt using 2 partitions on (a);
create table t015t5 ( a int not null , b int, c int) store by (a);
create table t015t6 ( a int not null , b int, c int);

delete from t015t1;
insert into t015t1 values (1),(2),(3),(4),(5);



?section test_bulk_load_simple
--------------------------------------------------------------------------


prepare s from load cleanup for table t015t2;
explain options 'f' s;
execute s;
 
prepare s from load transform into t015t2 select * from t015t1;
explain options 'f' s;
execute s;


prepare s from load complete for table t015t2;
explain options 'f' s;
execute s;


select * from t015t2 order by a;

prepare s from load into t015t2 select * from t015t1;
explain options 'f' s;
execute s;

prepare s from load with no recovery into t015t2 select * from t015t1;
explain options 'f' s;
execute s;

--log error rows
prepare s from load with log error rows into t015t2 select * from t015t1;
explain options 'f' s;
execute s;

delete from t015t2;
insert into t015t2 values (100),(200),(300);
select * from t015t2 order by a;
load with truncate table into t015t2 select * from t015t1;

select * from t015t2 order by a;


load transform into t015t3 select a,a,a from t015t1;

create index t015t3idx on t015t3(b);

load transform into t015t3 select a,a,a from t015t1;


drop index t015t3idx ;

alter table t015t3  add  constraint t015t3_cnst check (b>100);

load transform into t015t3 select a,a,a from t015t1;


alter table t015t3  drop  constraint t015t3_cnst;

prepare s3 from load transform into t015t3 select a,a,a from t015t1  <<+ cardinality 10e0 >> ;
explain options 'f' s3;
log LOG015_plan.log clear;
explain s3;
log;
sh grep "sort_key" LOG015_PLAN.LOG  >> LOG015 ;
log LOG015;
prepare s4 from load transform into t015t4 select a,a,a from t015t1  <<+ cardinality 10e0 >> ;
explain options 'f' s4;
log;
log LOG015_plan.log clear;
explain s4;
log;
sh grep -A 2 "sort_key" LOG015_PLAN.LOG  >> LOG015 ;
log LOG015;
prepare s5 from load transform into t015t5 select a,a,a from t015t1  <<+ cardinality 10e0 >> ;
explain options 'f' s5;
log;
log LOG015_plan.log clear;
explain s5;
log;
sh grep "sort_key" LOG015_PLAN.LOG  >> LOG015 ;
log LOG015;
prepare s6 from load transform into t015t6 select a,a,a from t015t1;
explain options 'f' s6;

--------------------------------------------------------------------------

select count(*) from hive.hive.customer_address where ca_address_sk <= 5000;
select count(*) from customer_address;

prepare s from 
load transform into customer_address 
select * from hive.hive.customer_address where ca_address_sk <= 5000;                                                                                              
explain options 'f' s;

load   into customer_address 
select * from hive.hive.customer_address where ca_address_sk <= 5000;                                                                                                   

select count(*) from customer_address;

select [first 20] * from customer_address  where ca_address_sk <= 5000 order by ca_address_sk ;

--------------------------------------------------------------------------
--select count(*) from hive.hive.customer_address;
select count(*) from customer_address_NOPK;

prepare s from 
load transform into customer_address_NOPK 
select * from hive.hive.customer_address  where ca_address_sk <= 5000;                                                                                              
explain options 'f' s;

load   into customer_address_NOPK 
select * from hive.hive.customer_address where ca_address_sk <= 5000;                                                                                                   

select count(*) from customer_address_NOPK;

select [first 20] * from customer_address_NOPK order by ca_address_sk ;

---------------------

select count(*) from hive.hive.customer_demographics where cd_demo_sk <= 5000;
select count(*) from customer_demographics;

prepare s from 
load transform into customer_demographics 
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
explain options 'f' s;

log LOG015_plan.log clear;
explain s;
log;
sh grep "ESP_EXCHANGE.*6" LOG015_PLAN.LOG  >> LOG015 ;
sh grep -A 18  "ESP_EXCHANGE.*6" LOG015_PLAN.LOG  | grep partitioning -A 1 >> LOG015 ;
sh grep "ESP_EXCHANGE.*2" LOG015_PLAN.LOG  >> LOG015 ;
sh grep -A 18  "ESP_EXCHANGE.*2" LOG015_PLAN.LOG  | grep parent_partitioning -A 1 >> LOG015 ;

log LOG015;

load   into customer_demographics 
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;                                                                                            

select count(*) from customer_demographics;

select [first 20] * from customer_demographics order by cd_demo_sk  ;


---------------------
select count(*) from hive.hive.customer_demographics where cd_demo_sk <= 5000;;
select count(*) from customer_demographics_salt;

prepare s from 
load transform into customer_demographics_salt 
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;;                                                                                              
explain options 'f' s;

log LOG015_plan.log clear;
explain s;
log;

sh grep "ESP_EXCHANGE.*6" LOG015_PLAN.LOG  >> LOG015 ;
sh grep -A 18  "ESP_EXCHANGE.*6" LOG015_PLAN.LOG  | grep partitioning -A 3 >> LOG015 ;
sh grep "ESP_EXCHANGE.*2" LOG015_PLAN.LOG  >> LOG015 ;
sh grep -A 18  "ESP_EXCHANGE.*2" LOG015_PLAN.LOG  | grep parent_partitioning -A 3 >> LOG015 ;
log LOG015;

load   into customer_demographics_salt 
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;;                                                                                              

select count(*) from customer_demographics_salt;

select [first 20] * from customer_demographics_salt order by cd_demo_sk  ;

----------------

-- with no duplicats option 


cqd  comp_bool_226 'on';
drop table t015t3 cascade;
drop table t015t4 cascade;
drop table t015t5 cascade;
drop table t015t6 cascade;

create table t015t3 ( a int not null primary key, b int, c int);
create table t015t4 ( a int not null primary key, b int, c int) salt using 2 partitions on (a);
create table t015t5 ( a int not null , b int, c int) store by (a);
create table t015t6 ( a int not null , b int, c int);
delete from t015t1;
insert into  t015t1 values (25),(24),(23),(7),(5),(5),(2),(10),(11),(12),(13),(14),(15);


----produces error 
prepare s from load transform into t015t3 select a,a,a from t015t1;
explain options 'f' s;
execute s;
load with no output into t015t3 select a,a,a from t015t1;
select * from t015t3 order by a;
---produces eror
prepare s from load transform into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>;
explain options 'f' s;
execute s;
load with no output into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>; 
select * from t015t4 order by a;
---
prepare s from load transform into t015t5 select a,a,a from t015t1 ;
explain options 'f' s;
execute s;
load with no output into t015t5 select a,a,a from t015t1 ; 
select * from t015t5 order by a;
---
prepare s from load transform into t015t6 select a,a,a from t015t1 ;
explain options 'f' s;
execute s;
load with no output into t015t6 select a,a,a from t015t1 ; 
select * from t015t6 order by a;
------------------------------

----
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON';
prepare s from load transform into t015t3 select a,a,a from t015t1;
explain options 'f' s;
execute s;
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset;
load with no output, no duplicate check  into t015t3 select a,a,a from t015t1;
select * from t015t3 order by a;
---
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON';
prepare s from load transform into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>;
explain options 'f' s;
execute s;
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset;
load with no output, no duplicate check into t015t4 select a,a,a from t015t1 <<+ cardinality 10e8 >>; 
select * from t015t4 order by a;
---
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON';
prepare s from load transform into t015t5 select a,a,a from t015t1 ;
explain options 'f' s;
execute s;
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset;
load with no output, no duplicate check into t015t5 select a,a,a from t015t1 ; 
select * from t015t5 order by a;
---
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES 'ON';
prepare s from load transform into t015t6 select a,a,a from t015t1 ;
explain options 'f' s;
execute s;
cqd TRAF_LOAD_PREP_SKIP_DUPLICATES reset;
load with no output, no duplicate check into t015t6 select a,a,a from t015t1 ; 
select * from t015t4 order by a;


--Add  tests with indexes on bulkoaded tables
drop table customer_demographics_salt cascade;
drop table customer_demographics cascade;
create table customer_demographics
(
  cd_demo_sk              int not null,
  cd_gender               char(1),
  cd_marital_status       char(1),
  cd_education_status     char(20),
  cd_purchase_estimate    int,
  cd_credit_rating        char(10),
  cd_dep_count            int,
  cd_dep_employed_count   int,
  cd_dep_college_count    int
) store by (cd_demo_sk); 

create table customer_demographics_salt
(
  cd_demo_sk              int not null,
  cd_gender               char(1),
  cd_marital_status       char(1),
  cd_education_status     char(20),
  cd_purchase_estimate    int,
  cd_credit_rating        char(10),
  cd_dep_count            int,
  cd_dep_employed_count   int,
  cd_dep_college_count    int,
  primary key (cd_demo_sk)
)
salt using 4 partitions on (cd_demo_sk);
create index cd_dep_count_IDX on customer_demographics(cd_dep_count);
create index cd_dep_college_count_IDX on customer_demographics(cd_dep_college_count);

explain options 'f' 
load  transform into customer_demographics
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;

load  into customer_demographics 
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;

set parserflags 1;
select count(*) from customer_demographics;
select count(*) from table(index_table cd_dep_count_IDX);
select count(*) from table(index_table cd_dep_college_count_IDX);

load  with rebuild indexes into customer_demographics 
select * from hive.hive.customer_demographics where cd_demo_sk > 5000 and cd_demo_sk <= 6000 ;

select count(*) from customer_demographics;
select count(*) from table(index_table cd_dep_count_IDX);
select count(*) from table(index_table cd_dep_college_count_IDX);

create index cd_dep_count_IDX2 on customer_demographics_salt(cd_dep_count);
create index cd_dep_college_count_IDX2 on customer_demographics_salt(cd_dep_college_count);
---------------------------
load  into customer_demographics_salt 
select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;

set parserflags 1;
select count(*) from customer_demographics_salt;
select count(*) from table(index_table cd_dep_count_IDX2);
select count(*) from table(index_table cd_dep_college_count_IDX2);

drop index cd_dep_count_IDX2;
drop index cd_dep_college_count_IDX2;

-- --load with upsert using load
-- drop table customer_demographics_salt cascade;
-- drop table customer_demographics cascade;
-- create table customer_demographics
-- (
--   cd_demo_sk              int not null,
--   cd_gender               char(1),
--   cd_marital_status       char(1),
--   cd_education_status     char(20),
--   cd_purchase_estimate    int,
--   cd_credit_rating        char(10),
--   cd_dep_count            int,
--   cd_dep_employed_count   int,
--   cd_dep_college_count    int,
--   primary key (cd_demo_sk)
-- ); 
-- 
-- create table customer_demographics_salt
-- (
--   cd_demo_sk              int not null,
--   cd_gender               char(1),
--   cd_marital_status       char(1),
--   cd_education_status     char(20),
--   cd_purchase_estimate    int,
--   cd_credit_rating        char(10),
--   cd_dep_count            int,
--   cd_dep_employed_count   int,
--   cd_dep_college_count    int,
--   primary key (cd_demo_sk)
-- )
-- salt using 4 partitions on (cd_demo_sk);
-- create index cd_dep_count_IDX on customer_demographics(cd_dep_count) no populate ;
-- create index cd_dep_college_count_IDX on customer_demographics(cd_dep_college_count) no populate;
-- 
-- load with upsert using load into customer_demographics 
-- select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
-- 
-- set parserflags 1;
-- select count(*) from customer_demographics;
-- select count(*) from table(index_table cd_dep_count_IDX);
-- select count(*) from table(index_table cd_dep_college_count_IDX);
-- 
-- drop index cd_dep_count_IDX;
-- drop index cd_dep_college_count_IDX;
-- 
-- create index cd_dep_count_IDX2 on customer_demographics_salt(cd_dep_count) no populate ;
-- create index cd_dep_college_count_IDX2 on customer_demographics_salt(cd_dep_college_count) no populate;
-- ---------------------------
-- load with upsert using load into customer_demographics_salt 
-- select * from hive.hive.customer_demographics where cd_demo_sk <= 5000;
-- 
-- set parserflags 1;
-- select count(*) from customer_demographics_salt;
-- select count(*) from table(index_table cd_dep_count_IDX2);
-- select count(*) from table(index_table cd_dep_college_count_IDX2);
-- 
-- drop index cd_dep_count_IDX2;
-- drop index cd_dep_college_count_IDX2;

-----
drop table customer_address cascade;

create table "customer_address_delim"
(
  ca_address_sk        int not null,
  ca_address_id        char(16),
  ca_street_number     char(10),
  ca_street_name       varchar(60),
  ca_street_type       char(15),
  ca_suite_number      char(10),
  ca_city              varchar(60),
  ca_county            varchar(30),
  ca_state             char(2),
  ca_zip               char(10),
  ca_country           varchar(30),
  ca_gmt_offset        decimal(5,2),
  ca_location_type     char(20),
  primary key (ca_address_sk)
)
salt using 4 partitions
HBASE_OPTIONS (data_block_encoding = 'FAST_DIFF', compression = 'GZ') 
;

select count(*) from hive.hive.customer_address  where ca_address_sk <= 5000;
select count(*) from "customer_address_delim";

prepare s from 
load transform into "customer_address_delim" 
select * from hive.hive.customer_address  where ca_address_sk <= 5000;                                                                                              
explain options 'f' s;

load   into "customer_address_delim" 
select * from hive.hive.customer_address  where ca_address_sk <= 5000;                                                                                                   

select count(*) from "customer_address_delim";

select [first 20] * from "customer_address_delim" order by ca_address_sk ;

cqd attempt_esp_parallelism 'off';
load with no output into t015t4 select a,a,a from t015t1;







