-- Tests for SQL on Hadoop PoC
-- Added April 2013
--
-- @@@ START COPYRIGHT @@@
--
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements.  See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership.  The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License.  You may obtain a copy of the License at
--
--   http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied.  See the License for the
-- specific language governing permissions and limitations
-- under the License.
--
-- @@@ END COPYRIGHT @@@

-- NEED to run TEST003 first

log LOG004 clear;

set schema hive.hive;
cqd attempt_esp_parallelism 'off';

cqd hive_max_esps  '1';
cqd PARALLEL_NUM_ESPS '1';
cqd parallel_num_esps '1';

cqd HIVE_MAX_STRING_LENGTH_IN_BYTES '25' ;
cqd mode_seahive 'ON';

select [first 100] * from hive.ins_customer order by c_customer_sk;
select * from hive.ins_promotion order by P_PROMO_SK;
select [first 100] * from hive.ins_customer_address order by CA_ADDRESS_SK;
select * from hive.ins_store order by S_STORE_SK;
log;

----------customer has 100000 rows
log LOG004_ins_customer.dat clear;
select [first 1000] * from hive.ins_customer order by c_customer_sk;
log;
log LOG004_orig_customer.dat clear;
select [first 1000] * from hive.customer order by c_customer_sk;
log;
sh  diff  LOG004_ORIG_CUSTOMER.DAT LOG004_INS_CUSTOMER.DAT 2>&1 >> LOG004;
----promotion has 300 rows

log LOG004_orig_promotion.dat clear;
select * from hive.promotion order by P_PROMO_SK;
log;
log LOG004_ins_promotion.dat clear;
select * from hive.ins_promotion order by P_PROMO_SK;
log;
sh  diff  LOG004_ORIG_PROMOTION.DAT LOG004_INS_PROMOTION.DAT 2>&1  >> LOG004;

---------customer_address has 50000 rows
log LOG004_orig_customer_address.dat clear;
select [first 1000] * from hive.customer_address order by CA_ADDRESS_SK;
log;
log LOG004_ins_customer_address.dat clear;
select [first 1000] * from hive.ins_customer_address order by CA_ADDRESS_SK;
log;
sh  diff -s LOG004_ORIG_CUSTOMER_ADDRESS.DAT LOG004_INS_CUSTOMER_ADDRESS.DAT 2>&1 >> LOG004;

----store has 12 rows
log LOG004_orig_store.dat clear;
select * from hive.store order by S_STORE_SK;
log;
log LOG004_ins_store.dat clear;
select * from hive.ins_store order by S_STORE_SK;
log;
sh  diff -s LOG004_ORIG_STORE.DAT LOG004_INS_STORE.DAT 2>&1 >>LOG004;

--THERE SEEMS TO BE AN ISSUE WITH FLOATING POINT TYPES --maybe coversion issue
-- using ceiling function to overcome this issue for now
----store_sales has xxxrows
log LOG004_orig_store_sales.dat clear;
select [first 500]
ss_sold_date_sk,ss_sold_time_sk, ss_item_sk, ss_customer_sk, 
ss_cdemo_sk, ss_hdemo_sk,ss_addr_sk,ss_store_sk, ss_promo_sk,
ss_ticket_number,ss_quantity, 
ceiling(ss_wholesale_cost) , 
ceiling(ss_list_price),
ceiling(ss_sales_price),
ceiling(ss_ext_discount_amt), 
ceiling(ss_ext_sales_price),
ceiling(ss_ext_wholesale_cost), 
ceiling(ss_ext_list_price),
ceiling(ss_ext_tax),
ceiling(ss_coupon_amt),
ceiling(ss_net_paid), 
ceiling(ss_net_paid_inc_tax),
ceiling(ss_net_profit)
from hive.store_sales order by  ss_sold_date_sk,ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk ,ss_store_sk, ss_promo_sk;
log;
log LOG004_ins_store_sales.dat clear;
select [first 500]
ss_sold_date_sk,ss_sold_time_sk, ss_item_sk, ss_customer_sk, 
ss_cdemo_sk, ss_hdemo_sk,ss_addr_sk,ss_store_sk, ss_promo_sk,
ss_ticket_number,ss_quantity, 
ceiling(ss_wholesale_cost) , 
ceiling(ss_list_price),
ceiling(ss_sales_price),
ceiling(ss_ext_discount_amt), 
ceiling(ss_ext_sales_price),
ceiling(ss_ext_wholesale_cost), 
ceiling(ss_ext_list_price),
ceiling(ss_ext_tax),
ceiling(ss_coupon_amt),
ceiling(ss_net_paid), 
ceiling(ss_net_paid_inc_tax),
ceiling(ss_net_profit)
from hive.ins_store_sales  order by  ss_sold_date_sk,ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk ,ss_store_sk, ss_promo_sk;
log;
sh diff  LOG004_ORIG_STORE_SALES.DAT LOG004_INS_STORE_SALES.DAT 2>&1 >>LOG004;


----customer_demographics has xxxrows
log LOG004_orig_customer_demographics.dat clear;
select  [first 500] * from hive.customer_demographics order by cd_demo_sk ;
log;
log LOG004_ins_customer_demographics.dat clear;
select  [first 500] * from hive.ins_customer_demographics  order by cd_demo_sk;
log;
sh diff  LOG004_ORIG_CUSTOMER_DEMOGRAPHICS.DAT LOG004_INS_CUSTOMER_DEMOGRAPHICS.DAT 2>&1 >>LOG004;



----date_dim has xxxrows
log LOG004_orig_date_dim.dat clear;
select  [first 500] * from hive.date_dim ;
log;
log LOG004_ins_date_dim.dat clear;
select  [first 500] * from hive.ins_date_dim;
log;
sh diff  LOG004_ORIG_DATE_DIM.DAT LOG004_INS_DATE_DIM.DAT 2>&1 >>LOG004;

----time_dim has xxxrows
log LOG004_orig_time_dim.dat clear;
select  [first 500] * from hive.time_dim ;
log;
log LOG004_ins_time_dim.dat clear;
select  [first 500] * from hive.ins_time_dim;
log;
sh diff  LOG004_ORIG_TIME_DIM.DAT LOG004_INS_TIME_DIM.DAT 2>&1 >>LOG004;


----item has xxxrows
log LOG004_orig_item.dat clear;
select  [first 500] i_item_sk,i_item_id,i_rec_start_date, i_rec_end_date  i_item_desc,ceiling(i_current_price),ceiling(i_wholesale_cost), i_brand_id,i_brand, i_class_id, i_class, i_category_id, i_category,i_manufact_id,i_manufact,i_size,i_formulation, i_color, i_units, i_container, i_manager_id, i_product_name
from hive.item order by i_item_sk, i_item_id ;
log;
log LOG004_ins_item.dat clear;
select  [first 500] i_item_sk,i_item_id,i_rec_start_date, i_rec_end_date  i_item_desc,ceiling(i_current_price),ceiling(i_wholesale_cost), i_brand_id,i_brand, i_class_id, i_class, i_category_id, i_category,i_manufact_id,i_manufact,i_size,i_formulation, i_color, i_units, i_container, i_manager_id, i_product_name
from hive.ins_item  order by i_item_sk, i_item_id;
log;
sh diff  LOG004_ORIG_ITEM.DAT LOG004_INS_ITEM.DAT 2>&1 >>LOG004;


----household_demographics_dim has xxxrows
log LOG004_orig_household_demographics.dat clear;
select  [first 500] * from hive.household_demographics order by hd_demo_sk ;
log;
log LOG004_ins_household_demographics.dat clear;
select  [first 500] * from hive.ins_household_demographics order by hd_demo_sk;
log;
sh diff  LOG004_ORIG_HOUSEHOLD_DEMOGRAPHICS.DAT LOG004_INS_HOUSEHOLD_DEMOGRAPHICS.DAT 2>&1 >>LOG004;



--insert into ins_customerNaddress select c_customer_id, c_salutation, c_first_name, c_last_name, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number,ca_city,  ca_county, ca_state, ca_zip, ca_country  from customer a join customer_address b on a.c_current_addr_sk=b.ca_address_sk where a.c_customer_sk <=1000;



--insert into ins_store_sales_summary select ss_sold_date_sk ,s_store_id, s_store_name,ss_store_sk, sum (ss_quantity) , sum(ss_wholesale_cost) from store a join store_sales b on a.s_store_sk=b.ss_store_sk group by  ss_sold_date_sk ,s_store_id, s_store_name,ss_store_sk; 


