-- @@@ START COPYRIGHT @@@
--
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements.  See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership.  The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License.  You may obtain a copy of the License at
--
--   http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied.  See the License for the
-- specific language governing permissions and limitations
-- under the License.
--
-- @@@ END COPYRIGHT @@@
-- TESTDML14 (Histograms and MFVs)
-- Functionality: Tests MFVs and second MFVs generated in histograms
--                and their impact on cardinalities
-- Expected File: ETESTDML14
-- History: Created 10/06/2010
-- Owner: Renu Varshneya
-------------------------------------------------------------------

drop schema cat.maxcard cascade;

log ATESTDML14 clear;

cqd HIST_MISSING_STATS_WARNING_LEVEL '0';

cqd HIST_USE_SAMPLE_FOR_CARDINALITY_ESTIMATION 'off';

CQD POS 'OFF';

create schema cat.maxcard;

set schema cat.maxcard;

create table t0 (a int not null not droppable, b int, c int, primary key (a));
create table t1 (a int not null not droppable, b int, c int, primary key (a));
create table t2 (a int not null not droppable, b int, c int, primary key (a));
create table t3 (a int not null not droppable, b int, c int, primary key (a));
create table t4 (a int not null not droppable, b int, c int, primary key (a));
create table t5 (a int not null not droppable, b int, c int, primary key (a));
create table t6 (a int not null not droppable, b int, c int, primary key (a));

create table cube2
(a int not null not droppable, 
b int not null not droppable,
c int not null not droppable,
d int, e int, f int, txt char(100),
primary key (a,b,c));

CREATE TABLE TMFV2 (ssn largeint NOT NULL,
                 sid int not null
                );

CREATE TABLE TMFV3 (cssn char(40) NOT NULL,
                 sid int not null
                );

CREATE TABLE TMFV4 (cssn char(40) NOT NULL,
                 sid int not null
                );

create table TMFV1(a int not null, primary key(a));

insert into t0 values (0,0,0);
insert into t0 values (1,1,1);
insert into t0 values (2,2,2);
insert into t0 values (3,3,3);
insert into t0 values (4,4,4);
insert into t0 values (5,5,5);
insert into t0 values (6,6,6);
insert into t0 values (7,7,7);
insert into t0 values (8,8,8);
insert into t0 values (9,9,9);

-- t1,t2,t3,t4,t5 are 10 rows similar to t0
insert into t1  select  * from t0;
insert into t2  select  * from t0;
insert into t3  select  * from t0;
insert into t4  select  * from t0;
insert into t5  select  * from t0;

-- t6, t7 are 100 rows
insert into t6  select  t1.a+10*t2.a,t1.a,t2.a from t1,t2;

insert into cube2  select  t2.a, t6.a, t.a + t6.a, t2.a, t6.a, t.b, 'some text'
from t2, t6, t6 t  where t6.b < 9;

update cube2 set txt = 'blue sky' where a < 3;
update cube2 set txt = 'blue sky green grass' where a = 3;
update cube2 set txt = 'blue sky red sun' where a+b = 3;
update cube2 set txt = 'some text repeated' where a >=4 and a <= 8;
update cube2 set d = 11 where f in (5, 3);
update cube2 set d = 3 where f < 5;

insert into TMFV1 values(1);
-- Approx 197K rows inserted

insert into TMFV2
(
select c1,c10 from TMFV1
  transpose 1,1,1,2,2,3,4,5,6,7,8,9,10,12,12,13,13,15,15,16,16,17,17,18,18,18,19,19,20,20,21,21,21,
  22,22,22,23,23,24,24,24,25,26,27,28,28,29,29 as c1
  transpose 0,1,2,3,4,5,6,7 as c2
  transpose 0,1,2,3,4,5,6,7 as c3
  transpose 0,1,2,3,4,5,6,7 as c8
  transpose 0,1,2,3,4,5,6,7 as c10
);

-- Approx 197K rows inserted

insert into TMFV3
(
select c1,c10 from TMFV1
  transpose 
'1', '1', '1', '2', '2', '3', '4', '5', '6', '7', '8', '9', '10',
'123456789012345678901234567890',
'123456789012345678901234567890',
'13', '13', '15', '15', '16', '16', '17', '17', '18', '18', '18',
'19', '19', '20', '20', '21', '21', '21', '22', '22', '22', '23', '23',
'24', '24', '24', '25', '26', '27', '28', '28', '29', '29' as c1
  transpose 0,1,2,3,4,5,6,7 as c2
  transpose 0,1,2,3,4,5,6,7 as c3
  transpose 0,1,2,3,4,5,6,7 as c8
  transpose 0,1,2,3,4,5,6,7 as c10
);

-- Approx 197K rows inserted

insert into TMFV4 (select * from TMFV3);

cqd USTAT_PROCESS_GAPS 'off';

-- disable generation of single valued intervals. So we can capture MFVs and SMFVs

cqd USTAT_FREQ_SIZE_PERCENT '110';

update statistics for table cube2 on txt generate 3 intervals;

--Number    Rowcount         UEC Boundary
--====== =========== =========== ==============================
--     0           0           0 ('blue sky')
--     1       26700           1 ('blue sky')
--     2       18300           3 ('some text')
--     3       45000           1 ('some text repeated')

update statistics for table cube2 on d generate 5 intervals;

--Number    Rowcount         UEC Boundary
--====== =========== =========== =========
--     0           0           0 (0)
--     1       10800           3 (2)
--     2       48600           1 (3)
--     3        7200           2 (5)
--     4       10800           3 (8)
--     5       12600           2 (11)

update statistics for table TMFV2 on ssn generate 10 intervals;

--Number    Rowcount         UEC Boundary
--====== =========== =========== ===========
--     0           0           0 (1)
--     1       12288           1 (1)
--     2       20480           4 (5)
--     3       20480           5 (10)
--     4       16384           2 (13)
--     5       16384           2 (16)
--     6       20480           2 (18)
--     7       16384           2 (20)
--     8       24576           2 (22)
--     9       24576           3 (25)
--    10       24576           4 (29)

update statistics for table TMFV2 on sid generate 10 intervals;

--Number    Rowcount         UEC Boundary
--====== =========== =========== =========
--     0           0           0 (0)
--     1       24576           1 (0)
--     2       24576           1 (1)
--     3       24576           1 (2)
--     4       24576           1 (3)
--     5       24576           1 (4)
--     6       24576           1 (5)
--     7       24576           1 (6)
--     8       24576           1 (7)

update statistics for table TMFV3 on cssn generate 10 intervals;

--Number    Rowcount         UEC Boundary
--====== =========== =========== =========
--     0           0           0 ('1')
--     1       16384           2 ('10')
--     2       16384           2 ('13')
--     3       16384           2 ('16')
--     4       20480           2 ('18')
--     5       16384           2 ('2')
--     6       20480           2 ('21')
--     7       20480           2 ('23')
--     8       20480           3 ('26')
--     9       24576           4 ('3')
--    10       24576           6 ('9')

update statistics for table TMFV4 on cssn generate 10 intervals;

--Number    Rowcount         UEC Boundary
--====== =========== =========== =========
--     0           0           0 ('1')
--     1       16384           2 ('10')
--     2       16384           2 ('13')
--     3       16384           2 ('16')
--     4       20480           2 ('18')
--     5       16384           2 ('2')
--     6       20480           2 ('21')
--     7       20480           2 ('23')
--     8       20480           3 ('26')
--     9       24576           4 ('3')
--    10       24576           6 ('9')


-- Get MFV and second MFVs for each column wehave updated
cqd ustat_show_mfv_info 'on';

prepare report_scan_maxcard from
select cast(SEQ_NUM as char(4)) SEQN, cast(operator as char(26)) OPERATOR,
cast(
substring(description, position('max_card_est' in description),
      position('fragm' in description) - position('max_card_est' in description)
         ) as char(30))
from table(explain(NULL, 'XX')) where
description like '%max%' and
rtrim(operator) = 'FILE_SCAN'
order by 1
;

prepare explainCard from
select plan.num, plan.op, tab_name, plan.card "Expected Cardinality"
from (select cast(runningcount(seq_num) as char(4)),
                  cast(cardinality as char(10)),
                  cast(operator as char (30)),
                  substring (substring(tname from (1+locate('.',tname))), (1+locate('.',substring(tname from (1+locate('.',tname))))),15 )
      from table(explain(null,'XX'))
      where (operator like '%SCAN%') OR
                (operator like '%JOIN%')
      sequence by seq_num desc) plan(num, card, op, tab_name)
;

cqd query_cache '0';

cqd cache_histograms '0';

cqd hist_use_high_frequency_info 'on';

cqd ustat_show_mfv_info 'on';

---=========================================================================
--- Clear and regenerate statistics with same number of intervals.
--- The results were very consistent
--=========================================================================


-- =========================================================================
-- Equality predicates with MFVs are accurate

-- 1.2E4 rows returned
prepare xx from select count(*) from tmfv2 where ssn = 24;

execute report_scan_maxcard;

execute explainCard ;

-- 8192 rows returned
prepare xx from select count(*) from tmfv2 where ssn = 2;

execute report_scan_maxcard;

execute explainCard ;

-- 8192 rows returned
prepare xx from select count(*) from tmfv2 where ssn = 12;

execute report_scan_maxcard;

execute explainCard ;

-- 45K rows returned
prepare xx from select count(*) from cube2 where txt = 'some text repeated';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where lower(txt) = 'some text repeated';

execute report_scan_maxcard;

execute explainCard ;

-- 9000 rows returned for next 4 queries below
prepare xx from select count(*) from cube2 where txt = 'some text';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where upper(txt) = 'some text';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where cast(txt as char (5)) = 'some text';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where trim(txt) = 'some text';

execute report_scan_maxcard;

execute explainCard ;

-- turn hist_use_high_frequency_info 'off' and recompile the queries

cqd hist_use_high_frequency_info 'off';

-- 8190 rows returned
prepare xx from select count(*) from tmfv2 where ssn = 24;

execute report_scan_maxcard;

execute explainCard ;

-- 5110 rows returned
prepare xx from select count(*) from tmfv2 where ssn = 2;

execute report_scan_maxcard;

execute explainCard ;

-- 8190 rows returned
prepare xx from select count(*) from tmfv2 where ssn = 12;

execute report_scan_maxcard;

execute explainCard ;

-- 45K rows returned. No impact since it is a SVI
prepare xx from select count(*) from cube2 where txt = 'some text repeated';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where lower(txt) = 'some text repeated';

execute report_scan_maxcard;

execute explainCard ;

-- 45K rows returned for next 4 queries below, which is wrong. Because of encoded values
-- the optimizer is returning the rowcount from the next interval

prepare xx from select count(*) from cube2 where txt = 'some text';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where upper(txt) = 'some text';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where cast(txt as char (5)) = 'some text';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where trim(txt) = 'some text';

execute report_scan_maxcard;

execute explainCard ;

--=========================================================================


--=========================================================================
cqd hist_use_high_frequency_info 'on';

-- Equality predicates with non-MFVs. It should pick the average value for card estimate
-- max cardinality estimate should pick up the second MFV frequency

prepare xx from select count(*) from tmfv2 where ssn = 16;

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from tmfv2 where ssn = 20;

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where txt = 'more text';

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from cube2 where lower(txt) = 'more text';

--=========================================================================

--=========================================================================

-- Non-equality predicates with MFVs

cqd hist_use_high_frequency_info 'on';

prepare xx from select count(*) from tmfv2 where ssn <> 24;

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from tmfv2 where ssn <> 2;

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from tmfv2 where ssn <> 12;

execute report_scan_maxcard;

execute explainCard ;

cqd hist_use_high_frequency_info 'off';

prepare xx from select count(*) from tmfv2 where ssn <> 24;

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from tmfv2 where ssn <> 2;

execute report_scan_maxcard;

execute explainCard ;

prepare xx from select count(*) from tmfv2 where ssn <> 12;

execute report_scan_maxcard;

execute explainCard ;


log;

exit;

