Hi,

Below is a table definition from a Oracle 11g instance (sorry for the long text).

CREATE TABLE "SCHEMA"."DOCUMENTS"
( "AA" NUMBER(10,0) NOT NULL ENABLE,
"ACC_YEAR" NUMBER(4,0) NOT NULL ENABLE,
"IN_DAY" DATE,
"DOC_NUMBER" VARCHAR2(50 BYTE),
"DOC_DATE" DATE,
"IN_PLACEOFCREATION" VARCHAR2(500 BYTE),
"IN_PERSON" VARCHAR2(500 BYTE),
"WORKER" VARCHAR2(50 BYTE),
"IN_DESCRIPTION" VARCHAR2(1000 BYTE),
"IN_RECEIVER" VARCHAR2(400 BYTE),
"IN_FORWARD" VARCHAR2(400 BYTE),
"ARCHIVE" VARCHAR2(250 BYTE),
"OUT_DAY" DATE,
"OUT_PLACEOFCREATION" VARCHAR2(500 BYTE),
"OUT_AUTHORITY" VARCHAR2(500 BYTE),
"OUT_DESTINATION" VARCHAR2(500 BYTE),
"OUT_DESCRIPTION" VARCHAR2(1000 BYTE),
"OUT_ANNOUNCE" VARCHAR2(500 BYTE),
"FAKELOS_AA" NUMBER(10,0),
"FAKELOS_DATE" DATE,
"ARCHIVE_DATE" DATE,
"SEND_TYPE" VARCHAR2(50 BYTE),
"COMMENTS" VARCHAR2(4000 BYTE),
"FAKELOS_YEAR" NUMBER(10,0),
"IN_PERSON_CODE" VARCHAR2(20 BYTE),
"DOC_PIN" VARCHAR2(10 BYTE),
"DOC_STATUS" VARCHAR2(50 BYTE),
"DOC_EXPIREDATE" DATE,
"DOC_RECEIVE_TYPE" VARCHAR2(50 BYTE),
"V_USAGE" VARCHAR2(20 BYTE) NOT NULL ENABLE,
"V_OLD_USAGE" VARCHAR2(20 BYTE) DEFAULT '',
"N_OLD_YEAR" NUMBER(4,0),
"N_OLD_AA" NUMBER(10,0),
"V_PRIORITY" VARCHAR2(50 BYTE) DEFAULT '',
"V_SCALING" VARCHAR2(50 BYTE) DEFAULT '',
"V_MASTERUSAGE" VARCHAR2(20 BYTE),
"N_MASTERYEAR" NUMBER(4,0),
"N_MASTERAA" NUMBER(10,0),
"DOC_DESTROYDATE" DATE,
"N_PAGECOUNT" NUMBER(10,0),
"V_BARCODE" VARCHAR2(50 BYTE),
"V_APPLITYPE" VARCHAR2(64 BYTE),
"V_BARCODE_OUT" VARCHAR2(50 BYTE),
"V_TEAM" VARCHAR2(200 BYTE),
"V_EMAIL" VARCHAR2(100 BYTE),
"V_MOBILEPHONE" VARCHAR2(30 BYTE),
"N_STATUSID" NUMBER(10,0),
"V_ORGANIZATION" VARCHAR2(200 BYTE),
"V_UNIT" VARCHAR2(200 BYTE),
"V_IN_SUBJECT_CHAR" VARCHAR2(200 BYTE),
"V_OUT_SUBJECT_CHAR" VARCHAR2(200 BYTE),
"V_IMPORT_COODE" VARCHAR2(200 BYTE),
"V_WRITER" VARCHAR2(200 BYTE),
"V_UNDERSIGNED" VARCHAR2(200 BYTE),
"UPDATE_STRING" VARCHAR2(200 BYTE),
"CREATED_BY" VARCHAR2(200 BYTE),
"UPDATED_BY" VARCHAR2(200 BYTE),
"CREATED_ON" DATE,
"UPDATED_ON" DATE,
"N_CANCELLED" NUMBER(1,0),
"FAKELOS_AA2" VARCHAR2(16 BYTE),
"N_IN_PERSON_PUBLICBODY_ID" NUMBER(10,0),
"N_OUT_DEST_PUBLICBODY_ID" NUMBER(10,0),
"N_OUT_ANNOUNCE_PUBLICBODY_ID" NUMBER(10,0),
"N_DOCUMENTRECEIVED" NUMBER(1,0),
"N_ANNOUNCEMENTRECEIVED" NUMBER(1,0),
"N_WEB_RAISED" NUMBER(1,0),
"V_WEB_RAISEDNO" VARCHAR2(30 BYTE),
"N_EXCLUDE_MASTER_UPDATE" NUMBER(1,0),
"N_EMPLOYEEID" NUMBER(10,0),
"N_TOPAA" NUMBER(10,0),
"N_TOPYEAR" NUMBER(4,0),
"V_TOPUSAGE" VARCHAR2(20 BYTE),
"V_DOCUMENTDESCR" VARCHAR2(300 BYTE),
"V_COPYDOCUMENT" VARCHAR2(100 BYTE),
CONSTRAINT "DOCUMENTS_PKEY" PRIMARY KEY ("V_USAGE", "ACC_YEAR", "AA")
USING INDEX PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 655360 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_PROTOCOL"  ENABLE,
CONSTRAINT "DOCUMENTS_STATUS_FKEY" FOREIGN KEY ("N_STATUSID")
REFERENCES "SCHEMA"."STATUS" ("N_ID") ENABLE,
CONSTRAINT "FK_DOCUMENTS_EMPLOYEES" FOREIGN KEY ("N_EMPLOYEEID")
REFERENCES "SCHEMA"."EMPLOYEE" ("N_ID") ON DELETE SET NULL ENABLE
) SEGMENT CREATION IMMEDIATE
PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING
STORAGE(INITIAL 4194304 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_PROTOCOL" ;

CREATE INDEX "SCHEMA"."DOUBLE_RECORDS_CHECK" ON "SCHEMA"."DOCUMENTS"
("IN_PERSON", "DOC_NUMBER", "DOC_DATE")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE INDEX "SCHEMA"."IDX_ACCYEAR" ON "SCHEMA"."DOCUMENTS" ("ACC_YEAR")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE INDEX "SCHEMA"."IDX_CREATEDBY" ON "SCHEMA"."DOCUMENTS" ("CREATED_BY")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE INDEX "SCHEMA"."IDX_INDAY" ON "SCHEMA"."DOCUMENTS" ("IN_DAY")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE INDEX "SCHEMA"."IDX_MASTERUSAGEYEAR" ON "SCHEMA"."DOCUMENTS"
("V_MASTERUSAGE", "N_MASTERYEAR")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE INDEX "SCHEMA"."IDX_NMASTERAA" ON "SCHEMA"."DOCUMENTS" ("N_MASTERAA")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE INDEX "SCHEMA"."IDX_UPDATED_BY" ON "SCHEMA"."DOCUMENTS" ("UPDATED_BY")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE INDEX "SCHEMA"."IDX_UPDATED_ON" ON "SCHEMA"."DOCUMENTS" ("UPDATED_ON")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE INDEX "SCHEMA"."INDAY_USAGE" ON "SCHEMA"."DOCUMENTS" ("IN_DAY",
"V_USAGE")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

CREATE UNIQUE INDEX "SCHEMA"."UNIQUERECORDS_INDEX" ON "SCHEMA"."DOCUMENTS"
("V_DOCUMENTDESCR")
PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "TS_GENERAL" ;

The following query:

SELECT COUNT(*) FROM DOCUMENTS WHERE V_USAGE='ANAT' AND ACC_YEAR=2013 AND
CREATED_ON>=TO_DATE('01/11/2013', 'DS') AND CREATED_BY='user';

generates this plan, which is normal I guess, since there's no index for the CREATED_ON column.

----------------------------------------------------------------------------------------------
| Id  | Operation                    | Name          | Rows  | Bytes | Cost (%CPU)| Time     |
----------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |               |     1 |    26 |   798   (1)| 00:00:10 |
|   1 |  SORT AGGREGATE              |               |     1 |    26 |            |          |
|*  2 |   TABLE ACCESS BY INDEX ROWID| DOCUMENTS     |     1 |    26 |   798   (1)| 00:00:10 |
|*  3 |    INDEX RANGE SCAN          | IDX_CREATEDBY |  2131 |       |    11   (0)| 00:00:01 |
----------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

  2 - filter("CREATED_ON">=TO_DATE(' 2013-11-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss') 
              AND "V_USAGE"='ANAT' AND "ACC_YEAR"=2013)
  3 - access("CREATED_BY"='user')

If I create an index for the CREATED_ON column, the generated plan is:

-----------------------------------------------------------------------------------------------
| Id  | Operation                    | Name           | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |                |     1 |    26 |   351   (0)| 00:00:05 |
|   1 |  SORT AGGREGATE              |                |     1 |    26 |            |          |
|*  2 |   TABLE ACCESS BY INDEX ROWID| DOCUMENTS      |     1 |    26 |   351   (0)| 00:00:05 |
|*  3 |    INDEX RANGE SCAN          | IDX_CREATED_ON |  3809 |       |    13   (0)| 00:00:01 |
-----------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

  2 - filter("CREATED_BY"='user' AND "V_USAGE"='ANAT' AND "ACC_YEAR"=2013)
  3 - access("CREATED_ON">=TO_DATE(' 2013-11-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))

This seems a bit strange, since the rows returned by the IDX_CREATED_ON index are 3809 (more than the no index solution), but the CPU cost is less than the first case.

If I create an index on both CREATED_ON and CREATED_BY, the plan is:

--------------------------------------------------------------------------------------------------
| Id  | Operation                    | Name              | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |                   |     1 |    26 |     4   (0)| 00:00:01 |
|   1 |  SORT AGGREGATE              |                   |     1 |    26 |            |          |
|*  2 |   TABLE ACCESS BY INDEX ROWID| DOCUMENTS         |     1 |    26 |     4   (0)| 00:00:01 |
|*  3 |    INDEX SKIP SCAN           | IDX_CREATED_ON_BY |     2 |       |     3   (0)| 00:00:01 |
--------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

  2 - filter("V_USAGE"='ANAT' AND "ACC_YEAR"=2013)
  3 - access("CREATED_ON">=TO_DATE(' 2013-11-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss') AND 
              "CREATED_BY"='user' AND "CREATED_ON" IS NOT NULL)
      filter("CREATED_BY"='user')

I suppose the last solution seems to be the best, seeing the cost in CPU and number of rows returned from the intermediate results. Forgot to mention this query returns 23.

So, the big question is: Could you think of a better index that fits this specific query?

asked Nov 25 '13 at 21:16

Vasilis's gravatar image

Vasilis
21114


One Answer:

So, the big question is: Could you think of a better index that fits this specific query?

Yes, you should to it the other way around: (CREATED_BY, CREATED_ON)

Rule of thumb: index equality before ranges.

To answer the question you didn't ask: why is the cost value of the first plan higher then the cost of the second although the index matches less rows?

The reason is the so called index clustering factor:

The clustering factor is a benchmark that expresses the correlation between the index row order and the table row order.

For example, an ORDERS table, that grows every day, might have an index on the order date and another one on the customer id. Because orders don’t get deleted there are no holes in the table so that each new order is added to the end. The table grows chronologically. An index on the order date has a very low clustering factor because the index order is essentially the same as the table order. The index on customer id has a higher clustering factor because the index order is different from the table order; the table row will be inserted at the end of the table, the corresponding index entry somewhere in the middle of the index—according to the customer id.

answered Nov 26 '13 at 11:39

Markus%20Winand's gravatar image

Markus Winand ♦♦
93651120

Hi Markus,

Thanks for your answer. After posting my question, I saw my initial query and realized what you say (also remembering the tip in your book, which btw is excellent). That's because the date could be null, but the created_by column always has a value. I changed the order of the index (created_by, created_on) and the results were even better. I also dropped another index on the created_by column which does not have any reason to exist now.

As the title says, use the index!

(Nov 26 '13 at 13:33) Vasilis