/************************************************
File : Ext_taxa.sql
Release : UB2.0.3
SIR     : 1683
Author : Ganesh Thadkamalla
Date :  04/27/2005
Description :  This script will extract Taxonomy data from ITIS table structure and populate ITISTAXA.
  It calls c_names.sql file to extract common names.  It generates Taxon Sort code by calling other procedures.  
  Reports data problems.

Comments :


*************************************************/
set echo off;
spool logs\ext_taxa.log

Prompt *** Creating constraints and indexes on ITIS schema objects.

--Create primary Keys and required indexes on ITIS objects.
alter table itis.taxon_authors_lkp
add constraint taxon_authors_lkp_pk 
primary key (kingdom_id,taxon_author_id)
using index 
tablespace itis_inds;

--Create indexes on ITIS tables.
create index i_taxa_units_tsn  on taxonomic_units(TSN)
tablespace itis_inds;
create index i_taxa_units_ptsn  on taxonomic_units(PARENT_TSN)
tablespace itis_inds;
CREATE INDEX ISL_TSN_ACCEPTED ON SYNONYM_LINKS(TSN_ACCEPTED)
tablespace itis_inds;
create index i_comments on comments(comment_id)
tablespace itis_inds;
create index i_tu_comments_links on tu_comments_links(tsn,comment_id)
tablespace itis_inds;
drop index i_itistaxa_sn;

Prompt *** Analyze above created indexes.

--analyze above created indexes
analyze index itis.i_taxa_units_tsn compute statistics;
analyze index itis.i_taxa_units_ptsn compute statistics;
analyze index itis.ISL_TSN_ACCEPTED compute statistics;
analyze index itis.i_comments compute statistics;
analyze index itis.i_tu_comments_links compute statistics;

Prompt *** Drop and Create ITIStaxa_seq sequence.

--Create a sequence and ITISTAXA table for holding extracted data from ITIS tables.
drop sequence itistaxa_seq;
CREATE SEQUENCE itistaxa_SEQ
START WITH 1 INCREMENT BY 1;

Prompt *** Drop and create ITISTaxa table.

truncate table itistaxa;
drop table itistaxa;
create table itistaxa
(tsn integer,
parent_tsn integer,
true_name_tsn integer,
taxon_rank_code char(8),
taxon_rank_name Char(15),
status char(1),
taxon_sort_code varchar2(45),
itis_name varchar2(175),
author_date varchar2(100),
serial_number integer)
tablespace itis_tabs
storage (initial 30M next 20M);


--Drop and Create functions and procedures for use with data extraction
Prompt *** Drop and Create functions and procedures for use with data extraction

drop FUNCTION F_AUTHOR_DATE;
drop FUNCTION F_AUTHOR_DATE_SYN;
drop FUNCTION F_itis_name;
drop FUNCTION F_RANK_NAME;
drop PROCEDURE KNG_HIER_PROC;
drop PACKAGE SORT_CODE_PKG;

create or replace function f_rank_name(p_kingdom_id in number, p_rank_id in number) 
return varchar2 is
lv_rank_name taxon_unit_types.rank_name%type;
begin
select tut.rank_name into lv_rank_name
from taxon_unit_types tut
where tut.kingdom_id = p_kingdom_id
and   tut.rank_id = p_rank_id;

return lv_rank_name;
exception 
when NO_DATA_FOUND then
return ' ';

end;
/
SHOW ERRORS

create or replace function f_author_date(p_kingdom_id in number, p_author_id in number) 
return varchar2 is
lv_author_date taxon_authors_lkp.taxon_author%type;
begin
select taxon_author into lv_author_date
from taxon_authors_lkp
where kingdom_id = p_kingdom_id
and   taxon_author_id = p_author_id;

return lv_author_date;
exception 
when NO_DATA_FOUND then
return ' ';
end;
/
SHOW ERRORS

-- author_date function for synonyms.
create or replace function f_author_date_syn(p_tsn in number) 
return varchar2 is
  lv_author_id taxonomic_units.taxon_author_id%type;
  lv_kingdom_id taxonomic_units.kingdom_id%type;
  lv_author_date taxon_authors_lkp.taxon_author%type;
begin
  select taxon_author_id, kingdom_id 
  into lv_author_id,lv_kingdom_id
  from taxonomic_units x
  where x.tsn=p_tsn;

  select taxon_author into lv_author_date
  from taxon_authors_lkp
  where kingdom_id = lv_kingdom_id
  and   taxon_author_id = lv_author_id;

  return lv_author_date;
exception 
  when NO_DATA_FOUND then
    return ' ';
end;
/
SHOW ERRORS

-- author_date funtion for synonyms.
@scripts\f_itis_name.sql
show errors;
--Create KNG_hier_proc that creates the hierarcy for a given kingdom.
@scripts\KNG_hier.sql
show errors;
Prompt ***
Prompt ***  Extracting ITIS taxonomy and populating in ITISTAXA table.
prompt ***
set serveroutput on
Declare

cursor kingdom_cur is 
  select distinct tu.unit_name1, tut.rank_name,tsn,tu.usage,tu.taxon_author_id, tu.kingdom_id
  from itis.taxonomic_units tu, itis.taxon_unit_types tut
  where tut.kingdom_id = tu.kingdom_id
   and  tut.rank_id = tu.rank_id
   and rank_name = 'Kingdom'
   and usage in ('valid','accepted');

begin


for k_var in kingdom_cur loop
  --insert the kingdom
  insert into itistaxa (tsn, taxon_rank_name,status,serial_number,author_date,taxon_rank_code,itis_name)
  values(k_var.tsn,'Kingdom','A',itistaxa_seq.nextval,f_author_date(k_var.kingdom_id,k_var.taxon_author_id),
    'KNG',k_var.unit_name1);

  -- Insert the kingdom's synonyms
  insert into itistaxa(tsn,true_name_tsn,serial_number,status,taxon_rank_name,taxon_rank_code,author_date,itis_name)
  select /*+ NO_MERGE(A) */   A.tsn,k_var.tsn,itistaxa_seq.nextval,
    'S','Kingdom','KNG', f_author_date_syn(A.tsn),f_itis_name(a.tsn)
  from (SELECT /*+ NO_MERGE */ TSN FROM synonym_links
        where tsn_accepted = k_var.tsn
        order by tsn ) A;

  commit;

  -- insert the hierarchy
  kng_hier_proc(k_var.tsn);
  
  commit;
end loop;
  commit;
end;
/
show errors

Prompt *** Creat indexes on ITISTAXA table and analyze them.

create index i_itistsn on itistaxa(tsn)
tablespace itis_inds;
create index i_itistaxa_name on itis.itistaxa(itis_name)
tablespace itis_inds;

analyze index itis.i_itistsn compute statistics;
analyze index itis.i_itistaxa_name compute statistics;

spool off;

--Report and fix duplicate TSNs in the extraction process.
@scripts\rep_dupl_tsn.sql
@scripts\fix_dupl_tsn.sql

-- Create sort_code_pkg and create sort codes for the records.
@scripts\sort_code_pkg

Prompt ***
Prompt ***  Generating Taxon Sort Code for records in ITISTAXA table.
prompt ***

@scripts\taxon_sort_code.sql

-- Extract Taxon common names into itis_cnames table.
Prompt ***
Prompt ***  Extracting common name alias and populating them in ITIS_cnames table.
prompt ***

@scripts\c_names.sql


--Populate display_name with first 60 chars of itis_name
alter table itistaxa
add (display_name varchar2(60),
    itis_name_ascii varchar2(175));

commit;
SET TRANSACTION USE ROLLBACK SEGMENT RBSBIG;
update itistaxa t
set display_name = substr(rtrim(itis_name),1,60),
    itis_name_ascii= CONVERT(ITIS_NAME,'US7ASCII','WE8ISO8859P1');
commit;

create index idn_itistaxa on itistaxa(display_name)
tablespace itis_inds;

analyze index itis.idn_itistaxa compute statistics;

--analyze itistaxa table.
analyze table itis.ITISTAXA compute statistics;

--Report and fix duplicate itis_names in the extraction process.
@scripts\rep_dupl_name.sql
--@scripts\Rep_IT_dupname.sql

-- save the duplicates into dupl_names table in ITIS schema.
@scripts\dupl_names.sql

-- process storet.tsrchdup table

--analyze tables
analyze table itis.DUPL_NAMES compute statistics;
analyze table itis.ITIS_CNAMES compute statistics;


exit;
