*********************************************************************
*  THIS FILE IS NAMED 'STORET.HELP.EXAMPLES.SAS.LIBRARY(TAXMAT00)'
*
***  DELETE THIS LINE AND ALL PRECEDING LINES BEFORE USING.
//III      JOB (AAAASTORP,MIII),'LIBRARY(TAXMAT00)',TIME=(2,00),
//             MSGLEVEL=(1,1),PRTY=2
/*ROUTE  PRINT HOLD
/*JOBPARM LINES=300
//*------------------------------------------------------------*
//*               TAXMAT00    EPA SORT CODE/TSN LIST           *
//*------------------------------------------------------------*
//*     SEARCHES THE PUBLIC COPY OF THE NODC/EPA MASTER        *
//*     TAXONOMIC FILE, COMPARING IT TO A USER-SUPPLIED        *
//*     LIST OF SCIENTIFIC NAMES. THESE NAMES MUST BE IN       *
//*     A DATASET AND THE NAME MUST START IN COLUMN ONE (1).   *
//*     THE FILE CAN CONTAIN AS MANY NAMES AS YOU DESIRE AND   *
//*     EACH NAME MUST BE ON A LINE BY ITSELF.                 *
//*                                                            *
//*     THE FULLY QUALIFIED DATASET NAME MUST BE ENTERED       *
//*     ON THE LINE BELOW WHICH BEGINS WITH                    *
//*     SAS.OTHER DD.  THE FULLY QUALIFIED NAME CONSISTS       *
//*     OF YOUR USERID AND ACCOUNT NUMBER AND THEN THE         *
//*     DATASET NAME AS SHOWN BELOW.                           *
//*                                                            *
//*     THE RESULT IS A REPORT OF MATCHED NAMES, MISSED        *
//*     NAMES, NEAR MISSES, AND AMBIGUOUS (MULTIPLE MATCH)     *
//*     NAMES.                                                 *
//*                                                            *
//*     FOR MATCHED NAMES, THE TAXON SERIAL NUMBER (TSN)       *
//*     IS SUPPLIED, TOGETHER WITH THE PARENT TSN, AND AN      *
//*     EPA SORT CODE WHICH INDICATES TRUE TAXONOMIC           *
//*     ORDER.                                                 *
//*                                                            *
//*     DOES -NOT- SHOW NODC 12-DIGIT CODES.                   *
//*                                                            *
//*------------------------------------------------------------*
//TAXMAT00 EXEC SAS,WORK='5000,4000',GREGION=7000K,PRINT=A
//SAS.MASTER DD DISP=SHR,DSN=LJMA014.MASTER.SPECIES.PUBNFILE
//*
//SAS.OTHER DD DISP=SHR,DSN=IIIAAAA.TAXNAMES
//*
//SAS.SYSIN DD DATA
OPTIONS NOCENTER;
DATA OURLIST;
FORMAT SEQ_NO 5.; RETAIN SEQ_NO;
FORMAT SCI_NAME $45.;
INFILE OTHER LENGTH=L MISSOVER;
INPUT  @1   OUR_NAME   $45.
                ;
IF L=0 THEN DELETE;
IF OUR_NAME=' ' THEN DELETE;
PTR=0; DROP PTR;
SCI_NAME=UPCASE(OUR_NAME);

PTR=INDEX(SCI_NAME,' SP ');
IF PTR=0 THEN PTR=INDEX(SCI_NAME,' SP.');
IF PTR=0 THEN PTR=INDEX(SCI_NAME,' SPP ');
IF PTR=0 THEN PTR=INDEX(SCI_NAME,' SPP.');
IF PTR>1 THEN SCI_NAME=SUBSTR(SCI_NAME,1,PTR-1);

C1=SUBSTR(SCI_NAME,1,1);
   DO WHILE(C1=' ');
     SCI_NAME=SUBSTR(SCI_NAME,2);
     C1=SUBSTR(SCI_NAME,1,1);
     END;

SEQ_NO=_N_;
*--------------------------------------------------------------------;
PROC SORT DATA=OURLIST; BY SCI_NAME;
*--------------------------------------------------------------------;
DATA TAXCODE;
INFILE MASTER;
FORMAT SCI_NAME $45.;
FORMAT SRT_CODE $CHAR17.;
FORMAT NODC_NUM $CHAR18.;
INPUT  @1  SERIAL      7.
       @9  PARENT      7.
      @17  AKA         7.
      @25  TYP_FLAG   $1.
      @26  LEVEL      $1.
      @28  NODC_NUM   $12.
      @40  USE_FLAG   $1.
      @42  SRT_CODE   $17.
      @60  SCI_NAME   $45.
      @106 AUTHOR     $30.
      @137 YY         $2.
      @139 MM         $2.
      @141 DD         $2.
                ;

IF USE_FLAG=' ' | USE_FLAG='S' | USE_FLAG='C';

                         *     TAKE THE ASTERISK OUT OF THE "IF"     ;
 *IF LEVEL ^= ' ';       * <== (RIGHT HERE)                          ;
                         *     TO ELIMINATE NON-BIOS NAMES.          ;

FORMAT USE $3.;
USE=USE_FLAG;
IF USE_FLAG='S' THEN USE='SYN';
IF USE_FLAG='C' THEN USE='COM';
IF SRT_CODE=' ' THEN SRT_CODE='*NAME NOT IN BIOS*';
*--------------------------------------------------------------------;
PROC SORT DATA=TAXCODE; BY SCI_NAME;
*--------------------------------------------------------------------;
DATA RESULTS; MERGE OURLIST(IN=INOURS) TAXCODE(IN=INEPA); BY SCI_NAME;
FORMAT STATUS $10.;
FORMAT HLD_NAME $45.;
FORMAT HLD_CODE $18.;
FORMAT HLD_CODN $18.;
FORMAT HLD_SER    9.;
FORMAT HLD_LEV   $1.;
RETAIN NEARMISS 0; DROP NEARMISS;
RETAIN HLD_NAME HLD_CODE HLD_CODN HLD_SER HLD_LEV;
DROP   HLD_NAME HLD_CODE HLD_CODN HLD_SER HLD_LEV;
*;
STATUS='MATCHED';
*;
IF INOURS & INEPA THEN DO;
  IF HLD_NAME=SCI_NAME & HLD_CODE^=SRT_CODE
    THEN DO;
    HTCODE=SRT_CODE;
    HTCODN=NODC_NUM;
    HSNAME=SCI_NAME;
    SRT_CODE=HLD_CODE;
    NODC_NUM=HLD_CODN;
    SCI_NAME=HLD_NAME;
    STATUS='DUPL MATCH';
    SRT_CODE=HTCODE;
    NODC_NUM=HTCODN;
    SCI_NAME=HSNAME;
    END;
  END;
IF ^ INEPA THEN DO;
  HSNAME=SCI_NAME;
  IF NEARMISS=0 THEN DO;
    HTNAME=OUR_NAME;
    OUR_NAME='---';
    SRT_CODE=HLD_CODE; NODC_NUM=HLD_CODN;
    SERIAL=HLD_SER; LEVEL=HLD_LEV;
    SCI_NAME=HLD_NAME; STATUS='.PREV   ';
    OUTPUT;
    OUR_NAME=HTNAME;
    END;
  NODC_NUM=' - - - -          '; SERIAL=.; LEVEL=' ';
  SRT_CODE='*   MISSING   *'; SCI_NAME=HSNAME; STATUS='..MISSED';
  NEARMISS=1;
  OUTPUT;
  END;
*;
IF NEARMISS=1 THEN DO;
  IF INEPA THEN DO;
    HTNAME=OUR_NAME;
    OUR_NAME='---';
    STATUS='...NEXT ';
    OUTPUT;
    OUR_NAME=HTNAME;
    IF INOURS THEN STATUS='MATCHED';
    NEARMISS=0;
    END;
  END;
IF INOURS & (STATUS='MATCHED'| STATUS='DUPL MATCH') THEN DO;
  IF LEVEL=' ' & STATUS='MATCHED' THEN
    STATUS='MATCHED **';
  OUTPUT;
  END;
HLD_NAME=SCI_NAME;
HLD_CODE=SRT_CODE;
HLD_CODN=NODC_NUM;
HLD_SER =SERIAL;
HLD_LEV =LEVEL;
HLD_STAT=STATUS;
*--------------------------------------------------------------------;
TITLE1 '        CRITTER CODE CROSS-MATCH';
TITLE2 '                  - * -';
*--------------------------------------------------------------------;
PROC PRINT UNIFORM LABEL SPLIT='*' DATA=RESULTS;
LABEL SERIAL='TSN*-------';
LABEL PARENT='PARENT*-------';
LABEL AKA   ='POINTER*-------';
LABEL SRT_CODE='SORT CODE*-----------------';
LABEL NODC_NUM='NODC CODE*-----------------';
LABEL SCI_NAME='CANDIDATE NAME*--------------------';
LABEL STATUS='STATUS*-------';
LABEL USE='S/C*---';
LABEL LEVEL='LEV*---';
ID SERIAL;
VAR PARENT AKA LEVEL SRT_CODE STATUS USE SCI_NAME;
*--------------------------------------------------------------------;
DATA COUNTERS; SET RESULTS;
IF STATUS= 'MATCHED  '   |
    STATUS= 'MATCHED **'  |
     STATUS= 'DUPL MATCH'  |
      STATUS= '..MISSED  '
             ;
*--------------------------------------------------------------------;
PROC FREQ; TABLES STATUS;
*--------------------------------------------------------------------;
PROC SORT DATA=COUNTERS; BY SEQ_NO;
*--------------------------------------------------------------------;
DATA _NULL_; SET COUNTERS;
TITLE1 '   STATUS OF ALL NAMES REQUESTED';
TITLE2 '      (ORIGINAL REQUEST ORDER)';
FILE PRINT;
PUT   @1   STATUS    $10.
     @12   LEVEL      $1.
     @14   SRT_CODE  $17.
     @32   USE        $3.
     @36   OUR_NAME  $45.
           ;
*--------------------------------------------------------------------;
DATA MISS; SET RESULTS;
IF  STATUS= 'MATCHED **'  |
     STATUS= 'DUPL MATCH'  |
      STATUS= '..MISSED  '
             ;
*--------------------------------------------------------------------;
PROC SORT; BY STATUS SCI_NAME;
*--------------------------------------------------------------------;
PROC PRINT UNIFORM LABEL SPLIT='*' DATA=MISS;
LABEL SERIAL='SERIAL*---------';
LABEL SRT_CODE='SORT CODE*-----------------';
LABEL NODC_NUM='NODC CODE*-----------------';
LABEL SCI_NAME='CANDIDATE NAME*--------------------';
LABEL OUR_NAME='CANDIDATE NAME*--------------------';
LABEL STATUS='STATUS*-------';
TITLE1 '        CRITTER CODE CROSS-MATCH';
TITLE2 '                  - * -';
TITLE3 '   NAMES WITH IMPERFECT CODE MATCHES';
ID SERIAL;
VAR SRT_CODE STATUS OUR_NAME;
*--------------------------------------------------------------------;
DATA CODES; SET RESULTS;
IF STATUS='MATCHED   ' |
   STATUS='DUPL MATCH'
         ;
FORMAT TRY_CODE $17.;
KEEP SRT_CODE;
TRY_CODE=SRT_CODE;
SRT_CODE=SUBSTR(TRY_CODE,1,2);          OUTPUT;
SRT_CODE=SUBSTR(TRY_CODE,1,4);          OUTPUT;
SRT_CODE=SUBSTR(TRY_CODE,1,6);          OUTPUT;
SRT_CODE=SUBSTR(TRY_CODE,1,9);          OUTPUT;
SRT_CODE=SUBSTR(TRY_CODE,1,12);         OUTPUT;
SRT_CODE=SUBSTR(TRY_CODE,1,15);         OUTPUT;
SRT_CODE=SUBSTR(TRY_CODE,1,17);         OUTPUT;
*--------------------------------------------------------------------;
PROC SORT DATA=CODES; BY SRT_CODE;
*--------------------------------------------------------------------;
DATA KEEP; SET CODES;
FORMAT HLD_CODE $17.;  RETAIN HLD_CODE;  DROP HLD_CODE;
IF SRT_CODE=HLD_CODE THEN DELETE;
HLD_CODE=SRT_CODE;
*--------------------------------------------------------------------;
PROC SORT DATA=TAXCODE; BY SRT_CODE USE;
*--------------------------------------------------------------------;
DATA NAMELIST; MERGE KEEP(IN=NEEDED) TAXCODE;
BY SRT_CODE; IF NEEDED;
KEEP SERIAL PARENT AKA LEVEL SRT_CODE USE SCI_NAME;
*--------------------------------------------------------------------;
TITLE1;
DATA _NULL_; SET NAMELIST;
FILE PRINT NOTITLES HEADER=H;
FORMAT USAGE $5.;
IF USE='SYN' THEN USAGE='(SYN)';
IF USE='COM' THEN USAGE='(COM)';
FORMAT RANK $7.;  IF LEVEL='V' THEN RANK='VARIETY';
                  IF LEVEL='S' THEN RANK='SPECIES';
                  IF LEVEL='G' THEN RANK='GENUS  ';
                  IF LEVEL='F' THEN RANK='FAMILY ';
                  IF LEVEL='O' THEN RANK='ORDER  ';
                  IF LEVEL='C' THEN RANK='CLASS  ';
                  IF LEVEL='P' THEN RANK='PHYLUM ';

IF USE = '  ' &
  (LEVEL = 'P' |
    LEVEL = 'C' |
     LEVEL = 'O' |
      LEVEL = 'F' |
       LEVEL = 'G') THEN PUT;

INDENT=9;         IF LEVEL='C' THEN INDENT=11;
                  IF LEVEL='O' THEN INDENT=13;
                  IF LEVEL='F' THEN INDENT=15;
                  IF LEVEL='G' THEN INDENT=17;
                  IF LEVEL='S' THEN INDENT=17;
                  IF LEVEL='V' THEN INDENT=17;

IF USE='     ' THEN
  PUT @1         RANK       $7.
      @INDENT    USAGE      $5.
      @INDENT+7  SCI_NAME   $45.
               ;
ELSE
  PUT @INDENT    USAGE      $5.
      @INDENT+7  SCI_NAME   $45.
               ;
RETURN;
H:
PUT _PAGE_;
PUT '                  TAXONOMIC HIERARCHY OF MATCHED NAMES';
PUT;
PUT;
PUT '   RANK             SCIENTIFIC/COMMON NAME';
PUT '---------       -----------------------------------------------';
RETURN;
*--------------------------------------------------------------------;
