audnet.gif (4937 bytes)

 

Home     FAQs     Guestbook     Jobs     Travel     Sponsors     Search     E-Mail

AuditNet Links
Audit Programs
AuditNet Newsletter
AuditNet Mailing Lists
AuditNet Library
Audit Jobs
Travel Links

Audit Career Links
Partner Discounts

Search AuditNet
Sign Guestbook

Sponsor Advertising
About AuditNet
About Jim Kaplan
AuditNet Seminars

Listing of SAS Program for Detecting Duplicate Addresses

Employee/Vendor Address Match
SAS Program

Revise JCL according to your organization's standards.
Revise data element formats according to your vendor file and employee file layouts

000001 //(Job Card)
000002 /*JOBPARM U=NNNN
000003 /*ROUTE PRINT FETCH
000004 //STEP1 EXEC SAS,SORT=15
000005 //SAS.SASLIST DD SYSOUT=(J,,1601)
000006 //INPUTFL1 DD DSN=(Employee Data File Here),DISP=SHR
000007 //INPUTFL2 DD DSN=(Vendor File Here),DISP=SHR
000008 //SYSIN DD *
000009 DATA EMPLADDR(DROP=ZIP AA B C D E F G H I J K CHEK Z); *Temp variables;
000010 INFILE INPUTFL1; *Employee data file;
000011 INPUT @20 NAME $28. @50 Z $1. @50 ADDRESS $28.
000012 @80 CITY $18. @98 ST $2. @100 ZIPCODE $5.;
000013 TYP='EMPLOYEE'; *Type of record;
000014 IF ZIPCODE GE 0 THEN DO; *If zipcode exists;
000015 AA=COMPRESS(ADDRESS,',.''&#%'); *Characters to be removed from address;
000016 B=INDEX(AA,'-'); *Returns position of any dashes in address;
000017 IF B GT 0 THEN DO;
000018 SUBSTR(AA,B,1)=' '; *Replace dash with space in address;
000019 END;
000020 J=COMPRESS(AA,' '); *Remove spaces from address;
000021 C=INDEX(J,'BOX'); *Returns position of 'BOX' in address (if any);
000022 D=INDEX(J,'DRAWER'); *Returns position of 'DRAWER' in address (if any);
000023 IF C=1 THEN DO; *If position of 'BOX' is 1;
000024 A=SUBSTR(J,C); *Returns address starting at 'BOX';
000025 RETURN; *Input next record;
000026 END;
000027 IF C GT 1 THEN DO; *If position of 'BOX' is > 1;
000028 A=SUBSTR(J,(C-1)); *Returns address starting 1 character before 'BOX';
000029 RETURN; *Input next record;
000030 END;
000031 IF D=1 THEN DO; *If position of 'DRAWER' is 1;
000032 A=SUBSTR(J,D); *Returns address starting at 'DRAWER';
000033 RETURN; *Input next record;
000034 END;
000035 IF D GT 1 THEN DO; *If position of 'DRAWER' is > 1;
000036 A=SUBSTR(J,(D-1)); *Returns address starting 1 character before 'DRAWER';
000037 RETURN; *Input next record;
000038 END;
000039 CHEK='0123456789'; *Numeric check string;
000040 E=VERIFY(Z,CHEK); *Check to see if 1st character of address is number;
000041 IF E NE 0 THEN DO; *If not numeric;
000042 F=SCAN(AA,1); *Returns 1st 'word' string in address (before 1st delimiter, if any);
000043 G=SCAN(AA,2); *Returns 2nd 'word' string in address (if any);
000044 H=LENGTH(F); *Returns length of 1st 'word' in address;
000045 I=LENGTH(G); *Returns length of 2nd 'word' in address;
000046 K=COMPRESS(AA,' ');*Remove spaces from address;
000047 A=SUBSTR(K,H,(I+1)); *Returns string starting with last character in 1st word for length of 2nd word in address + 1;
000048 RETURN; *Input next record;
000049 END;
000050 ELSE DO; *If numeric;
000051 F=SCAN(AA,1); *Returns 1st 'word' string in address (before 1st delimiter, if any);
000052 G=LENGTH(F); *Returns length of 1st word in address;
000053 K=COMPRESS(AA,' '); *Remove spaces from address;
000054 A=SUBSTR(K,1,(G+1)); *Returns string starting at 1 for length of 1st word in address + 1;
000055 RETURN; *Input next record;
000056 END;
000057 END;
000058 DELETE; *Delete if zipcode = 0, then input next record;
000059 DATA VENDADDR(DROP= AA B C D E F G H I J K CHEK Z ZIP ZIP1);*Temp variables;
000060 FORMAT ZIP1 $5.; *Format zipcode character field, 5 characters;
000061 INFILE INPUTFL2; *Input vendor data file;
000062 INPUT @1 VENDNO 6. @7 NAME $28. @45 ADDRESS $28. @45 Z $1.
000063 @101 CITY $19. @120 ST $2. @122 ZIP PD5.;TYP='VENDOR';
000064 ZIP1=ZIP;
000065 ZIPCODE=SUBSTR(ZIP1,1,5); *Returns first five characters of zipcode;
000066 IF ZIPCODE GE 0 THEN DO; *If zipcode exists;
000067 AA=COMPRESS(ADDRESS,',.''&#%'); *Characters to be removed from address;
000068 B=INDEX(AA,'-'); *Returns position of any dashes in address;
000069 IF B GT 0 THEN DO;
000070 SUBSTR(AA,B,1)=' '; *Replace dash with space in address;
000071 END;
000072 J=COMPRESS(AA,' '); *Remove spaces from address;
000073 C=INDEX(J,'BOX'); *Returns position of 'BOX' in address (if any);
000074 D=INDEX(J,'DRAWER'); *Returns position of 'DRAWER' in address (if any);
000075 IF C=1 THEN DO; *If position of 'BOX' is 1;
000076 A=SUBSTR(J,C); *Returns address starting at 'BOX';
000077 RETURN; *Input next record;
000078 END;
000079 IF C GT 1 THEN DO; *If position of 'BOX' is > 1;
000080 A=SUBSTR(J,(C-1)); *Returns address starting 1 character before 'BOX';
000081 RETURN; *Input next record;
000082 END;
000083 IF D=1 THEN DO; *If position of 'DRAWER' is 1;
000084 A=SUBSTR(J,D); *Returns address starting at 'DRAWER';
000085 RETURN; *Input next record;
000086 END;
000087 IF D GT 1 THEN DO; *If position of 'DRAWER' is > 1;
000088 A=SUBSTR(J,(D-1)); *Returns address starting 1 character before 'DRAWER';
000089 RETURN; *Input next record;
000090 END;
000091 CHEK='0123456789'; *Numeric check string;
000092 E=VERIFY(Z,CHEK); *Check to see if 1st character of address is number;
000093 IF E NE 0 THEN DO; *If not numeric;
000094 F=SCAN(AA,1); *Returns 1st 'word' string in address (before 1st delimiter, if any);
000095 G=SCAN(AA,2); *Returns 2nd 'word' string in address (if any);
000096 H=LENGTH(F); *Returns length of 1st 'word' in address;
000097 I=LENGTH(G); *Returns length of 2nd 'word' in address;
000098 K=COMPRESS(AA,' '); *Remove spaces from address;
000099 A=SUBSTR(K,H,(I+1)); *Returns string starting with last character in 1st word for length of 2nd word in address + 1;
000100 RETURN; *Input next record;
000101 END;
000102 ELSE DO; *If numeric;
000103 F=SCAN(AA,1); *Returns 1st 'word' string in address (before 1st delimiter, if any);
000104 G=LENGTH(F); *Returns length of 1st word in address;
000105 K=COMPRESS(AA,' '); *Remove spaces from address;
000106 A=SUBSTR(K,1,(G+1)); *Returns string starting at 1 for length of 1st word in address + 1;
000107 RETURN; *Input next record;
000108 END;
000109 END;
000110 DELETE; *Delete if zipcode = 0, then input next record;
000111 DATA JOIN; *Merge two data sets created above from employee & vendor data files;
000112 SET EMPLADDR VENDADDR;
000113 RETURN; *Input next record;
000114 PROC SORT; *Sort by zipcode, address string, record type;
000115 BY ZIPCODE A TYP;
000116 DATA MATCH1(DROP=ACT FLAG); *First match iteration;
000117 SET JOIN; *Input dataset with sorted, merged records;
000118 BY ZIPCODE A TYP; *Input records as sorted by zipcode, address string, record type;
000119 RETAIN FLAG; *Retain value of flag variable from one iteration of data step to the next (from one input record to the next);
000120 IF FIRST.A THEN DO; *If 1st occurrence of matching address strings (may be only 1);
000121 ACT=0; *Initialize activity variable to 0;
000122 FLAG='E'; *Set flag to E (employee record);
000123 END;
000124 ACT+1; *Increment activity variable by 1 for each matching string;
000125 IF LAST.A THEN DO; *If last occurrence of matching address strings (may be only 1);
000126 IF ACT=1 THEN DO; *If only 1 occurrence;
000127 DELETE; *Delete record since no match, then input next record;
000128 END;
000129 END;
000130 IF FIRST.A THEN DO; *If 1st occurrence of matching address strings (may be only 1);
000131 IF TYP='VENDOR' THEN DO; *If record type = vendor;
000132 FLAG='V'; *Set flag to V (vendor record);
000133 DELETE; *Delete record since no matching employee record, then input next record;
000134 END;
000135 END;
000136 IF FLAG='V' THEN DELETE; *Delete duplicate vendor record (if any), then input next record;
000137 RETURN; *Input next record;
000138 PROC SORT; *Sort by zipcode, address string, type record (vendor first);
000139 BY ZIPCODE A DESCENDING TYP;
000140 DATA MATCH2(DROP=FLAG A); *2nd match iteration;
000141 SET MATCH1; *Input matched records;
000142 BY ZIPCODE A DESCENDING TYP; *Input in sorted order;
000143 RETAIN FLAG; *Retain value of flag variable from one iteration of data step to the next (from one input record to the next);
000144 IF FIRST.A THEN DO; *If 1st occurrence of matching address strings (may be only 1);
000145 FLAG='V'; *Set flag to V for vendor record;
000146 IF TYP='EMPLOYEE' THEN DO; *If employee record type;
000147 FLAG='E'; *Set flag to E for employee record;
000148 DELETE; *Delete record since no matching vendor record, then input next record;
000149 END;
000150 END;
000151 IF FLAG='E' THEN DELETE; *Delete duplicate employee record, then input next record;
000152 RETURN; *Input next record;
000153 PROC PRINT; *Print final matched records (match2);
000154 ID VENDNO TYP; *List vendor number (for both employee and vendor) and record type 1st;
000155 OPTIONS MISSING=' '; *Output a space for any missing variables (instead of SAS default);
000156 FORMAT VENDNO Z6.; *Format vendor number to show leading zeroes;
000157 FORMAT ZIPCODE $5.; *Format zipcode character field 5 characters;
000158 TITLE1 'XXX CORPORATION';
000159 TITLE3 'ACCOUNTS PAYABLE DATA COLLECTION';
000160 TITLE5 'SIMILAR EMPLOYEE AND VENDOR ADDRESSES';
000161 TITLE8;

Copyright © 1999-2000 AuditNet.org.  

All rights reserved. No part of this Website may be reproduced in any form, by copying from the Internet, photostat, microfilm, xerography, or any other means, or incorporated into any information retrieval system, electronic or mechanical, without the written permission of the copyright owner.

Send comments to: editor@auditnet.org



Revised: January 14, 2008

Address of this Page is http://www.auditnet.org/