OME soundex dm()
From Ingres Community Wiki
Contents |
Introduction
The standard Ingres soundex function is a typical Knuth coded version of the Russell soundex (circa 1918). The Daitch-Mokotoff soundex (circa 1980) is an upgraded soundex more suitable to larger databases.
No special libraries are need to support this function, hence this code should be transportable to non Linux systems.
This implementation of the Daitch-Mokotoff soundex was written by Martin Bowes.
Syntax
soundex_dm(
(varchar )string
[, (varchar )cheese]
)
Note that unlike the standard soundex function, the Daitch-Mokotoff soundex allows for hard/soft sounds. For example is the 'ch' soft as in 'cheese' or hard as in 'christmas'? The optional second parameter to the function may be used to direct the function to use either hard or soft versions. Permitted values for this parameter are: HARD, H, hard, h, SOFT, S, soft and s. The default value being 'SOFT'.
Return Value
The function returns a varchar(6) string representing the Daitch-Mokotoff soundex value. Note that the return value is an integer expression and may have leading zeroes.
Example
select soundex_dm('Nichols') gives 648400 and soundex_dm('Nicholson') gives 648460. Whereas, select soundex_dm('Nichols', 'h') gives 658400 and soundex_dm('Nicholson', 'h') gives 658460.
Contrast that with a standard soundex which returns 'N242' in both cases.
FOD
Add the following definition to the fod_id enum set: UDF_SOUNDEX_DM
Then add the following to the Function_Definitions array:
static IIADD_FO_DFN Function_Definitions[]={
...
{
II_O_OPERATION, /*fod_object_type*/
{"soundex_dm"}, /*fod_name*/
UDF_SOUNDEX_DM, /*fod_id*/
II_NORMAL /*fod_type*/
},
...
}; /*Function_Definitions*/
FIDs
Add the following definitions to the fid_id enum set:
UDF_FI_SOUNDEX_DM_1, UDF_FI_SOUNDEX_DM_2
The FIDs rely on the following definition of parameter types.
static II_DT_ID UD_2_VC[] = {II_VARCHAR, II_VARCHAR};
The FIDs are:
static IIADD_FI_DFN Function_Instances[] = {
...
{/* soundex_dm(varchar) */
II_O_FUNCTION_INSTANCE, /* fid_object_type */
UDF_FI_SOUNDEX_DM_1, /* fid_id*/
II_NO_FI, /* fid_cmplmnt*/
UDF_SOUNDEX_DM, /* fid_opid=fod_id from function definition
** This is the minor sort field for this array
*/
II_NORMAL, /* fid_optype
** This is the major sort field for this array
*/
II_FID_F0_NOFLAGS, /* fid_attributes*/
0, /* fid_wslength*/
1, /* fid_numargs*/
UD_2_VC, /* fid_args, a pointer to an array of datatypes*/
II_VARCHAR, /* fid_result, result is an integer */
II_RES_FIXED, /* fid_rltype*/
6 + sizeof(short), /* fid_rlength */
0, /* fid_rprec */
soundex_dm1, /* fid_routine */
0 /* lenspec_routine */
}, /* soundex_dm(varchar) */
{/* soundex_dm(varchar, varchar) */
II_O_FUNCTION_INSTANCE, /* fid_object_type */
UDF_FI_SOUNDEX_DM_2, /* fid_id*/
II_NO_FI, /* fid_cmplmnt*/
UDF_SOUNDEX_DM, /* fid_opid=fod_id from function definition
** This is the minor sort field for this array
*/
II_NORMAL, /* fid_optype
** This is the major sort field for this array
*/
II_FID_F0_NOFLAGS, /* fid_attributes*/
0, /* fid_wslength*/
2, /* fid_numargs*/
UD_2_VC, /* fid_args, a pointer to an array of datatypes*/
II_VARCHAR, /* fid_result, result is an integer */
II_RES_FIXED, /* fid_rltype*/
6 + sizeof(short), /* fid_rlength */
0, /* fid_rprec */
soundex_dm, /* fid_routine */
0 /* lenspec_routine */
}, /* soundex_dm(varchar, varchar) */
};
Executor Code
/* Stuff used in soundex_dm() function */ #define SOUNDEX_INT_BUFFER 64 #define SOUNDEX_PAD_BUFFER 20
Single Parameter Version
II_STATUS
soundex_dm1 (
II_SCB *scb,
II_DATA_VALUE *string, /* Generate the Daitch-Mokotoff soundex for this
** string
*/
II_DATA_VALUE *rdv /* varchar(6) is returned */
)
{
/* This simply makes the 2nd parameter with a default value of SOFT and
** then passes this to the 2 parameter routine.
*/
struct _cheesy {
short true_length;
char text[4];
} cheesy;
II_DATA_VALUE cheese;
cheesy.true_length=4;
memcpy(&cheesy.text, "SOFT", 4);
cheese.db_length=6;
cheese.db_datatype=II_VARCHAR;
cheese.db_data=(char *)&cheesy;
return soundex_dm(scb, string, &cheese, rdv);
};
Two Parameter Version
II_STATUS
soundex_dm (
II_SCB *scb,
II_DATA_VALUE *string, /* Generate the Daitch-Mokotoff soundex for this
** string
*/
II_DATA_VALUE *cheese, /* Optional flag, indicates if ch in cheese is:
** 'SOFT' | 'S' (as in cheese), or
** 'HARD' | 'H' (as in christmas)
** Default is SOFT.
*/
II_DATA_VALUE *rdv /* varchar(6) is returned */
)
{
int start_word=0, soundex_idx=0, go_hard=0, before_a_vowel=0;
int prior_letter, prior_code;
int i, j, true_length;
char a_char, buffer[SOUNDEX_INT_BUFFER + SOUNDEX_PAD_BUFFER];
/* Used for error processing */
char msg[256];
/* Pad the buffer with spaces */
for (i=0; i< SOUNDEX_INT_BUFFER + SOUNDEX_PAD_BUFFER; i++) buffer[i]=' ';
/* Preprocess:
** Point to first alpha char in input. None? Return an error!
** Convert char to upper until first non-alpha. If non-alpha is one or more
** blanks then skip these and continue. Fill an internal char array[64].
*/
true_length=*(short *)string->db_data;
for (i=0,j=0; i< true_length && j< SOUNDEX_INT_BUFFER; i++)
{
if (isblank(*(char *)(string->db_data + sizeof(short) + i))) continue; /*ignore spaces*/
if (isalpha(*(char *)(string->db_data + sizeof(short) + i)))
{ /* Convert to uppercase and store in buffer */
a_char=(char )toupper((int )(*(char *)(string->db_data + sizeof(short) + i)));
memcpy((char *)(buffer + j), &a_char, 1);
start_word=1; j++;
continue;
};
break; /* Anything else breaks the pre-process loop */
}; /* For */
if (!start_word)
{
sprintf(msg, "soundex_dm(): No valid word in input data.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
/* Initialise the return data */
rdv->db_prec= (short )0;
*(short *)rdv->db_data=(short )6;
strncpy((char *)(rdv->db_data + sizeof(short)), "000000", 6);
buffer[j]='\0'; /* Terminate the buffer */
/* Do we have cheese? */
/* go_hard=1 for 'HARD', 'H', 0 for 'SOFT', 'S' */
if (*(short *)cheese->db_data==1)
{
if (*(char *)(cheese->db_data + sizeof(short))=='H'
|| *(char *)(cheese->db_data + sizeof(short))=='h') {go_hard=1;}
else if (*(char *)(cheese->db_data + sizeof(short))=='S'
|| *(char *)(cheese->db_data + sizeof(short))=='s') {go_hard=0;}
else {
sprintf(msg, "soundex_dm(): Invalid 'hardness' indicator supplied. Use 'H' or 'S'");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
}
else if (*(short *)cheese->db_data==4)
{
if (!strncmp((cheese->db_data + sizeof(short)), "HARD", 4)
|| !strncmp((cheese->db_data + sizeof(short)), "hard", 4)) {go_hard=1;}
else if (!strncmp((cheese->db_data + sizeof(short)), "SOFT", 4)
|| !strncmp((cheese->db_data + sizeof(short)), "SOFT", 4)) {go_hard=0;}
else {
sprintf(msg, "soundex_dm(): Invalid 'hardness' indicator supplied. Use 'HARD' or 'SOFT'");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
}
else {
sprintf(msg, "soundex_dm(): Invalid 'hardness' indicator supplied. Use 'HARD' or 'SOFT'");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
/* Now process the data stored in the buffer */
start_word=1; prior_letter=0; prior_code=-1; /* Sundry Initialisation */
for (i=0; i<j && soundex_idx<6; start_word=0)
{
before_a_vowel=soundex_dm_vowelage(&buffer, i, j, 1);
/* The 'A' cases... */
if (!strncmp((char *)(buffer +i), "AI", 2)
|| !strncmp((char *)(buffer +i), "AJ", 2)
|| !strncmp((char *)(buffer +i), "AY", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {soundex_idx++;}
else {
before_a_vowel=soundex_dm_vowelage(&buffer, i, j, 2);
if (before_a_vowel) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '1';
soundex_idx++;
};
};
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "AU", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {soundex_idx++;}
else {
before_a_vowel=soundex_dm_vowelage(&buffer, i, j, 2);
if (before_a_vowel) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '7';
soundex_idx++;
};
};
i+=2;
continue;
};
if (buffer[i]=='A')
{
if (start_word) {soundex_idx++;prior_letter=1;prior_code=0;}
else {prior_letter=0; prior_code=-1;};
i++;
continue;
};
/* The 'B', 'V', 'W' cases... */
if (buffer[i]=='B' || buffer[i]=='V' || buffer[i]=='W')
{
if (!(prior_letter && prior_code==7))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '7';
prior_letter=1; prior_code=7;
soundex_idx++;
};
i++;
continue;
};
/* The 'C' cases... */
if (!strncmp((char *)(buffer +i), "CHS", 3))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5';
soundex_idx++;
}
else
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "54", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
};
i+=3;
continue;
};
if ((!strncmp((char *)(buffer +i), "CSZ", 3)) || !strncmp((char *)(buffer +i), "CZS", 3))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "CH", 2))
{
prior_letter=0; prior_code=-1;
if (go_hard) *(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5'; /* As KH */
else *(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4'; /* As TCH */
soundex_idx++;
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "CK", 2))
{
prior_letter=0; prior_code=-1;
if (go_hard) *(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5'; /* As K */
else *(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4'; /* As TSK */
soundex_idx++;
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "CS", 2) || !strncmp((char *)(buffer + i), "CZ", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=2;
continue;
};
if (buffer[i]=='C')
{
if (go_hard) {
if (!(prior_letter && prior_code==5))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5'; /* As K */
prior_letter=1; prior_code=5;
soundex_idx++;
};
}
else {
if (!(prior_letter && prior_code==4))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4'; /* As TZ */
prior_letter=1; prior_code=4;
soundex_idx++;
};
};
i++;
continue;
};
/* The 'D' cases... */
if (!strncmp((char *)(buffer +i), "DRZ", 3)
|| !strncmp((char *)(buffer +i), "DRS", 3)
|| !strncmp((char *)(buffer +i), "DSH", 3)
|| !strncmp((char *)(buffer +i), "DSZ", 3)
|| !strncmp((char *)(buffer +i), "DZH", 3)
|| !strncmp((char *)(buffer +i), "DZS", 3))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "DS", 2) || !strncmp((char *)(buffer + i), "DZ", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "DT", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '3';
soundex_idx++;
i+=2;
continue;
};
if (buffer[i]=='D')
{
if (!(prior_letter && prior_code==3))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '3'; /* As K */
prior_letter=1; prior_code=3;
soundex_idx++;
};
i++;
continue;
};
/* The 'E' cases... */
if (!strncmp((char *)(buffer +i), "EI", 2)
|| !strncmp((char *)(buffer +i), "EJ", 2)
|| !strncmp((char *)(buffer +i), "EY", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {soundex_idx++;}
else {
before_a_vowel=soundex_dm_vowelage(&buffer, i, j, 2);
if (before_a_vowel) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '1';
soundex_idx++;
};
};
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "EU", 2))
{
prior_letter=0; prior_code=-1;
before_a_vowel=soundex_dm_vowelage(&buffer, i, j, 2);
if (start_word || before_a_vowel) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '1';
soundex_idx++;
};
i+=2;
continue;
};
if (buffer[i]=='E')
{
if (start_word) {soundex_idx++;prior_letter=1;prior_code=0;}
else {prior_letter=0; prior_code=-1;};
i++;
continue;
};
/* The 'F' cases... */
if (!strncmp((char *)(buffer +i), "FB", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '7';
soundex_idx++;
i+=2;
continue;
};
if (buffer[i]=='F')
{
if (!(prior_letter && prior_code==7))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '7'; /* As K */
prior_letter=1; prior_code=7;
soundex_idx++;
};
i++;
continue;
};
/* The 'G' and 'Q' cases... */
if (buffer[i]=='G' || buffer[i]=='Q')
{
if (!(prior_letter && prior_code==5))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5';
prior_letter=1; prior_code=5;
soundex_idx++;
};
i++;
continue;
};
/* The 'H' cases... */
if (buffer[i]=='H')
{
if ((start_word || before_a_vowel) && !(prior_letter && prior_code==5))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5';
prior_letter=1; prior_code=5;
soundex_idx++;
};
i++;
continue;
};
/* The 'I' cases... */
if (!strncmp((char *)(buffer +i), "IA", 2)
|| !strncmp((char *)(buffer +i), "IE", 2)
|| !strncmp((char *)(buffer +i), "IO", 2)
|| !strncmp((char *)(buffer +i), "IU", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '1';
soundex_idx++;
};
i+=2;
continue;
};
if (buffer[i]=='I')
{
if (start_word) {soundex_idx++;prior_letter=1;prior_code=0;}
else {prior_letter=0; prior_code=-1;};
i++;
continue;
};
/* The 'J' cases... */
if (buffer[i]=='J')
{
if (go_hard) { /* Try Y(1) */
if (start_word && !(prior_letter && prior_code==1))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '1'; /* As K */
prior_letter=1; prior_code=1;
soundex_idx++;
};
}
else { /* Try DZH(4) */
if (!(prior_letter && prior_code==4))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4'; /* As TZ */
prior_letter=1; prior_code=4;
soundex_idx++;
};
};
i++;
continue;
};
/* The 'K' cases... */
if (!strncmp((char *)(buffer +i), "KS", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5';
soundex_idx++;
}
else
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "54", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
};
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "KH", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5';
soundex_idx++;
i+=2;
continue;
};
if (buffer[i]=='K')
{
if (!(prior_letter && prior_code==5))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5';
prior_letter=1; prior_code=5;
soundex_idx++;
};
i++;
continue;
};
/* The 'L' cases... */
if (buffer[i]=='L')
{
if (!(prior_letter && prior_code==8))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '8';
prior_letter=1; prior_code=8;
soundex_idx++;
};
i++;
continue;
};
/* The 'M' cases... */
if (!strncmp((char *)(buffer +i), "MN", 2))
{
prior_letter=0; prior_code=-1;
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "66", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
i+=2;
continue;
};
if (buffer[i]=='M')
{
if (!(prior_letter && prior_code==6))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '6';
prior_letter=1; prior_code=6;
soundex_idx++;
};
i++;
continue;
};
/* The 'N' cases... */
if (!strncmp((char *)(buffer +i), "NM", 2))
{
prior_letter=0; prior_code=-1;
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "66", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
i+=2;
continue;
};
if (buffer[i]=='N')
{
if (!(prior_letter && prior_code==6))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '6';
prior_letter=1; prior_code=6;
soundex_idx++;
};
i++;
continue;
};
/* The 'O' cases... */
if (!strncmp((char *)(buffer +i), "OI", 2)
|| !strncmp((char *)(buffer +i), "OJ", 2)
|| !strncmp((char *)(buffer +i), "OY", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {
soundex_idx++;
}
else {
before_a_vowel=soundex_dm_vowelage(&buffer, i, j, 2);
if (before_a_vowel) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '1';
soundex_idx++;
};
};
i+=2;
continue;
};
if (buffer[i]=='O')
{
if (start_word) {soundex_idx++;prior_letter=1;prior_code=0;}
else {prior_letter=0; prior_code=-1;};
i++;
continue;
};
/* The 'P' cases... */
if (!strncmp((char *)(buffer +i), "PF", 2)
|| !strncmp((char *)(buffer +i), "PH", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '7';
soundex_idx++;
i+=2;
continue;
};
if (buffer[i]=='P')
{
if (!(prior_letter && prior_code==7))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '7';
prior_letter=1; prior_code=7;
soundex_idx++;
};
i++;
continue;
};
/* The 'R' cases... */
if (!strncmp((char *)(buffer +i), "RTZ", 3))
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "94", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "RS", 2)
|| !strncmp((char *)(buffer +i), "RZ", 2))
{
prior_letter=0; prior_code=-1;
if (go_hard) { /* Try RTZ(94) */
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "94", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
}
else { /* Try ZH(4) */
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=2;
continue;
};
if (buffer[i]=='R')
{
if (!(prior_letter && prior_code==9))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '9';
prior_letter=1; prior_code=9;
soundex_idx++;
};
i++;
continue;
};
/* The 'S' cases... */
if (!strncmp((char *)(buffer +i), "SCHTSCH", 7))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=7;
continue;
};
if (!strncmp((char *)(buffer +i), "SCHTSH", 6)
|| !strncmp((char *)(buffer +i), "SCHTCH", 6))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=6;
continue;
};
if (!strncmp((char *)(buffer +i), "SHTCH", 5)
|| !strncmp((char *)(buffer +i), "SHTSH", 5)
|| !strncmp((char *)(buffer +i), "STSCH", 5))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=5;
continue;
};
if (!strncmp((char *)(buffer +i), "SHCH", 4)
|| !strncmp((char *)(buffer +i), "STRZ", 4)
|| !strncmp((char *)(buffer +i), "STRS", 4)
|| !strncmp((char *)(buffer +i), "STSH", 4))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=4;
continue;
};
if (!strncmp((char *)(buffer +i), "SCHT", 4)
|| !strncmp((char *)(buffer +i), "SCHD", 4))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "43", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
};
i+=4;
continue;
};
if (!strncmp((char *)(buffer +i), "STCH", 4)
|| !strncmp((char *)(buffer +i), "SZCZ", 4)
|| !strncmp((char *)(buffer +i), "SZCS", 4))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=4;
continue;
};
if (!strncmp((char *)(buffer +i), "SCH", 3))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "SHT", 3)
|| !strncmp((char *)(buffer +i), "SZT", 3)
|| !strncmp((char *)(buffer +i), "SHD", 3)
|| !strncmp((char *)(buffer +i), "SZD", 3))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "43", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
};
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "SH", 2)
|| !strncmp((char *)(buffer +i), "SZ", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "SC", 2)
|| !strncmp((char *)(buffer +i), "SD", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "ST", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "43", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
};
i+=2;
continue;
};
if (buffer[i]=='S')
{
if (!(prior_letter && prior_code==4))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;prior_letter=1;prior_code=4;
};
i++;
continue;
};
/* The 'T' cases... */
if (!strncmp((char *)(buffer +i), "TTSCH", 5))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=5;
continue;
};
if (!strncmp((char *)(buffer +i), "TTCH", 4)
|| !strncmp((char *)(buffer +i), "TSCH", 4)
|| !strncmp((char *)(buffer +i), "TTSZ", 4))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=4;
continue;
};
if (!strncmp((char *)(buffer +i), "TCH", 3)
|| !strncmp((char *)(buffer +i), "TRZ", 3)
|| !strncmp((char *)(buffer +i), "TRS", 3)
|| !strncmp((char *)(buffer +i), "TSH", 3)
|| !strncmp((char *)(buffer +i), "TTS", 3)
|| !strncmp((char *)(buffer +i), "TTZ", 3)
|| !strncmp((char *)(buffer +i), "TSZ", 3)
|| !strncmp((char *)(buffer +i), "TZS", 3))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "TH", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '3';
soundex_idx++;
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "TC", 2)
|| !strncmp((char *)(buffer +i), "TZ", 2)
|| !strncmp((char *)(buffer +i), "TS", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=2;
continue;
};
if (buffer[i]=='T')
{
if (!(prior_letter && prior_code==3))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '3';
soundex_idx++; prior_letter=1; prior_code=3;
};
i++;
continue;
};
/* The 'U' cases... */
if (!strncmp((char *)(buffer +i), "UI", 2)
|| !strncmp((char *)(buffer +i), "UJ", 2)
|| !strncmp((char *)(buffer +i), "UY", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {
soundex_idx++;
}
else {
before_a_vowel=soundex_dm_vowelage(&buffer, i, j, 2);
if (before_a_vowel) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '1';
soundex_idx++;
};
};
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "UE", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {
soundex_idx++;
};
i+=2;
continue;
};
if (buffer[i]=='U')
{
if (start_word) {soundex_idx++;prior_letter=1;prior_code=0;}
else {prior_letter=0; prior_code=-1;};
i++;
continue;
};
/* The 'X' cases... */
if (buffer[i]=='X')
{
if (start_word)
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '5';
prior_letter=1; prior_code=5;
soundex_idx++;
}
else if (!(prior_letter && prior_code==54))
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "54", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
};
i++;
continue;
};
/* The 'Y' cases... */
if (buffer[i]=='Y')
{
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '1';
soundex_idx++;prior_letter=1;prior_code=0;
}
else {prior_letter=0; prior_code=-1;};
i++;
continue;
};
/* The 'Z' cases... */
if (!strncmp((char *)(buffer +i), "ZHDZH", 5))
{
prior_letter=0; prior_code=-1;
if (start_word)
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=5;
continue;
};
if (!strncmp((char *)(buffer +i), "ZDZH", 4))
{
prior_letter=0; prior_code=-1;
if (start_word)
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=4;
continue;
};
if (!strncmp((char *)(buffer +i), "ZSCH", 4))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=4;
continue;
};
if (!strncmp((char *)(buffer +i), "ZDZ", 3))
{
prior_letter=0; prior_code=-1;
if (start_word)
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
};
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "ZHD", 3))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "43", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
};
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "ZSH", 3))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=3;
continue;
};
if (!strncmp((char *)(buffer +i), "ZD", 2))
{
prior_letter=0; prior_code=-1;
if (start_word) {
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '2';
soundex_idx++;
}
else
{
if (soundex_idx+2 <= 6)
{
memcpy((char *)(rdv->db_data + sizeof(short) + soundex_idx), "43", 2);
soundex_idx+=2;
} else {
sprintf(msg, "soundex_dm(): Internal buffer overflow detected. This should not occur. Please report the case to your system administrator.");
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
};
i+=2;
continue;
};
if (!strncmp((char *)(buffer +i), "ZH", 2)
|| !strncmp((char *)(buffer +i), "ZS", 2))
{
prior_letter=0; prior_code=-1;
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++;
i+=2;
continue;
};
if (buffer[i]=='Z')
{
if (!(prior_letter && prior_code==4))
{
*(char *)(rdv->db_data + sizeof(short) + soundex_idx) = '4';
soundex_idx++; prior_letter=1; prior_code=4;
};
i++;
continue;
};
}; /* For Loop */
return (II_OK);
}; /* soundex_dm() */
Support function
/* soundex_dm_vowelage:
** Simply checks if the current code set is before a vowel.
** In this case a vowel is in the set: A, E, I, O, U, J and Y
*/
int
soundex_dm_vowelage (
char *buffer, /* The buffer of characters to check */
int b_ptr, /* The current position in the buffer */
int b_len, /* The length of the buffer */
int skip /* How far ahead to check for a vowel */
)
{
/* return (0) if we have exhausted the buffer */
if (b_ptr + skip >= b_len) {return ((int )0);};
/* return (1) if before a vowel */
if (buffer[b_ptr + skip]=='A' || buffer[b_ptr + skip]=='E'
|| buffer[b_ptr + skip]=='I' || buffer[b_ptr + skip]=='O'
|| buffer[b_ptr + skip]=='U' || buffer[b_ptr + skip]=='J'
|| buffer[b_ptr + skip]=='Y')
{return ((int )1);};
/* return (0) if NOT before a vowel */
return ((int )0);
}; /* soundex_dm_vowelage */
