OME regex()
From Ingres Community Wiki
Contents |
Introduction
The Linux regex library can be utilised from within OME to provide a NORMAL function which allows grep like regular expressions to be executed on varchar or long varchar strings.
By Martin Bowes.
Syntax
regex(
(varchar | long varchar) string,
(varchar ) pattern
)
Return Value
The function being coded to return 1 if the regex pattern has a match in the string. It will return zero otherwise.
Example
This allows sql like: select count(*) from a_table where regex(a_string, '^[+-]?[0-9]+$')=1.
This would count the cases where the nominated string was an integer expression. It allows preceeding '+' or '-' signs.
FOD
In the fod_id enum set include the identifier: UDF_REGEX
The following can then be added to the Function_Definitions array:
static IIADD_FO_DFN Function_Definitions[]={
...
{ /* regex() */
II_O_OPERATION, /*fod_object_type*/
{"regex"}, /*fod_name*/
UDF_REGEX, /*fod_id*/
II_NORMAL /*fod_type*/
},
}
FIDs
Add the following definitions to the fid_id enum set:
UDF_FI_REGEX_VARCHAR, UDF_FI_REGEX_LVARCHAR
You will need arrays of datatypes, to indicate what types are permitted for each parameter. The FIDs displayed use the following.
static II_DT_ID UD_2_VC[] = {II_VARCHAR, II_VARCHAR};
static II_DT_ID UD_LVC_N_VC[] = {II_LVCH, II_VARCHAR};
The FIDs themselves are:
static IIADD_FI_DFN Function_Instances[] = {
{/* regex(varchar, varchar) */
II_O_FUNCTION_INSTANCE, /* fid_object_type */
UDF_FI_REGEX_VARCHAR, /* fid_id*/
II_NO_FI, /* fid_cmplmnt*/
UDF_REGEX, /* fid_opid=fod_id from function definition
** This is the minor sort field for this array
*/
II_NORMAL, /* fid_optype
** This is the major sort field for this array
*/
II_FID_F0_NOFLAGS, /* fid_attributes*/
0, /* fid_wslength*/
2, /* fid_numargs*/
UD_2_VC, /* fid_args, a pointer to an array of datatypes*/
II_INTEGER, /* fid_result, result is an integer */
II_RES_FIXED, /* fid_rltype*/
4, /* fid_rlength */
0, /* fid_rprec */
regex, /* fid_routine */
0 /* lenspec_routine */
}, /* regex(varchar, varchar) */
{/* regex(long varchar, varchar) */
II_O_FUNCTION_INSTANCE, /* fid_object_type */
UDF_FI_REGEX_LVARCHAR, /* fid_id*/
II_NO_FI, /* fid_cmplmnt*/
UDF_REGEX, /* fid_opid=fod_id from function definition
** This is the minor sort field for this array
*/
II_NORMAL, /* fid_optype
** This is the major sort field for this array
*/
II_FID_F0_NOFLAGS, /* fid_attributes*/
0, /* fid_wslength*/
2, /* fid_numargs*/
UD_LVC_N_VC, /* fid_args, a pointer to an array of datatypes*/
II_INTEGER, /* fid_result, result is an integer */
II_RES_FIXED, /* fid_rltype*/
4, /* fid_rlength */
0, /* fid_rprec */
long_regex, /* fid_routine */
0 /* lenspec_routine */
}, /* regex(long varchar, varchar) */
}
Executor Code
You will need...
#include <sys/types.h> /* Required for regex() */ #include <regex.h>
And..
#define MAX_REGEX_LENGTH 256 #define REGEX_STRING_CHUNK 1024
regex(varchar, varchar)
II_STATUS
regex(
II_SCB *scb,
II_DATA_VALUE *string,
II_DATA_VALUE *regex,
II_DATA_VALUE *rdv
)
{
int i, true_length, remainder, next_bit;
int ecode, eflags, cflags=REG_EXTENDED | REG_NEWLINE;
regex_t preg[1024]; /*pointer to a pattern buffer storage area*/
char aregex[MAX_REGEX_LENGTH + 1];
char string_buffer[REGEX_STRING_CHUNK + 1];
/* nmatch, pmatch are ignored in regexec as compilation is with
** REG_NEWLINE. Hence we simply set some dummy values here.
*/
size_t nmatch=0;
regmatch_t pmatch[]={};
/* Used for error processing */
char msg[256];
rdv->db_prec = 0; /* Set output precision */
*(int *)(rdv->db_data) = 0; /* Set a no match default */
/* Need to extract the regex, check if excesive, terminate it, and compile
** it!
*/
true_length=*(short *)regex->db_data;
if (true_length > MAX_REGEX_LENGTH) {
sprintf(msg, "regex(): regular expression is too long!\n");
us_error(scb, 0x200011, msg);
return(II_ERROR);
};
memcpy(aregex, (char *)(regex->db_data + sizeof(short)), true_length);
aregex[true_length]='\0';
ecode=(int )regcomp(preg, aregex, cflags);
if (ecode)
{
regerror(ecode, preg, msg, sizeof(msg));
regfree(preg);
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
/* Allow for processing strings longer than the buffer length
** Should allow easy extension to support longs!
*/
true_length=*(short *)string->db_data;
for (i=0;;i++)
{
/* If not initial chunk, indicate this is not the beginning of the
** line
*/
if (i>0) {eflags=REG_NOTBOL;} else {eflags=0;};
/* How much is left */
remainder=true_length - (i * REGEX_STRING_CHUNK);
if (remainder <= 0) break; /* Nothing left to do! */
/* Set the next_bit and eflags on whether or not this is the last
** chunk of input data
*/
if (remainder > REGEX_STRING_CHUNK) {
next_bit=REGEX_STRING_CHUNK;
eflags=eflags | REG_NOTEOL; /* As there is at least one more chunk */
}
else {
next_bit=remainder;
};
/* Copy 'next_bit' of data to string_buffer */
memcpy(string_buffer
,(char *)(string->db_data + sizeof(short) + i * REGEX_STRING_CHUNK)
,next_bit
);
string_buffer[next_bit]='\0'; /* It must be terminated */
if (! regexec(
preg, string_buffer,
nmatch, pmatch, /* These are ignored */
eflags))
{
*(int *)(rdv->db_data)=(int )1;
break; /* First match breaks the loop! */
};
};
regfree(preg);
return (II_OK);
}; /*regex*/
regex(long varchar, varchar)
II_STATUS
long_regex(
II_SCB *scb,
II_DATA_VALUE *string,
II_DATA_VALUE *regex,
II_DATA_VALUE *rdv
)
{
int i, true_length, remainder, next_bit;
int ecode, eflags, cflags=REG_EXTENDED | REG_NEWLINE;
regex_t preg[1024]; /*pointer to a pattern buffer storage area*/
char aregex[MAX_REGEX_LENGTH + 1];
char *string_buffer;
/* nmatch, pmatch are ignored in regexec as compilation is with
** REG_NEWLINE. Hence we simply set some dummy values here.
*/
size_t nmatch=0;
regmatch_t pmatch[]={};
/* Used for error processing */
char msg[256];
/* Used for reading a long varchar */
char segspace[2048];
II_STATUS status;
II_POP_CB pop_cb;
II_DATA_VALUE underdv, segment, coupon;
rdv->db_prec = 0; /* Set output precision */
*(int *)(rdv->db_data) = 0; /* Set a no match default */
status=II_OK;
/* Set coupon to be a copy of string */
coupon.db_data = string->db_data;
coupon.db_length = string->db_length;
coupon.db_datatype = string->db_datatype;
coupon.db_prec = 0;
/* Initiialise parts of the segment */
segment.db_data = NULL;
segment.db_length = 0;
segment.db_datatype = string->db_datatype;
segment.db_prec = 0;
/* Need to extract the regex, check if excesive, terminate it, and compile
** it!
*/
true_length=*(short *)regex->db_data;
if (true_length > MAX_REGEX_LENGTH) {
sprintf(msg, "regex(): regular expression is too long!\n");
us_error(scb, 0x200011, msg);
return(II_ERROR);
};
memcpy(aregex, (char *)(regex->db_data + sizeof(short)), true_length);
aregex[true_length]='\0';
ecode=(int )regcomp(preg, aregex, cflags);
if (ecode)
{
regerror(ecode, preg, msg, sizeof(msg));
regfree(preg);
us_error(scb, 0x200011, msg);
return (II_ERROR);
};
/* Allow for processing strings longer than the buffer length
** Should allow easy extension to support longs!
*/
/* Initialise the pop_cb, want to act on string */
pop_cb.pop_length = sizeof(pop_cb);
pop_cb.pop_type = II_POP_TYPE;
pop_cb.pop_ascii_id = 0;
pop_cb.pop_temporary = II_POP_SHORT_TEMP;
pop_cb.pop_underdv = &underdv;
underdv.db_datatype = II_VARCHAR;
underdv.db_data = NULL;
underdv.db_length = 0;
pop_cb.pop_coupon = &coupon;
pop_cb.pop_segment = &segment;
/* Determine the size of the segments that may be used */
status=(*usc_lo_handler)(II_INFORMATION, &pop_cb);
if (status) {
sprintf(msg,
"regex(): Error %d encountered seeking INFORMATION on long segment length\n",
pop_cb.pop_error.err_code);
us_error(scb, 0x200011, msg);
return(status);
};
/* Now set to read that many bytes into a (II_DATA_VALUE )segment */
segment.db_length = underdv.db_length;
segment.db_datatype = underdv.db_datatype;
segment.db_prec = underdv.db_prec;
segment.db_data = segspace;
pop_cb.pop_continuation = II_C_BEGIN_MASK;
string_buffer=(char *)(segspace + sizeof(short));
eflags=0;
do {
status=(*usc_lo_handler)(II_GET, &pop_cb);
if (status)
{
if ((status >= II_ERROR)
|| (pop_cb.pop_error.err_code != II_E_NOMORE))
{
sprintf(msg,
"regex(): Unexpected error %d encountered processing long object\n",
status);
us_error(scb, 0x200010, msg);
return(II_ERROR);
};
};
/* Got a Segment */
pop_cb.pop_continuation = 0;
true_length=*(short *)segment.db_data;
segspace[true_length + sizeof(short)]='\0'; /* And terminate it */
/* And indicate if there are more chunks to come */
if (pop_cb.pop_error.err_code != II_E_NOMORE) eflags=eflags | REG_NOTEOL;
if (! regexec(
preg, string_buffer,
nmatch, pmatch, /* These are ignored */
eflags))
{
*(int *)(rdv->db_data)=(int )1;
break; /* First match breaks the loop! */
};
eflags=REG_NOTBOL; /* No longer the beginning of line on all subsequent reads */
} while ((status <= II_ERROR)
&& (pop_cb.pop_error.err_code != II_E_NOMORE));
regfree(preg);
return (II_OK);
}; /*long_regex*/
