OME regex()

From Ingres Community Wiki

Jump to: navigation, search

Contents

Introduction

The Linux regex library can be utilised from within OME to provide a NORMAL function which allows grep like regular expressions to be executed on varchar or long varchar strings.

By Martin Bowes.

Syntax

regex(
       (varchar | long varchar) string,
       (varchar ) pattern
       )

Return Value

The function being coded to return 1 if the regex pattern has a match in the string. It will return zero otherwise.

Example

This allows sql like: select count(*) from a_table where regex(a_string, '^[+-]?[0-9]+$')=1.

This would count the cases where the nominated string was an integer expression. It allows preceeding '+' or '-' signs.

FOD

In the fod_id enum set include the identifier: UDF_REGEX

The following can then be added to the Function_Definitions array:

static IIADD_FO_DFN Function_Definitions[]={
   ...
   { /* regex() */
   II_O_OPERATION,   /*fod_object_type*/
   {"regex"},        /*fod_name*/
   UDF_REGEX,        /*fod_id*/
   II_NORMAL         /*fod_type*/
   },
}

FIDs

Add the following definitions to the fid_id enum set:

UDF_FI_REGEX_VARCHAR,
UDF_FI_REGEX_LVARCHAR

You will need arrays of datatypes, to indicate what types are permitted for each parameter. The FIDs displayed use the following.

static II_DT_ID  UD_2_VC[]         = {II_VARCHAR,  II_VARCHAR};
static II_DT_ID  UD_LVC_N_VC[]     = {II_LVCH,     II_VARCHAR};

The FIDs themselves are:

static IIADD_FI_DFN Function_Instances[] = {
   {/* regex(varchar, varchar) */
   II_O_FUNCTION_INSTANCE,    /* fid_object_type */
   UDF_FI_REGEX_VARCHAR,      /* fid_id*/
   II_NO_FI,                  /* fid_cmplmnt*/
   UDF_REGEX,                 /* fid_opid=fod_id from function definition
                              ** This is the minor sort field for this array
                              */
   II_NORMAL,                 /* fid_optype
                              ** This is the major sort field for this array
                              */
   II_FID_F0_NOFLAGS,         /* fid_attributes*/
   0,                         /* fid_wslength*/
   2,                         /* fid_numargs*/
   UD_2_VC,                   /* fid_args, a pointer to an array of datatypes*/
   II_INTEGER,                /* fid_result, result is an integer */
   II_RES_FIXED,              /* fid_rltype*/
   4,                         /* fid_rlength */
   0,                         /* fid_rprec */
   regex,                     /* fid_routine */
   0                          /* lenspec_routine */
   }, /* regex(varchar, varchar) */
  
   {/* regex(long varchar, varchar) */
   II_O_FUNCTION_INSTANCE,    /* fid_object_type */
   UDF_FI_REGEX_LVARCHAR,     /* fid_id*/
   II_NO_FI,                  /* fid_cmplmnt*/
   UDF_REGEX,                 /* fid_opid=fod_id from function definition
                              ** This is the minor sort field for this array
                              */
   II_NORMAL,                 /* fid_optype
                              ** This is the major sort field for this array
                              */
   II_FID_F0_NOFLAGS,         /* fid_attributes*/
   0,                         /* fid_wslength*/
   2,                         /* fid_numargs*/
   UD_LVC_N_VC,               /* fid_args, a pointer to an array of datatypes*/
   II_INTEGER,                /* fid_result, result is an integer */
   II_RES_FIXED,              /* fid_rltype*/
   4,                         /* fid_rlength */
   0,                         /* fid_rprec */
   long_regex,                /* fid_routine */
   0                          /* lenspec_routine */
   }, /* regex(long varchar, varchar) */
}

Executor Code

You will need...

#include <sys/types.h>      /* Required for regex() */
#include <regex.h>

And..

#define MAX_REGEX_LENGTH    256
#define REGEX_STRING_CHUNK 1024

regex(varchar, varchar)

II_STATUS
regex(
   II_SCB          *scb,
   II_DATA_VALUE   *string,
   II_DATA_VALUE   *regex,
   II_DATA_VALUE   *rdv
   )
{   
   int i, true_length, remainder, next_bit;
   int ecode, eflags, cflags=REG_EXTENDED | REG_NEWLINE;
   regex_t preg[1024]; /*pointer to a pattern buffer storage area*/
   
   char aregex[MAX_REGEX_LENGTH + 1];
   char string_buffer[REGEX_STRING_CHUNK + 1];
   
   /* nmatch, pmatch are ignored in regexec as compilation is with
   ** REG_NEWLINE. Hence we simply set some dummy values here.
   */ 
   size_t       nmatch=0;
   regmatch_t   pmatch[]={};
   
   /* Used for error processing */
   char msg[256];
    
   rdv->db_prec           = 0; /* Set output precision */
   *(int *)(rdv->db_data) = 0; /* Set a no match default */
   
   /* Need to extract the regex, check if excesive, terminate it, and compile
   ** it!
   */
   true_length=*(short *)regex->db_data;
   if (true_length > MAX_REGEX_LENGTH) {
       sprintf(msg, "regex(): regular expression is too long!\n");
       us_error(scb, 0x200011, msg);
       return(II_ERROR);
   };
   memcpy(aregex, (char *)(regex->db_data + sizeof(short)), true_length);
   aregex[true_length]='\0';
   
   ecode=(int )regcomp(preg, aregex, cflags);
   if (ecode)
   {
       regerror(ecode, preg, msg, sizeof(msg));
       regfree(preg);
       us_error(scb, 0x200011, msg);
       return (II_ERROR);
   };
   /* Allow for processing strings longer than the buffer length
   ** Should allow easy extension to support longs!
   */
   true_length=*(short *)string->db_data;
   for (i=0;;i++)
   {
       /* If not initial chunk, indicate this is not the beginning of the
       ** line
       */
       if (i>0) {eflags=REG_NOTBOL;} else {eflags=0;};
  
       /* How much is left */
       remainder=true_length - (i * REGEX_STRING_CHUNK);
   
       if (remainder <= 0) break; /* Nothing left to do! */
   
       /* Set the next_bit and eflags on whether or not this is the last
       ** chunk of input data
       */
       if (remainder > REGEX_STRING_CHUNK) {
           next_bit=REGEX_STRING_CHUNK;
           eflags=eflags | REG_NOTEOL; /* As there is at least one more chunk */
       }
       else {
           next_bit=remainder;
       };
   
       /* Copy 'next_bit' of data to string_buffer */
       memcpy(string_buffer
           ,(char *)(string->db_data + sizeof(short) + i * REGEX_STRING_CHUNK)
           ,next_bit
           );
       string_buffer[next_bit]='\0'; /* It must be terminated */
   
       if (! regexec(
           preg, string_buffer,
           nmatch, pmatch, /* These are ignored */
           eflags))
       {
           *(int *)(rdv->db_data)=(int )1;
           break; /* First match breaks the loop! */
       };
   };
   regfree(preg);
   return (II_OK);
}; /*regex*/

regex(long varchar, varchar)

II_STATUS
long_regex(
   II_SCB          *scb,
   II_DATA_VALUE   *string,
   II_DATA_VALUE   *regex,
   II_DATA_VALUE   *rdv
   )
{
   int i, true_length, remainder, next_bit;
   int ecode, eflags, cflags=REG_EXTENDED | REG_NEWLINE;
   regex_t preg[1024]; /*pointer to a pattern buffer storage area*/
   
   char aregex[MAX_REGEX_LENGTH + 1];
   char *string_buffer;
   
   /* nmatch, pmatch are ignored in regexec as compilation is with
   ** REG_NEWLINE. Hence we simply set some dummy values here.
   */
   size_t       nmatch=0;
   regmatch_t   pmatch[]={};
   
   /* Used for error processing */
   char msg[256];
   
   /* Used for reading a long varchar */
   char segspace[2048];
   II_STATUS       status;
   II_POP_CB       pop_cb;
   II_DATA_VALUE   underdv, segment, coupon;
   
   rdv->db_prec           = 0; /* Set output precision */
   *(int *)(rdv->db_data) = 0; /* Set a no match default */
   
   status=II_OK;
   
   /* Set coupon to be a copy of string */
   coupon.db_data     = string->db_data;
   coupon.db_length   = string->db_length;
   coupon.db_datatype = string->db_datatype;
   coupon.db_prec     = 0;
   
   /* Initiialise parts of the segment */
   segment.db_data     = NULL;
   segment.db_length   = 0;
   segment.db_datatype = string->db_datatype;
   segment.db_prec     = 0;
   
   /* Need to extract the regex, check if excesive, terminate it, and compile
   ** it!
   */
   true_length=*(short *)regex->db_data;
   if (true_length > MAX_REGEX_LENGTH) {
       sprintf(msg, "regex(): regular expression is too long!\n");
       us_error(scb, 0x200011, msg);
       return(II_ERROR);
   };
   memcpy(aregex, (char *)(regex->db_data + sizeof(short)), true_length);
   aregex[true_length]='\0';
   
   ecode=(int )regcomp(preg, aregex, cflags);
   if (ecode)
   {
       regerror(ecode, preg, msg, sizeof(msg));
       regfree(preg);
       us_error(scb, 0x200011, msg);
       return (II_ERROR);
   };
   /* Allow for processing strings longer than the buffer length
   ** Should allow easy extension to support longs!
   */
   /* Initialise the pop_cb, want to act on string */
   pop_cb.pop_length               = sizeof(pop_cb);
   pop_cb.pop_type                 = II_POP_TYPE;
   pop_cb.pop_ascii_id             = 0;
   pop_cb.pop_temporary            = II_POP_SHORT_TEMP;
   
   pop_cb.pop_underdv  = &underdv;
   underdv.db_datatype = II_VARCHAR;
   underdv.db_data     = NULL;
   underdv.db_length   = 0;
   
   pop_cb.pop_coupon   = &coupon;
   
   pop_cb.pop_segment              = &segment;
   
   /* Determine the size of the segments that may be used */
   status=(*usc_lo_handler)(II_INFORMATION, &pop_cb);
   if (status) {
       sprintf(msg,
           "regex(): Error %d encountered seeking INFORMATION on long segment length\n",
           pop_cb.pop_error.err_code);
       us_error(scb, 0x200011, msg);
       return(status);
   };
   
   /* Now set to read that many bytes into a (II_DATA_VALUE )segment */
   segment.db_length    = underdv.db_length;
   segment.db_datatype  = underdv.db_datatype;
   segment.db_prec      = underdv.db_prec;
   segment.db_data      = segspace;
   
   pop_cb.pop_continuation = II_C_BEGIN_MASK;
   string_buffer=(char *)(segspace + sizeof(short));
   eflags=0;
   do {
       status=(*usc_lo_handler)(II_GET, &pop_cb);
       if (status)
       {
           if ((status >= II_ERROR)
           || (pop_cb.pop_error.err_code != II_E_NOMORE))
           {
               sprintf(msg,
                   "regex(): Unexpected error %d encountered processing long object\n",
                    status);
               us_error(scb, 0x200010, msg);
               return(II_ERROR);
           };
       };
       /* Got a Segment */
       pop_cb.pop_continuation = 0;
       true_length=*(short *)segment.db_data;
       segspace[true_length + sizeof(short)]='\0'; /* And terminate it */
   
       /* And indicate if there are more chunks to come */
       if (pop_cb.pop_error.err_code != II_E_NOMORE) eflags=eflags | REG_NOTEOL;
   
       if (! regexec(
           preg, string_buffer,
           nmatch, pmatch, /* These are ignored */
           eflags))
       {
           *(int *)(rdv->db_data)=(int )1;
           break; /* First match breaks the loop! */
       };
       eflags=REG_NOTBOL; /* No longer the beginning of line on all subsequent reads */
   } while ((status <= II_ERROR)  
           && (pop_cb.pop_error.err_code != II_E_NOMORE));
   
   regfree(preg);
   return (II_OK);
}; /*long_regex*/
Personal tools
Developing With