srcs/toolbox/lexer.c

00001 /* 
00002  * Copyright (c) 2005-2012 by KoanLogic s.r.l.
00003  */
00004 
00005 #include <ctype.h>
00006 
00007 #include <toolbox/carpal.h>
00008 #include <toolbox/lexer.h>
00009 #include <toolbox/memory.h>
00010 #include <toolbox/misc.h>
00011 
00012 /* Lexer context. */
00013 struct u_lexer_s
00014 {
00015     char *s;        /* NUL-terminated string, that shall be parsed. */
00016     size_t slen;    /* String length (excluding ending '\0'). */
00017     size_t pos;     /* Actual lexer position. */
00018     size_t lmatch;  /* Offset of current left side match. */
00019     size_t rmatch;  /* Offset of current right side match. */
00020     char err[U_LEXER_ERR_SZ];   /* Error string. */
00021 };
00022 
00023 static void u_lexer_incr (u_lexer_t *l);
00024 static int u_lexer_next_ex (u_lexer_t *l, int eat_ws, char *pb);
00025 static size_t u_lexer_strlen_match (u_lexer_t *l);
00026 
00042 int u_lexer_new (const char *s, u_lexer_t **pl)
00043 {
00044     u_lexer_t *l = NULL;
00045 
00046     dbg_return_if (s == NULL, ~0);
00047     dbg_return_if (pl == NULL, ~0);
00048     
00049     /* Make room for the object. */
00050     warn_err_sif ((l = u_zalloc(sizeof *l)) == NULL);
00051 
00052     /* Internalize the string to be parsed. */
00053     warn_err_if ((l->s = u_strdup(s)) == NULL);
00054     l->slen = strlen(s);
00055 
00056     /* Init offset counters. */
00057     l->pos = l->rmatch = l->lmatch = 0;
00058 
00059     /* Error string. */
00060     l->err[0] = '\0';
00061 
00062     /* Copy out. */
00063     *pl = l;
00064 
00065     return 0;
00066 err:
00067     u_lexer_free(l);
00068     return ~0;
00069 }
00070 
00078 const char *u_lexer_lookahead (u_lexer_t *l)
00079 {
00080     return &l->s[l->pos];
00081 }
00082 
00090 void u_lexer_free (u_lexer_t *l)
00091 {
00092     /* Don't moan on NULL objects. */
00093     if (l)
00094     {
00095         if (l->s)
00096             u_free(l->s);
00097         u_free(l);
00098     }
00099 
00100     return;
00101 }
00102 
00110 const char *u_lexer_geterr (u_lexer_t *l)
00111 {
00112     dbg_return_if (l == NULL, NULL);
00113 
00114     return l->err;
00115 }
00116 
00127 int u_lexer_seterr (u_lexer_t *l, const char *fmt, ...)
00128 {
00129     va_list ap;
00130 
00131     dbg_return_if (l == NULL, ~0);
00132     dbg_return_if (fmt == NULL, ~0);
00133 
00134     va_start(ap, fmt);
00135     (void) vsnprintf(l->err, sizeof l->err, fmt, ap); 
00136     va_end(ap);
00137 
00138     return 0;
00139 }
00140 
00150 int u_lexer_next (u_lexer_t *l, char *pb)
00151 {
00152     return u_lexer_next_ex(l, 0, pb);
00153 }
00154 
00165 int u_lexer_skip (u_lexer_t *l, char *pb)
00166 {
00167     return u_lexer_next_ex(l, 1, pb);
00168 }
00169 
00178 int u_lexer_eot (u_lexer_t *l)
00179 {
00180     return (l->pos >= l->slen);
00181 }
00182 
00191 int u_lexer_eat_ws (u_lexer_t *l)
00192 {
00193     dbg_return_if (l == NULL, -1);
00194     dbg_return_if (u_lexer_eot(l), -1);
00195 
00196     while (isspace((int) l->s[l->pos]))
00197     {
00198         dbg_return_if (u_lexer_eot(l), -1);
00199         u_lexer_incr(l);
00200     }
00201 
00202     return 0;
00203 }
00204 
00212 char u_lexer_peek (u_lexer_t *l)
00213 {
00214     return l->s[l->pos];
00215 }
00216 
00224 void u_lexer_record_lmatch (u_lexer_t *l)
00225 {
00226     l->lmatch = l->pos;
00227     return;
00228 }
00229 
00237 void u_lexer_record_rmatch (u_lexer_t *l)
00238 {
00239     l->rmatch = l->pos;
00240     return;
00241 }
00242 
00252 char *u_lexer_get_match (u_lexer_t *l, char match[U_TOKEN_SZ])
00253 {
00254     size_t len;
00255 
00256     dbg_return_if (match == NULL, NULL);
00257     dbg_return_if (l->rmatch < l->lmatch, NULL);
00258     dbg_return_if ((len = u_lexer_strlen_match(l)) >= U_TOKEN_SZ, NULL);
00259 
00260     memcpy(match, l->s + l->lmatch, len);
00261     match[len] = '\0';
00262 
00263     return match;
00264 }
00265 
00276 int u_lexer_expect_char (u_lexer_t *l, char expected)
00277 {
00278     char c = u_lexer_peek(l);
00279 
00280     /* The expected char must be under lexer cursor.  If matched, the
00281      * cursor is advanced by one position. */
00282     if (c == expected)
00283         U_LEXER_NEXT(l, NULL);
00284     else
00285         U_LEXER_ERR(l, "expecting \'%c\', got \'%c\' instead", expected, c);
00286 
00287     return 0;
00288 err:
00289     return ~0;
00290 }
00291 
00299 size_t u_lexer_pos (u_lexer_t *l)
00300 {
00301     return l->pos;
00302 }
00303 
00308 /* Must always be called after proper sanitization by u_lexer_eot. */
00309 static void u_lexer_incr (u_lexer_t *l)
00310 {
00311     l->pos += 1;
00312 #ifdef U_LEXER_DEBUG
00313     u_con("\'%c\' -> \'%c\'", *(l->s + l->pos), *(l->s + l->pos + 1));
00314 #endif  /* U_LEXER_DEBUG */
00315     return;
00316 }
00317 
00318 /* '-1' EOT, '0' ok */
00319 static int u_lexer_next_ex (u_lexer_t *l, int eat_ws, char *pb)
00320 {
00321     dbg_return_if (u_lexer_eot(l), -1);
00322 
00323     /* Consume at least one char. */
00324     u_lexer_incr(l);
00325 
00326     /* If requested, skip white spaces. */
00327     if (eat_ws)
00328         dbg_return_if (u_lexer_eat_ws(l) == -1, -1);
00329 
00330     /* If requested, copy out the accepted char. */
00331     if (pb)
00332         *pb = u_lexer_peek(l);
00333 
00334     return 0;
00335 }
00336 
00337 static size_t u_lexer_strlen_match (u_lexer_t *l)
00338 {
00339     return (l->rmatch - l->lmatch + 1);
00340 }

←Products
© 2005-2012 - KoanLogic S.r.l. - All rights reserved