srcs/toolbox/lexer.c
00001
00002
00003
00004
00005 #include <ctype.h>
00006
00007 #include <toolbox/carpal.h>
00008 #include <toolbox/lexer.h>
00009 #include <toolbox/memory.h>
00010 #include <toolbox/misc.h>
00011
00012
00013 struct u_lexer_s
00014 {
00015 char *s;
00016 size_t slen;
00017 size_t pos;
00018 size_t lmatch;
00019 size_t rmatch;
00020 char err[U_LEXER_ERR_SZ];
00021 };
00022
00023 static void u_lexer_incr (u_lexer_t *l);
00024 static int u_lexer_next_ex (u_lexer_t *l, int eat_ws, char *pb);
00025 static size_t u_lexer_strlen_match (u_lexer_t *l);
00026
00042 int u_lexer_new (const char *s, u_lexer_t **pl)
00043 {
00044 u_lexer_t *l = NULL;
00045
00046 dbg_return_if (s == NULL, ~0);
00047 dbg_return_if (pl == NULL, ~0);
00048
00049
00050 warn_err_sif ((l = u_zalloc(sizeof *l)) == NULL);
00051
00052
00053 warn_err_if ((l->s = u_strdup(s)) == NULL);
00054 l->slen = strlen(s);
00055
00056
00057 l->pos = l->rmatch = l->lmatch = 0;
00058
00059
00060 l->err[0] = '\0';
00061
00062
00063 *pl = l;
00064
00065 return 0;
00066 err:
00067 u_lexer_free(l);
00068 return ~0;
00069 }
00070
00078 const char *u_lexer_lookahead (u_lexer_t *l)
00079 {
00080 return &l->s[l->pos];
00081 }
00082
00090 void u_lexer_free (u_lexer_t *l)
00091 {
00092
00093 if (l)
00094 {
00095 if (l->s)
00096 u_free(l->s);
00097 u_free(l);
00098 }
00099
00100 return;
00101 }
00102
00110 const char *u_lexer_geterr (u_lexer_t *l)
00111 {
00112 dbg_return_if (l == NULL, NULL);
00113
00114 return l->err;
00115 }
00116
00127 int u_lexer_seterr (u_lexer_t *l, const char *fmt, ...)
00128 {
00129 va_list ap;
00130
00131 dbg_return_if (l == NULL, ~0);
00132 dbg_return_if (fmt == NULL, ~0);
00133
00134 va_start(ap, fmt);
00135 (void) vsnprintf(l->err, sizeof l->err, fmt, ap);
00136 va_end(ap);
00137
00138 return 0;
00139 }
00140
00150 int u_lexer_next (u_lexer_t *l, char *pb)
00151 {
00152 return u_lexer_next_ex(l, 0, pb);
00153 }
00154
00165 int u_lexer_skip (u_lexer_t *l, char *pb)
00166 {
00167 return u_lexer_next_ex(l, 1, pb);
00168 }
00169
00178 int u_lexer_eot (u_lexer_t *l)
00179 {
00180 return (l->pos >= l->slen);
00181 }
00182
00191 int u_lexer_eat_ws (u_lexer_t *l)
00192 {
00193 dbg_return_if (l == NULL, -1);
00194 dbg_return_if (u_lexer_eot(l), -1);
00195
00196 while (isspace((int) l->s[l->pos]))
00197 {
00198 dbg_return_if (u_lexer_eot(l), -1);
00199 u_lexer_incr(l);
00200 }
00201
00202 return 0;
00203 }
00204
00212 char u_lexer_peek (u_lexer_t *l)
00213 {
00214 return l->s[l->pos];
00215 }
00216
00224 void u_lexer_record_lmatch (u_lexer_t *l)
00225 {
00226 l->lmatch = l->pos;
00227 return;
00228 }
00229
00237 void u_lexer_record_rmatch (u_lexer_t *l)
00238 {
00239 l->rmatch = l->pos;
00240 return;
00241 }
00242
00252 char *u_lexer_get_match (u_lexer_t *l, char match[U_TOKEN_SZ])
00253 {
00254 size_t len;
00255
00256 dbg_return_if (match == NULL, NULL);
00257 dbg_return_if (l->rmatch < l->lmatch, NULL);
00258 dbg_return_if ((len = u_lexer_strlen_match(l)) >= U_TOKEN_SZ, NULL);
00259
00260 memcpy(match, l->s + l->lmatch, len);
00261 match[len] = '\0';
00262
00263 return match;
00264 }
00265
00276 int u_lexer_expect_char (u_lexer_t *l, char expected)
00277 {
00278 char c = u_lexer_peek(l);
00279
00280
00281
00282 if (c == expected)
00283 U_LEXER_NEXT(l, NULL);
00284 else
00285 U_LEXER_ERR(l, "expecting \'%c\', got \'%c\' instead", expected, c);
00286
00287 return 0;
00288 err:
00289 return ~0;
00290 }
00291
00299 size_t u_lexer_pos (u_lexer_t *l)
00300 {
00301 return l->pos;
00302 }
00303
00308
00309 static void u_lexer_incr (u_lexer_t *l)
00310 {
00311 l->pos += 1;
00312 #ifdef U_LEXER_DEBUG
00313 u_con("\'%c\' -> \'%c\'", *(l->s + l->pos), *(l->s + l->pos + 1));
00314 #endif
00315 return;
00316 }
00317
00318
00319 static int u_lexer_next_ex (u_lexer_t *l, int eat_ws, char *pb)
00320 {
00321 dbg_return_if (u_lexer_eot(l), -1);
00322
00323
00324 u_lexer_incr(l);
00325
00326
00327 if (eat_ws)
00328 dbg_return_if (u_lexer_eat_ws(l) == -1, -1);
00329
00330
00331 if (pb)
00332 *pb = u_lexer_peek(l);
00333
00334 return 0;
00335 }
00336
00337 static size_t u_lexer_strlen_match (u_lexer_t *l)
00338 {
00339 return (l->rmatch - l->lmatch + 1);
00340 }