parser.c

00001 /*
00002  * Copyright (c) 2005-2012 by KoanLogic s.r.l. <http://www.koanlogic.com>
00003  * All rights reserved.
00004  *
00005  * This file is part of KLone, and as such it is subject to the license stated
00006  * in the LICENSE file which you have received as part of this distribution.
00007  *
00008  * $Id: parser.c,v 1.14 2008/10/18 00:03:00 tat Exp $
00009  */
00010 
00011 #include "klone_conf.h"
00012 #include <stdio.h>
00013 #include <stdlib.h>
00014 #include <ctype.h>
00015 #include <u/libu.h>
00016 #include <klone/os.h>
00017 #include <klone/translat.h>
00018 #include <klone/parser.h>
00019 
00020 /* parser state */
00021 enum { 
00022     S_START, 
00023     S_IN_DOUBLE_QUOTE,
00024     S_IN_SINGLE_QUOTE, 
00025     S_HTML, 
00026     S_WAIT_PERC,
00027     S_START_CODE, 
00028     S_CODE, 
00029     S_WAIT_GT,
00030     S_EAT_NEWLINE
00031 };
00032 
00033 enum { LF = 0xA, CR = 0xD };
00034 
00035 static int parser_on_block(parser_t *p, const char *buf, size_t sz)
00036 {
00037     dbg_err_if (p == NULL);
00038     dbg_err_if (buf == NULL);
00039 
00040     for(;;)
00041     {
00042         switch(p->state)
00043         {
00044         case S_START:
00045             /* empty file */
00046             return 0;
00047         case S_IN_DOUBLE_QUOTE:
00048         case S_IN_SINGLE_QUOTE: 
00049             if(p->state != p->prev_state)
00050             {
00051                 p->state = p->prev_state;
00052                 continue;
00053             } else
00054                 return 0;
00055         case S_HTML: 
00056         case S_WAIT_PERC:
00057             if(sz && p->cb_html)
00058                 dbg_err_if(p->cb_html(p, p->cb_arg, buf, sz));
00059             return 0;
00060         case S_START_CODE:
00061         case S_CODE:
00062         case S_WAIT_GT:
00063             if(sz && p->cb_code)
00064                 dbg_err_if(p->cb_code(p, p->cmd_code, p->cb_arg, buf, sz));
00065             return 0;
00066         }
00067     }
00068 
00069     return 0;
00070 err:
00071     return ~0;
00072 }
00073 
00074 int parser_run(parser_t *p)
00075 {
00076     enum { BUFSZ = 262144 }; /* a big buffer is good to better zip *.kl1 */
00077     #define set_state( s ) \
00078         do { tmp = p->state; p->state = s; p->prev_state = tmp; } while(0)
00079     #define fetch_next_char()                                           \
00080         do { prev = c;                                                  \
00081             dbg_err_if((rc = io_getc(p->in, &c)) < 0);                  \
00082             if(rc == 0) break;                                          \
00083             if( (c == CR || c == LF) && prev != (c == CR ? LF : CR))    \
00084                 p->line++;                                              \
00085         } while(0)
00086     int tmp;
00087     char c = 0, prev;
00088     char buf[BUFSZ];
00089     size_t idx = 0;
00090     ssize_t rc;
00091 
00092     dbg_err_if (p == NULL);
00093     
00094     buf[0] = 0;
00095     prev = 0;
00096 
00097     dbg_err_if(p->line > 1);
00098 
00099     fetch_next_char();
00100 
00101     while(rc > 0)
00102     {
00103         prev = c;
00104         switch(p->state)
00105         {
00106         case S_START:
00107             set_state(S_HTML);
00108             continue;
00109         case S_IN_DOUBLE_QUOTE:
00110             if(c == '"' && prev != '\\')
00111                 set_state(p->prev_state);
00112             break;
00113         case S_IN_SINGLE_QUOTE:
00114             if(c == '\'' && prev != '\\')
00115                 set_state(p->prev_state);
00116             break;
00117         case S_HTML:
00118             if(c == '<')
00119                 set_state(S_WAIT_PERC);
00120             break;
00121         case S_WAIT_PERC:
00122             if(c == '%')
00123             {
00124                 if(idx && --idx) /* erase < */
00125                 {
00126                     buf[idx] = 0;
00127                     dbg_err_if(parser_on_block(p, buf, idx));
00128                     buf[0] = 0; idx = 0;
00129                 }
00130                 set_state(S_START_CODE);
00131                 p->code_line = p->line; /* save start code line number  */
00132                 fetch_next_char();      /* get cmd char (!,@,etc.)      */
00133                 continue;
00134             } else {
00135                 set_state(S_HTML);
00136                 continue;
00137             }
00138             break;
00139         case S_START_CODE:
00140             if(isspace(c))
00141                 p->cmd_code = 0;
00142             else {
00143                 p->cmd_code = c;
00144                 fetch_next_char();
00145             }
00146             set_state(S_CODE);
00147             continue;
00148         case S_CODE:
00149             if(c == '%') 
00150                 set_state(S_WAIT_GT);
00151             break;
00152         case S_WAIT_GT:
00153             if(c == '>')
00154             {
00155                 if(idx && --idx) /* erase % */
00156                 {
00157                     buf[idx] = 0;
00158                     dbg_err_if(parser_on_block(p, buf, idx));
00159                     buf[0] = 0; idx = 0;
00160                 }
00161                 fetch_next_char();
00162                 p->cmd_code = 0;
00163                 set_state(S_HTML);
00164                 continue;
00165             } else {
00166                 set_state(S_CODE);
00167                 continue;
00168             }
00169             break;
00170         case S_EAT_NEWLINE:
00171             if(c == CR || c == LF)
00172             {
00173                 fetch_next_char();
00174                 continue; /* eat it */
00175             }
00176             set_state(S_HTML);
00177             continue;
00178         default:
00179             dbg_err_if("unknown parser state");
00180         }
00181         buf[idx++] = c;
00182         if(idx == BUFSZ - 1)
00183         {
00184             buf[idx] = 0;
00185             dbg_err_if(parser_on_block(p, buf, idx));
00186             buf[0] = 0; idx = 0;
00187         }
00188 
00189         fetch_next_char();
00190     }
00191 
00192     if(idx)
00193     {
00194         buf[idx] = 0;
00195         dbg_err_if(parser_on_block(p, buf, idx));
00196         buf[0] = 0; idx = 0;
00197     }
00198 
00199     return 0;
00200 err:
00201     return ~0;
00202 }
00203 
00204 void parser_set_cb_code(parser_t *p, parser_cb_code_t cb)
00205 {
00206     dbg_ifb (p == NULL) return;
00207     p->cb_code = cb;
00208 }
00209 
00210 void parser_set_cb_html(parser_t *p, parser_cb_html_t cb)
00211 {
00212     dbg_ifb (p == NULL) return;
00213     p->cb_html = cb;
00214 }
00215 
00216 void* parser_get_cb_arg(parser_t *p)
00217 {
00218     dbg_ifb (p == NULL) return NULL;
00219     return p->cb_arg;
00220 }
00221 
00222 void parser_set_cb_arg(parser_t *p, void *opaque)
00223 {
00224     dbg_ifb (p == NULL) return;
00225     p->cb_arg = opaque;
00226 }
00227 
00228 void parser_set_io(parser_t *p, io_t *in, io_t *out)
00229 {
00230     dbg_ifb (p == NULL) return;
00231     p->in = in;
00232     p->out = out;
00233 }
00234 
00235 int parser_free(parser_t *t)
00236 {
00237     U_FREE(t);
00238     return 0;
00239 }
00240 
00241 int parser_reset(parser_t *p)
00242 {
00243     dbg_return_if (p == NULL, ~0);
00244 
00245     p->line = 1;
00246     p->state = p->prev_state = S_START;
00247     p->cmd_code = 0;
00248 
00249     return 0;
00250 }
00251 
00252 int parser_create(parser_t **pt)
00253 {
00254     parser_t *p = NULL;
00255 
00256     dbg_return_if (pt == NULL, ~0);
00257     
00258     p = (parser_t*)u_zalloc(sizeof(parser_t));
00259     dbg_err_if(p == NULL);
00260 
00261     (void) parser_reset(p);
00262 
00263     *pt = p;
00264 
00265     return 0;
00266 err:
00267     if(p)
00268         parser_free(p);
00269     return ~0;
00270 }

←Products
Copyright © 2005-2012 - KoanLogic S.r.l. - All rights reserved