parser.c
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "klone_conf.h"
00012 #include <stdio.h>
00013 #include <stdlib.h>
00014 #include <ctype.h>
00015 #include <u/libu.h>
00016 #include <klone/os.h>
00017 #include <klone/translat.h>
00018 #include <klone/parser.h>
00019
00020
00021 enum {
00022 S_START,
00023 S_IN_DOUBLE_QUOTE,
00024 S_IN_SINGLE_QUOTE,
00025 S_HTML,
00026 S_WAIT_PERC,
00027 S_START_CODE,
00028 S_CODE,
00029 S_WAIT_GT,
00030 S_EAT_NEWLINE
00031 };
00032
00033 enum { LF = 0xA, CR = 0xD };
00034
00035 static int parser_on_block(parser_t *p, const char *buf, size_t sz)
00036 {
00037 dbg_err_if (p == NULL);
00038 dbg_err_if (buf == NULL);
00039
00040 for(;;)
00041 {
00042 switch(p->state)
00043 {
00044 case S_START:
00045
00046 return 0;
00047 case S_IN_DOUBLE_QUOTE:
00048 case S_IN_SINGLE_QUOTE:
00049 if(p->state != p->prev_state)
00050 {
00051 p->state = p->prev_state;
00052 continue;
00053 } else
00054 return 0;
00055 case S_HTML:
00056 case S_WAIT_PERC:
00057 if(sz && p->cb_html)
00058 dbg_err_if(p->cb_html(p, p->cb_arg, buf, sz));
00059 return 0;
00060 case S_START_CODE:
00061 case S_CODE:
00062 case S_WAIT_GT:
00063 if(sz && p->cb_code)
00064 dbg_err_if(p->cb_code(p, p->cmd_code, p->cb_arg, buf, sz));
00065 return 0;
00066 }
00067 }
00068
00069 return 0;
00070 err:
00071 return ~0;
00072 }
00073
00074 int parser_run(parser_t *p)
00075 {
00076 enum { BUFSZ = 262144 };
00077 #define set_state( s ) \
00078 do { tmp = p->state; p->state = s; p->prev_state = tmp; } while(0)
00079 #define fetch_next_char() \
00080 do { prev = c; \
00081 dbg_err_if((rc = io_getc(p->in, &c)) < 0); \
00082 if(rc == 0) break; \
00083 if( (c == CR || c == LF) && prev != (c == CR ? LF : CR)) \
00084 p->line++; \
00085 } while(0)
00086 int tmp;
00087 char c = 0, prev;
00088 char buf[BUFSZ];
00089 size_t idx = 0;
00090 ssize_t rc;
00091
00092 dbg_err_if (p == NULL);
00093
00094 buf[0] = 0;
00095 prev = 0;
00096
00097 dbg_err_if(p->line > 1);
00098
00099 fetch_next_char();
00100
00101 while(rc > 0)
00102 {
00103 prev = c;
00104 switch(p->state)
00105 {
00106 case S_START:
00107 set_state(S_HTML);
00108 continue;
00109 case S_IN_DOUBLE_QUOTE:
00110 if(c == '"' && prev != '\\')
00111 set_state(p->prev_state);
00112 break;
00113 case S_IN_SINGLE_QUOTE:
00114 if(c == '\'' && prev != '\\')
00115 set_state(p->prev_state);
00116 break;
00117 case S_HTML:
00118 if(c == '<')
00119 set_state(S_WAIT_PERC);
00120 break;
00121 case S_WAIT_PERC:
00122 if(c == '%')
00123 {
00124 if(idx && --idx)
00125 {
00126 buf[idx] = 0;
00127 dbg_err_if(parser_on_block(p, buf, idx));
00128 buf[0] = 0; idx = 0;
00129 }
00130 set_state(S_START_CODE);
00131 p->code_line = p->line;
00132 fetch_next_char();
00133 continue;
00134 } else {
00135 set_state(S_HTML);
00136 continue;
00137 }
00138 break;
00139 case S_START_CODE:
00140 if(isspace(c))
00141 p->cmd_code = 0;
00142 else {
00143 p->cmd_code = c;
00144 fetch_next_char();
00145 }
00146 set_state(S_CODE);
00147 continue;
00148 case S_CODE:
00149 if(c == '%')
00150 set_state(S_WAIT_GT);
00151 break;
00152 case S_WAIT_GT:
00153 if(c == '>')
00154 {
00155 if(idx && --idx)
00156 {
00157 buf[idx] = 0;
00158 dbg_err_if(parser_on_block(p, buf, idx));
00159 buf[0] = 0; idx = 0;
00160 }
00161 fetch_next_char();
00162 p->cmd_code = 0;
00163 set_state(S_HTML);
00164 continue;
00165 } else {
00166 set_state(S_CODE);
00167 continue;
00168 }
00169 break;
00170 case S_EAT_NEWLINE:
00171 if(c == CR || c == LF)
00172 {
00173 fetch_next_char();
00174 continue;
00175 }
00176 set_state(S_HTML);
00177 continue;
00178 default:
00179 dbg_err_if("unknown parser state");
00180 }
00181 buf[idx++] = c;
00182 if(idx == BUFSZ - 1)
00183 {
00184 buf[idx] = 0;
00185 dbg_err_if(parser_on_block(p, buf, idx));
00186 buf[0] = 0; idx = 0;
00187 }
00188
00189 fetch_next_char();
00190 }
00191
00192 if(idx)
00193 {
00194 buf[idx] = 0;
00195 dbg_err_if(parser_on_block(p, buf, idx));
00196 buf[0] = 0; idx = 0;
00197 }
00198
00199 return 0;
00200 err:
00201 return ~0;
00202 }
00203
00204 void parser_set_cb_code(parser_t *p, parser_cb_code_t cb)
00205 {
00206 dbg_ifb (p == NULL) return;
00207 p->cb_code = cb;
00208 }
00209
00210 void parser_set_cb_html(parser_t *p, parser_cb_html_t cb)
00211 {
00212 dbg_ifb (p == NULL) return;
00213 p->cb_html = cb;
00214 }
00215
00216 void* parser_get_cb_arg(parser_t *p)
00217 {
00218 dbg_ifb (p == NULL) return NULL;
00219 return p->cb_arg;
00220 }
00221
00222 void parser_set_cb_arg(parser_t *p, void *opaque)
00223 {
00224 dbg_ifb (p == NULL) return;
00225 p->cb_arg = opaque;
00226 }
00227
00228 void parser_set_io(parser_t *p, io_t *in, io_t *out)
00229 {
00230 dbg_ifb (p == NULL) return;
00231 p->in = in;
00232 p->out = out;
00233 }
00234
00235 int parser_free(parser_t *t)
00236 {
00237 U_FREE(t);
00238 return 0;
00239 }
00240
00241 int parser_reset(parser_t *p)
00242 {
00243 dbg_return_if (p == NULL, ~0);
00244
00245 p->line = 1;
00246 p->state = p->prev_state = S_START;
00247 p->cmd_code = 0;
00248
00249 return 0;
00250 }
00251
00252 int parser_create(parser_t **pt)
00253 {
00254 parser_t *p = NULL;
00255
00256 dbg_return_if (pt == NULL, ~0);
00257
00258 p = (parser_t*)u_zalloc(sizeof(parser_t));
00259 dbg_err_if(p == NULL);
00260
00261 (void) parser_reset(p);
00262
00263 *pt = p;
00264
00265 return 0;
00266 err:
00267 if(p)
00268 parser_free(p);
00269 return ~0;
00270 }