srcs/toolbox/uri.c

00001 /* 
00002  * Copyright (c) 2005-2012 by KoanLogic s.r.l. - All rights reserved.  
00003  */
00004 
00005 #include <stdlib.h>
00006 #include <string.h>
00007 #include <ctype.h>
00008 
00009 #include <toolbox/uri.h>
00010 #include <toolbox/carpal.h>
00011 #include <toolbox/misc.h>
00012 #include <toolbox/memory.h>
00013 #include <toolbox/lexer.h>
00014 
00015 /* Internal representation of an URI value. */
00016 struct u_uri_s
00017 {
00018     unsigned int opts, flags;
00019     char scheme[U_TOKEN_SZ];
00020     char userinfo[U_TOKEN_SZ];
00021     char user[U_TOKEN_SZ], pwd[U_TOKEN_SZ];
00022     char authority[U_TOKEN_SZ];
00023     char host[U_TOKEN_SZ];
00024     char port[U_TOKEN_SZ];
00025     char path[U_TOKEN_SZ];
00026     char query[U_TOKEN_SZ];
00027     char fragment[U_TOKEN_SZ];
00028 };
00029 
00030 static int u_uri_parser (u_lexer_t *l, u_uri_opts_t opts, u_uri_t **pu);
00031 static int u_uri_parse_scheme (u_lexer_t *l, u_uri_t *u);
00032 static int u_uri_parse_hier_part (u_lexer_t *l, u_uri_t *u);
00033 static int u_uri_parse_authority (u_lexer_t *l, u_uri_t *u);
00034 static int u_uri_parse_query (u_lexer_t *l, u_uri_t *u);
00035 static int u_uri_parse_fragment (u_lexer_t *l, u_uri_t *u);
00036 static int u_uri_parse_abempty (u_lexer_t *l, u_uri_t *u);
00037 static int u_uri_parse_userinfo (u_lexer_t *l, u_uri_t *u);
00038 static int u_uri_parse_host (u_lexer_t *l, u_uri_t *u);
00039 static int u_uri_parse_ipliteral (u_lexer_t *l);
00040 static int u_uri_parse_ipv4address (u_lexer_t *l);
00041 static int u_uri_parse_regname (u_lexer_t *l);
00042 static int u_uri_expect_colon (u_lexer_t *l);
00043 static int u_uri_expect_pct_encoded (u_lexer_t *l);
00044 static int u_uri_expect_segment_nz (u_lexer_t *l);
00045 static int u_uri_expect_path_abempty (u_lexer_t *l);
00046 static int u_uri_query_first (u_lexer_t *l);
00047 static int u_uri_fragment_first (u_lexer_t *l);
00048 static int u_uri_path_first (u_lexer_t *l);
00049 static int u_uri_ipliteral_first (u_lexer_t *l);
00050 static int u_uri_ipv4address_first (u_lexer_t *l);
00051 static int u_uri_regname_first (u_lexer_t *l);
00052 static int u_uri_port_first (u_lexer_t *l);
00053 static int u_uri_parse_port (u_lexer_t *l, u_uri_t *u);
00054 static int u_uri_match_pchar (u_lexer_t *l);
00055 static int u_uri_match_pchar_minus_at_sign (u_lexer_t *l);
00056 static int u_uri_match_ups (u_lexer_t *l);
00057 static int u_uri_adjust_greedy_match (u_lexer_t *l, char match[U_TOKEN_SZ]);
00058 static int u_uri_knead_authority (u_uri_t *u, char s[U_URI_STRMAX]);
00059 static int u_uri_crumble_user_password (u_uri_t *u);
00060 
00125 int u_uri_crumble (const char *uri, u_uri_opts_t opts, u_uri_t **pu)
00126 {
00127     u_lexer_t *l = NULL;
00128 
00129     warn_err_if (u_lexer_new(uri, &l));
00130     warn_err_if (u_uri_parser(l, opts, pu));
00131 
00132     u_lexer_free(l), l = NULL;
00133 
00134     return 0;
00135 err:
00136     u_lexer_free(l);
00137     return ~0;
00138 }
00139 
00152 int u_uri_knead (u_uri_t *u, char s[U_URI_STRMAX])
00153 {
00154     dbg_return_if (u == NULL, ~0);
00155     dbg_return_if (s == NULL, ~0);
00156 
00157     /* see RFC 3986, Section 5.3. Component Recomposition */
00158 
00159     *s = '\0';
00160 
00161     if (strlen(u->scheme))
00162     {
00163         dbg_err_if (u_strlcat(s, u->scheme, U_URI_STRMAX));
00164         dbg_err_if (u_strlcat(s, ":", U_URI_STRMAX));
00165     }
00166 
00167     if (strlen(u->authority))
00168     {
00169         dbg_err_if (u_strlcat(s, "//", U_URI_STRMAX));
00170         dbg_err_if (u_strlcat(s, u->authority, U_URI_STRMAX));
00171     }
00172     else /* try recompose authority through its atoms */
00173         dbg_err_if (u_uri_knead_authority(u, s));
00174 
00175     dbg_err_if (u_strlcat(s, u->path, U_URI_STRMAX));
00176 
00177     if (strlen(u->query))
00178     {
00179         dbg_err_if (u_strlcat(s, "?", U_URI_STRMAX));
00180         dbg_err_if (u_strlcat(s, u->query, U_URI_STRMAX));
00181     }
00182 
00183     if (strlen(u->fragment))
00184     {
00185         dbg_err_if (u_strlcat(s, "#", U_URI_STRMAX));
00186         dbg_err_if (u_strlcat(s, u->fragment, U_URI_STRMAX));
00187     }
00188  
00189     return 0;
00190 err:
00191     return ~0;
00192 }
00193 
00195 void u_uri_print (u_uri_t *u, int extended)
00196 {
00197     u_unused_args(extended);
00198 
00199     if (strlen(u->scheme))
00200         u_con("scheme: \"%s\"", u->scheme);
00201 
00202     if (strlen(u->userinfo))
00203     {
00204         u_con("userinfo: \"%s\"", u->userinfo);
00205 
00206         if (extended)
00207         {
00208             u_con("{");
00209             if (strlen(u->user))
00210                 u_con("  user: \"%s\"", u->user);
00211 
00212             if (strlen(u->pwd))
00213                 u_con("  pwd: \"%s\"", u->pwd);
00214             u_con("}");
00215         }
00216     }
00217 
00218     if (strlen(u->host))
00219         u_con("host: \"%s\"", u->host);
00220 
00221     if (strlen(u->port))
00222         u_con("port: \"%s\"", u->port);
00223 
00224     if (strlen(u->path))
00225         u_con("path: \"%s\"", u->path);
00226 
00227     if (strlen(u->query))
00228         u_con("query: \"%s\"", u->query);
00229 
00230     if (strlen(u->fragment))
00231         u_con("fragment: \"%s\"", u->fragment);
00232 
00233     return;
00234 }
00235 
00250 int u_uri_new (u_uri_opts_t opts, u_uri_t **pu)
00251 {
00252     u_uri_t *u = u_zalloc(sizeof *u);
00253     warn_err_sif (u == NULL);
00254 
00255     u->opts = opts;
00256     u->flags = U_URI_FLAGS_NONE;
00257     dbg_err_if (u_uri_set_path(u, "/"));    /* path is mandatory, so we set
00258                                                default value here */
00259     *pu = u;
00260     return 0;
00261 err:
00262     return ~0;
00263 }
00264 
00275 void u_uri_free (u_uri_t *u)
00276 {
00277     if (u)
00278         u_free(u);
00279     return;
00280 }
00281 
00282 #define U_URI_GETSET_F(field)                                       \
00283 const char *u_uri_get_##field (u_uri_t *uri)                        \
00284 {                                                                   \
00285     dbg_return_if (uri == NULL, NULL);                              \
00286     return uri->field;                                              \
00287 }                                                                   \
00288                                                                     \
00289 int u_uri_set_##field (u_uri_t *uri, const char *val)               \
00290 {                                                                   \
00291     dbg_return_if (uri == NULL, ~0);                                \
00292     dbg_return_if (val == NULL, ~0);                                \
00293                                                                     \
00294     dbg_err_if (u_strlcpy(uri->field, val, sizeof uri->field));     \
00295                                                                     \
00296     return 0;                                                       \
00297 err:                                                                \
00298     return ~0;                                                      \
00299 }
00300 
00301 U_URI_GETSET_F(scheme)
00302 U_URI_GETSET_F(userinfo)
00303 U_URI_GETSET_F(user)
00304 U_URI_GETSET_F(pwd)
00305 U_URI_GETSET_F(host)
00306 U_URI_GETSET_F(port)
00307 U_URI_GETSET_F(authority)
00308 U_URI_GETSET_F(path)
00309 U_URI_GETSET_F(query)
00310 U_URI_GETSET_F(fragment)
00311 
00312 u_uri_flags_t u_uri_get_flags (u_uri_t *uri) { return uri->flags; }
00313 
00318 static int u_uri_parser (u_lexer_t *l, u_uri_opts_t opts, u_uri_t **pu)
00319 {
00320     u_uri_t *u = NULL;
00321 
00322     warn_err_sif (u_uri_new(opts, &u));
00323 
00324     /* Get URI scheme. */
00325     warn_err_if (u_uri_parse_scheme(l, u));
00326 
00327     /* Scheme and hier-part are separated by a ':'. */
00328     warn_err_if (u_uri_expect_colon(l));
00329 
00330     /* Hierarchical part (authority and/or path). */
00331     warn_err_if (u_uri_parse_hier_part(l, u));
00332 
00333     /* Optional query is introduced by a '?'. */
00334     if (u_uri_query_first(l))
00335         warn_err_if (u_uri_parse_query(l, u));
00336 
00337     /* Optional fragment is introduced by a '#'. */
00338     if (u_uri_fragment_first(l))
00339         warn_err_if (u_uri_parse_fragment(l, u));
00340 
00341     *pu = u;
00342 
00343     return 0;
00344 err:
00345     u_uri_free(u);
00346     return ~0;
00347 }
00348 
00349 static int u_uri_expect_colon (u_lexer_t *l)
00350 {
00351     return u_lexer_expect_char(l, ':');
00352 }
00353 
00354 static int u_uri_query_first (u_lexer_t *l)
00355 {
00356     return (u_lexer_peek(l) == '?');
00357 }
00358 
00359 static int u_uri_fragment_first (u_lexer_t *l)
00360 {
00361     return (u_lexer_peek(l) == '#');
00362 }
00363 
00364 static int u_uri_port_first (u_lexer_t *l)
00365 {
00366     return (u_lexer_peek(l) == ':');
00367 }
00368 
00369 /* Check for both absolute and rootless paths. */
00370 static int u_uri_path_first (u_lexer_t *l)
00371 {
00372     return (u_lexer_peek(l) == '/' || u_uri_match_pchar(l));
00373 }
00374 
00375 /* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
00376 static int u_uri_parse_scheme (u_lexer_t *l, u_uri_t *u)
00377 {
00378     char c = u_lexer_peek(l);
00379 
00380     u_lexer_record_lmatch(l);
00381 
00382     if (!isalpha((int) c))
00383         U_LEXER_ERR(l, "Expect an alpha char, got \'%c\' instead.", c);
00384 
00385     do {
00386         U_LEXER_NEXT(l, &c); 
00387     } while (isalnum((int) c) || c == '+' || c == '-' || c == '.');
00388 
00389     u_lexer_record_rmatch(l);
00390 
00391     /* The match includes the first non-scheme char. */
00392     (void) u_lexer_get_match(l, u->scheme);
00393     u->scheme[strlen(u->scheme) - 1] = '\0';
00394 
00395     return 0;
00396 err:
00397     return ~0;
00398 }
00399 
00400 /* "//" authority path-abempty */
00401 static int u_uri_parse_authority (u_lexer_t *l, u_uri_t *u)
00402 {
00403     char c, i = 0;
00404 
00405     /* Consume "//". */
00406     do {
00407         if ((c = u_lexer_peek(l)) != '/')
00408             U_LEXER_ERR(l, "Expect \'/\', got \'%c\' instead.", c);
00409         U_LEXER_NEXT(l, NULL);
00410     } while (++i < 2);
00411 
00412     /* [userinfo "@"] */
00413     if (strchr(u_lexer_lookahead(l), '@'))
00414         warn_err_if (u_uri_parse_userinfo(l, u));
00415     warn_err_if (u_uri_parse_host(l, u));
00416 
00417     /* Optional port is introduced by a ':' char. */
00418     if (u_uri_port_first(l))
00419         warn_err_if (u_uri_parse_port(l, u));
00420 
00421     /* path-abempty */
00422     warn_err_if (u_uri_parse_abempty(l, u));
00423 
00424     return 0;
00425 err:
00426     return ~0;
00427 }
00428 
00429 /* Much more relaxed than requested: we want to let service names, not just
00430  * port numbers.  See /etc/services. */
00431 static int u_uri_parse_port (u_lexer_t *l, u_uri_t *u)
00432 {
00433     char c;
00434 
00435     if ((c = u_lexer_peek(l)) != ':')
00436         U_LEXER_ERR(l, "Expect \':\', got \'%c\'", c);
00437 
00438     /* Consume useless ':'. */
00439     U_LEXER_NEXT(l, &c);
00440 
00441     u_lexer_record_lmatch(l);
00442 
00443     while (isalnum((int) c) || c == '_' || c == '-' || c == '.')
00444         U_LEXER_NEXT(l, &c);
00445 
00446     u_lexer_record_rmatch(l);
00447     (void) u_uri_adjust_greedy_match(l, u->port);
00448 
00449     return 0;
00450 err:
00451     return ~0;
00452 }
00453 
00454 static int u_uri_parse_host (u_lexer_t *l, u_uri_t *u)
00455 {
00456     /* u_uri_parse_ipliteral() will override this. */
00457     u_lexer_record_lmatch(l);
00458 
00459     if (u_uri_ipv4address_first(l))
00460     {
00461         warn_err_if (u_uri_parse_ipv4address(l));
00462         u->flags |= U_URI_FLAGS_HOST_IS_IPADDRESS;
00463     }
00464     else if (u_uri_regname_first(l))
00465         warn_err_if (u_uri_parse_regname(l));
00466     else if (u_uri_ipliteral_first(l))
00467     {
00468         /* The left pointer is handled inside the ipliteral parser as it must 
00469          * take care (i.e. skip) of starting '['. */
00470         warn_err_if (u_uri_parse_ipliteral(l));
00471 
00472         u->flags |= U_URI_FLAGS_HOST_IS_IPADDRESS | 
00473                     U_URI_FLAGS_HOST_IS_IPLITERAL;
00474     }
00475 
00476     u_lexer_record_rmatch(l);
00477 
00478     (void) u_uri_adjust_greedy_match(l, u->host);
00479 
00480     return 0;
00481 err:
00482     return ~0;
00483 }
00484 
00485 static int u_uri_adjust_greedy_match (u_lexer_t *l, char match[U_TOKEN_SZ])
00486 {
00487     size_t mlen;
00488 
00489     dbg_return_if (l == NULL, ~0);
00490     dbg_return_if (match == NULL, ~0);
00491 
00492     (void) u_lexer_get_match(l, match);
00493     mlen = strlen(match);
00494 
00495     if (!u_lexer_eot(l))
00496         --mlen;
00497 
00498     /* Take care of the one-char-extra greedy match in case of IP-literal's.
00499      * Please note that "This [Internet Protocol literal address] is the only 
00500      * place where square bracket characters are allowed in the URI syntax",
00501      * i.e. the following is safe as it doesn't impact no URI component other
00502      * than the IP-literal. */
00503     if (mlen && match[mlen - 1] == ']')
00504         --mlen;
00505 
00506     match[mlen] = '\0';
00507 
00508     return 0;
00509 }
00510 
00511 static int u_uri_ipliteral_first (u_lexer_t *l)
00512 {
00513     return (u_lexer_peek(l) == '[');
00514 }
00515 
00516 static int u_uri_parse_ipliteral (u_lexer_t *l)
00517 {
00518     char c;
00519 
00520     if ((c = u_lexer_peek(l)) != '[')
00521         U_LEXER_ERR(l, "Expect \'[\', got \'%c\' instead.", c);
00522 
00523     /* Consume useless '[' and reset left match. */
00524     U_LEXER_NEXT(l, &c);
00525     u_lexer_record_lmatch(l);
00526 
00527     while (u_uri_match_pchar(l) && c != ']')
00528         U_LEXER_NEXT(l, &c);
00529 
00530     /* We need to reach here with lexer cursor over the ']' char. */
00531     if ((c = u_lexer_peek(l)) != ']')
00532         U_LEXER_ERR(l, "Expect \']\', got \'%c\' instead.", c);
00533 
00534     /* Consume ending ']' and go out. */
00535     U_LEXER_NEXT(l, NULL);
00536 
00537     return 0;
00538 err:
00539     return ~0;
00540 }
00541 
00542 static int u_uri_ipv4address_first (u_lexer_t *l)
00543 {
00544     return (isdigit(u_lexer_peek(l)));
00545 }
00546 
00547 static int u_uri_parse_ipv4address (u_lexer_t *l)
00548 {
00549     char c;
00550 
00551     do {
00552         U_LEXER_NEXT(l, &c); 
00553     } while (isdigit(c) || c == '.');
00554 
00555     return 0;
00556 err:
00557     return ~0;
00558 }
00559 
00560 static int u_uri_regname_first (u_lexer_t *l)
00561 {
00562     return (u_uri_match_ups(l));
00563 }
00564 
00565 static int u_uri_parse_regname (u_lexer_t *l)
00566 {
00567     do {
00568         U_LEXER_NEXT(l, NULL); 
00569     } while (u_uri_match_ups(l));
00570 
00571     return 0;
00572 err:
00573     return ~0;
00574 }
00575 
00576 static int u_uri_parse_userinfo (u_lexer_t *l, u_uri_t *u)
00577 {
00578     char c;
00579 
00580     u_lexer_record_lmatch(l);
00581 
00582     do {
00583         U_LEXER_NEXT(l, &c);
00584     } while (u_uri_match_pchar_minus_at_sign(l));
00585 
00586     if (u_lexer_peek(l) != '@')
00587         U_LEXER_ERR(l, "Expect \'@\', got \'%c\' instead.", c);
00588 
00589     u_lexer_record_rmatch(l);
00590     (void) u_uri_adjust_greedy_match(l, u->userinfo);
00591 
00592     /* 3.2.1.  User Information: Use of the format "user:password" in the 
00593      * userinfo field is deprecated.  Anyway... */
00594     if (!(u->opts & U_URI_OPT_DONT_PARSE_USERINFO))
00595         dbg_if (u_uri_crumble_user_password(u)); 
00596 
00597     /* Consume '@' and go out. */
00598     U_LEXER_NEXT(l, NULL);
00599 
00600     return 0;
00601 err:
00602     return ~0;
00603 }
00604 
00605 /* TODO: refine ! */
00606 static int u_uri_crumble_user_password (u_uri_t *u)
00607 {
00608     char c;
00609     u_lexer_t *l = NULL;
00610 
00611     dbg_return_if (!strlen(u->userinfo), ~0);
00612 
00613     /* Create a disposable lexer. */
00614     dbg_err_if (u_lexer_new(u->userinfo, &l));
00615 
00616     /* User name. */
00617     u_lexer_record_lmatch(l);
00618 
00619     /* Assume there is at least one char available. */
00620     do u_lexer_next(l, &c); while (c != ':' && !u_lexer_eot(l));
00621 
00622     u_lexer_record_rmatch(l);
00623     (void) u_uri_adjust_greedy_match(l, u->user);
00624 
00625     /* Check if we've exhausted the userinfo string. */
00626     nop_goto_if (u_lexer_eot(l), end);
00627 
00628     /* Skip ':'. */
00629     U_LEXER_NEXT(l, NULL);
00630 
00631     /* Password. */
00632     u_lexer_record_lmatch(l);
00633 
00634     do u_lexer_next(l, &c); while (!u_lexer_eot(l));
00635 
00636     u_lexer_record_rmatch(l);
00637     (void) u_uri_adjust_greedy_match(l, u->pwd);
00638 
00639 end:
00640     u_lexer_free(l);
00641     return 0;
00642 err:
00643     u_lexer_free(l);
00644     return ~0;
00645 }
00646 
00647 static int u_uri_parse_abempty (u_lexer_t *l, u_uri_t *u)
00648 {
00649     u_lexer_record_lmatch(l);
00650     warn_err_if (u_uri_expect_path_abempty(l));
00651     u_lexer_record_rmatch(l);
00652     (void) u_uri_adjust_greedy_match(l, u->path);
00653 
00654     return 0;
00655 err:
00656     return ~0;
00657 }
00658 
00659 /* ['/'] segment-nz path-abempty 
00660  * Absorbs both rootless and absolute paths. */
00661 static int u_uri_parse_path (u_lexer_t *l, u_uri_t *u)
00662 {
00663     u_lexer_record_lmatch(l);
00664 
00665     if (u_lexer_peek(l) == '/')
00666         U_LEXER_NEXT(l, NULL);
00667     
00668     warn_err_if (u_uri_expect_segment_nz(l));
00669     warn_err_if (u_uri_expect_path_abempty(l));
00670 
00671     u_lexer_record_rmatch(l);
00672     (void) u_uri_adjust_greedy_match(l, u->path);
00673 
00674     return 0;
00675 err:
00676     return ~0;
00677 }
00678 
00679 /* See RFC3986 Appendix A. */
00680 static int u_uri_parse_hier_part (u_lexer_t *l, u_uri_t *u)
00681 {
00682     /* We need to look ahead two chars to see if we have the '//' string that 
00683      * introduces the authority token. */
00684     if (!strncmp(u_lexer_lookahead(l), "//", strlen("//")))
00685         warn_err_if (u_uri_parse_authority(l, u));
00686     else if (u_uri_path_first(l))
00687         warn_err_if (u_uri_parse_path(l, u));
00688     else /* Path empty. */
00689         u->path[0] = '\0';
00690 
00691     return 0;
00692 err:
00693     return ~0;
00694 }
00695 
00696 static int u_uri_parse_query (u_lexer_t *l, u_uri_t *u)
00697 {
00698     char c;
00699 
00700     if ((c = u_lexer_peek(l)) != '?')
00701         U_LEXER_ERR(l, "Expect \'?\', got \'%c\' instead.", c);
00702 
00703     /* Consume the starting '?' char. */
00704     U_LEXER_NEXT(l, &c);
00705     u_lexer_record_lmatch(l);
00706 
00707     while (c == '/' || c == '?' || u_uri_match_pchar(l))
00708         U_LEXER_NEXT(l, &c);
00709 
00710     u_lexer_record_rmatch(l);
00711     (void) u_uri_adjust_greedy_match(l, u->query);
00712 
00713     return 0;
00714 err:
00715     return ~0;
00716 }
00717 
00718 /* Same as query. */
00719 static int u_uri_parse_fragment (u_lexer_t *l, u_uri_t *u)
00720 {
00721     char c;
00722 
00723     if ((c = u_lexer_peek(l)) != '#')
00724         U_LEXER_ERR(l, "Expect \'#\', got \'%c\' instead.", c);
00725 
00726     /* Consume the starting '#' char. */
00727     U_LEXER_NEXT(l, &c);
00728     u_lexer_record_lmatch(l);
00729 
00730     while (c == '/' || c == '?' || u_uri_match_pchar(l))
00731         U_LEXER_NEXT(l, &c);
00732 
00733     u_lexer_record_rmatch(l);
00734     (void) u_lexer_get_match(l, u->fragment);
00735 
00736     return 0;
00737 err:
00738     return ~0;
00739 }
00740 
00741 /* unreserved / pct-encoded / sub-delims */
00742 static int u_uri_match_ups (u_lexer_t *l)
00743 {
00744     char c;
00745 
00746     switch ((c = u_lexer_peek(l)))
00747     {
00748         /* pct-encoded */
00749         case '%':
00750             warn_err_if (u_uri_expect_pct_encoded(l));
00751 
00752         /* unreserved */
00753         case '-': case '.': case '_': case '~': 
00754 
00755         /* sub-delims */
00756         case '!': case '$': case '&': case '\'': case '(':
00757         case ')': case '*': case '+': case ',': case ';': 
00758         case '=':
00759             return 1;
00760 
00761         /* ALPHA / DIGIT */
00762         default:
00763             return isalnum((int) c);
00764     }
00765 
00766     /* fallthrough */
00767 err:
00768     return 0;
00769 }
00770 
00771 static int u_uri_match_pchar (u_lexer_t *l)
00772 {
00773     switch (u_lexer_peek(l))
00774     {
00775         case '@':
00776             return 1;
00777         default:
00778             return u_uri_match_pchar_minus_at_sign(l);
00779     }
00780 }
00781 
00782 static int u_uri_match_pchar_minus_at_sign (u_lexer_t *l)
00783 {
00784     switch (u_lexer_peek(l))
00785     {
00786         case ':':
00787             return 1;
00788         default:
00789             return u_uri_match_ups(l);
00790     }
00791 }
00792 
00793 static int u_uri_expect_pct_encoded (u_lexer_t *l)
00794 {
00795     char i, c;
00796 
00797     if (u_lexer_peek(l) != '%')
00798         U_LEXER_ERR(l, "Expect \'%%\', got \'%c\' instead.", c);
00799 
00800     for (i = 0; i < 2; i++)
00801     {
00802         U_LEXER_NEXT(l, &c);
00803 
00804         if (!isxdigit((int) c))
00805             U_LEXER_ERR(l, "Non hex digit \'%c\' in percent encoding.", c);
00806     }
00807 
00808     return 0;
00809 err:
00810     return ~0;
00811 }
00812 
00813 /* 1*pchar */
00814 static int u_uri_expect_segment_nz (u_lexer_t *l)
00815 {
00816     /* Expect at least one pchar. */
00817     if (!u_uri_match_pchar(l))
00818         U_LEXER_ERR(l, "Expect a pchar, got \'%c\' instead.", u_lexer_peek(l));
00819 
00820     do { U_LEXER_NEXT(l, NULL); } while (u_uri_match_pchar(l));
00821 
00822     return 0;
00823 err:
00824     return ~0;
00825 }
00826 
00827 /* *pchar */
00828 static int u_uri_expect_segment (u_lexer_t *l)
00829 {
00830     do { U_LEXER_NEXT(l, NULL); } while (u_uri_match_pchar(l));
00831 
00832     return 0;
00833 err:
00834     return ~0;
00835 }
00836 
00837 /* *("/" segment) */
00838 static int u_uri_expect_path_abempty (u_lexer_t *l)
00839 {
00840     /* Could be empty. */
00841     if (u_lexer_peek(l) != '/')
00842         return 0;
00843 
00844     /* Consume '/'-separated segments. */
00845     do { u_uri_expect_segment(l); } while (u_lexer_peek(l) == '/'); 
00846 
00847     return 0;
00848 }
00849 
00850 static int u_uri_knead_authority (u_uri_t *u, char s[U_URI_STRMAX])
00851 {
00852     dbg_return_if (u == NULL, ~0);
00853     dbg_return_if (strlen(u->host) == 0, ~0);
00854     dbg_return_if (s == NULL, ~0);
00855 
00856     /* If host is IPv6 literal, automatically add the IPLITERAL flag.
00857      * XXX Lousy test, we'd better parse it through u_uri_parse_ipliteral() */
00858     if (strchr(u->host, ':'))
00859         u->flags |= U_URI_FLAGS_HOST_IS_IPLITERAL;
00860 
00861     dbg_err_if (u_strlcat(s, "//", U_URI_STRMAX));
00862 
00863     if (strlen(u->userinfo))
00864     {
00865         dbg_err_if (u_strlcat(s, u->userinfo, U_URI_STRMAX));
00866         dbg_err_if (u_strlcat(s, "@", U_URI_STRMAX));
00867     }
00868     else if (strlen(u->user))
00869     {
00870         dbg_err_if (u_strlcat(s, u->user, U_URI_STRMAX)); 
00871 
00872         if (strlen(u->pwd))
00873         {
00874             dbg_err_if (u_strlcat(s, ":", U_URI_STRMAX)); 
00875             dbg_err_if (u_strlcat(s, u->pwd, U_URI_STRMAX)); 
00876         }
00877 
00878         dbg_err_if (u_strlcat(s, "@", U_URI_STRMAX));
00879     }
00880 
00881     if (u->flags & U_URI_FLAGS_HOST_IS_IPLITERAL)
00882         dbg_err_if (u_strlcat(s, "[", U_URI_STRMAX));
00883 
00884     dbg_err_if (u_strlcat(s, u->host, U_URI_STRMAX));
00885 
00886     if (u->flags & U_URI_FLAGS_HOST_IS_IPLITERAL)
00887         dbg_err_if (u_strlcat(s, "]", U_URI_STRMAX));
00888 
00889     if (strlen(u->port))
00890     {
00891         dbg_err_if (u_strlcat(s, ":", U_URI_STRMAX));
00892         dbg_err_if (u_strlcat(s, u->port, U_URI_STRMAX));
00893     }
00894 
00895     return 0;
00896 err:
00897     return ~0;
00898 }
00899 
00900 

←Products
© 2005-2012 - KoanLogic S.r.l. - All rights reserved