mxml-entity.c

Go to the documentation of this file.
00001 /*
00002  * "$Id: mxml-entity.c 309 2007-09-21 04:46:02Z mike $"
00003  *
00004  * Character entity support code for Mini-XML, a small XML-like
00005  * file parsing library.
00006  *
00007  * Copyright 2003-2007 by Michael Sweet.
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Library General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * Contents:
00020  *
00021  *   mxmlEntityAddCallback()    - Add a callback to convert entities to
00022  *                                Unicode.
00023  *   mxmlEntityGetName()        - Get the name that corresponds to the
00024  *                                character value.
00025  *   mxmlEntityGetValue()       - Get the character corresponding to a named
00026  *                                entity.
00027  *   mxmlEntityRemoveCallback() - Remove a callback.
00028  *   _mxml_entity_cb()          - Lookup standard (X)HTML entities.
00029  */
00030 
00031 /*
00032  * Include necessary headers...
00033  */
00034 
00035 #include "mxml-private.h"
00036 
00037 
00038 /*
00039  * 'mxmlEntityAddCallback()' - Add a callback to convert entities to Unicode.
00040  */
00041 
00042 int                 /* O - 0 on success, -1 on failure */
00043 mxmlEntityAddCallback(
00044     int (*cb)(const char *name))    /* I - Callback function to add */
00045 {
00046   _mxml_global_t *global = _mxml_global();
00047                     /* Global data */
00048 
00049 
00050   if (global->num_entity_cbs < (int)(sizeof(global->entity_cbs) / sizeof(global->entity_cbs[0])))
00051   {
00052     global->entity_cbs[global->num_entity_cbs] = cb;
00053     global->num_entity_cbs ++;
00054 
00055     return (0);
00056   }
00057   else
00058   {
00059     mxml_error("Unable to add entity callback!");
00060 
00061     return (-1);
00062   }
00063 }
00064 
00065 
00066 /*
00067  * 'mxmlEntityGetName()' - Get the name that corresponds to the character value.
00068  *
00069  * If val does not need to be represented by a named entity, NULL is returned.
00070  */
00071 
00072 const char *                /* O - Entity name or NULL */
00073 mxmlEntityGetName(int val)      /* I - Character value */
00074 {
00075   switch (val)
00076   {
00077     case '&' :
00078         return ("amp");
00079 
00080     case '<' :
00081         return ("lt");
00082 
00083     case '>' :
00084         return ("gt");
00085 
00086     case '\"' :
00087         return ("quot");
00088 
00089     default :
00090         return (NULL);
00091   }
00092 }
00093 
00094 
00095 /*
00096  * 'mxmlEntityGetValue()' - Get the character corresponding to a named entity.
00097  *
00098  * The entity name can also be a numeric constant. -1 is returned if the
00099  * name is not known.
00100  */
00101 
00102 int                 /* O - Character value or -1 on error */
00103 mxmlEntityGetValue(const char *name)    /* I - Entity name */
00104 {
00105   int       i;          /* Looping var */
00106   int       ch;         /* Character value */
00107   _mxml_global_t *global = _mxml_global();
00108                     /* Global data */
00109 
00110 
00111   for (i = 0; i < global->num_entity_cbs; i ++)
00112     if ((ch = (global->entity_cbs[i])(name)) >= 0)
00113       return (ch);
00114 
00115   return (-1);
00116 }
00117 
00118 
00119 /*
00120  * 'mxmlEntityRemoveCallback()' - Remove a callback.
00121  */
00122 
00123 void
00124 mxmlEntityRemoveCallback(int (*cb)(const char *name))
00125                     /* I - Callback function to remove */
00126 {
00127   int       i;          /* Looping var */
00128   _mxml_global_t *global = _mxml_global();
00129                     /* Global data */
00130 
00131 
00132   for (i = 0; i < global->num_entity_cbs; i ++)
00133     if (cb == global->entity_cbs[i])
00134     {
00135      /*
00136       * Remove the callback...
00137       */
00138 
00139       global->num_entity_cbs --;
00140 
00141       if (i < global->num_entity_cbs)
00142         memmove(global->entity_cbs + i, global->entity_cbs + i + 1,
00143             (global->num_entity_cbs - i) * sizeof(global->entity_cbs[0]));
00144 
00145       return;
00146     }
00147 }
00148 
00149 
00150 /*
00151  * '_mxml_entity_cb()' - Lookup standard (X)HTML entities.
00152  */
00153 
00154 int                 /* O - Unicode value or -1 */
00155 _mxml_entity_cb(const char *name)   /* I - Entity name */
00156 {
00157   int   diff,               /* Difference between names */
00158     current,            /* Current entity in search */
00159     first,              /* First entity in search */
00160     last;               /* Last entity in search */
00161   static const struct
00162   {
00163     const char  *name;          /* Entity name */
00164     int     val;            /* Character value */
00165   } entities[] =
00166   {
00167     { "AElig",      198 },
00168     { "Aacute",     193 },
00169     { "Acirc",      194 },
00170     { "Agrave",     192 },
00171     { "Alpha",      913 },
00172     { "Aring",      197 },
00173     { "Atilde",     195 },
00174     { "Auml",       196 },
00175     { "Beta",       914 },
00176     { "Ccedil",     199 },
00177     { "Chi",        935 },
00178     { "Dagger",     8225 },
00179     { "Delta",      916 },
00180     { "Dstrok",     208 },
00181     { "ETH",        208 },
00182     { "Eacute",     201 },
00183     { "Ecirc",      202 },
00184     { "Egrave",     200 },
00185     { "Epsilon",    917 },
00186     { "Eta",        919 },
00187     { "Euml",       203 },
00188     { "Gamma",      915 },
00189     { "Iacute",     205 },
00190     { "Icirc",      206 },
00191     { "Igrave",     204 },
00192     { "Iota",       921 },
00193     { "Iuml",       207 },
00194     { "Kappa",      922 },
00195     { "Lambda",     923 },
00196     { "Mu",     924 },
00197     { "Ntilde",     209 },
00198     { "Nu",     925 },
00199     { "OElig",      338 },
00200     { "Oacute",     211 },
00201     { "Ocirc",      212 },
00202     { "Ograve",     210 },
00203     { "Omega",      937 },
00204     { "Omicron",    927 },
00205     { "Oslash",     216 },
00206     { "Otilde",     213 },
00207     { "Ouml",       214 },
00208     { "Phi",        934 },
00209     { "Pi",     928 },
00210     { "Prime",      8243 },
00211     { "Psi",        936 },
00212     { "Rho",        929 },
00213     { "Scaron",     352 },
00214     { "Sigma",      931 },
00215     { "THORN",      222 },
00216     { "Tau",        932 },
00217     { "Theta",      920 },
00218     { "Uacute",     218 },
00219     { "Ucirc",      219 },
00220     { "Ugrave",     217 },
00221     { "Upsilon",    933 },
00222     { "Uuml",       220 },
00223     { "Xi",     926 },
00224     { "Yacute",     221 },
00225     { "Yuml",       376 },
00226     { "Zeta",       918 },
00227     { "aacute",     225 },
00228     { "acirc",      226 },
00229     { "acute",      180 },
00230     { "aelig",      230 },
00231     { "agrave",     224 },
00232     { "alefsym",    8501 },
00233     { "alpha",      945 },
00234     { "amp",        '&' },
00235     { "and",        8743 },
00236     { "ang",        8736 },
00237     { "apos",           '\'' },
00238     { "aring",      229 },
00239     { "asymp",      8776 },
00240     { "atilde",     227 },
00241     { "auml",       228 },
00242     { "bdquo",      8222 },
00243     { "beta",       946 },
00244     { "brkbar",     166 },
00245     { "brvbar",     166 },
00246     { "bull",       8226 },
00247     { "cap",        8745 },
00248     { "ccedil",     231 },
00249     { "cedil",      184 },
00250     { "cent",       162 },
00251     { "chi",        967 },
00252     { "circ",       710 },
00253     { "clubs",      9827 },
00254     { "cong",       8773 },
00255     { "copy",       169 },
00256     { "crarr",      8629 },
00257     { "cup",        8746 },
00258     { "curren",     164 },
00259     { "dArr",       8659 },
00260     { "dagger",     8224 },
00261     { "darr",       8595 },
00262     { "deg",        176 },
00263     { "delta",      948 },
00264     { "diams",      9830 },
00265     { "die",        168 },
00266     { "divide",     247 },
00267     { "eacute",     233 },
00268     { "ecirc",      234 },
00269     { "egrave",     232 },
00270     { "empty",      8709 },
00271     { "emsp",       8195 },
00272     { "ensp",       8194 },
00273     { "epsilon",    949 },
00274     { "equiv",      8801 },
00275     { "eta",        951 },
00276     { "eth",        240 },
00277     { "euml",       235 },
00278     { "euro",       8364 },
00279     { "exist",      8707 },
00280     { "fnof",       402 },
00281     { "forall",     8704 },
00282     { "frac12",     189 },
00283     { "frac14",     188 },
00284     { "frac34",     190 },
00285     { "frasl",      8260 },
00286     { "gamma",      947 },
00287     { "ge",     8805 },
00288     { "gt",     '>' },
00289     { "hArr",       8660 },
00290     { "harr",       8596 },
00291     { "hearts",     9829 },
00292     { "hellip",     8230 },
00293     { "hibar",      175 },
00294     { "iacute",     237 },
00295     { "icirc",      238 },
00296     { "iexcl",      161 },
00297     { "igrave",     236 },
00298     { "image",      8465 },
00299     { "infin",      8734 },
00300     { "int",        8747 },
00301     { "iota",       953 },
00302     { "iquest",     191 },
00303     { "isin",       8712 },
00304     { "iuml",       239 },
00305     { "kappa",      954 },
00306     { "lArr",       8656 },
00307     { "lambda",     955 },
00308     { "lang",       9001 },
00309     { "laquo",      171 },
00310     { "larr",       8592 },
00311     { "lceil",      8968 },
00312     { "ldquo",      8220 },
00313     { "le",     8804 },
00314     { "lfloor",     8970 },
00315     { "lowast",     8727 },
00316     { "loz",        9674 },
00317     { "lrm",        8206 },
00318     { "lsaquo",     8249 },
00319     { "lsquo",      8216 },
00320     { "lt",     '<' },
00321     { "macr",       175 },
00322     { "mdash",      8212 },
00323     { "micro",      181 },
00324     { "middot",     183 },
00325     { "minus",      8722 },
00326     { "mu",     956 },
00327     { "nabla",      8711 },
00328     { "nbsp",       160 },
00329     { "ndash",      8211 },
00330     { "ne",     8800 },
00331     { "ni",     8715 },
00332     { "not",        172 },
00333     { "notin",      8713 },
00334     { "nsub",       8836 },
00335     { "ntilde",     241 },
00336     { "nu",     957 },
00337     { "oacute",     243 },
00338     { "ocirc",      244 },
00339     { "oelig",      339 },
00340     { "ograve",     242 },
00341     { "oline",      8254 },
00342     { "omega",      969 },
00343     { "omicron",    959 },
00344     { "oplus",      8853 },
00345     { "or",     8744 },
00346     { "ordf",       170 },
00347     { "ordm",       186 },
00348     { "oslash",     248 },
00349     { "otilde",     245 },
00350     { "otimes",     8855 },
00351     { "ouml",       246 },
00352     { "para",       182 },
00353     { "part",       8706 },
00354     { "permil",     8240 },
00355     { "perp",       8869 },
00356     { "phi",        966 },
00357     { "pi",     960 },
00358     { "piv",        982 },
00359     { "plusmn",     177 },
00360     { "pound",      163 },
00361     { "prime",      8242 },
00362     { "prod",       8719 },
00363     { "prop",       8733 },
00364     { "psi",        968 },
00365     { "quot",       '\"' },
00366     { "rArr",       8658 },
00367     { "radic",      8730 },
00368     { "rang",       9002 },
00369     { "raquo",      187 },
00370     { "rarr",       8594 },
00371     { "rceil",      8969 },
00372     { "rdquo",      8221 },
00373     { "real",       8476 },
00374     { "reg",        174 },
00375     { "rfloor",     8971 },
00376     { "rho",        961 },
00377     { "rlm",        8207 },
00378     { "rsaquo",     8250 },
00379     { "rsquo",      8217 },
00380     { "sbquo",      8218 },
00381     { "scaron",     353 },
00382     { "sdot",       8901 },
00383     { "sect",       167 },
00384     { "shy",        173 },
00385     { "sigma",      963 },
00386     { "sigmaf",     962 },
00387     { "sim",        8764 },
00388     { "spades",     9824 },
00389     { "sub",        8834 },
00390     { "sube",       8838 },
00391     { "sum",        8721 },
00392     { "sup",        8835 },
00393     { "sup1",       185 },
00394     { "sup2",       178 },
00395     { "sup3",       179 },
00396     { "supe",       8839 },
00397     { "szlig",      223 },
00398     { "tau",        964 },
00399     { "there4",     8756 },
00400     { "theta",      952 },
00401     { "thetasym",   977 },
00402     { "thinsp",     8201 },
00403     { "thorn",      254 },
00404     { "tilde",      732 },
00405     { "times",      215 },
00406     { "trade",      8482 },
00407     { "uArr",       8657 },
00408     { "uacute",     250 },
00409     { "uarr",       8593 },
00410     { "ucirc",      251 },
00411     { "ugrave",     249 },
00412     { "uml",        168 },
00413     { "upsih",      978 },
00414     { "upsilon",    965 },
00415     { "uuml",       252 },
00416     { "weierp",     8472 },
00417     { "xi",     958 },
00418     { "yacute",     253 },
00419     { "yen",        165 },
00420     { "yuml",       255 },
00421     { "zeta",       950 },
00422     { "zwj",        8205 },
00423     { "zwnj",       8204 }
00424   };
00425 
00426 
00427  /*
00428   * Do a binary search for the named entity...
00429   */
00430 
00431   first = 0;
00432   last  = (int)(sizeof(entities) / sizeof(entities[0]) - 1);
00433 
00434   while ((last - first) > 1)
00435   {
00436     current = (first + last) / 2;
00437 
00438     if ((diff = strcmp(name, entities[current].name)) == 0)
00439       return (entities[current].val);
00440     else if (diff < 0)
00441       last = current;
00442     else
00443       first = current;
00444   }
00445 
00446  /*
00447   * If we get here, there is a small chance that there is still
00448   * a match; check first and last...
00449   */
00450 
00451   if (!strcmp(name, entities[first].name))
00452     return (entities[first].val);
00453   else if (!strcmp(name, entities[last].name))
00454     return (entities[last].val);
00455   else
00456     return (-1);
00457 }

Generated by  doxygen 1.6.2