mxml-entity.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /*
  2. * Character entity support code for Mini-XML, a small XML file parsing library.
  3. *
  4. * https://www.msweet.org/mxml
  5. *
  6. * Copyright © 2003-2019 by Michael R Sweet.
  7. *
  8. * Licensed under Apache License v2.0. See the file "LICENSE" for more
  9. * information.
  10. */
  11. /*
  12. * Include necessary headers...
  13. */
  14. #include "mxml-private.h"
  15. /*
  16. * 'mxmlEntityAddCallback()' - Add a callback to convert entities to Unicode.
  17. */
  18. int /* O - 0 on success, -1 on failure */
  19. mxmlEntityAddCallback(
  20. mxml_entity_cb_t cb) /* I - Callback function to add */
  21. {
  22. _mxml_global_t *global = _mxml_global();
  23. /* Global data */
  24. if (global->num_entity_cbs < (int)(sizeof(global->entity_cbs) / sizeof(global->entity_cbs[0])))
  25. {
  26. global->entity_cbs[global->num_entity_cbs] = cb;
  27. global->num_entity_cbs ++;
  28. return (0);
  29. }
  30. else
  31. {
  32. mxml_error("Unable to add entity callback!");
  33. return (-1);
  34. }
  35. }
  36. /*
  37. * 'mxmlEntityGetName()' - Get the name that corresponds to the character value.
  38. *
  39. * If val does not need to be represented by a named entity, @code NULL@ is returned.
  40. */
  41. const char * /* O - Entity name or @code NULL@ */
  42. mxmlEntityGetName(int val) /* I - Character value */
  43. {
  44. switch (val)
  45. {
  46. case '&' :
  47. return ("amp");
  48. case '<' :
  49. return ("lt");
  50. case '>' :
  51. return ("gt");
  52. case '\"' :
  53. return ("quot");
  54. default :
  55. return (NULL);
  56. }
  57. }
  58. /*
  59. * 'mxmlEntityGetValue()' - Get the character corresponding to a named entity.
  60. *
  61. * The entity name can also be a numeric constant. -1 is returned if the
  62. * name is not known.
  63. */
  64. int /* O - Character value or -1 on error */
  65. mxmlEntityGetValue(const char *name) /* I - Entity name */
  66. {
  67. int i; /* Looping var */
  68. int ch; /* Character value */
  69. _mxml_global_t *global = _mxml_global();
  70. /* Global data */
  71. for (i = 0; i < global->num_entity_cbs; i ++)
  72. if ((ch = (global->entity_cbs[i])(name)) >= 0)
  73. return (ch);
  74. return (-1);
  75. }
  76. /*
  77. * 'mxmlEntityRemoveCallback()' - Remove a callback.
  78. */
  79. void
  80. mxmlEntityRemoveCallback(
  81. mxml_entity_cb_t cb) /* I - Callback function to remove */
  82. {
  83. int i; /* Looping var */
  84. _mxml_global_t *global = _mxml_global();
  85. /* Global data */
  86. for (i = 0; i < global->num_entity_cbs; i ++)
  87. if (cb == global->entity_cbs[i])
  88. {
  89. /*
  90. * Remove the callback...
  91. */
  92. global->num_entity_cbs --;
  93. if (i < global->num_entity_cbs)
  94. memmove(global->entity_cbs + i, global->entity_cbs + i + 1,
  95. (global->num_entity_cbs - i) * sizeof(global->entity_cbs[0]));
  96. return;
  97. }
  98. }
  99. /*
  100. * '_mxml_entity_cb()' - Lookup standard (X)HTML entities.
  101. */
  102. int /* O - Unicode value or -1 */
  103. _mxml_entity_cb(const char *name) /* I - Entity name */
  104. {
  105. int diff, /* Difference between names */
  106. current, /* Current entity in search */
  107. first, /* First entity in search */
  108. last; /* Last entity in search */
  109. static const struct
  110. {
  111. const char *name; /* Entity name */
  112. int val; /* Character value */
  113. } entities[] =
  114. {
  115. { "AElig", 198 },
  116. { "Aacute", 193 },
  117. { "Acirc", 194 },
  118. { "Agrave", 192 },
  119. { "Alpha", 913 },
  120. { "Aring", 197 },
  121. { "Atilde", 195 },
  122. { "Auml", 196 },
  123. { "Beta", 914 },
  124. { "Ccedil", 199 },
  125. { "Chi", 935 },
  126. { "Dagger", 8225 },
  127. { "Delta", 916 },
  128. { "Dstrok", 208 },
  129. { "ETH", 208 },
  130. { "Eacute", 201 },
  131. { "Ecirc", 202 },
  132. { "Egrave", 200 },
  133. { "Epsilon", 917 },
  134. { "Eta", 919 },
  135. { "Euml", 203 },
  136. { "Gamma", 915 },
  137. { "Iacute", 205 },
  138. { "Icirc", 206 },
  139. { "Igrave", 204 },
  140. { "Iota", 921 },
  141. { "Iuml", 207 },
  142. { "Kappa", 922 },
  143. { "Lambda", 923 },
  144. { "Mu", 924 },
  145. { "Ntilde", 209 },
  146. { "Nu", 925 },
  147. { "OElig", 338 },
  148. { "Oacute", 211 },
  149. { "Ocirc", 212 },
  150. { "Ograve", 210 },
  151. { "Omega", 937 },
  152. { "Omicron", 927 },
  153. { "Oslash", 216 },
  154. { "Otilde", 213 },
  155. { "Ouml", 214 },
  156. { "Phi", 934 },
  157. { "Pi", 928 },
  158. { "Prime", 8243 },
  159. { "Psi", 936 },
  160. { "Rho", 929 },
  161. { "Scaron", 352 },
  162. { "Sigma", 931 },
  163. { "THORN", 222 },
  164. { "Tau", 932 },
  165. { "Theta", 920 },
  166. { "Uacute", 218 },
  167. { "Ucirc", 219 },
  168. { "Ugrave", 217 },
  169. { "Upsilon", 933 },
  170. { "Uuml", 220 },
  171. { "Xi", 926 },
  172. { "Yacute", 221 },
  173. { "Yuml", 376 },
  174. { "Zeta", 918 },
  175. { "aacute", 225 },
  176. { "acirc", 226 },
  177. { "acute", 180 },
  178. { "aelig", 230 },
  179. { "agrave", 224 },
  180. { "alefsym", 8501 },
  181. { "alpha", 945 },
  182. { "amp", '&' },
  183. { "and", 8743 },
  184. { "ang", 8736 },
  185. { "apos", '\'' },
  186. { "aring", 229 },
  187. { "asymp", 8776 },
  188. { "atilde", 227 },
  189. { "auml", 228 },
  190. { "bdquo", 8222 },
  191. { "beta", 946 },
  192. { "brkbar", 166 },
  193. { "brvbar", 166 },
  194. { "bull", 8226 },
  195. { "cap", 8745 },
  196. { "ccedil", 231 },
  197. { "cedil", 184 },
  198. { "cent", 162 },
  199. { "chi", 967 },
  200. { "circ", 710 },
  201. { "clubs", 9827 },
  202. { "cong", 8773 },
  203. { "copy", 169 },
  204. { "crarr", 8629 },
  205. { "cup", 8746 },
  206. { "curren", 164 },
  207. { "dArr", 8659 },
  208. { "dagger", 8224 },
  209. { "darr", 8595 },
  210. { "deg", 176 },
  211. { "delta", 948 },
  212. { "diams", 9830 },
  213. { "die", 168 },
  214. { "divide", 247 },
  215. { "eacute", 233 },
  216. { "ecirc", 234 },
  217. { "egrave", 232 },
  218. { "empty", 8709 },
  219. { "emsp", 8195 },
  220. { "ensp", 8194 },
  221. { "epsilon", 949 },
  222. { "equiv", 8801 },
  223. { "eta", 951 },
  224. { "eth", 240 },
  225. { "euml", 235 },
  226. { "euro", 8364 },
  227. { "exist", 8707 },
  228. { "fnof", 402 },
  229. { "forall", 8704 },
  230. { "frac12", 189 },
  231. { "frac14", 188 },
  232. { "frac34", 190 },
  233. { "frasl", 8260 },
  234. { "gamma", 947 },
  235. { "ge", 8805 },
  236. { "gt", '>' },
  237. { "hArr", 8660 },
  238. { "harr", 8596 },
  239. { "hearts", 9829 },
  240. { "hellip", 8230 },
  241. { "hibar", 175 },
  242. { "iacute", 237 },
  243. { "icirc", 238 },
  244. { "iexcl", 161 },
  245. { "igrave", 236 },
  246. { "image", 8465 },
  247. { "infin", 8734 },
  248. { "int", 8747 },
  249. { "iota", 953 },
  250. { "iquest", 191 },
  251. { "isin", 8712 },
  252. { "iuml", 239 },
  253. { "kappa", 954 },
  254. { "lArr", 8656 },
  255. { "lambda", 955 },
  256. { "lang", 9001 },
  257. { "laquo", 171 },
  258. { "larr", 8592 },
  259. { "lceil", 8968 },
  260. { "ldquo", 8220 },
  261. { "le", 8804 },
  262. { "lfloor", 8970 },
  263. { "lowast", 8727 },
  264. { "loz", 9674 },
  265. { "lrm", 8206 },
  266. { "lsaquo", 8249 },
  267. { "lsquo", 8216 },
  268. { "lt", '<' },
  269. { "macr", 175 },
  270. { "mdash", 8212 },
  271. { "micro", 181 },
  272. { "middot", 183 },
  273. { "minus", 8722 },
  274. { "mu", 956 },
  275. { "nabla", 8711 },
  276. { "nbsp", 160 },
  277. { "ndash", 8211 },
  278. { "ne", 8800 },
  279. { "ni", 8715 },
  280. { "not", 172 },
  281. { "notin", 8713 },
  282. { "nsub", 8836 },
  283. { "ntilde", 241 },
  284. { "nu", 957 },
  285. { "oacute", 243 },
  286. { "ocirc", 244 },
  287. { "oelig", 339 },
  288. { "ograve", 242 },
  289. { "oline", 8254 },
  290. { "omega", 969 },
  291. { "omicron", 959 },
  292. { "oplus", 8853 },
  293. { "or", 8744 },
  294. { "ordf", 170 },
  295. { "ordm", 186 },
  296. { "oslash", 248 },
  297. { "otilde", 245 },
  298. { "otimes", 8855 },
  299. { "ouml", 246 },
  300. { "para", 182 },
  301. { "part", 8706 },
  302. { "permil", 8240 },
  303. { "perp", 8869 },
  304. { "phi", 966 },
  305. { "pi", 960 },
  306. { "piv", 982 },
  307. { "plusmn", 177 },
  308. { "pound", 163 },
  309. { "prime", 8242 },
  310. { "prod", 8719 },
  311. { "prop", 8733 },
  312. { "psi", 968 },
  313. { "quot", '\"' },
  314. { "rArr", 8658 },
  315. { "radic", 8730 },
  316. { "rang", 9002 },
  317. { "raquo", 187 },
  318. { "rarr", 8594 },
  319. { "rceil", 8969 },
  320. { "rdquo", 8221 },
  321. { "real", 8476 },
  322. { "reg", 174 },
  323. { "rfloor", 8971 },
  324. { "rho", 961 },
  325. { "rlm", 8207 },
  326. { "rsaquo", 8250 },
  327. { "rsquo", 8217 },
  328. { "sbquo", 8218 },
  329. { "scaron", 353 },
  330. { "sdot", 8901 },
  331. { "sect", 167 },
  332. { "shy", 173 },
  333. { "sigma", 963 },
  334. { "sigmaf", 962 },
  335. { "sim", 8764 },
  336. { "spades", 9824 },
  337. { "sub", 8834 },
  338. { "sube", 8838 },
  339. { "sum", 8721 },
  340. { "sup", 8835 },
  341. { "sup1", 185 },
  342. { "sup2", 178 },
  343. { "sup3", 179 },
  344. { "supe", 8839 },
  345. { "szlig", 223 },
  346. { "tau", 964 },
  347. { "there4", 8756 },
  348. { "theta", 952 },
  349. { "thetasym", 977 },
  350. { "thinsp", 8201 },
  351. { "thorn", 254 },
  352. { "tilde", 732 },
  353. { "times", 215 },
  354. { "trade", 8482 },
  355. { "uArr", 8657 },
  356. { "uacute", 250 },
  357. { "uarr", 8593 },
  358. { "ucirc", 251 },
  359. { "ugrave", 249 },
  360. { "uml", 168 },
  361. { "upsih", 978 },
  362. { "upsilon", 965 },
  363. { "uuml", 252 },
  364. { "weierp", 8472 },
  365. { "xi", 958 },
  366. { "yacute", 253 },
  367. { "yen", 165 },
  368. { "yuml", 255 },
  369. { "zeta", 950 },
  370. { "zwj", 8205 },
  371. { "zwnj", 8204 }
  372. };
  373. /*
  374. * Do a binary search for the named entity...
  375. */
  376. first = 0;
  377. last = (int)(sizeof(entities) / sizeof(entities[0]) - 1);
  378. while ((last - first) > 1)
  379. {
  380. current = (first + last) / 2;
  381. if ((diff = strcmp(name, entities[current].name)) == 0)
  382. return (entities[current].val);
  383. else if (diff < 0)
  384. last = current;
  385. else
  386. first = current;
  387. }
  388. /*
  389. * If we get here, there is a small chance that there is still
  390. * a match; check first and last...
  391. */
  392. if (!strcmp(name, entities[first].name))
  393. return (entities[first].val);
  394. else if (!strcmp(name, entities[last].name))
  395. return (entities[last].val);
  396. else
  397. return (-1);
  398. }