2 //============================================================+
3 // File name : html_entity_decode_php4.php
5 // Last Update : 2008-04-01
6 // Author : Nicola Asuni
8 // License : GNU LGPL (http://www.gnu.org/copyleft/lesser.html)
9 // ----------------------------------------------------------------------------
10 // Copyright (C) 2002-2008 Nicola Asuni - Tecnick.com S.r.l.
12 // This program is free software: you can redistribute it and/or modify
13 // it under the terms of the GNU Lesser General Public License as published by
14 // the Free Software Foundation, either version 2.1 of the License, or
15 // (at your option) any later version.
17 // This program is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU Lesser General Public License for more details.
22 // You should have received a copy of the GNU Lesser General Public License
23 // along with this program. If not, see <http://www.gnu.org/licenses/>.
25 // See LICENSE.TXT file for more information.
26 // ----------------------------------------------------------------------------
29 // Description : This is a PHP4 function that redefine the
30 // standard html_entity_decode function to support
34 // Author: Nicola Asuni
40 // 09044 Quartucciu (CA)
44 //============================================================+
47 * This is a PHP4 function that redefine the standard
48 * html_entity_decode function to support UTF-8 encoding.
49 * @package com.tecnick.tcpdf
50 * @author Nicola Asuni
51 * @copyright 2004-2008 Nicola Asuni - Tecnick.com S.r.l (www.tecnick.com) Via Della Pace, 11 - 09044 - Quartucciu (CA) - ITALY - www.tecnick.com - info@tecnick.com
52 * @link http://www.tcpdf.org
53 * @license http://www.gnu.org/copyleft/lesser.html LGPL
57 * Returns the UTF-8 string corresponding to unicode value.
58 * @param $num unicode value to convert.
59 * @return string converted
61 function code_to_utf8($num) {
64 } elseif ($num <= 0x7FF) {
65 return chr(($num >> 0x06) + 0xC0).chr(($num & 0x3F) + 128);
66 } elseif ($num <= 0xFFFF) {
67 return chr(($num >> 0x0C) + 0xE0).chr((($num >> 0x06) & 0x3F) + 0x80).chr(($num & 0x3F) + 0x80);
68 } elseif ($num <= 0x1FFFFF) {
69 return chr(($num >> 0x12) + 0xF0).chr((($num >> 0x0C) & 0x3F) + 0x80).chr((($num >> 0x06) & 0x3F) + 0x80).chr(($num & 0x3F) + 0x80);
71 return ' '; // default value
75 * Reverse function for htmlentities.
76 * Convert entities in UTF-8.
77 * @param $text_to_convert Text to convert.
78 * @return string converted
80 function html_entity_decode_php4($text_to_convert) {
81 $htmlentities_table = array (
82 "Á" => "".chr(195).chr(129)."",
83 "á" => "".chr(195).chr(161)."",
84 "Â" => "".chr(195).chr(130)."",
85 "â" => "".chr(195).chr(162)."",
86 "´" => "".chr(194).chr(180)."",
87 "Æ" => "".chr(195).chr(134)."",
88 "æ" => "".chr(195).chr(166)."",
89 "À" => "".chr(195).chr(128)."",
90 "à" => "".chr(195).chr(160)."",
91 "ℵ" => "".chr(226).chr(132).chr(181)."",
92 "Α" => "".chr(206).chr(145)."",
93 "α" => "".chr(206).chr(177)."",
94 "&" => "".chr(38)."",
95 "∧" => "".chr(226).chr(136).chr(167)."",
96 "∠" => "".chr(226).chr(136).chr(160)."",
97 "Å" => "".chr(195).chr(133)."",
98 "å" => "".chr(195).chr(165)."",
99 "≈" => "".chr(226).chr(137).chr(136)."",
100 "Ã" => "".chr(195).chr(131)."",
101 "ã" => "".chr(195).chr(163)."",
102 "Ä" => "".chr(195).chr(132)."",
103 "ä" => "".chr(195).chr(164)."",
104 "„" => "".chr(226).chr(128).chr(158)."",
105 "Β" => "".chr(206).chr(146)."",
106 "β" => "".chr(206).chr(178)."",
107 "¦" => "".chr(194).chr(166)."",
108 "•" => "".chr(226).chr(128).chr(162)."",
109 "∩" => "".chr(226).chr(136).chr(169)."",
110 "Ç" => "".chr(195).chr(135)."",
111 "ç" => "".chr(195).chr(167)."",
112 "¸" => "".chr(194).chr(184)."",
113 "¢" => "".chr(194).chr(162)."",
114 "Χ" => "".chr(206).chr(167)."",
115 "χ" => "".chr(207).chr(135)."",
116 "ˆ" => "".chr(203).chr(134)."",
117 "♣" => "".chr(226).chr(153).chr(163)."",
118 "≅" => "".chr(226).chr(137).chr(133)."",
119 "©" => "".chr(194).chr(169)."",
120 "↵" => "".chr(226).chr(134).chr(181)."",
121 "∪" => "".chr(226).chr(136).chr(170)."",
122 "¤" => "".chr(194).chr(164)."",
123 "†" => "".chr(226).chr(128).chr(160)."",
124 "‡" => "".chr(226).chr(128).chr(161)."",
125 "↓" => "".chr(226).chr(134).chr(147)."",
126 "⇓" => "".chr(226).chr(135).chr(147)."",
127 "°" => "".chr(194).chr(176)."",
128 "Δ" => "".chr(206).chr(148)."",
129 "δ" => "".chr(206).chr(180)."",
130 "♦" => "".chr(226).chr(153).chr(166)."",
131 "÷" => "".chr(195).chr(183)."",
132 "É" => "".chr(195).chr(137)."",
133 "é" => "".chr(195).chr(169)."",
134 "Ê" => "".chr(195).chr(138)."",
135 "ê" => "".chr(195).chr(170)."",
136 "È" => "".chr(195).chr(136)."",
137 "è" => "".chr(195).chr(168)."",
138 "∅" => "".chr(226).chr(136).chr(133)."",
139 " " => "".chr(226).chr(128).chr(131)."",
140 " " => "".chr(226).chr(128).chr(130)."",
141 "Ε" => "".chr(206).chr(149)."",
142 "ε" => "".chr(206).chr(181)."",
143 "≡" => "".chr(226).chr(137).chr(161)."",
144 "Η" => "".chr(206).chr(151)."",
145 "η" => "".chr(206).chr(183)."",
146 "Ð" => "".chr(195).chr(144)."",
147 "ð" => "".chr(195).chr(176)."",
148 "Ë" => "".chr(195).chr(139)."",
149 "ë" => "".chr(195).chr(171)."",
150 "€" => "".chr(226).chr(130).chr(172)."",
151 "∃" => "".chr(226).chr(136).chr(131)."",
152 "ƒ" => "".chr(198).chr(146)."",
153 "∀" => "".chr(226).chr(136).chr(128)."",
154 "½" => "".chr(194).chr(189)."",
155 "¼" => "".chr(194).chr(188)."",
156 "¾" => "".chr(194).chr(190)."",
157 "⁄" => "".chr(226).chr(129).chr(132)."",
158 "Γ" => "".chr(206).chr(147)."",
159 "γ" => "".chr(206).chr(179)."",
160 "≥" => "".chr(226).chr(137).chr(165)."",
161 "↔" => "".chr(226).chr(134).chr(148)."",
162 "⇔" => "".chr(226).chr(135).chr(148)."",
163 "♥" => "".chr(226).chr(153).chr(165)."",
164 "…" => "".chr(226).chr(128).chr(166)."",
165 "Í" => "".chr(195).chr(141)."",
166 "í" => "".chr(195).chr(173)."",
167 "Î" => "".chr(195).chr(142)."",
168 "î" => "".chr(195).chr(174)."",
169 "¡" => "".chr(194).chr(161)."",
170 "Ì" => "".chr(195).chr(140)."",
171 "ì" => "".chr(195).chr(172)."",
172 "ℑ" => "".chr(226).chr(132).chr(145)."",
173 "∞" => "".chr(226).chr(136).chr(158)."",
174 "∫" => "".chr(226).chr(136).chr(171)."",
175 "Ι" => "".chr(206).chr(153)."",
176 "ι" => "".chr(206).chr(185)."",
177 "¿" => "".chr(194).chr(191)."",
178 "∈" => "".chr(226).chr(136).chr(136)."",
179 "Ï" => "".chr(195).chr(143)."",
180 "ï" => "".chr(195).chr(175)."",
181 "Κ" => "".chr(206).chr(154)."",
182 "κ" => "".chr(206).chr(186)."",
183 "Λ" => "".chr(206).chr(155)."",
184 "λ" => "".chr(206).chr(187)."",
185 "⟨" => "".chr(226).chr(140).chr(169)."",
186 "«" => "".chr(194).chr(171)."",
187 "←" => "".chr(226).chr(134).chr(144)."",
188 "⇐" => "".chr(226).chr(135).chr(144)."",
189 "⌈" => "".chr(226).chr(140).chr(136)."",
190 "“" => "".chr(226).chr(128).chr(156)."",
191 "≤" => "".chr(226).chr(137).chr(164)."",
192 "⌊" => "".chr(226).chr(140).chr(138)."",
193 "∗" => "".chr(226).chr(136).chr(151)."",
194 "◊" => "".chr(226).chr(151).chr(138)."",
195 "‎" => "".chr(226).chr(128).chr(142)."",
196 "‹" => "".chr(226).chr(128).chr(185)."",
197 "‘" => "".chr(226).chr(128).chr(152)."",
198 "¯" => "".chr(194).chr(175)."",
199 "—" => "".chr(226).chr(128).chr(148)."",
200 "µ" => "".chr(194).chr(181)."",
201 "·" => "".chr(194).chr(183)."",
202 "−" => "".chr(226).chr(136).chr(146)."",
203 "Μ" => "".chr(206).chr(156)."",
204 "μ" => "".chr(206).chr(188)."",
205 "∇" => "".chr(226).chr(136).chr(135)."",
206 " " => "".chr(194).chr(160)."",
207 "–" => "".chr(226).chr(128).chr(147)."",
208 "≠" => "".chr(226).chr(137).chr(160)."",
209 "∋" => "".chr(226).chr(136).chr(139)."",
210 "¬" => "".chr(194).chr(172)."",
211 "∉" => "".chr(226).chr(136).chr(137)."",
212 "⊄" => "".chr(226).chr(138).chr(132)."",
213 "Ñ" => "".chr(195).chr(145)."",
214 "ñ" => "".chr(195).chr(177)."",
215 "Ν" => "".chr(206).chr(157)."",
216 "ν" => "".chr(206).chr(189)."",
217 "Ó" => "".chr(195).chr(147)."",
218 "ó" => "".chr(195).chr(179)."",
219 "Ô" => "".chr(195).chr(148)."",
220 "ô" => "".chr(195).chr(180)."",
221 "Œ" => "".chr(197).chr(146)."",
222 "œ" => "".chr(197).chr(147)."",
223 "Ò" => "".chr(195).chr(146)."",
224 "ò" => "".chr(195).chr(178)."",
225 "‾" => "".chr(226).chr(128).chr(190)."",
226 "Ω" => "".chr(206).chr(169)."",
227 "ω" => "".chr(207).chr(137)."",
228 "Ο" => "".chr(206).chr(159)."",
229 "ο" => "".chr(206).chr(191)."",
230 "⊕" => "".chr(226).chr(138).chr(149)."",
231 "∨" => "".chr(226).chr(136).chr(168)."",
232 "ª" => "".chr(194).chr(170)."",
233 "º" => "".chr(194).chr(186)."",
234 "Ø" => "".chr(195).chr(152)."",
235 "ø" => "".chr(195).chr(184)."",
236 "Õ" => "".chr(195).chr(149)."",
237 "õ" => "".chr(195).chr(181)."",
238 "⊗" => "".chr(226).chr(138).chr(151)."",
239 "Ö" => "".chr(195).chr(150)."",
240 "ö" => "".chr(195).chr(182)."",
241 "¶" => "".chr(194).chr(182)."",
242 "∂" => "".chr(226).chr(136).chr(130)."",
243 "‰" => "".chr(226).chr(128).chr(176)."",
244 "⊥" => "".chr(226).chr(138).chr(165)."",
245 "Φ" => "".chr(206).chr(166)."",
246 "φ" => "".chr(207).chr(134)."",
247 "Π" => "".chr(206).chr(160)."",
248 "π" => "".chr(207).chr(128)."",
249 "ϖ" => "".chr(207).chr(150)."",
250 "±" => "".chr(194).chr(177)."",
251 "£" => "".chr(194).chr(163)."",
252 "′" => "".chr(226).chr(128).chr(178)."",
253 "″" => "".chr(226).chr(128).chr(179)."",
254 "∏" => "".chr(226).chr(136).chr(143)."",
255 "∝" => "".chr(226).chr(136).chr(157)."",
256 "Ψ" => "".chr(206).chr(168)."",
257 "ψ" => "".chr(207).chr(136)."",
258 "√" => "".chr(226).chr(136).chr(154)."",
259 "⟩" => "".chr(226).chr(140).chr(170)."",
260 "»" => "".chr(194).chr(187)."",
261 "→" => "".chr(226).chr(134).chr(146)."",
262 "⇒" => "".chr(226).chr(135).chr(146)."",
263 "⌉" => "".chr(226).chr(140).chr(137)."",
264 "”" => "".chr(226).chr(128).chr(157)."",
265 "ℜ" => "".chr(226).chr(132).chr(156)."",
266 "®" => "".chr(194).chr(174)."",
267 "⌋" => "".chr(226).chr(140).chr(139)."",
268 "Ρ" => "".chr(206).chr(161)."",
269 "ρ" => "".chr(207).chr(129)."",
270 "‏" => "".chr(226).chr(128).chr(143)."",
271 "›" => "".chr(226).chr(128).chr(186)."",
272 "’" => "".chr(226).chr(128).chr(153)."",
273 "‚" => "".chr(226).chr(128).chr(154)."",
274 "Š" => "".chr(197).chr(160)."",
275 "š" => "".chr(197).chr(161)."",
276 "⋅" => "".chr(226).chr(139).chr(133)."",
277 "§" => "".chr(194).chr(167)."",
278 "­" => "".chr(194).chr(173)."",
279 "Σ" => "".chr(206).chr(163)."",
280 "σ" => "".chr(207).chr(131)."",
281 "ς" => "".chr(207).chr(130)."",
282 "∼" => "".chr(226).chr(136).chr(188)."",
283 "♠" => "".chr(226).chr(153).chr(160)."",
284 "⊂" => "".chr(226).chr(138).chr(130)."",
285 "⊆" => "".chr(226).chr(138).chr(134)."",
286 "∑" => "".chr(226).chr(136).chr(145)."",
287 "¹" => "".chr(194).chr(185)."",
288 "²" => "".chr(194).chr(178)."",
289 "³" => "".chr(194).chr(179)."",
290 "⊃" => "".chr(226).chr(138).chr(131)."",
291 "⊇" => "".chr(226).chr(138).chr(135)."",
292 "ß" => "".chr(195).chr(159)."",
293 "Τ" => "".chr(206).chr(164)."",
294 "τ" => "".chr(207).chr(132)."",
295 "∴" => "".chr(226).chr(136).chr(180)."",
296 "Θ" => "".chr(206).chr(152)."",
297 "θ" => "".chr(206).chr(184)."",
298 "ϑ" => "".chr(207).chr(145)."",
299 " " => "".chr(226).chr(128).chr(137)."",
300 "Þ" => "".chr(195).chr(158)."",
301 "þ" => "".chr(195).chr(190)."",
302 "˜" => "".chr(203).chr(156)."",
303 "×" => "".chr(195).chr(151)."",
304 "™" => "".chr(226).chr(132).chr(162)."",
305 "Ú" => "".chr(195).chr(154)."",
306 "ú" => "".chr(195).chr(186)."",
307 "↑" => "".chr(226).chr(134).chr(145)."",
308 "⇑" => "".chr(226).chr(135).chr(145)."",
309 "Û" => "".chr(195).chr(155)."",
310 "û" => "".chr(195).chr(187)."",
311 "Ù" => "".chr(195).chr(153)."",
312 "ù" => "".chr(195).chr(185)."",
313 "¨" => "".chr(194).chr(168)."",
314 "ϒ" => "".chr(207).chr(146)."",
315 "Υ" => "".chr(206).chr(165)."",
316 "υ" => "".chr(207).chr(133)."",
317 "Ü" => "".chr(195).chr(156)."",
318 "ü" => "".chr(195).chr(188)."",
319 "℘" => "".chr(226).chr(132).chr(152)."",
320 "Ξ" => "".chr(206).chr(158)."",
321 "ξ" => "".chr(206).chr(190)."",
322 "Ý" => "".chr(195).chr(157)."",
323 "ý" => "".chr(195).chr(189)."",
324 "¥" => "".chr(194).chr(165)."",
325 "ÿ" => "".chr(195).chr(191)."",
326 "Ÿ" => "".chr(197).chr(184)."",
327 "Ζ" => "".chr(206).chr(150)."",
328 "ζ" => "".chr(206).chr(182)."",
329 "‍" => "".chr(226).chr(128).chr(141)."",
330 "‌" => "".chr(226).chr(128).chr(140)."",
334 $return_text = strtr($text_to_convert, $htmlentities_table);
335 $return_text = preg_replace('~&#x([0-9a-f]+);~ei', 'code_to_utf8(hexdec("\\1"))', $return_text);
336 $return_text = preg_replace('~&#([0-9]+);~e', 'code_to_utf8(\\1)', $return_text);
340 //============================================================+
342 //============================================================+