1 : <?php
2 :
3 : // if want to implement error collecting here, we'll need to use some sort
4 : // of global data (probably trigger_error) because it's impossible to pass
5 : // $config or $context to the callback functions.
6 :
7 : /**
8 : * Handles referencing and derefencing character entities
9 : */
10 : class HTMLPurifier_EntityParser
11 1 : {
12 :
13 : /**
14 : * Reference to entity lookup table.
15 : */
16 : protected $_entity_lookup;
17 :
18 : /**
19 : * Callback regex string for parsing entities.
20 : */
21 : protected $_substituteEntitiesRegex =
22 : '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
23 : // 1. hex 2. dec 3. string (XML style)
24 :
25 :
26 : /**
27 : * Decimal to parsed string conversion table for special entities.
28 : */
29 : protected $_special_dec2str =
30 : array(
31 : 34 => '"',
32 : 38 => '&',
33 : 39 => "'",
34 : 60 => '<',
35 : 62 => '>'
36 : );
37 :
38 : /**
39 : * Stripped entity names to decimal conversion table for special entities.
40 : */
41 : protected $_special_ent2dec =
42 : array(
43 : 'quot' => 34,
44 : 'amp' => 38,
45 : 'lt' => 60,
46 : 'gt' => 62
47 : );
48 :
49 : /**
50 : * Substitutes non-special entities with their parsed equivalents. Since
51 : * running this whenever you have parsed character is t3h 5uck, we run
52 : * it before everything else.
53 : *
54 : * @param $string String to have non-special entities parsed.
55 : * @returns Parsed string.
56 : */
57 : public function substituteNonSpecialEntities($string) {
58 : // it will try to detect missing semicolons, but don't rely on it
59 2 : return preg_replace_callback(
60 2 : $this->_substituteEntitiesRegex,
61 2 : array($this, 'nonSpecialEntityCallback'),
62 : $string
63 2 : );
64 : }
65 :
66 : /**
67 : * Callback function for substituteNonSpecialEntities() that does the work.
68 : *
69 : * @param $matches PCRE matches array, with 0 the entire match, and
70 : * either index 1, 2 or 3 set with a hex value, dec value,
71 : * or string (respectively).
72 : * @returns Replacement string.
73 : */
74 :
75 : protected function nonSpecialEntityCallback($matches) {
76 : // replaces all but big five
77 2 : $entity = $matches[0];
78 2 : $is_num = (@$matches[0][1] === '#');
79 2 : if ($is_num) {
80 0 : $is_hex = (@$entity[2] === 'x');
81 0 : $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
82 :
83 : // abort for special characters
84 0 : if (isset($this->_special_dec2str[$code])) return $entity;
85 :
86 0 : return HTMLPurifier_Encoder::unichr($code);
87 : } else {
88 2 : if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
89 2 : if (!$this->_entity_lookup) {
90 2 : $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
91 2 : }
92 2 : if (isset($this->_entity_lookup->table[$matches[3]])) {
93 2 : return $this->_entity_lookup->table[$matches[3]];
94 : } else {
95 0 : return $entity;
96 : }
97 : }
98 : }
99 :
100 : /**
101 : * Substitutes only special entities with their parsed equivalents.
102 : *
103 : * @notice We try to avoid calling this function because otherwise, it
104 : * would have to be called a lot (for every parsed section).
105 : *
106 : * @param $string String to have non-special entities parsed.
107 : * @returns Parsed string.
108 : */
109 : public function substituteSpecialEntities($string) {
110 0 : return preg_replace_callback(
111 0 : $this->_substituteEntitiesRegex,
112 0 : array($this, 'specialEntityCallback'),
113 0 : $string);
114 : }
115 :
116 : /**
117 : * Callback function for substituteSpecialEntities() that does the work.
118 : *
119 : * This callback has same syntax as nonSpecialEntityCallback().
120 : *
121 : * @param $matches PCRE-style matches array, with 0 the entire match, and
122 : * either index 1, 2 or 3 set with a hex value, dec value,
123 : * or string (respectively).
124 : * @returns Replacement string.
125 : */
126 : protected function specialEntityCallback($matches) {
127 0 : $entity = $matches[0];
128 0 : $is_num = (@$matches[0][1] === '#');
129 0 : if ($is_num) {
130 0 : $is_hex = (@$entity[2] === 'x');
131 0 : $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
132 0 : return isset($this->_special_dec2str[$int]) ?
133 0 : $this->_special_dec2str[$int] :
134 0 : $entity;
135 : } else {
136 0 : return isset($this->_special_ent2dec[$matches[3]]) ?
137 0 : $this->_special_ent2dec[$matches[3]] :
138 0 : $entity;
139 : }
140 : }
141 :
142 : }
143 :
|