1 : <?php
2 :
3 : /**
4 : * Definition of the purified HTML that describes allowed children,
5 : * attributes, and many other things.
6 : *
7 : * Conventions:
8 : *
9 : * All member variables that are prefixed with info
10 : * (including the main $info array) are used by HTML Purifier internals
11 : * and should not be directly edited when customizing the HTMLDefinition.
12 : * They can usually be set via configuration directives or custom
13 : * modules.
14 : *
15 : * On the other hand, member variables without the info prefix are used
16 : * internally by the HTMLDefinition and MUST NOT be used by other HTML
17 : * Purifier internals. Many of them, however, are public, and may be
18 : * edited by userspace code to tweak the behavior of HTMLDefinition.
19 : *
20 : * @note This class is inspected by Printer_HTMLDefinition; please
21 : * update that class if things here change.
22 : *
23 : * @warning Directives that change this object's structure must be in
24 : * the HTML or Attr namespace!
25 : */
26 1 : class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
27 : {
28 :
29 : // FULLY-PUBLIC VARIABLES ---------------------------------------------
30 :
31 : /**
32 : * Associative array of element names to HTMLPurifier_ElementDef
33 : */
34 : public $info = array();
35 :
36 : /**
37 : * Associative array of global attribute name to attribute definition.
38 : */
39 : public $info_global_attr = array();
40 :
41 : /**
42 : * String name of parent element HTML will be going into.
43 : */
44 : public $info_parent = 'div';
45 :
46 : /**
47 : * Definition for parent element, allows parent element to be a
48 : * tag that's not allowed inside the HTML fragment.
49 : */
50 : public $info_parent_def;
51 :
52 : /**
53 : * String name of element used to wrap inline elements in block context
54 : * @note This is rarely used except for BLOCKQUOTEs in strict mode
55 : */
56 : public $info_block_wrapper = 'p';
57 :
58 : /**
59 : * Associative array of deprecated tag name to HTMLPurifier_TagTransform
60 : */
61 : public $info_tag_transform = array();
62 :
63 : /**
64 : * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
65 : */
66 : public $info_attr_transform_pre = array();
67 :
68 : /**
69 : * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
70 : */
71 : public $info_attr_transform_post = array();
72 :
73 : /**
74 : * Nested lookup array of content set name (Block, Inline) to
75 : * element name to whether or not it belongs in that content set.
76 : */
77 : public $info_content_sets = array();
78 :
79 : /**
80 : * Indexed list of HTMLPurifier_Injector to be used.
81 : */
82 : public $info_injector = array();
83 :
84 : /**
85 : * Doctype object
86 : */
87 : public $doctype;
88 :
89 :
90 :
91 : // RAW CUSTOMIZATION STUFF --------------------------------------------
92 :
93 : /**
94 : * Adds a custom attribute to a pre-existing element
95 : * @note This is strictly convenience, and does not have a corresponding
96 : * method in HTMLPurifier_HTMLModule
97 : * @param $element_name String element name to add attribute to
98 : * @param $attr_name String name of attribute
99 : * @param $def Attribute definition, can be string or object, see
100 : * HTMLPurifier_AttrTypes for details
101 : */
102 : public function addAttribute($element_name, $attr_name, $def) {
103 : $module = $this->getAnonymousModule();
104 : if (!isset($module->info[$element_name])) {
105 : $element = $module->addBlankElement($element_name);
106 : } else {
107 : $element = $module->info[$element_name];
108 : }
109 : $element->attr[$attr_name] = $def;
110 : }
111 :
112 : /**
113 : * Adds a custom element to your HTML definition
114 : * @note See HTMLPurifier_HTMLModule::addElement for detailed
115 : * parameter and return value descriptions.
116 : */
117 : public function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
118 0 : $module = $this->getAnonymousModule();
119 : // assume that if the user is calling this, the element
120 : // is safe. This may not be a good idea
121 0 : $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
122 0 : return $element;
123 : }
124 :
125 : /**
126 : * Adds a blank element to your HTML definition, for overriding
127 : * existing behavior
128 : * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
129 : * parameter and return value descriptions.
130 : */
131 : public function addBlankElement($element_name) {
132 0 : $module = $this->getAnonymousModule();
133 0 : $element = $module->addBlankElement($element_name);
134 0 : return $element;
135 : }
136 :
137 : /**
138 : * Retrieves a reference to the anonymous module, so you can
139 : * bust out advanced features without having to make your own
140 : * module.
141 : */
142 : public function getAnonymousModule() {
143 0 : if (!$this->_anonModule) {
144 0 : $this->_anonModule = new HTMLPurifier_HTMLModule();
145 0 : $this->_anonModule->name = 'Anonymous';
146 0 : }
147 0 : return $this->_anonModule;
148 : }
149 :
150 : private $_anonModule;
151 :
152 :
153 : // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
154 :
155 : public $type = 'HTML';
156 : public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
157 :
158 : /**
159 : * Performs low-cost, preliminary initialization.
160 : */
161 : public function __construct() {
162 : $this->manager = new HTMLPurifier_HTMLModuleManager();
163 : }
164 :
165 : protected function doSetup($config) {
166 0 : $this->processModules($config);
167 0 : $this->setupConfigStuff($config);
168 0 : unset($this->manager);
169 :
170 : // cleanup some of the element definitions
171 0 : foreach ($this->info as $k => $v) {
172 0 : unset($this->info[$k]->content_model);
173 0 : unset($this->info[$k]->content_model_type);
174 0 : }
175 0 : }
176 :
177 : /**
178 : * Extract out the information from the manager
179 : */
180 : protected function processModules($config) {
181 :
182 0 : if ($this->_anonModule) {
183 : // for user specific changes
184 : // this is late-loaded so we don't have to deal with PHP4
185 : // reference wonky-ness
186 0 : $this->manager->addModule($this->_anonModule);
187 0 : unset($this->_anonModule);
188 0 : }
189 :
190 0 : $this->manager->setup($config);
191 0 : $this->doctype = $this->manager->doctype;
192 :
193 0 : foreach ($this->manager->modules as $module) {
194 0 : foreach($module->info_tag_transform as $k => $v) {
195 0 : if ($v === false) unset($this->info_tag_transform[$k]);
196 0 : else $this->info_tag_transform[$k] = $v;
197 0 : }
198 0 : foreach($module->info_attr_transform_pre as $k => $v) {
199 0 : if ($v === false) unset($this->info_attr_transform_pre[$k]);
200 0 : else $this->info_attr_transform_pre[$k] = $v;
201 0 : }
202 0 : foreach($module->info_attr_transform_post as $k => $v) {
203 0 : if ($v === false) unset($this->info_attr_transform_post[$k]);
204 0 : else $this->info_attr_transform_post[$k] = $v;
205 0 : }
206 0 : foreach ($module->info_injector as $k => $v) {
207 0 : if ($v === false) unset($this->info_injector[$k]);
208 0 : else $this->info_injector[$k] = $v;
209 0 : }
210 0 : }
211 :
212 0 : $this->info = $this->manager->getElements();
213 0 : $this->info_content_sets = $this->manager->contentSets->lookup;
214 :
215 0 : }
216 :
217 : /**
218 : * Sets up stuff based on config. We need a better way of doing this.
219 : */
220 : protected function setupConfigStuff($config) {
221 :
222 0 : $block_wrapper = $config->get('HTML', 'BlockWrapper');
223 0 : if (isset($this->info_content_sets['Block'][$block_wrapper])) {
224 0 : $this->info_block_wrapper = $block_wrapper;
225 0 : } else {
226 0 : trigger_error('Cannot use non-block element as block wrapper',
227 0 : E_USER_ERROR);
228 : }
229 :
230 0 : $parent = $config->get('HTML', 'Parent');
231 0 : $def = $this->manager->getElement($parent, true);
232 0 : if ($def) {
233 0 : $this->info_parent = $parent;
234 0 : $this->info_parent_def = $def;
235 0 : } else {
236 0 : trigger_error('Cannot use unrecognized element as parent',
237 0 : E_USER_ERROR);
238 0 : $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
239 : }
240 :
241 : // support template text
242 : $support = "(for information on implementing this, see the ".
243 0 : "support forums) ";
244 :
245 : // setup allowed elements -----------------------------------------
246 :
247 0 : $allowed_elements = $config->get('HTML', 'AllowedElements');
248 0 : $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); // retrieve early
249 :
250 0 : if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
251 0 : $allowed = $config->get('HTML', 'Allowed');
252 0 : if (is_string($allowed)) {
253 0 : list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
254 0 : }
255 0 : }
256 :
257 0 : if (is_array($allowed_elements)) {
258 0 : foreach ($this->info as $name => $d) {
259 0 : if(!isset($allowed_elements[$name])) unset($this->info[$name]);
260 0 : unset($allowed_elements[$name]);
261 0 : }
262 : // emit errors
263 0 : foreach ($allowed_elements as $element => $d) {
264 0 : $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
265 0 : trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
266 0 : }
267 0 : }
268 :
269 : // setup allowed attributes ---------------------------------------
270 :
271 0 : $allowed_attributes_mutable = $allowed_attributes; // by copy!
272 0 : if (is_array($allowed_attributes)) {
273 :
274 : // This actually doesn't do anything, since we went away from
275 : // global attributes. It's possible that userland code uses
276 : // it, but HTMLModuleManager doesn't!
277 0 : foreach ($this->info_global_attr as $attr => $x) {
278 0 : $keys = array($attr, "*@$attr", "*.$attr");
279 0 : $delete = true;
280 0 : foreach ($keys as $key) {
281 0 : if ($delete && isset($allowed_attributes[$key])) {
282 0 : $delete = false;
283 0 : }
284 0 : if (isset($allowed_attributes_mutable[$key])) {
285 0 : unset($allowed_attributes_mutable[$key]);
286 0 : }
287 0 : }
288 0 : if ($delete) unset($this->info_global_attr[$attr]);
289 0 : }
290 :
291 0 : foreach ($this->info as $tag => $info) {
292 0 : foreach ($info->attr as $attr => $x) {
293 0 : $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
294 0 : $delete = true;
295 0 : foreach ($keys as $key) {
296 0 : if ($delete && isset($allowed_attributes[$key])) {
297 0 : $delete = false;
298 0 : }
299 0 : if (isset($allowed_attributes_mutable[$key])) {
300 0 : unset($allowed_attributes_mutable[$key]);
301 0 : }
302 0 : }
303 0 : if ($delete) unset($this->info[$tag]->attr[$attr]);
304 0 : }
305 0 : }
306 : // emit errors
307 0 : foreach ($allowed_attributes_mutable as $elattr => $d) {
308 0 : $bits = preg_split('/[.@]/', $elattr, 2);
309 0 : $c = count($bits);
310 : switch ($c) {
311 0 : case 2:
312 0 : if ($bits[0] !== '*') {
313 0 : $element = htmlspecialchars($bits[0]);
314 0 : $attribute = htmlspecialchars($bits[1]);
315 0 : if (!isset($this->info[$element])) {
316 0 : trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
317 0 : } else {
318 0 : trigger_error("Attribute '$attribute' in element '$element' not supported $support",
319 0 : E_USER_WARNING);
320 : }
321 0 : break;
322 0 : }
323 : // otherwise fall through
324 0 : case 1:
325 0 : $attribute = htmlspecialchars($bits[0]);
326 0 : trigger_error("Global attribute '$attribute' is not ".
327 0 : "supported in any elements $support",
328 0 : E_USER_WARNING);
329 0 : break;
330 0 : }
331 0 : }
332 :
333 0 : }
334 :
335 : // setup forbidden elements ---------------------------------------
336 :
337 0 : $forbidden_elements = $config->get('HTML', 'ForbiddenElements');
338 0 : $forbidden_attributes = $config->get('HTML', 'ForbiddenAttributes');
339 :
340 0 : foreach ($this->info as $tag => $info) {
341 0 : if (isset($forbidden_elements[$tag])) {
342 0 : unset($this->info[$tag]);
343 0 : continue;
344 0 : }
345 0 : foreach ($info->attr as $attr => $x) {
346 : if (
347 0 : isset($forbidden_attributes["$tag@$attr"]) ||
348 0 : isset($forbidden_attributes["*@$attr"]) ||
349 0 : isset($forbidden_attributes[$attr])
350 0 : ) {
351 0 : unset($this->info[$tag]->attr[$attr]);
352 0 : continue;
353 0 : } // this segment might get removed eventually
354 0 : elseif (isset($forbidden_attributes["$tag.$attr"])) {
355 : // $tag.$attr are not user supplied, so no worries!
356 0 : trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
357 0 : }
358 0 : }
359 0 : }
360 0 : foreach ($forbidden_attributes as $key => $v) {
361 0 : if (strlen($key) < 2) continue;
362 0 : if ($key[0] != '*') continue;
363 0 : if ($key[1] == '.') {
364 0 : trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
365 0 : }
366 0 : }
367 :
368 : // setup injectors -----------------------------------------------------
369 0 : foreach ($this->info_injector as $i => $injector) {
370 0 : if ($injector->checkNeeded($config) !== false) {
371 : // remove injector that does not have it's required
372 : // elements/attributes present, and is thus not needed.
373 0 : unset($this->info_injector[$i]);
374 0 : }
375 0 : }
376 0 : }
377 :
378 : /**
379 : * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
380 : * separate lists for processing. Format is element[attr1|attr2],element2...
381 : * @warning Although it's largely drawn from TinyMCE's implementation,
382 : * it is different, and you'll probably have to modify your lists
383 : * @param $list String list to parse
384 : * @param array($allowed_elements, $allowed_attributes)
385 : * @todo Give this its own class, probably static interface
386 : */
387 : public function parseTinyMCEAllowedList($list) {
388 :
389 0 : $list = str_replace(array(' ', "\t"), '', $list);
390 :
391 0 : $elements = array();
392 0 : $attributes = array();
393 :
394 0 : $chunks = preg_split('/(,|[\n\r]+)/', $list);
395 0 : foreach ($chunks as $chunk) {
396 0 : if (empty($chunk)) continue;
397 : // remove TinyMCE element control characters
398 0 : if (!strpos($chunk, '[')) {
399 0 : $element = $chunk;
400 0 : $attr = false;
401 0 : } else {
402 0 : list($element, $attr) = explode('[', $chunk);
403 : }
404 0 : if ($element !== '*') $elements[$element] = true;
405 0 : if (!$attr) continue;
406 0 : $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
407 0 : $attr = explode('|', $attr);
408 0 : foreach ($attr as $key) {
409 0 : $attributes["$element.$key"] = true;
410 0 : }
411 0 : }
412 :
413 0 : return array($elements, $attributes);
414 :
415 : }
416 :
417 :
418 : }
419 :
420 :
|