1 : <?php
2 :
3 : /**
4 : * Removes all unrecognized tags from the list of tokens.
5 : *
6 : * This strategy iterates through all the tokens and removes unrecognized
7 : * tokens. If a token is not recognized but a TagTransform is defined for
8 : * that element, the element will be transformed accordingly.
9 : */
10 :
11 1 : class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
12 : {
13 :
14 : public function execute($tokens, $config, $context) {
15 2 : $definition = $config->getHTMLDefinition();
16 2 : $generator = new HTMLPurifier_Generator($config, $context);
17 2 : $result = array();
18 :
19 2 : $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
20 2 : $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
21 :
22 2 : $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
23 2 : $hidden_elements = $config->get('Core', 'HiddenElements');
24 :
25 : // remove script contents compatibility
26 2 : if ($remove_script_contents === true) {
27 0 : $hidden_elements['script'] = true;
28 2 : } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
29 0 : unset($hidden_elements['script']);
30 0 : }
31 :
32 2 : $attr_validator = new HTMLPurifier_AttrValidator();
33 :
34 : // removes tokens until it reaches a closing tag with its value
35 2 : $remove_until = false;
36 :
37 : // converts comments into text tokens when this is equal to a tag name
38 2 : $textify_comments = false;
39 :
40 2 : $token = false;
41 2 : $context->register('CurrentToken', $token);
42 :
43 2 : $e = false;
44 2 : if ($config->get('Core', 'CollectErrors')) {
45 0 : $e =& $context->get('ErrorCollector');
46 0 : }
47 :
48 2 : foreach($tokens as $token) {
49 2 : if ($remove_until) {
50 1 : if (empty($token->is_tag) || $token->name !== $remove_until) {
51 1 : continue;
52 0 : }
53 1 : }
54 2 : if (!empty( $token->is_tag )) {
55 : // DEFINITION CALL
56 :
57 : // before any processing, try to transform the element
58 : if (
59 2 : isset($definition->info_tag_transform[$token->name])
60 2 : ) {
61 0 : $original_name = $token->name;
62 : // there is a transformation for this tag
63 : // DEFINITION CALL
64 : $token = $definition->
65 0 : info_tag_transform[$token->name]->
66 0 : transform($token, $config, $context);
67 0 : if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
68 0 : }
69 :
70 2 : if (isset($definition->info[$token->name])) {
71 :
72 : // mostly everything's good, but
73 : // we need to make sure required attributes are in order
74 : if (
75 2 : ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
76 2 : $definition->info[$token->name]->required_attr &&
77 2 : ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
78 2 : ) {
79 2 : $attr_validator->validateToken($token, $config, $context);
80 2 : $ok = true;
81 2 : foreach ($definition->info[$token->name]->required_attr as $name) {
82 2 : if (!isset($token->attr[$name])) {
83 1 : $ok = false;
84 1 : break;
85 0 : }
86 2 : }
87 2 : if (!$ok) {
88 1 : if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
89 1 : continue;
90 0 : }
91 2 : $token->armor['ValidateAttributes'] = true;
92 2 : }
93 :
94 2 : if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
95 0 : $textify_comments = $token->name;
96 2 : } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
97 0 : $textify_comments = false;
98 0 : }
99 :
100 2 : } elseif ($escape_invalid_tags) {
101 : // invalid tag, generate HTML representation and insert in
102 0 : if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
103 0 : $token = new HTMLPurifier_Token_Text(
104 0 : $generator->generateFromToken($token)
105 0 : );
106 0 : } else {
107 : // check if we need to destroy all of the tag's children
108 : // CAN BE GENERICIZED
109 1 : if (isset($hidden_elements[$token->name])) {
110 1 : if ($token instanceof HTMLPurifier_Token_Start) {
111 1 : $remove_until = $token->name;
112 1 : } elseif ($token instanceof HTMLPurifier_Token_Empty) {
113 : // do nothing: we're still looking
114 0 : } else {
115 1 : $remove_until = false;
116 : }
117 1 : if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
118 1 : } else {
119 1 : if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
120 : }
121 1 : continue;
122 : }
123 2 : } elseif ($token instanceof HTMLPurifier_Token_Comment) {
124 : // textify comments in script tags when they are allowed
125 0 : if ($textify_comments !== false) {
126 0 : $data = $token->data;
127 0 : $token = new HTMLPurifier_Token_Text($data);
128 0 : } else {
129 : // strip comments
130 0 : if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
131 0 : continue;
132 : }
133 2 : } elseif ($token instanceof HTMLPurifier_Token_Text) {
134 2 : } else {
135 0 : continue;
136 : }
137 2 : $result[] = $token;
138 2 : }
139 2 : if ($remove_until && $e) {
140 : // we removed tokens until the end, throw error
141 0 : $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
142 0 : }
143 :
144 2 : $context->destroy('CurrentToken');
145 :
146 2 : return $result;
147 : }
148 :
149 : }
150 :
|