1 : <?php
2 :
3 : /*! @mainpage
4 : *
5 : * HTML Purifier is an HTML filter that will take an arbitrary snippet of
6 : * HTML and rigorously test, validate and filter it into a version that
7 : * is safe for output onto webpages. It achieves this by:
8 : *
9 : * -# Lexing (parsing into tokens) the document,
10 : * -# Executing various strategies on the tokens:
11 : * -# Removing all elements not in the whitelist,
12 : * -# Making the tokens well-formed,
13 : * -# Fixing the nesting of the nodes, and
14 : * -# Validating attributes of the nodes; and
15 : * -# Generating HTML from the purified tokens.
16 : *
17 : * However, most users will only need to interface with the HTMLPurifier
18 : * and HTMLPurifier_Config.
19 : */
20 :
21 : /*
22 : HTML Purifier 3.1.1 - Standards Compliant HTML Filtering
23 : Copyright (C) 2006-2008 Edward Z. Yang
24 :
25 : This library is free software; you can redistribute it and/or
26 : modify it under the terms of the GNU Lesser General Public
27 : License as published by the Free Software Foundation; either
28 : version 2.1 of the License, or (at your option) any later version.
29 :
30 : This library is distributed in the hope that it will be useful,
31 : but WITHOUT ANY WARRANTY; without even the implied warranty of
32 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
33 : Lesser General Public License for more details.
34 :
35 : You should have received a copy of the GNU Lesser General Public
36 : License along with this library; if not, write to the Free Software
37 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
38 : */
39 :
40 : /**
41 : * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
42 : *
43 : * @note There are several points in which configuration can be specified
44 : * for HTML Purifier. The precedence of these (from lowest to
45 : * highest) is as follows:
46 : * -# Instance: new HTMLPurifier($config)
47 : * -# Invocation: purify($html, $config)
48 : * These configurations are entirely independent of each other and
49 : * are *not* merged (this behavior may change in the future).
50 : *
51 : * @todo We need an easier way to inject strategies using the configuration
52 : * object.
53 : */
54 : class HTMLPurifier
55 1 : {
56 :
57 : /** Version of HTML Purifier */
58 : public $version = '3.1.1';
59 :
60 : /** Constant with version of HTML Purifier */
61 : const VERSION = '3.1.1';
62 :
63 : /** Global configuration object */
64 : public $config;
65 :
66 : /** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
67 : private $filters = array();
68 :
69 : /** Single instance of HTML Purifier */
70 : private static $instance;
71 :
72 : protected $strategy, $generator;
73 :
74 : /**
75 : * Resultant HTMLPurifier_Context of last run purification. Is an array
76 : * of contexts if the last called method was purifyArray().
77 : */
78 : public $context;
79 :
80 : /**
81 : * Initializes the purifier.
82 : * @param $config Optional HTMLPurifier_Config object for all instances of
83 : * the purifier, if omitted, a default configuration is
84 : * supplied (which can be overridden on a per-use basis).
85 : * The parameter can also be any type that
86 : * HTMLPurifier_Config::create() supports.
87 : */
88 : public function __construct($config = null) {
89 :
90 2 : $this->config = HTMLPurifier_Config::create($config);
91 :
92 2 : $this->strategy = new HTMLPurifier_Strategy_Core();
93 :
94 2 : }
95 :
96 : /**
97 : * Adds a filter to process the output. First come first serve
98 : * @param $filter HTMLPurifier_Filter object
99 : */
100 : public function addFilter($filter) {
101 0 : trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
102 0 : $this->filters[] = $filter;
103 0 : }
104 :
105 : /**
106 : * Filters an HTML snippet/document to be XSS-free and standards-compliant.
107 : *
108 : * @param $html String of HTML to purify
109 : * @param $config HTMLPurifier_Config object for this operation, if omitted,
110 : * defaults to the config object specified during this
111 : * object's construction. The parameter can also be any type
112 : * that HTMLPurifier_Config::create() supports.
113 : * @return Purified HTML
114 : */
115 : public function purify($html, $config = null) {
116 :
117 : // :TODO: make the config merge in, instead of replace
118 2 : $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
119 :
120 : // implementation is partially environment dependant, partially
121 : // configuration dependant
122 2 : $lexer = HTMLPurifier_Lexer::create($config);
123 :
124 2 : $context = new HTMLPurifier_Context();
125 :
126 : // setup HTML generator
127 2 : $this->generator = new HTMLPurifier_Generator($config, $context);
128 2 : $context->register('Generator', $this->generator);
129 :
130 : // set up global context variables
131 2 : if ($config->get('Core', 'CollectErrors')) {
132 : // may get moved out if other facilities use it
133 0 : $language_factory = HTMLPurifier_LanguageFactory::instance();
134 0 : $language = $language_factory->create($config, $context);
135 0 : $context->register('Locale', $language);
136 :
137 0 : $error_collector = new HTMLPurifier_ErrorCollector($context);
138 0 : $context->register('ErrorCollector', $error_collector);
139 0 : }
140 :
141 : // setup id_accumulator context, necessary due to the fact that
142 : // AttrValidator can be called from many places
143 2 : $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
144 2 : $context->register('IDAccumulator', $id_accumulator);
145 :
146 2 : $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
147 :
148 : // setup filters
149 2 : $filter_flags = $config->getBatch('Filter');
150 2 : $custom_filters = $filter_flags['Custom'];
151 2 : unset($filter_flags['Custom']);
152 2 : $filters = array();
153 2 : foreach ($filter_flags as $filter => $flag) {
154 2 : if (!$flag) continue;
155 0 : $class = "HTMLPurifier_Filter_$filter";
156 0 : $filters[] = new $class;
157 0 : }
158 2 : foreach ($custom_filters as $filter) {
159 : // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
160 0 : $filters[] = $filter;
161 0 : }
162 2 : $filters = array_merge($filters, $this->filters);
163 : // maybe prepare(), but later
164 :
165 2 : for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
166 0 : $html = $filters[$i]->preFilter($html, $config, $context);
167 0 : }
168 :
169 : // purified HTML
170 : $html =
171 2 : $this->generator->generateFromTokens(
172 : // list of tokens
173 2 : $this->strategy->execute(
174 : // list of un-purified tokens
175 2 : $lexer->tokenizeHTML(
176 : // un-purified HTML
177 2 : $html, $config, $context
178 2 : ),
179 2 : $config, $context
180 2 : )
181 2 : );
182 :
183 2 : for ($i = $filter_size - 1; $i >= 0; $i--) {
184 0 : $html = $filters[$i]->postFilter($html, $config, $context);
185 0 : }
186 :
187 2 : $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
188 2 : $this->context =& $context;
189 2 : return $html;
190 : }
191 :
192 : /**
193 : * Filters an array of HTML snippets
194 : * @param $config Optional HTMLPurifier_Config object for this operation.
195 : * See HTMLPurifier::purify() for more details.
196 : * @return Array of purified HTML
197 : */
198 : public function purifyArray($array_of_html, $config = null) {
199 0 : $context_array = array();
200 0 : foreach ($array_of_html as $key => $html) {
201 0 : $array_of_html[$key] = $this->purify($html, $config);
202 0 : $context_array[$key] = $this->context;
203 0 : }
204 0 : $this->context = $context_array;
205 0 : return $array_of_html;
206 : }
207 :
208 : /**
209 : * Singleton for enforcing just one HTML Purifier in your system
210 : * @param $prototype Optional prototype HTMLPurifier instance to
211 : * overload singleton with, or HTMLPurifier_Config
212 : * instance to configure the generated version with.
213 : */
214 : public static function instance($prototype = null) {
215 0 : if (!self::$instance || $prototype) {
216 0 : if ($prototype instanceof HTMLPurifier) {
217 0 : self::$instance = $prototype;
218 0 : } elseif ($prototype) {
219 0 : self::$instance = new HTMLPurifier($prototype);
220 0 : } else {
221 0 : self::$instance = new HTMLPurifier();
222 : }
223 0 : }
224 0 : return self::$instance;
225 : }
226 :
227 : /**
228 : * @note Backwards compatibility, see instance()
229 : */
230 : public static function getInstance($prototype = null) {
231 0 : return HTMLPurifier::instance($prototype);
232 : }
233 :
234 : }
|