1 : <?php
2 :
3 : /**
4 : * Generates HTML from tokens.
5 : * @todo Refactor interface so that configuration/context is determined
6 : * upon instantiation, no need for messy generateFromTokens() calls
7 : * @todo Make some of the more internal functions protected, and have
8 : * unit tests work around that
9 : */
10 : class HTMLPurifier_Generator
11 1 : {
12 :
13 : /**
14 : * Whether or not generator should produce XML output
15 : */
16 : private $_xhtml = true;
17 :
18 : /**
19 : * :HACK: Whether or not generator should comment the insides of <script> tags
20 : */
21 : private $_scriptFix = false;
22 :
23 : /**
24 : * Cache of HTMLDefinition during HTML output to determine whether or
25 : * not attributes should be minimized.
26 : */
27 : private $_def;
28 :
29 : /**
30 : * Configuration for the generator
31 : */
32 : protected $config;
33 :
34 : /**
35 : * @param $config Instance of HTMLPurifier_Config
36 : * @param $context Instance of HTMLPurifier_Context
37 : */
38 : public function __construct($config, $context) {
39 2 : $this->config = $config;
40 2 : $this->_scriptFix = $config->get('Output', 'CommentScriptContents');
41 2 : $this->_def = $config->getHTMLDefinition();
42 2 : $this->_xhtml = $this->_def->doctype->xml;
43 2 : }
44 :
45 : /**
46 : * Generates HTML from an array of tokens.
47 : * @param $tokens Array of HTMLPurifier_Token
48 : * @param $config HTMLPurifier_Config object
49 : * @return Generated HTML
50 : */
51 : public function generateFromTokens($tokens) {
52 2 : if (!$tokens) return '';
53 :
54 : // Basic algorithm
55 2 : $html = '';
56 2 : for ($i = 0, $size = count($tokens); $i < $size; $i++) {
57 2 : if ($this->_scriptFix && $tokens[$i]->name === 'script'
58 2 : && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
59 : // script special case
60 : // the contents of the script block must be ONE token
61 : // for this to work.
62 0 : $html .= $this->generateFromToken($tokens[$i++]);
63 0 : $html .= $this->generateScriptFromToken($tokens[$i++]);
64 0 : }
65 2 : $html .= $this->generateFromToken($tokens[$i]);
66 2 : }
67 :
68 : // Tidy cleanup
69 2 : if (extension_loaded('tidy') && $this->config->get('Output', 'TidyFormat')) {
70 0 : $tidy = new Tidy;
71 0 : $tidy->parseString($html, array(
72 0 : 'indent'=> true,
73 0 : 'output-xhtml' => $this->_xhtml,
74 0 : 'show-body-only' => true,
75 0 : 'indent-spaces' => 2,
76 0 : 'wrap' => 68,
77 0 : ), 'utf8');
78 0 : $tidy->cleanRepair();
79 0 : $html = (string) $tidy; // explicit cast necessary
80 0 : }
81 :
82 : // Normalize newlines to system defined value
83 2 : $nl = $this->config->get('Output', 'Newline');
84 2 : if ($nl === null) $nl = PHP_EOL;
85 2 : if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
86 2 : return $html;
87 : }
88 :
89 : /**
90 : * Generates HTML from a single token.
91 : * @param $token HTMLPurifier_Token object.
92 : * @return Generated HTML
93 : */
94 : public function generateFromToken($token) {
95 2 : if (!$token instanceof HTMLPurifier_Token) {
96 0 : trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
97 0 : return '';
98 :
99 2 : } elseif ($token instanceof HTMLPurifier_Token_Start) {
100 2 : $attr = $this->generateAttributes($token->attr, $token->name);
101 2 : return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
102 :
103 2 : } elseif ($token instanceof HTMLPurifier_Token_End) {
104 2 : return '</' . $token->name . '>';
105 :
106 2 : } elseif ($token instanceof HTMLPurifier_Token_Empty) {
107 2 : $attr = $this->generateAttributes($token->attr, $token->name);
108 2 : return '<' . $token->name . ($attr ? ' ' : '') . $attr .
109 2 : ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
110 2 : . '>';
111 :
112 2 : } elseif ($token instanceof HTMLPurifier_Token_Text) {
113 2 : return $this->escape($token->data, ENT_NOQUOTES);
114 :
115 0 : } elseif ($token instanceof HTMLPurifier_Token_Comment) {
116 0 : return '<!--' . $token->data . '-->';
117 : } else {
118 0 : return '';
119 :
120 : }
121 : }
122 :
123 : /**
124 : * Special case processor for the contents of script tags
125 : * @warning This runs into problems if there's already a literal
126 : * --> somewhere inside the script contents.
127 : */
128 : public function generateScriptFromToken($token) {
129 0 : if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
130 : // Thanks <http://lachy.id.au/log/2005/05/script-comments>
131 0 : $data = preg_replace('#//\s*$#', '', $token->data);
132 0 : return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
133 : }
134 :
135 : /**
136 : * Generates attribute declarations from attribute array.
137 : * @note This does not include the leading or trailing space.
138 : * @param $assoc_array_of_attributes Attribute array
139 : * @param $element Name of element attributes are for, used to check
140 : * attribute minimization.
141 : * @return Generate HTML fragment for insertion.
142 : */
143 : public function generateAttributes($assoc_array_of_attributes, $element = false) {
144 2 : $html = '';
145 2 : foreach ($assoc_array_of_attributes as $key => $value) {
146 2 : if (!$this->_xhtml) {
147 : // Remove namespaced attributes
148 0 : if (strpos($key, ':') !== false) continue;
149 : // Check if we should minimize the attribute: val="val" -> val
150 0 : if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
151 0 : $html .= $key . ' ';
152 0 : continue;
153 0 : }
154 0 : }
155 2 : $html .= $key.'="'.$this->escape($value).'" ';
156 2 : }
157 2 : return rtrim($html);
158 : }
159 :
160 : /**
161 : * Escapes raw text data.
162 : * @todo This really ought to be protected, but until we have a facility
163 : * for properly generating HTML here w/o using tokens, it stays
164 : * public.
165 : * @param $string String data to escape for HTML.
166 : * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
167 : * permissible for non-attribute output.
168 : * @return String escaped data.
169 : */
170 : public function escape($string, $quote = ENT_COMPAT) {
171 2 : return htmlspecialchars($string, $quote, 'UTF-8');
172 : }
173 :
174 : }
175 :
|