1 : <?php
2 :
3 : /**
4 : * Class that handles operations involving percent-encoding in URIs.
5 : *
6 : * @warning
7 : * Be careful when reusing instances of PercentEncoder. The object
8 : * you use for normalize() SHOULD NOT be used for encode(), or
9 : * vice-versa.
10 : */
11 : class HTMLPurifier_PercentEncoder
12 1 : {
13 :
14 : /**
15 : * Reserved characters to preserve when using encode().
16 : */
17 : protected $preserve = array();
18 :
19 : /**
20 : * String of characters that should be preserved while using encode().
21 : */
22 : public function __construct($preserve = false) {
23 : // unreserved letters, ought to const-ify
24 2 : for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
25 2 : for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
26 2 : for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
27 2 : $this->preserve[45] = true; // Dash -
28 2 : $this->preserve[46] = true; // Period .
29 2 : $this->preserve[95] = true; // Underscore _
30 2 : $this->preserve[126]= true; // Tilde ~
31 :
32 : // extra letters not to escape
33 2 : if ($preserve !== false) {
34 2 : for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
35 2 : $this->preserve[ord($preserve[$i])] = true;
36 2 : }
37 2 : }
38 2 : }
39 :
40 : /**
41 : * Our replacement for urlencode, it encodes all non-reserved characters,
42 : * as well as any extra characters that were instructed to be preserved.
43 : * @note
44 : * Assumes that the string has already been normalized, making any
45 : * and all percent escape sequences valid. Percents will not be
46 : * re-escaped, regardless of their status in $preserve
47 : * @param $string String to be encoded
48 : * @return Encoded string.
49 : */
50 : public function encode($string) {
51 2 : $ret = '';
52 2 : for ($i = 0, $c = strlen($string); $i < $c; $i++) {
53 2 : if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
54 0 : $ret .= '%' . sprintf('%02X', $int);
55 0 : } else {
56 2 : $ret .= $string[$i];
57 : }
58 2 : }
59 2 : return $ret;
60 : }
61 :
62 : /**
63 : * Fix up percent-encoding by decoding unreserved characters and normalizing.
64 : * @warning This function is affected by $preserve, even though the
65 : * usual desired behavior is for this not to preserve those
66 : * characters. Be careful when reusing instances of PercentEncoder!
67 : * @param $string String to normalize
68 : */
69 : public function normalize($string) {
70 2 : if ($string == '') return '';
71 2 : $parts = explode('%', $string);
72 2 : $ret = array_shift($parts);
73 2 : foreach ($parts as $part) {
74 0 : $length = strlen($part);
75 0 : if ($length < 2) {
76 0 : $ret .= '%25' . $part;
77 0 : continue;
78 0 : }
79 0 : $encoding = substr($part, 0, 2);
80 0 : $text = substr($part, 2);
81 0 : if (!ctype_xdigit($encoding)) {
82 0 : $ret .= '%25' . $part;
83 0 : continue;
84 0 : }
85 0 : $int = hexdec($encoding);
86 0 : if (isset($this->preserve[$int])) {
87 0 : $ret .= chr($int) . $text;
88 0 : continue;
89 0 : }
90 0 : $encoding = strtoupper($encoding);
91 0 : $ret .= '%' . $encoding . $text;
92 0 : }
93 2 : return $ret;
94 : }
95 :
96 : }
97 :
|