1 : <?php
2 :
3 : /**
4 : * Parses a URI into the components and fragment identifier as specified
5 : * by RFC 3986.
6 : */
7 : class HTMLPurifier_URIParser
8 1 : {
9 :
10 : /**
11 : * Instance of HTMLPurifier_PercentEncoder to do normalization with.
12 : */
13 : protected $percentEncoder;
14 :
15 : public function __construct() {
16 : $this->percentEncoder = new HTMLPurifier_PercentEncoder();
17 : }
18 :
19 : /**
20 : * Parses a URI.
21 : * @param $uri string URI to parse
22 : * @return HTMLPurifier_URI representation of URI. This representation has
23 : * not been validated yet and may not conform to RFC.
24 : */
25 : public function parse($uri) {
26 :
27 2 : $uri = $this->percentEncoder->normalize($uri);
28 :
29 : // Regexp is as per Appendix B.
30 : // Note that ["<>] are an addition to the RFC's recommended
31 : // characters, because they represent external delimeters.
32 : $r_URI = '!'.
33 2 : '(([^:/?#"<>]+):)?'. // 2. Scheme
34 2 : '(//([^/?#"<>]*))?'. // 4. Authority
35 2 : '([^?#"<>]*)'. // 5. Path
36 2 : '(\?([^#"<>]*))?'. // 7. Query
37 2 : '(#([^"<>]*))?'. // 8. Fragment
38 2 : '!';
39 :
40 2 : $matches = array();
41 2 : $result = preg_match($r_URI, $uri, $matches);
42 :
43 2 : if (!$result) return false; // *really* invalid URI
44 :
45 : // seperate out parts
46 2 : $scheme = !empty($matches[1]) ? $matches[2] : null;
47 2 : $authority = !empty($matches[3]) ? $matches[4] : null;
48 2 : $path = $matches[5]; // always present, can be empty
49 2 : $query = !empty($matches[6]) ? $matches[7] : null;
50 2 : $fragment = !empty($matches[8]) ? $matches[9] : null;
51 :
52 : // further parse authority
53 2 : if ($authority !== null) {
54 2 : $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
55 2 : $matches = array();
56 2 : preg_match($r_authority, $authority, $matches);
57 2 : $userinfo = !empty($matches[1]) ? $matches[2] : null;
58 2 : $host = !empty($matches[3]) ? $matches[3] : '';
59 2 : $port = !empty($matches[4]) ? (int) $matches[5] : null;
60 2 : } else {
61 2 : $port = $host = $userinfo = null;
62 : }
63 :
64 2 : return new HTMLPurifier_URI(
65 2 : $scheme, $userinfo, $host, $port, $path, $query, $fragment);
66 : }
67 :
68 : }
69 :
|