aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/sabre/vobject/lib/StringUtil.php
blob: b8615f2ba12aa8e1394de233d6fd38cfb8ede443 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
<?php

namespace Sabre\VObject;

/**
 * Useful utilities for working with various strings.
 *
 * @copyright Copyright (C) fruux GmbH (https://fruux.com/)
 * @author Evert Pot (http://evertpot.com/)
 * @license http://sabre.io/license/ Modified BSD License
 */
class StringUtil {

    /**
     * Returns true or false depending on if a string is valid UTF-8.
     *
     * @param string $str
     *
     * @return bool
     */
    static function isUTF8($str) {

        // Control characters
        if (preg_match('%[\x00-\x08\x0B-\x0C\x0E\x0F]%', $str)) {
            return false;
        }

        return (bool)preg_match('%%u', $str);

    }

    /**
     * This method tries its best to convert the input string to UTF-8.
     *
     * Currently only ISO-5991-1 input and UTF-8 input is supported, but this
     * may be expanded upon if we receive other examples.
     *
     * @param string $str
     *
     * @return string
     */
    static function convertToUTF8($str) {

        $encoding = mb_detect_encoding($str, ['UTF-8', 'ISO-8859-1', 'WINDOWS-1252'], true);

        switch ($encoding) {
            case 'ISO-8859-1' :
                $newStr = utf8_encode($str);
                break;
            /* Unreachable code. Not sure yet how we can improve this
             * situation.
            case 'WINDOWS-1252' :
                $newStr = iconv('cp1252', 'UTF-8', $str);
                break;
             */
            default :
                 $newStr = $str;

        }

        // Removing any control characters
        return (preg_replace('%(?:[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F])%', '', $newStr));

    }

}