aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/ezyang/htmlpurifier/maintenance/generate-entity-file.php
diff options
context:
space:
mode:
authorMario Vavti <mario@mariovavti.com>2017-05-31 09:56:35 +0200
committerMario Vavti <mario@mariovavti.com>2017-05-31 09:56:35 +0200
commit47d55694a4c84b6c12c0db61a69bcac8b671b20e (patch)
treeb15e96f4ea67e2214a66a9d28dafaf53d25b98ec /vendor/ezyang/htmlpurifier/maintenance/generate-entity-file.php
parent087f9784e3c5a860ed2b86e7f9e8e9f312038546 (diff)
parentf0e615dee529e031663576286345141ad2996974 (diff)
downloadvolse-hubzilla-47d55694a4c84b6c12c0db61a69bcac8b671b20e.tar.gz
volse-hubzilla-47d55694a4c84b6c12c0db61a69bcac8b671b20e.tar.bz2
volse-hubzilla-47d55694a4c84b6c12c0db61a69bcac8b671b20e.zip
Merge branch '2.4RC'2.4
Diffstat (limited to 'vendor/ezyang/htmlpurifier/maintenance/generate-entity-file.php')
-rwxr-xr-xvendor/ezyang/htmlpurifier/maintenance/generate-entity-file.php75
1 files changed, 75 insertions, 0 deletions
diff --git a/vendor/ezyang/htmlpurifier/maintenance/generate-entity-file.php b/vendor/ezyang/htmlpurifier/maintenance/generate-entity-file.php
new file mode 100755
index 000000000..ff1713e39
--- /dev/null
+++ b/vendor/ezyang/htmlpurifier/maintenance/generate-entity-file.php
@@ -0,0 +1,75 @@
+#!/usr/bin/php
+<?php
+
+chdir(dirname(__FILE__));
+require_once 'common.php';
+assertCli();
+
+/**
+ * @file
+ * Parses *.ent files into an entity lookup table, and then serializes and
+ * writes the whole kaboodle to a file. The resulting file is cached so
+ * that this script does not need to be run. This script should rarely,
+ * if ever, be run, since HTML's entities are fairly immutable.
+ */
+
+// here's where the entity files are located, assuming working directory
+// is the same as the location of this PHP file. Needs trailing slash.
+$entity_dir = '../docs/entities/';
+
+// defines the output file for the serialized content.
+$output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
+
+// courtesy of a PHP manual comment
+function unichr($dec)
+{
+ if ($dec < 128) {
+ $utf = chr($dec);
+ } elseif ($dec < 2048) {
+ $utf = chr(192 + (($dec - ($dec % 64)) / 64));
+ $utf .= chr(128 + ($dec % 64));
+ } else {
+ $utf = chr(224 + (($dec - ($dec % 4096)) / 4096));
+ $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
+ $utf .= chr(128 + ($dec % 64));
+ }
+ return $utf;
+}
+
+if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n");
+if ( file_exists($output_file) ) exit("Fatal Error: output file already exists.\n");
+
+$dh = @opendir($entity_dir);
+if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n");
+
+$entity_files = array();
+while (($file = readdir($dh)) !== false) {
+ if (@$file[0] === '.') continue;
+ if (substr(strrchr($file, "."), 1) !== 'ent') continue;
+ $entity_files[] = $file;
+}
+closedir($dh);
+
+if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n");
+
+$entity_table = array();
+$regexp = '/<!ENTITY\s+([A-Za-z0-9]+)\s+"&#(?:38;#)?([0-9]+);">/';
+
+foreach ( $entity_files as $file ) {
+ $contents = file_get_contents($entity_dir . $file);
+ $matches = array();
+ preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER);
+ foreach ($matches as $match) {
+ $entity_table[$match[1]] = unichr($match[2]);
+ }
+}
+
+$output = serialize($entity_table);
+
+$fh = fopen($output_file, 'w');
+fwrite($fh, $output);
+fclose($fh);
+
+echo "Completed successfully.";
+
+// vim: et sw=4 sts=4