aboutsummaryrefslogtreecommitdiffstats
path: root/lib/htmlpurifier/tests/HTMLPurifier/AttrDef/LangTest.php
diff options
context:
space:
mode:
Diffstat (limited to 'lib/htmlpurifier/tests/HTMLPurifier/AttrDef/LangTest.php')
-rw-r--r--lib/htmlpurifier/tests/HTMLPurifier/AttrDef/LangTest.php85
1 files changed, 85 insertions, 0 deletions
diff --git a/lib/htmlpurifier/tests/HTMLPurifier/AttrDef/LangTest.php b/lib/htmlpurifier/tests/HTMLPurifier/AttrDef/LangTest.php
new file mode 100644
index 000000000..c59175556
--- /dev/null
+++ b/lib/htmlpurifier/tests/HTMLPurifier/AttrDef/LangTest.php
@@ -0,0 +1,85 @@
+<?php
+
+class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
+{
+
+ function test() {
+
+ $this->def = new HTMLPurifier_AttrDef_Lang();
+
+ // basic good uses
+ $this->assertDef('en');
+ $this->assertDef('en-us');
+
+ $this->assertDef(' en ', 'en'); // trim
+ $this->assertDef('EN', 'en'); // case insensitivity
+
+ // (thanks Eugen Pankratz for noticing the typos!)
+ $this->assertDef('En-Us-Edison', 'en-us-edison'); // complex ci
+
+ $this->assertDef('fr en', false); // multiple languages
+ $this->assertDef('%', false); // bad character
+
+ // test overlong language according to syntax
+ $this->assertDef('thisistoolongsoitgetscut', false);
+
+ // primary subtag rules
+ // I'm somewhat hesitant to allow x and i as primary language codes,
+ // because they usually are never used in real life. However,
+ // theoretically speaking, having them alone is permissable, so
+ // I'll be lenient. No XML parser is going to complain anyway.
+ $this->assertDef('x');
+ $this->assertDef('i');
+ // real world use-cases
+ $this->assertDef('x-klingon');
+ $this->assertDef('i-mingo');
+ // because the RFC only defines two and three letter primary codes,
+ // anything with a length of four or greater is invalid, despite
+ // the syntax stipulation of 1 to 8 characters. Because the RFC
+ // specifically states that this reservation is in order to allow
+ // for future versions to expand, the adoption of a new RFC will
+ // require these test cases to be rewritten, even if backwards-
+ // compatibility is largely retained (i.e. this is not forwards
+ // compatible)
+ $this->assertDef('four', false);
+ // for similar reasons, disallow any other one character language
+ $this->assertDef('f', false);
+
+ // second subtag rules
+ // one letter subtags prohibited until revision. This is, however,
+ // less volatile than the restrictions on the primary subtags.
+ // Also note that this test-case tests fix-behavior: chop
+ // off subtags until you get a valid language code.
+ $this->assertDef('en-a', 'en');
+ // however, x is a reserved single-letter subtag that is allowed
+ $this->assertDef('en-x', 'en-x');
+ // 2-8 chars are permitted, but have special meaning that cannot
+ // be checked without maintaining country code lookup tables (for
+ // two characters) or special registration tables (for all above).
+ $this->assertDef('en-uk', true);
+
+ // further subtag rules: only syntactic constraints
+ $this->assertDef('en-us-edison');
+ $this->assertDef('en-us-toolonghaha', 'en-us');
+ $this->assertDef('en-us-a-silly-long-one');
+
+ // rfc 3066 stipulates that if a three letter and a two letter code
+ // are available, the two letter one MUST be used. Without a language
+ // code lookup table, we cannot implement this functionality.
+
+ // although the HTML protocol, technically speaking, allows you to
+ // omit language tags, this implicitly means that the parent element's
+ // language is the one applicable, which, in some cases, is incorrect.
+ // Thus, we allow und, only slightly defying the RFC's SHOULD NOT
+ // designation.
+ $this->assertDef('und');
+
+ // because attributes only allow one language, mul is allowed, complying
+ // with the RFC's SHOULD NOT designation.
+ $this->assertDef('mul');
+
+ }
+
+}
+
+// vim: et sw=4 sts=4