From fe5f1e4d67d999ed3c6ef78dc4d49f5dd1a93056 Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Tue, 24 Oct 2017 16:33:58 +0200 Subject: :white_check_mark: Some unit test for include/language.php Unfortunately not so much unit testable, but added in preparation to update LanguageDetect and Intl library. --- tests/unit/includes/LanguageTest.php | 165 +++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 tests/unit/includes/LanguageTest.php (limited to 'tests/unit/includes/LanguageTest.php') diff --git a/tests/unit/includes/LanguageTest.php b/tests/unit/includes/LanguageTest.php new file mode 100644 index 000000000..789dbe80b --- /dev/null +++ b/tests/unit/includes/LanguageTest.php @@ -0,0 +1,165 @@ +getFunctionMock(__NAMESPACE__, 'get_config'); + //$gc->expects($this->once())->willReturn(10) + //$cg = $this->getFunctionMock('Zotlabs\Lib\Config', 'Get'); + //$cg->expects($this->once())->willReturn(10); + //$this->assertEquals($langCode, detect_language($text)); + + + // Can not unit test detect_language(), therefore test the used library + // only for now to find regressions on library updates. + require_once('library/langdet/Text/LanguageDetect.php'); + $l = new Text_LanguageDetect; + // return 2-letter ISO 639-1 (en) language code + $l->setNameMode(2); + $lng = $l->detectConfidence($text); + + $this->assertEquals($langCode, $lng['language']); + $this->assertEquals($confidence, round($lng['confidence'], 6)); + } + + public function languageExamplesProvider() { + return [ + 'empty text' => [ + '', + '', + null + ], + 'English' => [ + 'English is a West Germanic language that was first spoken in early medieval England and is now a global lingua franca.[4][5] Named after the Angles, one of the Germanic tribes that migrated to England, it ultimately derives its name from the Anglia (Angeln) peninsula in the Baltic Sea. It is closely related to the Frisian languages, but its vocabulary has been significantly influenced by other Germanic languages, particularly Norse (a North Germanic language), as well as by Latin and Romance languages, especially French.', + 'da', // nearly impossible to find a wikipedia article which is detected as english + 0.000367 // minimum confidence is checked in detect_language(), but that is not yet unit testable + ], + 'German' => [ + 'Deutschland ist ein Bundesstaat in Mitteleuropa. Er besteht aus 16 Ländern und ist als freiheitlich-demokratischer und sozialer Rechtsstaat verfasst. Die Bundesrepublik Deutschland stellt die jüngste Ausprägung des deutschen Nationalstaates dar. Mit rund 82,8 Millionen Einwohnern (31. Dezember 2016) zählt Deutschland zu den dicht besiedelten Flächenstaaten.', + 'de', + 0.134339 + ], + 'Norwegian' => [ + 'Kongeriket Norge er et nordisk, europeisk land og en selvstendig stat vest på Den skandinaviske halvøy. Landet er langt og smalt, og kysten strekker seg langs Nord-Atlanteren, hvor også Norges kjente fjorder befinner seg. Totalt dekker det relativt tynt befolkede landet 385 000 kvadratkilometer med litt over fem millioner innbyggere (2016).', + 'no', + 0.007076 + ] + ]; + } + + + /** + * @covers ::get_language_name + * @dataProvider getLanguageNameProvider + */ + public function testGetLanguageName($lang, $name, $trans) { + $this->assertEquals($name, get_language_name($lang)); + foreach ($trans as $k => $v) { + //echo "$k -> $v"; + $this->assertEquals($v, get_language_name($lang, $k)); + } + } + + public function getLanguageNameProvider() { + return [ + 'empty language code' => [ + '', + '', + ['de' => ''] + ], + 'invalid language code' => [ + 'zz', + 'zz', + ['de' => 'zz'] + ], + 'de' => [ + 'de', + 'German', + [ + 'de' => 'Deutsch', + 'nb' => 'tysk' + ] + ], + 'de-de' => [ + 'de-de', + 'German', + [ + 'de-de' => 'Deutsch', + 'nb' => 'Deutsch' // should be tysk, seems to be a bug upstream + ] + ], + 'en' => [ + 'en', + 'English', + [ + 'de' => 'Englisch', + 'nb' => 'engelsk' + ] + ], + 'en-gb' => [ + 'en-gb', + 'British English', + [ + 'de' => 'Britisches Englisch', + 'nb' => 'britisk engelsk' + ] + ], + 'en-au' => [ + 'en-au', + 'Australian English', + [ + 'de' => 'Australisches Englisch', + 'nb' => 'australsk engelsk' + ] + ], + 'nb' => [ + 'nb', + 'Norwegian Bokmål', + [ + 'de' => 'Norwegisch Bokmål', + 'nb' => 'norsk bokmål' + ] + ] + ]; + } +} \ No newline at end of file -- cgit v1.2.3 From 8e4c5db766ce23d05b8507991b04fece743147de Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Wed, 25 Oct 2017 01:57:18 +0200 Subject: :arrow_up: Update Text_LanguageDetect. Update from v0.3.0 (2012) to v1.0.0 (2017) which should remove some warnings and improve PHP7 support. Using composer to handle this PEAR library now. Fix a problem in FeedutilsTest. --- tests/unit/includes/LanguageTest.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tests/unit/includes/LanguageTest.php') diff --git a/tests/unit/includes/LanguageTest.php b/tests/unit/includes/LanguageTest.php index 789dbe80b..8bf9ca766 100644 --- a/tests/unit/includes/LanguageTest.php +++ b/tests/unit/includes/LanguageTest.php @@ -52,7 +52,6 @@ class LanguageTest extends UnitTestCase { // Can not unit test detect_language(), therefore test the used library // only for now to find regressions on library updates. - require_once('library/langdet/Text/LanguageDetect.php'); $l = new Text_LanguageDetect; // return 2-letter ISO 639-1 (en) language code $l->setNameMode(2); @@ -71,8 +70,8 @@ class LanguageTest extends UnitTestCase { ], 'English' => [ 'English is a West Germanic language that was first spoken in early medieval England and is now a global lingua franca.[4][5] Named after the Angles, one of the Germanic tribes that migrated to England, it ultimately derives its name from the Anglia (Angeln) peninsula in the Baltic Sea. It is closely related to the Frisian languages, but its vocabulary has been significantly influenced by other Germanic languages, particularly Norse (a North Germanic language), as well as by Latin and Romance languages, especially French.', - 'da', // nearly impossible to find a wikipedia article which is detected as english - 0.000367 // minimum confidence is checked in detect_language(), but that is not yet unit testable + 'en', + 0.078422 ], 'German' => [ 'Deutschland ist ein Bundesstaat in Mitteleuropa. Er besteht aus 16 Ländern und ist als freiheitlich-demokratischer und sozialer Rechtsstaat verfasst. Die Bundesrepublik Deutschland stellt die jüngste Ausprägung des deutschen Nationalstaates dar. Mit rund 82,8 Millionen Einwohnern (31. Dezember 2016) zählt Deutschland zu den dicht besiedelten Flächenstaaten.', -- cgit v1.2.3 From 66832c41e9fff481c20ca219b3cc0a4e53b8b551 Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Wed, 25 Oct 2017 23:21:07 +0200 Subject: :arrow_up: Update intl library. Update intl library from v0.4? (2014) to v0.7.4 (2016). Use global composer autoloader now. --- tests/unit/includes/LanguageTest.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tests/unit/includes/LanguageTest.php') diff --git a/tests/unit/includes/LanguageTest.php b/tests/unit/includes/LanguageTest.php index 8bf9ca766..dd71115c4 100644 --- a/tests/unit/includes/LanguageTest.php +++ b/tests/unit/includes/LanguageTest.php @@ -140,7 +140,7 @@ class LanguageTest extends UnitTestCase { 'British English', [ 'de' => 'Britisches Englisch', - 'nb' => 'britisk engelsk' + 'nb' => 'engelsk (Storbritannia)' ] ], 'en-au' => [ @@ -148,7 +148,7 @@ class LanguageTest extends UnitTestCase { 'Australian English', [ 'de' => 'Australisches Englisch', - 'nb' => 'australsk engelsk' + 'nb' => 'engelsk (Australia)' ] ], 'nb' => [ -- cgit v1.2.3