From 62727012d37ef3d3cacc413d5667dc2d7bbf9cbb Mon Sep 17 00:00:00 2001 From: friendica Date: Sat, 12 May 2012 01:55:18 -0700 Subject: language detection library --- library/langdet/docs/example_web.php | 72 ++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 library/langdet/docs/example_web.php (limited to 'library/langdet/docs/example_web.php') diff --git a/library/langdet/docs/example_web.php b/library/langdet/docs/example_web.php new file mode 100644 index 000000000..1e155fef2 --- /dev/null +++ b/library/langdet/docs/example_web.php @@ -0,0 +1,72 @@ + + + +Text_LanguageDetect demonstration + + +

Text_LanguageDetect

+Supported languages:\n"; +$langs = $l->getLanguages(); +sort($langs); +foreach ($langs as $lang) { + echo ucfirst($lang), ', '; + $i++; +} + +echo "
total $i

"; + +?> +
+Enter text to identify language (at least a couple of sentences):
+ +
+ +
+utf8strlen($q); + if ($len < 20) { // this value picked somewhat arbitrarily + echo "Warning: string not very long ($len chars)
\n"; + } + + $result = $l->detectConfidence($q); + + if ($result == null) { + echo "Text_LanguageDetect cannot identify this piece of text.

\n"; + } else { + echo "Text_LanguageDetect thinks this text is written in {$result['language']} ({$result['similarity']}, {$result['confidence']})

\n"; + } + + $result = $l->detectUnicodeBlocks($q, false); + if (!empty($result)) { + arsort($result); + echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n

"; + } +} + +unset($l); + +/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ + +?> + -- cgit v1.2.3