diff options
author | friendica <info@friendica.com> | 2012-05-12 01:55:18 -0700 |
---|---|---|
committer | friendica <info@friendica.com> | 2012-05-12 01:55:18 -0700 |
commit | 62727012d37ef3d3cacc413d5667dc2d7bbf9cbb (patch) | |
tree | 14c222386842938da02f3d43ecb8da12c63e0dc5 /library/langdet/docs | |
parent | 99e4ea19e733f86259e39f5a22d64f1521abc5ae (diff) | |
download | volse-hubzilla-62727012d37ef3d3cacc413d5667dc2d7bbf9cbb.tar.gz volse-hubzilla-62727012d37ef3d3cacc413d5667dc2d7bbf9cbb.tar.bz2 volse-hubzilla-62727012d37ef3d3cacc413d5667dc2d7bbf9cbb.zip |
language detection library
Diffstat (limited to 'library/langdet/docs')
-rw-r--r-- | library/langdet/docs/example_clui.php | 35 | ||||
-rw-r--r-- | library/langdet/docs/example_web.php | 72 | ||||
-rw-r--r-- | library/langdet/docs/iso.php | 21 |
3 files changed, 128 insertions, 0 deletions
diff --git a/library/langdet/docs/example_clui.php b/library/langdet/docs/example_clui.php new file mode 100644 index 000000000..8e7d8577d --- /dev/null +++ b/library/langdet/docs/example_clui.php @@ -0,0 +1,35 @@ +<?php + +/** + * example usage (CLI) + * + * @package Text_LanguageDetect + * @version CVS: $Id: example_clui.php 322305 2012-01-15 00:04:17Z clockwerx $ + */ + +require_once 'Text/LanguageDetect.php'; + +$l = new Text_LanguageDetect; + +$stdin = fopen('php://stdin', 'r'); + +echo "Supported languages:\n"; +$langs = $l->getLanguages(); +sort($langs); +echo join(', ', $langs); + +echo "\ntotal ", count($langs), "\n\n"; + +while ($line = fgets($stdin)) { + $result = $l->detect($line, 4); + print_r($result); + $blocks = $l->detectUnicodeBlocks($line, true); + print_r($blocks); +} + +fclose($stdin); +unset($l); + +/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ + +?> diff --git a/library/langdet/docs/example_web.php b/library/langdet/docs/example_web.php new file mode 100644 index 000000000..1e155fef2 --- /dev/null +++ b/library/langdet/docs/example_web.php @@ -0,0 +1,72 @@ +<?php + +/** + * example usage (web) + * + * @package Text_LanguageDetect + * @version CVS: $Id: example_web.php 205493 2006-01-18 00:26:57Z taak $ + */ + +// browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded +header('Content-type: text/html; charset=utf-8', true); + +require_once 'Text/LanguageDetect.php'; + +$l = new Text_LanguageDetect; +if (isset($_REQUEST['q'])) { + $q = stripslashes($_REQUEST['q']); +} + +?> +<html> +<head> +<title>Text_LanguageDetect demonstration</title> +</head> +<body> +<h2>Text_LanguageDetect</h2> +<? +echo "<small>Supported languages:\n"; +$langs = $l->getLanguages(); +sort($langs); +foreach ($langs as $lang) { + echo ucfirst($lang), ', '; + $i++; +} + +echo "<br />total $i</small><br /><br />"; + +?> +<form method="post"> +Enter text to identify language (at least a couple of sentences):<br /> +<textarea name="q" wrap="virtual" cols="80" rows="8"><?= $q ?></textarea> +<br /> +<input type="submit" value="Submit" /> +</form> +<? +if (isset($q) && strlen($q)) { + $len = $l->utf8strlen($q); + if ($len < 20) { // this value picked somewhat arbitrarily + echo "Warning: string not very long ($len chars)<br />\n"; + } + + $result = $l->detectConfidence($q); + + if ($result == null) { + echo "Text_LanguageDetect cannot identify this piece of text. <br /><br />\n"; + } else { + echo "Text_LanguageDetect thinks this text is written in <b>{$result['language']}</b> ({$result['similarity']}, {$result['confidence']})<br /><br />\n"; + } + + $result = $l->detectUnicodeBlocks($q, false); + if (!empty($result)) { + arsort($result); + echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n<br /><br />"; + } +} + +unset($l); + +/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ + +?> +</body></html> diff --git a/library/langdet/docs/iso.php b/library/langdet/docs/iso.php new file mode 100644 index 000000000..6d7ec1d2e --- /dev/null +++ b/library/langdet/docs/iso.php @@ -0,0 +1,21 @@ +<?php +/** + * Demonstrates how to use ISO language codes. + * + * The "name mode" changes the way languages are accepted and returned. + */ +require_once 'Text/LanguageDetect.php'; +$l = new Text_LanguageDetect(); + + +//will output the ISO 639-1 two-letter language code +// "de" +$l->setNameMode(2); +echo $l->detectSimple('Das ist ein kleiner Text') . "\n"; + +//will output the ISO 639-2 three-letter language code +// "deu" +$l->setNameMode(3); +echo $l->detectSimple('Das ist ein kleiner Text') . "\n"; + +?>
\ No newline at end of file |