aboutsummaryrefslogtreecommitdiffstats
path: root/library/langdet/docs
diff options
context:
space:
mode:
authorfriendica <info@friendica.com>2012-05-12 01:55:18 -0700
committerfriendica <info@friendica.com>2012-05-12 01:55:18 -0700
commit62727012d37ef3d3cacc413d5667dc2d7bbf9cbb (patch)
tree14c222386842938da02f3d43ecb8da12c63e0dc5 /library/langdet/docs
parent99e4ea19e733f86259e39f5a22d64f1521abc5ae (diff)
downloadvolse-hubzilla-62727012d37ef3d3cacc413d5667dc2d7bbf9cbb.tar.gz
volse-hubzilla-62727012d37ef3d3cacc413d5667dc2d7bbf9cbb.tar.bz2
volse-hubzilla-62727012d37ef3d3cacc413d5667dc2d7bbf9cbb.zip
language detection library
Diffstat (limited to 'library/langdet/docs')
-rw-r--r--library/langdet/docs/example_clui.php35
-rw-r--r--library/langdet/docs/example_web.php72
-rw-r--r--library/langdet/docs/iso.php21
3 files changed, 128 insertions, 0 deletions
diff --git a/library/langdet/docs/example_clui.php b/library/langdet/docs/example_clui.php
new file mode 100644
index 000000000..8e7d8577d
--- /dev/null
+++ b/library/langdet/docs/example_clui.php
@@ -0,0 +1,35 @@
+<?php
+
+/**
+ * example usage (CLI)
+ *
+ * @package Text_LanguageDetect
+ * @version CVS: $Id: example_clui.php 322305 2012-01-15 00:04:17Z clockwerx $
+ */
+
+require_once 'Text/LanguageDetect.php';
+
+$l = new Text_LanguageDetect;
+
+$stdin = fopen('php://stdin', 'r');
+
+echo "Supported languages:\n";
+$langs = $l->getLanguages();
+sort($langs);
+echo join(', ', $langs);
+
+echo "\ntotal ", count($langs), "\n\n";
+
+while ($line = fgets($stdin)) {
+ $result = $l->detect($line, 4);
+ print_r($result);
+ $blocks = $l->detectUnicodeBlocks($line, true);
+ print_r($blocks);
+}
+
+fclose($stdin);
+unset($l);
+
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+?>
diff --git a/library/langdet/docs/example_web.php b/library/langdet/docs/example_web.php
new file mode 100644
index 000000000..1e155fef2
--- /dev/null
+++ b/library/langdet/docs/example_web.php
@@ -0,0 +1,72 @@
+<?php
+
+/**
+ * example usage (web)
+ *
+ * @package Text_LanguageDetect
+ * @version CVS: $Id: example_web.php 205493 2006-01-18 00:26:57Z taak $
+ */
+
+// browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded
+header('Content-type: text/html; charset=utf-8', true);
+
+require_once 'Text/LanguageDetect.php';
+
+$l = new Text_LanguageDetect;
+if (isset($_REQUEST['q'])) {
+ $q = stripslashes($_REQUEST['q']);
+}
+
+?>
+<html>
+<head>
+<title>Text_LanguageDetect demonstration</title>
+</head>
+<body>
+<h2>Text_LanguageDetect</h2>
+<?
+echo "<small>Supported languages:\n";
+$langs = $l->getLanguages();
+sort($langs);
+foreach ($langs as $lang) {
+ echo ucfirst($lang), ', ';
+ $i++;
+}
+
+echo "<br />total $i</small><br /><br />";
+
+?>
+<form method="post">
+Enter text to identify language (at least a couple of sentences):<br />
+<textarea name="q" wrap="virtual" cols="80" rows="8"><?= $q ?></textarea>
+<br />
+<input type="submit" value="Submit" />
+</form>
+<?
+if (isset($q) && strlen($q)) {
+ $len = $l->utf8strlen($q);
+ if ($len < 20) { // this value picked somewhat arbitrarily
+ echo "Warning: string not very long ($len chars)<br />\n";
+ }
+
+ $result = $l->detectConfidence($q);
+
+ if ($result == null) {
+ echo "Text_LanguageDetect cannot identify this piece of text. <br /><br />\n";
+ } else {
+ echo "Text_LanguageDetect thinks this text is written in <b>{$result['language']}</b> ({$result['similarity']}, {$result['confidence']})<br /><br />\n";
+ }
+
+ $result = $l->detectUnicodeBlocks($q, false);
+ if (!empty($result)) {
+ arsort($result);
+ echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n<br /><br />";
+ }
+}
+
+unset($l);
+
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+?>
+</body></html>
diff --git a/library/langdet/docs/iso.php b/library/langdet/docs/iso.php
new file mode 100644
index 000000000..6d7ec1d2e
--- /dev/null
+++ b/library/langdet/docs/iso.php
@@ -0,0 +1,21 @@
+<?php
+/**
+ * Demonstrates how to use ISO language codes.
+ *
+ * The "name mode" changes the way languages are accepted and returned.
+ */
+require_once 'Text/LanguageDetect.php';
+$l = new Text_LanguageDetect();
+
+
+//will output the ISO 639-1 two-letter language code
+// "de"
+$l->setNameMode(2);
+echo $l->detectSimple('Das ist ein kleiner Text') . "\n";
+
+//will output the ISO 639-2 three-letter language code
+// "deu"
+$l->setNameMode(3);
+echo $l->detectSimple('Das ist ein kleiner Text') . "\n";
+
+?> \ No newline at end of file