tiny-lang-detect

Generate tiny models for language detection  https://p.ce9e.org/tiny-lang-detect/demo/
git clone https://git.ce9e.org/tiny-lang-detect.git

commit
a3e48ff6c5886c46a18cbb9fa15646a3b1ce4041
parent
1bf85abcec4588d0e3f0806016148a68ffb13918
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2025-05-06 06:10
add js demo

Diffstat

A demo/demo.js 36 ++++++++++++++++++++++++++++++++++++
A demo/index.html 16 ++++++++++++++++
A demo/style.css 27 +++++++++++++++++++++++++++

3 files changed, 79 insertions, 0 deletions


diff --git a/demo/demo.js b/demo/demo.js

@@ -0,0 +1,36 @@
   -1     1 var model = {
   -1     2     'ngrams': [' t', 'ch', ' th', 'er', 'en ', 'a', 'e', 'o'],
   -1     3     'freq': {
   -1     4         'de': [0.0005, 0.0194, 0.0002, 0.0299, 0.0202, 0.0574, 0.1465, 0.0311],
   -1     5         'en': [0.0163, 0.0035, 0.0161, 0.0135, 0.0022, 0.0897, 0.1067, 0.0715],
   -1     6     },
   -1     7 };
   -1     8 
   -1     9 var count = (text, ngram) => {
   -1    10     return (text.match(new RegExp(ngram, 'g')) || []).length;
   -1    11 };
   -1    12 
   -1    13 var dist = (a, b) => {
   -1    14     return a.reduce((sum, v, i) => sum + Math.pow(v - b[i], 2), 0);
   -1    15 };
   -1    16 
   -1    17 var classify = text => {
   -1    18     var n = text.length + 1;
   -1    19     var freq = model.ngrams.map(g => count(text, g) / (n - g.length));
   -1    20     var best = null;
   -1    21     var bestDist = Infinity;
   -1    22     for (const lang of Object.keys(model.freq)) {
   -1    23         var d = dist(model.freq[lang], freq);
   -1    24         if (d < bestDist) {
   -1    25             bestDist = d;
   -1    26             best = lang;
   -1    27         }
   -1    28     }
   -1    29     return best;
   -1    30 };
   -1    31 
   -1    32 var textarea = document.querySelector('textarea');
   -1    33 var output = document.querySelector('output');
   -1    34 textarea.addEventListener('input', () => {
   -1    35     output.textContent = classify(textarea.value);
   -1    36 });

diff --git a/demo/index.html b/demo/index.html

@@ -0,0 +1,16 @@
   -1     1 <!DOCTYPE html>
   -1     2 <html>
   -1     3 <head>
   -1     4     <meta charset="UTF-8">
   -1     5     <meta name="viewport" content="width=device-width, initial-scale=1">
   -1     6     <meta http-equiv="Content-Security-Policy" content="default-src 'self'">
   -1     7     <title>Language Detection</title>
   -1     8     <link rel="stylesheet" href="style.css">
   -1     9 </head>
   -1    10 <body>
   -1    11     <textarea></textarea>
   -1    12     <output></output>
   -1    13 
   -1    14     <script src="demo.js" type="module"></script>
   -1    15 </body>
   -1    16 </html>

diff --git a/demo/style.css b/demo/style.css

@@ -0,0 +1,27 @@
   -1     1 * {
   -1     2     box-sizing: border-box;
   -1     3 }
   -1     4 
   -1     5 body {
   -1     6     margin-inline: auto;
   -1     7     padding-inline: 1em;
   -1     8     max-inline-size: 60em;
   -1     9 }
   -1    10 
   -1    11 textarea {
   -1    12     inline-size: 100%;
   -1    13     min-block-size: 50vb;
   -1    14     resize: block;
   -1    15 }
   -1    16 
   -1    17 output {
   -1    18     display: block;
   -1    19     text-align: end;
   -1    20     font-size: 200%;
   -1    21 }
   -1    22 
   -1    23 @media (prefers-color-scheme: dark) {
   -1    24     :root {
   -1    25         color-scheme: dark;
   -1    26     }
   -1    27 }