tiny-lang-detect

Generate tiny models for language detection  https://p.ce9e.org/tiny-lang-detect/demo/
git clone https://git.ce9e.org/tiny-lang-detect.git

commit
e9340ad6f80c3c09853e5860ef694ba036de52ba
parent
1698810e8a923192a8f15347644155f7a39eafe5
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2026-01-28 07:30
add long cli arguments

Diffstat

M gen_model.py 10 +++++-----

1 files changed, 5 insertions, 5 deletions


diff --git a/gen_model.py b/gen_model.py

@@ -12,7 +12,7 @@ def abs_diff(arr):
   12    12     return max(arr) - min(arr)
   13    13 
   14    14 
   15    -1 def get_model(*langs, n=8, ndigits=None):
   -1    15 def get_model(*langs, n=8, precision=None):
   16    16     data = {lang: get_data(lang) for lang in langs}
   17    17 
   18    18     ngrams = set()
@@ -27,7 +27,7 @@ def get_model(*langs, n=8, ndigits=None):
   27    27     return {
   28    28         'ngrams': ngrams,
   29    29         'freq': {
   30    -1             lang: [round(d.get(g, 0), ndigits) for g in ngrams]
   -1    30             lang: [round(d.get(g, 0), precision) for g in ngrams]
   31    31             for lang, d in data.items()
   32    32         },
   33    33     }
@@ -36,9 +36,9 @@ def get_model(*langs, n=8, ndigits=None):
   36    36 if __name__ == '__main__':
   37    37     parser = argparse.ArgumentParser()
   38    38     parser.add_argument('lang', nargs='+')
   39    -1     parser.add_argument('-n', type=int, default=8)
   40    -1     parser.add_argument('-p', type=int, default=4)
   -1    39     parser.add_argument('-n', '--ngrams', type=int, default=8)
   -1    40     parser.add_argument('-p', '--precision', type=int, default=4)
   41    41     args = parser.parse_args()
   42    42 
   43    -1     model = get_model(*args.lang, n=args.n, ndigits=args.p)
   -1    43     model = get_model(*args.lang, n=args.ngrams, precision=args.precision)
   44    44     print(json.dumps(model, ensure_ascii=False))