- commit
- 2479e1d6933082faf7d4fc5527d76f55c5087f3e
- parent
- 127bc181a6b4c10fd266504951d68dbc2e9aa4ef
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2025-05-10 16:50
limit precision
Diffstat
| M | README.md | 26 | ++------------------------ |
| M | gen_model.py | 9 | +++++---- |
2 files changed, 7 insertions, 28 deletions
diff --git a/README.md b/README.md
@@ -23,30 +23,8 @@ A model might look like this: 23 23 { 24 24 "ngrams": ["ei", "en", " t", "ch", " th", "er", "en ", "a", "e", "o"], 25 25 "freq": {26 -1 "en": [27 -1 0.0008847549205632559,28 -1 0.007865767270512856,29 -1 0.01639325502081986,30 -1 0.0035863210810589343,31 -1 0.016136794462706813,32 -1 0.01354675763365741,33 -1 0.002292672343996773,34 -1 0.0897445255534594,35 -1 0.10672365966622427,36 -1 0.0715634625370689837 -1 ],38 -1 "de": [39 -1 0.015897498950157848,40 -1 0.023261162650169673,41 -1 0.0005690935513966353,42 -1 0.019468205994060662,43 -1 0.00021883618283788822,44 -1 0.02992300137058795,45 -1 0.02022536188476834,46 -1 0.057449835679986086,47 -1 0.14656171354570646,48 -1 0.03112841470952607349 -1 ]-1 26 "en": [0.0009, 0.0079, 0.0164, 0.0036, 0.0161, 0.0135, 0.0023, 0.0897, 0.1067, 0.0716], -1 27 "de": [0.0159, 0.0233, 0.0006, 0.0195, 0.0002, 0.0299, 0.0202, 0.0574, 0.1466, 0.0311] 50 28 } 51 29 } 52 30 ```
diff --git a/gen_model.py b/gen_model.py
@@ -8,7 +8,7 @@ def get_data(lang): 8 8 return {k: v / raw['n_words'][len(k) - 1] for k, v in raw['freq'].items()} 9 9 10 1011 -1 def get_model(lang1, lang2, n=8):-1 11 def get_model(lang1, lang2, n=8, ndigits=None): 12 12 data1 = get_data(lang1) 13 13 data2 = get_data(lang2) 14 14 @@ -21,8 +21,8 @@ def get_model(lang1, lang2, n=8): 21 21 return { 22 22 'ngrams': ngrams, 23 23 'freq': {24 -1 lang1: [data1.get(g, 0) for g in ngrams],25 -1 lang2: [data2.get(g, 0) for g in ngrams],-1 24 lang1: [round(data1.get(g, 0), ndigits) for g in ngrams], -1 25 lang2: [round(data2.get(g, 0), ndigits) for g in ngrams], 26 26 }, 27 27 } 28 28 @@ -31,7 +31,8 @@ if __name__ == '__main__': 31 31 parser = argparse.ArgumentParser() 32 32 parser.add_argument('lang', nargs=2) 33 33 parser.add_argument('-n', type=int, default=8) -1 34 parser.add_argument('-p', type=int, default=4) 34 35 args = parser.parse_args() 35 3636 -1 model = get_model(*args.lang, n=args.n)-1 37 model = get_model(*args.lang, n=args.n, ndigits=args.p) 37 38 print(json.dumps(model))