- commit
- e160d8cb04fb6d6e6c762edcb27cbc4f704fb1d1
- parent
- 315458d0a09b6e790689e7e56ccb0428eca4a4c2
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2025-05-10 18:32
gen_model: most significant first
Diffstat
| M | README.md | 6 | +++--- |
| M | gen_model.py | 4 | ++-- |
2 files changed, 5 insertions, 5 deletions
diff --git a/README.md b/README.md
@@ -21,10 +21,10 @@ A model might look like this: 21 21 22 22 ```json 23 23 {24 -1 "ngrams": ["ei", "en", " t", "ch", " th", "er", "en ", "a", "e", "o"],-1 24 "ngrams": ["o", "e", "a", "en ", "er", " th", "ch", " t", "en", "ei"], 25 25 "freq": {26 -1 "en": [0.0009, 0.0079, 0.0164, 0.0036, 0.0161, 0.0135, 0.0023, 0.0897, 0.1067, 0.0716],27 -1 "de": [0.0159, 0.0233, 0.0006, 0.0195, 0.0002, 0.0299, 0.0202, 0.0574, 0.1466, 0.0311]-1 26 "en": [0.0716, 0.1067, 0.0897, 0.0023, 0.0135, 0.0161, 0.0036, 0.0164, 0.0079, 0.0009], -1 27 "de": [0.0311, 0.1466, 0.0574, 0.0202, 0.0299, 0.0002, 0.0195, 0.0006, 0.0233, 0.0159] 28 28 } 29 29 } 30 30 ```
diff --git a/gen_model.py b/gen_model.py
@@ -21,8 +21,8 @@ def get_model(*langs, n=8, ndigits=None): 21 21 ngrams = list(ngrams) 22 22 23 23 # prioritize by biggest absolute difference24 -1 ngrams.sort(key=lambda k: abs_diff([d.get(k, 0) for d in data.values()]))25 -1 ngrams = ngrams[-n:]-1 24 ngrams.sort(key=lambda k: -abs_diff([d.get(k, 0) for d in data.values()])) -1 25 ngrams = ngrams[:n] 26 26 27 27 return { 28 28 'ngrams': ngrams,