- commit
- c70e23e44137b076ce3db100ac66b940358e4bd8
- parent
- 8184ad526ec1080060f790a1fa99b0533fdb1b8a
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2025-05-19 09:17
simplify distance `prod (pi/qi)**pi` is how much more likely it is that observation p was created by model p than by model q. Applying log() turns this into the KL-divergence. `prod qi**pi` is the probability that observation p was created by model q. `prod (pi/qi)**pi * prod qi**pi = prod pi**pi` is the same for each q.
Diffstat
| M | README.md | 5 | ++--- |
| M | demo/demo.js | 11 | +++-------- |
| M | test.py | 6 | +----- |
3 files changed, 6 insertions, 16 deletions
diff --git a/README.md b/README.md
@@ -33,10 +33,9 @@ You can use the model like this: 33 33 34 34 ```py 35 35 def dist(p, q):36 -1 # https://en.wikipedia.org/wiki/Kullback-Leibler_divergence37 -1 pp = [pi + 0.0000001 for pi in p]-1 36 # 0 does not mean impossible, just very unlikely 38 37 qq = [qi + 0.0000001 for qi in q]39 -1 return sum(pi * math.log(pi / qi) for pi, qi in zip(pp, qq)) / sum(pp)-1 38 return 1 / math.prod(qi ** (pi / sum(p)) for pi, qi in zip(p, qq)) 40 39 41 40 def classify(model, text): 42 41 n = len(text) + 1
diff --git a/demo/demo.js b/demo/demo.js
@@ -10,22 +10,17 @@ var count = (text, ngram) => {
10 10 return (text.match(new RegExp(ngram, 'g')) || []).length;
11 11 };
12 12
13 -1 var sum = a => {
14 -1 return a.reduce((s, v) => s + v, 0);
15 -1 };
-1 13 var sum = a => a.reduce((s, v) => s + v, 0);
-1 14 var prod = a => a.reduce((s, v) => s * v, 1);
16 15
17 16 var dist = (p, q) => {
18 -1 // KL divergence breaks down for a single value
19 17 if (p.length === 1) {
20 18 return Math.abs(p[0] - q[0]);
21 19 }
22 20
23 21 // 0 does not mean impossible, just very unlikely
24 -1 var pp = p.map(pi => pi + 0.0000001);
25 22 var qq = q.map(qi => qi + 0.0000001);
26 -1
27 -1 // https://en.wikipedia.org/wiki/Kullback-Leibler_divergence
28 -1 return sum(pp.map((pi, i) => pi * Math.log(pi / qq[i]))) / sum(pp);
-1 23 return 1 / prod(p.map((pi, i) => Math.pow(qq[i], pi / sum(p))));
29 24 };
30 25
31 26 var classify = text => {
diff --git a/test.py b/test.py
@@ -62,16 +62,12 @@ LANG_MAP = {
62 62
63 63
64 64 def dist(p, q):
65 -1 # KL divergence breaks down for a single value
66 65 if len(p) == 1:
67 66 return abs(p[0] - q[0])
68 67
69 68 # 0 does not mean impossible, just very unlikely
70 -1 pp = [pi + 0.0000001 for pi in p]
71 69 qq = [qi + 0.0000001 for qi in q]
72 -1
73 -1 # https://en.wikipedia.org/wiki/Kullback-Leibler_divergence
74 -1 return sum(pi * math.log(pi / qi) for pi, qi in zip(pp, qq)) / sum(pp)
-1 70 return 1 / math.prod(qi ** (pi / sum(p)) for pi, qi in zip(p, qq))
75 71
76 72
77 73 def classify(model, text):