import argparse
import json
import math

LANG_MAP = {
    'afr': 'af',
    'ara': 'ar',
    'bul': 'bg',
    'ben': 'bn',
    'cat': 'ca',
    'ces': 'cs',
    'cym': 'cy',
    'dan': 'da',
    'deu': 'de',
    'ell': 'el',
    'eng': 'en',
    'spa': 'es',
    'est': 'et',
    'fas': 'fa',
    'fin': 'fi',
    'fra': 'fr',
    'guj': 'gu',
    'heb': 'he',
    'hin': 'hi',
    'hrv': 'hr',
    'hun': 'hu',
    'ind': 'id',
    'ita': 'it',
    'jpn': 'ja',
    'kan': 'kn',
    'kor': 'ko',
    'lit': 'lt',
    'lav': 'lv',
    'mkd': 'mk',
    'mal': 'ml',
    'mar': 'mr',
    'nep': 'ne',
    'nld': 'nl',
    'nor': 'no',
    'pan': 'pa',
    'pol': 'pl',
    'por': 'pt',
    'ron': 'ro',
    'rus': 'ru',
    'slk': 'sk',
    'slv': 'sl',
    'som': 'so',
    'sqi': 'sq',
    'swe': 'sv',
    'swa': 'sw',
    'tam': 'ta',
    'tel': 'te',
    'tha': 'th',
    'tgl': 'tl',
    'tur': 'tr',
    'ukr': 'uk',
    'urd': 'ur',
    'vie': 'vi',
    'zho': 'zh-cn',
    # 'zho': 'zh-tw',
}


def probability(p, q):
    # 0 does not mean impossible, just very unlikely
    a = 0.0000001
    qq = [qi * (1 - 2 * a) + a for qi in q]
    return math.prod(qi ** pi * (1 - qi) ** (1 - pi) for pi, qi in zip(p, qq))


def classify(model, text):
    n = len(text) + 1
    freq = [text.count(g) / (n - len(g)) for g in model['ngrams']]
    return max(model['freq'], key=lambda lang: probability(freq, model['freq'][lang]))


def test(model):
    total = 0
    correct = 0

    with open('data/wili/x_test.txt') as fh:
        with open('data/wili/y_test.txt') as fh2:
            for lang, text in zip(fh2, fh):
                lang = LANG_MAP.get(lang.rstrip())
                text = text.rstrip()
                if lang in model['freq']:
                    actual = classify(model, text)
                    total += 1
                    if actual == lang:
                        correct += 1

    print(
        f'{correct} out of {total} samples were detected correctly'
        f' ({correct / total:.1%})'
    )


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('model')
    args = parser.parse_args()

    with open(args.model) as fh:
        model = json.load(fh)

    test(model)