tiny-lang-detect

Generate tiny models for language detection  https://p.ce9e.org/tiny-lang-detect/demo/
git clone https://git.ce9e.org/tiny-lang-detect.git

commit
c2755acb5d4af6c990d63f827e18c83e33e20c69
parent
4520c824216c9f7c751f95989fe2076a061ec3e5
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2025-05-06 05:03
download data

Diffstat

A .gitignore 1 +
A Makefile 18 ++++++++++++++++++

2 files changed, 19 insertions, 0 deletions


diff --git a/.gitignore b/.gitignore

@@ -0,0 +1 @@
   -1     1 data

diff --git a/Makefile b/Makefile

@@ -0,0 +1,18 @@
   -1     1 .PHONY: all
   -1     2 all: data/wili data/profiles
   -1     3 
   -1     4 data/wili:
   -1     5 	@mkdir -p $@
   -1     6 	wget https://zenodo.org/records/841984/files/wili-2018.zip?download=1 -O /tmp/wili.zip
   -1     7 	unzip /tmp/wili.zip '*_test.txt' -d $@
   -1     8 	@rm /tmp/wili.zip
   -1     9 
   -1    10 data/profiles:
   -1    11 	@mkdir -p $@
   -1    12 	wget https://github.com/DoodleBears/langdetect/archive/refs/heads/master.zip -O /tmp/langdetect.zip
   -1    13 	unzip -j /tmp/langdetect.zip 'langdetect-master/langdetect/profiles/*' -d $@
   -1    14 	@rm /tmp/langdetect.zip
   -1    15 
   -1    16 .PHONY: clean
   -1    17 clean:
   -1    18 	rm -rf data