From 735e1e4b56e657447a5aa3e6a6ee3681c6154120 Mon Sep 17 00:00:00 2001 From: lm Date: Thu, 25 Apr 2024 16:24:44 +0200 Subject: [PATCH 01/11] fast loading for bitext mining --- mteb/abstasks/CrosslingualTask.py | 23 +- .../multilingual/TatoebaBitextMining.py | 5 +- .../Tatoeba_fast.json | 792 ++++++++++++++++++ .../Tatoeba_slow.json | 792 ++++++++++++++++++ 4 files changed, 1609 insertions(+), 3 deletions(-) create mode 100644 results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/Tatoeba_fast.json create mode 100644 results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/Tatoeba_slow.json diff --git a/mteb/abstasks/CrosslingualTask.py b/mteb/abstasks/CrosslingualTask.py index 38ed327fce..1b6dc9e817 100644 --- a/mteb/abstasks/CrosslingualTask.py +++ b/mteb/abstasks/CrosslingualTask.py @@ -1,7 +1,7 @@ from __future__ import annotations import datasets - +import polars as pl from .AbsTask import AbsTask @@ -20,6 +20,27 @@ def load_data(self, **kwargs): """Load dataset from HuggingFace hub""" if self.data_loaded: return + + fast_loading = self.fast_loading if hasattr(self, 'fast_loading') else False + if fast_loading: + self.fast_load() + else: + self.slow_load() + + def fast_load(self, **kwargs): + """Load all subsets at once, then group by language with Polars""" + self.dataset = {} + merged_dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # load "default" subset + for split in self.metadata.eval_splits: + grouped_by_lang = dict(merged_dataset[split].to_polars().group_by('lang')) + for lang in self.langs: + if lang not in self.dataset: + self.dataset[lang] = dict() + self.dataset[lang][split] = datasets.Dataset.from_polars(grouped_by_lang[lang].drop('lang')) # Remove lang column and convert back to HF datasets, not strictly necessary but better for compatibility + self.data_loaded = True + + def slow_load(self, **kwargs): + """Each subsets is loaded iteratively""" self.dataset = {} for lang in self.langs: self.dataset[lang] = datasets.load_dataset( diff --git a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py index ad37ad918d..44b0246691 100644 --- a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py @@ -124,8 +124,8 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask): metadata = TaskMetadata( name="Tatoeba", dataset={ - "path": "mteb/tatoeba-bitext-mining", - "revision": "9080400076fbadbb4c4dcb136ff4eddc40b42553", + "path": "loicmagne/tatoeba-bitext-mining", + "revision": "482264e767155e1f8baf2c27815db6cba8e4efa3", }, description="1,000 English-aligned sentence pairs for each language based on the Tatoeba corpus", reference="https://github.com/facebookresearch/LASER/tree/main/data/tatoeba/v1", @@ -147,3 +147,4 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask): n_samples={"test": 2000}, avg_character_length={"test": 39.4}, ) + fast_loading = True diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/Tatoeba_fast.json b/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/Tatoeba_fast.json new file mode 100644 index 0000000000..adb1bb4190 --- /dev/null +++ b/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/Tatoeba_fast.json @@ -0,0 +1,792 @@ +{ + "dataset_revision": "482264e767155e1f8baf2c27815db6cba8e4efa3", + "mteb_dataset_name": "Tatoeba", + "mteb_version": "1.7.32", + "test": { + "afr-eng": { + "accuracy": 0.64, + "f1": 0.5822107142857144, + "main_score": 0.5822107142857144, + "precision": 0.5610540043290043, + "recall": 0.64 + }, + "amh-eng": { + "accuracy": 0.42857142857142855, + "f1": 0.3621362433862434, + "main_score": 0.3621362433862434, + "precision": 0.34078089569160996, + "recall": 0.42857142857142855 + }, + "ang-eng": { + "accuracy": 0.15671641791044777, + "f1": 0.10240275893260965, + "main_score": 0.10240275893260965, + "precision": 0.09278972783143108, + "recall": 0.15671641791044777 + }, + "ara-eng": { + "accuracy": 0.906, + "f1": 0.8793333333333334, + "main_score": 0.8793333333333334, + "precision": 0.867, + "recall": 0.906 + }, + "arq-eng": { + "accuracy": 0.2349066959385291, + "f1": 0.1860239246671392, + "main_score": 0.1860239246671392, + "precision": 0.1710372305460121, + "recall": 0.2349066959385291 + }, + "arz-eng": { + "accuracy": 0.5660377358490566, + "f1": 0.5126153720493344, + "main_score": 0.5126153720493344, + "precision": 0.4917549257171899, + "recall": 0.5660377358490566 + }, + "ast-eng": { + "accuracy": 0.6771653543307087, + "f1": 0.6216972878390201, + "main_score": 0.6216972878390201, + "precision": 0.6003280839895013, + "recall": 0.6771653543307087 + }, + "awa-eng": { + "accuracy": 0.38961038961038963, + "f1": 0.3342918985776129, + "main_score": 0.3342918985776129, + "precision": 0.31577206640231853, + "recall": 0.38961038961038963 + }, + "aze-eng": { + "accuracy": 0.678, + "f1": 0.6209757936507937, + "main_score": 0.6209757936507937, + "precision": 0.599275, + "recall": 0.678 + }, + "bel-eng": { + "accuracy": 0.73, + "f1": 0.6773117826617827, + "main_score": 0.6773117826617827, + "precision": 0.657272619047619, + "recall": 0.73 + }, + "ben-eng": { + "accuracy": 0.424, + "f1": 0.36483772101921225, + "main_score": 0.36483772101921225, + "precision": 0.3457729908979909, + "recall": 0.424 + }, + "ber-eng": { + "accuracy": 0.055, + "f1": 0.04431875208571482, + "main_score": 0.04431875208571482, + "precision": 0.04244082144977572, + "recall": 0.055 + }, + "bos-eng": { + "accuracy": 0.9491525423728814, + "f1": 0.9326741996233521, + "main_score": 0.9326741996233521, + "precision": 0.9246704331450094, + "recall": 0.9491525423728814 + }, + "bre-eng": { + "accuracy": 0.07, + "f1": 0.05564912903188765, + "main_score": 0.05564912903188765, + "precision": 0.05271678663713547, + "recall": 0.07 + }, + "bul-eng": { + "accuracy": 0.943, + "f1": 0.9265, + "main_score": 0.9265, + "precision": 0.9185, + "recall": 0.943 + }, + "cat-eng": { + "accuracy": 0.956, + "f1": 0.9442333333333334, + "main_score": 0.9442333333333334, + "precision": 0.9385833333333332, + "recall": 0.956 + }, + "cbk-eng": { + "accuracy": 0.616, + "f1": 0.5537172077922078, + "main_score": 0.5537172077922078, + "precision": 0.5302890572390573, + "recall": 0.616 + }, + "ceb-eng": { + "accuracy": 0.10333333333333333, + "f1": 0.08054866862924506, + "main_score": 0.08054866862924506, + "precision": 0.07502244878137736, + "recall": 0.10333333333333333 + }, + "ces-eng": { + "accuracy": 0.963, + "f1": 0.9511666666666666, + "main_score": 0.9511666666666666, + "precision": 0.9453333333333332, + "recall": 0.963 + }, + "cha-eng": { + "accuracy": 0.20437956204379562, + "f1": 0.15976711852624262, + "main_score": 0.15976711852624262, + "precision": 0.14914252378716458, + "recall": 0.20437956204379562 + }, + "cmn-eng": { + "accuracy": 0.961, + "f1": 0.9493333333333333, + "main_score": 0.9493333333333333, + "precision": 0.9435, + "recall": 0.961 + }, + "cor-eng": { + "accuracy": 0.046, + "f1": 0.03422572331050592, + "main_score": 0.03422572331050592, + "precision": 0.03162596984336115, + "recall": 0.046 + }, + "csb-eng": { + "accuracy": 0.2766798418972332, + "f1": 0.21564781784063444, + "main_score": 0.21564781784063444, + "precision": 0.19755559548049667, + "recall": 0.2766798418972332 + }, + "cym-eng": { + "accuracy": 0.16521739130434782, + "f1": 0.13245287443752915, + "main_score": 0.13245287443752915, + "precision": 0.12422163707860082, + "recall": 0.16521739130434782 + }, + "dan-eng": { + "accuracy": 0.96, + "f1": 0.948, + "main_score": 0.948, + "precision": 0.9421666666666666, + "recall": 0.96 + }, + "deu-eng": { + "accuracy": 0.977, + "f1": 0.9701666666666667, + "main_score": 0.9701666666666667, + "precision": 0.9668333333333333, + "recall": 0.977 + }, + "dsb-eng": { + "accuracy": 0.3966597077244259, + "f1": 0.3342911512873443, + "main_score": 0.3342911512873443, + "precision": 0.3149256362982877, + "recall": 0.3966597077244259 + }, + "dtp-eng": { + "accuracy": 0.076, + "f1": 0.05692902980992225, + "main_score": 0.05692902980992225, + "precision": 0.052474262895911165, + "recall": 0.076 + }, + "ell-eng": { + "accuracy": 0.965, + "f1": 0.9543333333333334, + "main_score": 0.9543333333333334, + "precision": 0.949, + "recall": 0.965 + }, + "epo-eng": { + "accuracy": 0.468, + "f1": 0.4172759631154753, + "main_score": 0.4172759631154753, + "precision": 0.39916119127785793, + "recall": 0.468 + }, + "est-eng": { + "accuracy": 0.98, + "f1": 0.9733333333333333, + "main_score": 0.9733333333333333, + "precision": 0.97, + "recall": 0.98 + }, + "eus-eng": { + "accuracy": 0.274, + "f1": 0.23177456696198112, + "main_score": 0.23177456696198112, + "precision": 0.2188077901134505, + "recall": 0.274 + }, + "evaluation_time": 83.7, + "fao-eng": { + "accuracy": 0.3320610687022901, + "f1": 0.2750817884405671, + "main_score": 0.2750817884405671, + "precision": 0.25766949335651623, + "recall": 0.3320610687022901 + }, + "fin-eng": { + "accuracy": 0.947, + "f1": 0.9309666666666667, + "main_score": 0.9309666666666667, + "precision": 0.9233333333333332, + "recall": 0.947 + }, + "fra-eng": { + "accuracy": 0.935, + "f1": 0.9172333333333333, + "main_score": 0.9172333333333333, + "precision": 0.9090833333333332, + "recall": 0.935 + }, + "fry-eng": { + "accuracy": 0.37572254335260113, + "f1": 0.3113198458574181, + "main_score": 0.3113198458574181, + "precision": 0.28718689788053947, + "recall": 0.37572254335260113 + }, + "gla-eng": { + "accuracy": 0.05186972255729795, + "f1": 0.03614286125547736, + "main_score": 0.03614286125547736, + "precision": 0.032920330515058396, + "recall": 0.05186972255729795 + }, + "gle-eng": { + "accuracy": 0.141, + "f1": 0.11617234238344076, + "main_score": 0.11617234238344076, + "precision": 0.10977639432639433, + "recall": 0.141 + }, + "glg-eng": { + "accuracy": 0.954, + "f1": 0.9399666666666667, + "main_score": 0.9399666666666667, + "precision": 0.9333333333333332, + "recall": 0.954 + }, + "gsw-eng": { + "accuracy": 0.3162393162393162, + "f1": 0.2574183651106728, + "main_score": 0.2574183651106728, + "precision": 0.23740440845704006, + "recall": 0.3162393162393162 + }, + "heb-eng": { + "accuracy": 0.895, + "f1": 0.8687999999999999, + "main_score": 0.8687999999999999, + "precision": 0.8563333333333334, + "recall": 0.895 + }, + "hin-eng": { + "accuracy": 0.981, + "f1": 0.9761666666666667, + "main_score": 0.9761666666666667, + "precision": 0.9738333333333333, + "recall": 0.981 + }, + "hrv-eng": { + "accuracy": 0.969, + "f1": 0.9598333333333332, + "main_score": 0.9598333333333332, + "precision": 0.9553333333333333, + "recall": 0.969 + }, + "hsb-eng": { + "accuracy": 0.42028985507246375, + "f1": 0.36097980694253984, + "main_score": 0.36097980694253984, + "precision": 0.3393177560879424, + "recall": 0.42028985507246375 + }, + "hun-eng": { + "accuracy": 0.933, + "f1": 0.9158, + "main_score": 0.9158, + "precision": 0.9078666666666667, + "recall": 0.933 + }, + "hye-eng": { + "accuracy": 0.9487870619946092, + "f1": 0.9328391734052113, + "main_score": 0.9328391734052113, + "precision": 0.9249775381850854, + "recall": 0.9487870619946092 + }, + "ido-eng": { + "accuracy": 0.463, + "f1": 0.4025264453846807, + "main_score": 0.4025264453846807, + "precision": 0.38122698051948056, + "recall": 0.463 + }, + "ile-eng": { + "accuracy": 0.644, + "f1": 0.5770626984126984, + "main_score": 0.5770626984126984, + "precision": 0.5510559523809524, + "recall": 0.644 + }, + "ina-eng": { + "accuracy": 0.835, + "f1": 0.7912555555555555, + "main_score": 0.7912555555555555, + "precision": 0.7719416666666666, + "recall": 0.835 + }, + "ind-eng": { + "accuracy": 0.942, + "f1": 0.9274, + "main_score": 0.9274, + "precision": 0.9204166666666667, + "recall": 0.942 + }, + "isl-eng": { + "accuracy": 0.287, + "f1": 0.24065607022176788, + "main_score": 0.24065607022176788, + "precision": 0.2266953861605192, + "recall": 0.287 + }, + "ita-eng": { + "accuracy": 0.945, + "f1": 0.9305, + "main_score": 0.9305, + "precision": 0.9236666666666667, + "recall": 0.945 + }, + "jav-eng": { + "accuracy": 0.21951219512195122, + "f1": 0.17040714930958833, + "main_score": 0.17040714930958833, + "precision": 0.15645673087458636, + "recall": 0.21951219512195122 + }, + "jpn-eng": { + "accuracy": 0.925, + "f1": 0.9041333333333333, + "main_score": 0.9041333333333333, + "precision": 0.8946666666666667, + "recall": 0.925 + }, + "kab-eng": { + "accuracy": 0.018, + "f1": 0.011553926316289228, + "main_score": 0.011553926316289228, + "precision": 0.010367042966611933, + "recall": 0.018 + }, + "kat-eng": { + "accuracy": 0.9651474530831099, + "f1": 0.9544235924932976, + "main_score": 0.9544235924932976, + "precision": 0.9495084897229669, + "recall": 0.9651474530831099 + }, + "kaz-eng": { + "accuracy": 0.40695652173913044, + "f1": 0.3489310689310689, + "main_score": 0.3489310689310689, + "precision": 0.33022277432712216, + "recall": 0.40695652173913044 + }, + "khm-eng": { + "accuracy": 0.3767313019390582, + "f1": 0.32111076451865495, + "main_score": 0.32111076451865495, + "precision": 0.30287463575205953, + "recall": 0.3767313019390582 + }, + "kor-eng": { + "accuracy": 0.942, + "f1": 0.9252333333333334, + "main_score": 0.9252333333333334, + "precision": 0.91725, + "recall": 0.942 + }, + "kur-eng": { + "accuracy": 0.5390243902439025, + "f1": 0.46938830816879595, + "main_score": 0.46938830816879595, + "precision": 0.44402439024390244, + "recall": 0.5390243902439025 + }, + "kzj-eng": { + "accuracy": 0.08, + "f1": 0.062375485721297745, + "main_score": 0.062375485721297745, + "precision": 0.05787513947998687, + "recall": 0.08 + }, + "lat-eng": { + "accuracy": 0.237, + "f1": 0.19471638676795924, + "main_score": 0.19471638676795924, + "precision": 0.18242831031455645, + "recall": 0.237 + }, + "lfn-eng": { + "accuracy": 0.526, + "f1": 0.4702239538239538, + "main_score": 0.4702239538239538, + "precision": 0.45036479076479075, + "recall": 0.526 + }, + "lit-eng": { + "accuracy": 0.947, + "f1": 0.9315666666666667, + "main_score": 0.9315666666666667, + "precision": 0.9240833333333333, + "recall": 0.947 + }, + "lvs-eng": { + "accuracy": 0.984, + "f1": 0.9786666666666668, + "main_score": 0.9786666666666668, + "precision": 0.976, + "recall": 0.984 + }, + "mal-eng": { + "accuracy": 0.3609898107714702, + "f1": 0.32197442737643095, + "main_score": 0.32197442737643095, + "precision": 0.31035989355801963, + "recall": 0.3609898107714702 + }, + "mar-eng": { + "accuracy": 0.941, + "f1": 0.9238333333333333, + "main_score": 0.9238333333333333, + "precision": 0.9153333333333332, + "recall": 0.941 + }, + "max-eng": { + "accuracy": 0.5105633802816901, + "f1": 0.45245361055220207, + "main_score": 0.45245361055220207, + "precision": 0.4310446009389671, + "recall": 0.5105633802816901 + }, + "mhr-eng": { + "accuracy": 0.088, + "f1": 0.06890123680241327, + "main_score": 0.06890123680241327, + "precision": 0.06336032075067789, + "recall": 0.088 + }, + "mkd-eng": { + "accuracy": 0.93, + "f1": 0.91, + "main_score": 0.91, + "precision": 0.9005, + "recall": 0.93 + }, + "mon-eng": { + "accuracy": 0.9613636363636363, + "f1": 0.9503787878787878, + "main_score": 0.9503787878787878, + "precision": 0.9454545454545454, + "recall": 0.9613636363636363 + }, + "nds-eng": { + "accuracy": 0.377, + "f1": 0.32158799892917533, + "main_score": 0.32158799892917533, + "precision": 0.30228686974789915, + "recall": 0.377 + }, + "nld-eng": { + "accuracy": 0.959, + "f1": 0.9458333333333333, + "main_score": 0.9458333333333333, + "precision": 0.9393333333333332, + "recall": 0.959 + }, + "nno-eng": { + "accuracy": 0.805, + "f1": 0.7634056277056277, + "main_score": 0.7634056277056277, + "precision": 0.7460833333333332, + "recall": 0.805 + }, + "nob-eng": { + "accuracy": 0.983, + "f1": 0.9773333333333333, + "main_score": 0.9773333333333333, + "precision": 0.9745, + "recall": 0.983 + }, + "nov-eng": { + "accuracy": 0.5408560311284046, + "f1": 0.47992740074452145, + "main_score": 0.47992740074452145, + "precision": 0.4571457077293653, + "recall": 0.5408560311284046 + }, + "oci-eng": { + "accuracy": 0.446, + "f1": 0.3856950591103735, + "main_score": 0.3856950591103735, + "precision": 0.3645026629072681, + "recall": 0.446 + }, + "orv-eng": { + "accuracy": 0.18802395209580838, + "f1": 0.15103004434906303, + "main_score": 0.15103004434906303, + "precision": 0.139949044011544, + "recall": 0.18802395209580838 + }, + "pam-eng": { + "accuracy": 0.067, + "f1": 0.054106224221972254, + "main_score": 0.054106224221972254, + "precision": 0.050884173669467785, + "recall": 0.067 + }, + "pes-eng": { + "accuracy": 0.942, + "f1": 0.9259, + "main_score": 0.9259, + "precision": 0.9180833333333334, + "recall": 0.942 + }, + "pms-eng": { + "accuracy": 0.3638095238095238, + "f1": 0.30698337112622825, + "main_score": 0.30698337112622825, + "precision": 0.29048287691144836, + "recall": 0.3638095238095238 + }, + "pol-eng": { + "accuracy": 0.955, + "f1": 0.9428333333333334, + "main_score": 0.9428333333333334, + "precision": 0.937, + "recall": 0.955 + }, + "por-eng": { + "accuracy": 0.937, + "f1": 0.9213333333333332, + "main_score": 0.9213333333333332, + "precision": 0.9136666666666666, + "recall": 0.937 + }, + "ron-eng": { + "accuracy": 0.963, + "f1": 0.953, + "main_score": 0.953, + "precision": 0.948, + "recall": 0.963 + }, + "rus-eng": { + "accuracy": 0.938, + "f1": 0.9187333333333334, + "main_score": 0.9187333333333334, + "precision": 0.9094166666666668, + "recall": 0.938 + }, + "slk-eng": { + "accuracy": 0.962, + "f1": 0.9515, + "main_score": 0.9515, + "precision": 0.9465, + "recall": 0.962 + }, + "slv-eng": { + "accuracy": 0.976913730255164, + "f1": 0.9692183070068854, + "main_score": 0.9692183070068854, + "precision": 0.965370595382746, + "recall": 0.976913730255164 + }, + "spa-eng": { + "accuracy": 0.965, + "f1": 0.9541666666666667, + "main_score": 0.9541666666666667, + "precision": 0.949, + "recall": 0.965 + }, + "sqi-eng": { + "accuracy": 0.986, + "f1": 0.9816666666666666, + "main_score": 0.9816666666666666, + "precision": 0.9796666666666668, + "recall": 0.986 + }, + "srp-eng": { + "accuracy": 0.94, + "f1": 0.9224, + "main_score": 0.9224, + "precision": 0.9139166666666667, + "recall": 0.94 + }, + "swe-eng": { + "accuracy": 0.956, + "f1": 0.9441666666666666, + "main_score": 0.9441666666666666, + "precision": 0.9383333333333334, + "recall": 0.956 + }, + "swg-eng": { + "accuracy": 0.3125, + "f1": 0.26308944032158316, + "main_score": 0.26308944032158316, + "precision": 0.25082908163265305, + "recall": 0.3125 + }, + "swh-eng": { + "accuracy": 0.20256410256410257, + "f1": 0.14482168116783503, + "main_score": 0.14482168116783503, + "precision": 0.12913330133918371, + "recall": 0.20256410256410257 + }, + "tam-eng": { + "accuracy": 0.28664495114006516, + "f1": 0.24641722402809285, + "main_score": 0.24641722402809285, + "precision": 0.2364329085559867, + "recall": 0.28664495114006516 + }, + "tat-eng": { + "accuracy": 0.124, + "f1": 0.10248605006105005, + "main_score": 0.10248605006105005, + "precision": 0.09633368686868687, + "recall": 0.124 + }, + "tel-eng": { + "accuracy": 0.41452991452991456, + "f1": 0.36401725055571205, + "main_score": 0.36401725055571205, + "precision": 0.35070672687825644, + "recall": 0.41452991452991456 + }, + "tgl-eng": { + "accuracy": 0.156, + "f1": 0.13086049173049172, + "main_score": 0.13086049173049172, + "precision": 0.12408971861471862, + "recall": 0.156 + }, + "tha-eng": { + "accuracy": 0.9744525547445255, + "f1": 0.9671532846715328, + "main_score": 0.9671532846715328, + "precision": 0.9635036496350365, + "recall": 0.9744525547445255 + }, + "tuk-eng": { + "accuracy": 0.2019704433497537, + "f1": 0.15155072809764625, + "main_score": 0.15155072809764625, + "precision": 0.1379720853858785, + "recall": 0.2019704433497537 + }, + "tur-eng": { + "accuracy": 0.962, + "f1": 0.9508333333333334, + "main_score": 0.9508333333333334, + "precision": 0.9453333333333334, + "recall": 0.962 + }, + "tzl-eng": { + "accuracy": 0.3076923076923077, + "f1": 0.2545718170718171, + "main_score": 0.2545718170718171, + "precision": 0.2393429487179487, + "recall": 0.3076923076923077 + }, + "uig-eng": { + "accuracy": 0.289, + "f1": 0.24385631303394462, + "main_score": 0.24385631303394462, + "precision": 0.23044607973725623, + "recall": 0.289 + }, + "ukr-eng": { + "accuracy": 0.944, + "f1": 0.9281666666666667, + "main_score": 0.9281666666666667, + "precision": 0.9203333333333333, + "recall": 0.944 + }, + "urd-eng": { + "accuracy": 0.958, + "f1": 0.9456666666666668, + "main_score": 0.9456666666666668, + "precision": 0.9396666666666668, + "recall": 0.958 + }, + "uzb-eng": { + "accuracy": 0.21261682242990654, + "f1": 0.17144804183355583, + "main_score": 0.17144804183355583, + "precision": 0.15958045070194601, + "recall": 0.21261682242990654 + }, + "vie-eng": { + "accuracy": 0.963, + "f1": 0.9512333333333334, + "main_score": 0.9512333333333334, + "precision": 0.9455833333333333, + "recall": 0.963 + }, + "war-eng": { + "accuracy": 0.09, + "f1": 0.07253528138528138, + "main_score": 0.07253528138528138, + "precision": 0.06787196062341712, + "recall": 0.09 + }, + "wuu-eng": { + "accuracy": 0.799, + "f1": 0.7599642857142858, + "main_score": 0.7599642857142858, + "precision": 0.7441697802197802, + "recall": 0.799 + }, + "xho-eng": { + "accuracy": 0.056338028169014086, + "f1": 0.04524114383269313, + "main_score": 0.04524114383269313, + "precision": 0.0425888665325285, + "recall": 0.056338028169014086 + }, + "yid-eng": { + "accuracy": 0.17452830188679244, + "f1": 0.14381645917725996, + "main_score": 0.14381645917725996, + "precision": 0.13567534889243407, + "recall": 0.17452830188679244 + }, + "yue-eng": { + "accuracy": 0.761, + "f1": 0.7144706349206349, + "main_score": 0.7144706349206349, + "precision": 0.6962428571428572, + "recall": 0.761 + }, + "zsm-eng": { + "accuracy": 0.963, + "f1": 0.9530666666666666, + "main_score": 0.9530666666666666, + "precision": 0.94825, + "recall": 0.963 + } + } +} \ No newline at end of file diff --git a/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/Tatoeba_slow.json b/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/Tatoeba_slow.json new file mode 100644 index 0000000000..75b075e7dd --- /dev/null +++ b/results/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/Tatoeba_slow.json @@ -0,0 +1,792 @@ +{ + "dataset_revision": "482264e767155e1f8baf2c27815db6cba8e4efa3", + "mteb_dataset_name": "Tatoeba", + "mteb_version": "1.7.32", + "test": { + "afr-eng": { + "accuracy": 0.64, + "f1": 0.5822107142857144, + "main_score": 0.5822107142857144, + "precision": 0.5610540043290043, + "recall": 0.64 + }, + "amh-eng": { + "accuracy": 0.42857142857142855, + "f1": 0.3621362433862434, + "main_score": 0.3621362433862434, + "precision": 0.34078089569160996, + "recall": 0.42857142857142855 + }, + "ang-eng": { + "accuracy": 0.15671641791044777, + "f1": 0.10240275893260965, + "main_score": 0.10240275893260965, + "precision": 0.09278972783143108, + "recall": 0.15671641791044777 + }, + "ara-eng": { + "accuracy": 0.906, + "f1": 0.8793333333333334, + "main_score": 0.8793333333333334, + "precision": 0.867, + "recall": 0.906 + }, + "arq-eng": { + "accuracy": 0.2349066959385291, + "f1": 0.1860239246671392, + "main_score": 0.1860239246671392, + "precision": 0.1710372305460121, + "recall": 0.2349066959385291 + }, + "arz-eng": { + "accuracy": 0.5660377358490566, + "f1": 0.5126153720493344, + "main_score": 0.5126153720493344, + "precision": 0.4917549257171899, + "recall": 0.5660377358490566 + }, + "ast-eng": { + "accuracy": 0.6771653543307087, + "f1": 0.6216972878390201, + "main_score": 0.6216972878390201, + "precision": 0.6003280839895013, + "recall": 0.6771653543307087 + }, + "awa-eng": { + "accuracy": 0.38961038961038963, + "f1": 0.3342918985776129, + "main_score": 0.3342918985776129, + "precision": 0.31577206640231853, + "recall": 0.38961038961038963 + }, + "aze-eng": { + "accuracy": 0.678, + "f1": 0.6209757936507937, + "main_score": 0.6209757936507937, + "precision": 0.599275, + "recall": 0.678 + }, + "bel-eng": { + "accuracy": 0.73, + "f1": 0.6773117826617827, + "main_score": 0.6773117826617827, + "precision": 0.657272619047619, + "recall": 0.73 + }, + "ben-eng": { + "accuracy": 0.424, + "f1": 0.36483772101921225, + "main_score": 0.36483772101921225, + "precision": 0.3457729908979909, + "recall": 0.424 + }, + "ber-eng": { + "accuracy": 0.055, + "f1": 0.04431875208571482, + "main_score": 0.04431875208571482, + "precision": 0.04244082144977572, + "recall": 0.055 + }, + "bos-eng": { + "accuracy": 0.9491525423728814, + "f1": 0.9326741996233521, + "main_score": 0.9326741996233521, + "precision": 0.9246704331450094, + "recall": 0.9491525423728814 + }, + "bre-eng": { + "accuracy": 0.07, + "f1": 0.05564912903188765, + "main_score": 0.05564912903188765, + "precision": 0.05271678663713547, + "recall": 0.07 + }, + "bul-eng": { + "accuracy": 0.943, + "f1": 0.9265, + "main_score": 0.9265, + "precision": 0.9185, + "recall": 0.943 + }, + "cat-eng": { + "accuracy": 0.956, + "f1": 0.9442333333333334, + "main_score": 0.9442333333333334, + "precision": 0.9385833333333332, + "recall": 0.956 + }, + "cbk-eng": { + "accuracy": 0.616, + "f1": 0.5537172077922078, + "main_score": 0.5537172077922078, + "precision": 0.5302890572390573, + "recall": 0.616 + }, + "ceb-eng": { + "accuracy": 0.10333333333333333, + "f1": 0.08054866862924506, + "main_score": 0.08054866862924506, + "precision": 0.07502244878137736, + "recall": 0.10333333333333333 + }, + "ces-eng": { + "accuracy": 0.963, + "f1": 0.9511666666666666, + "main_score": 0.9511666666666666, + "precision": 0.9453333333333332, + "recall": 0.963 + }, + "cha-eng": { + "accuracy": 0.20437956204379562, + "f1": 0.15976711852624262, + "main_score": 0.15976711852624262, + "precision": 0.14914252378716458, + "recall": 0.20437956204379562 + }, + "cmn-eng": { + "accuracy": 0.961, + "f1": 0.9493333333333333, + "main_score": 0.9493333333333333, + "precision": 0.9435, + "recall": 0.961 + }, + "cor-eng": { + "accuracy": 0.046, + "f1": 0.03422572331050592, + "main_score": 0.03422572331050592, + "precision": 0.03162596984336115, + "recall": 0.046 + }, + "csb-eng": { + "accuracy": 0.2766798418972332, + "f1": 0.21564781784063444, + "main_score": 0.21564781784063444, + "precision": 0.19755559548049667, + "recall": 0.2766798418972332 + }, + "cym-eng": { + "accuracy": 0.16521739130434782, + "f1": 0.13245287443752915, + "main_score": 0.13245287443752915, + "precision": 0.12422163707860082, + "recall": 0.16521739130434782 + }, + "dan-eng": { + "accuracy": 0.96, + "f1": 0.948, + "main_score": 0.948, + "precision": 0.9421666666666666, + "recall": 0.96 + }, + "deu-eng": { + "accuracy": 0.977, + "f1": 0.9701666666666667, + "main_score": 0.9701666666666667, + "precision": 0.9668333333333333, + "recall": 0.977 + }, + "dsb-eng": { + "accuracy": 0.3966597077244259, + "f1": 0.3342911512873443, + "main_score": 0.3342911512873443, + "precision": 0.3149256362982877, + "recall": 0.3966597077244259 + }, + "dtp-eng": { + "accuracy": 0.076, + "f1": 0.05692902980992225, + "main_score": 0.05692902980992225, + "precision": 0.052474262895911165, + "recall": 0.076 + }, + "ell-eng": { + "accuracy": 0.965, + "f1": 0.9543333333333334, + "main_score": 0.9543333333333334, + "precision": 0.949, + "recall": 0.965 + }, + "epo-eng": { + "accuracy": 0.468, + "f1": 0.4172759631154753, + "main_score": 0.4172759631154753, + "precision": 0.39916119127785793, + "recall": 0.468 + }, + "est-eng": { + "accuracy": 0.98, + "f1": 0.9733333333333333, + "main_score": 0.9733333333333333, + "precision": 0.97, + "recall": 0.98 + }, + "eus-eng": { + "accuracy": 0.274, + "f1": 0.23177456696198112, + "main_score": 0.23177456696198112, + "precision": 0.2188077901134505, + "recall": 0.274 + }, + "evaluation_time": 92.57, + "fao-eng": { + "accuracy": 0.3320610687022901, + "f1": 0.2750817884405671, + "main_score": 0.2750817884405671, + "precision": 0.25766949335651623, + "recall": 0.3320610687022901 + }, + "fin-eng": { + "accuracy": 0.947, + "f1": 0.9309666666666667, + "main_score": 0.9309666666666667, + "precision": 0.9233333333333332, + "recall": 0.947 + }, + "fra-eng": { + "accuracy": 0.935, + "f1": 0.9172333333333333, + "main_score": 0.9172333333333333, + "precision": 0.9090833333333332, + "recall": 0.935 + }, + "fry-eng": { + "accuracy": 0.37572254335260113, + "f1": 0.3113198458574181, + "main_score": 0.3113198458574181, + "precision": 0.28718689788053947, + "recall": 0.37572254335260113 + }, + "gla-eng": { + "accuracy": 0.05186972255729795, + "f1": 0.03614286125547736, + "main_score": 0.03614286125547736, + "precision": 0.032920330515058396, + "recall": 0.05186972255729795 + }, + "gle-eng": { + "accuracy": 0.141, + "f1": 0.11617234238344076, + "main_score": 0.11617234238344076, + "precision": 0.10977639432639433, + "recall": 0.141 + }, + "glg-eng": { + "accuracy": 0.954, + "f1": 0.9399666666666667, + "main_score": 0.9399666666666667, + "precision": 0.9333333333333332, + "recall": 0.954 + }, + "gsw-eng": { + "accuracy": 0.3162393162393162, + "f1": 0.2574183651106728, + "main_score": 0.2574183651106728, + "precision": 0.23740440845704006, + "recall": 0.3162393162393162 + }, + "heb-eng": { + "accuracy": 0.895, + "f1": 0.8687999999999999, + "main_score": 0.8687999999999999, + "precision": 0.8563333333333334, + "recall": 0.895 + }, + "hin-eng": { + "accuracy": 0.981, + "f1": 0.9761666666666667, + "main_score": 0.9761666666666667, + "precision": 0.9738333333333333, + "recall": 0.981 + }, + "hrv-eng": { + "accuracy": 0.969, + "f1": 0.9598333333333332, + "main_score": 0.9598333333333332, + "precision": 0.9553333333333333, + "recall": 0.969 + }, + "hsb-eng": { + "accuracy": 0.42028985507246375, + "f1": 0.36097980694253984, + "main_score": 0.36097980694253984, + "precision": 0.3393177560879424, + "recall": 0.42028985507246375 + }, + "hun-eng": { + "accuracy": 0.933, + "f1": 0.9158, + "main_score": 0.9158, + "precision": 0.9078666666666667, + "recall": 0.933 + }, + "hye-eng": { + "accuracy": 0.9487870619946092, + "f1": 0.9328391734052113, + "main_score": 0.9328391734052113, + "precision": 0.9249775381850854, + "recall": 0.9487870619946092 + }, + "ido-eng": { + "accuracy": 0.463, + "f1": 0.4025264453846807, + "main_score": 0.4025264453846807, + "precision": 0.38122698051948056, + "recall": 0.463 + }, + "ile-eng": { + "accuracy": 0.644, + "f1": 0.5770626984126984, + "main_score": 0.5770626984126984, + "precision": 0.5510559523809524, + "recall": 0.644 + }, + "ina-eng": { + "accuracy": 0.835, + "f1": 0.7912555555555555, + "main_score": 0.7912555555555555, + "precision": 0.7719416666666666, + "recall": 0.835 + }, + "ind-eng": { + "accuracy": 0.942, + "f1": 0.9274, + "main_score": 0.9274, + "precision": 0.9204166666666667, + "recall": 0.942 + }, + "isl-eng": { + "accuracy": 0.287, + "f1": 0.24065607022176788, + "main_score": 0.24065607022176788, + "precision": 0.2266953861605192, + "recall": 0.287 + }, + "ita-eng": { + "accuracy": 0.945, + "f1": 0.9305, + "main_score": 0.9305, + "precision": 0.9236666666666667, + "recall": 0.945 + }, + "jav-eng": { + "accuracy": 0.21951219512195122, + "f1": 0.17040714930958833, + "main_score": 0.17040714930958833, + "precision": 0.15645673087458636, + "recall": 0.21951219512195122 + }, + "jpn-eng": { + "accuracy": 0.925, + "f1": 0.9041333333333333, + "main_score": 0.9041333333333333, + "precision": 0.8946666666666667, + "recall": 0.925 + }, + "kab-eng": { + "accuracy": 0.018, + "f1": 0.011553926316289228, + "main_score": 0.011553926316289228, + "precision": 0.010367042966611933, + "recall": 0.018 + }, + "kat-eng": { + "accuracy": 0.9651474530831099, + "f1": 0.9544235924932976, + "main_score": 0.9544235924932976, + "precision": 0.9495084897229669, + "recall": 0.9651474530831099 + }, + "kaz-eng": { + "accuracy": 0.40695652173913044, + "f1": 0.3489310689310689, + "main_score": 0.3489310689310689, + "precision": 0.33022277432712216, + "recall": 0.40695652173913044 + }, + "khm-eng": { + "accuracy": 0.3767313019390582, + "f1": 0.32111076451865495, + "main_score": 0.32111076451865495, + "precision": 0.30287463575205953, + "recall": 0.3767313019390582 + }, + "kor-eng": { + "accuracy": 0.942, + "f1": 0.9252333333333334, + "main_score": 0.9252333333333334, + "precision": 0.91725, + "recall": 0.942 + }, + "kur-eng": { + "accuracy": 0.5390243902439025, + "f1": 0.46938830816879595, + "main_score": 0.46938830816879595, + "precision": 0.44402439024390244, + "recall": 0.5390243902439025 + }, + "kzj-eng": { + "accuracy": 0.08, + "f1": 0.062375485721297745, + "main_score": 0.062375485721297745, + "precision": 0.05787513947998687, + "recall": 0.08 + }, + "lat-eng": { + "accuracy": 0.237, + "f1": 0.19471638676795924, + "main_score": 0.19471638676795924, + "precision": 0.18242831031455645, + "recall": 0.237 + }, + "lfn-eng": { + "accuracy": 0.526, + "f1": 0.4702239538239538, + "main_score": 0.4702239538239538, + "precision": 0.45036479076479075, + "recall": 0.526 + }, + "lit-eng": { + "accuracy": 0.947, + "f1": 0.9315666666666667, + "main_score": 0.9315666666666667, + "precision": 0.9240833333333333, + "recall": 0.947 + }, + "lvs-eng": { + "accuracy": 0.984, + "f1": 0.9786666666666668, + "main_score": 0.9786666666666668, + "precision": 0.976, + "recall": 0.984 + }, + "mal-eng": { + "accuracy": 0.3609898107714702, + "f1": 0.32197442737643095, + "main_score": 0.32197442737643095, + "precision": 0.31035989355801963, + "recall": 0.3609898107714702 + }, + "mar-eng": { + "accuracy": 0.941, + "f1": 0.9238333333333333, + "main_score": 0.9238333333333333, + "precision": 0.9153333333333332, + "recall": 0.941 + }, + "max-eng": { + "accuracy": 0.5105633802816901, + "f1": 0.45245361055220207, + "main_score": 0.45245361055220207, + "precision": 0.4310446009389671, + "recall": 0.5105633802816901 + }, + "mhr-eng": { + "accuracy": 0.088, + "f1": 0.06890123680241327, + "main_score": 0.06890123680241327, + "precision": 0.06336032075067789, + "recall": 0.088 + }, + "mkd-eng": { + "accuracy": 0.93, + "f1": 0.91, + "main_score": 0.91, + "precision": 0.9005, + "recall": 0.93 + }, + "mon-eng": { + "accuracy": 0.9613636363636363, + "f1": 0.9503787878787878, + "main_score": 0.9503787878787878, + "precision": 0.9454545454545454, + "recall": 0.9613636363636363 + }, + "nds-eng": { + "accuracy": 0.377, + "f1": 0.32158799892917533, + "main_score": 0.32158799892917533, + "precision": 0.30228686974789915, + "recall": 0.377 + }, + "nld-eng": { + "accuracy": 0.959, + "f1": 0.9458333333333333, + "main_score": 0.9458333333333333, + "precision": 0.9393333333333332, + "recall": 0.959 + }, + "nno-eng": { + "accuracy": 0.805, + "f1": 0.7634056277056277, + "main_score": 0.7634056277056277, + "precision": 0.7460833333333332, + "recall": 0.805 + }, + "nob-eng": { + "accuracy": 0.983, + "f1": 0.9773333333333333, + "main_score": 0.9773333333333333, + "precision": 0.9745, + "recall": 0.983 + }, + "nov-eng": { + "accuracy": 0.5408560311284046, + "f1": 0.47992740074452145, + "main_score": 0.47992740074452145, + "precision": 0.4571457077293653, + "recall": 0.5408560311284046 + }, + "oci-eng": { + "accuracy": 0.446, + "f1": 0.3856950591103735, + "main_score": 0.3856950591103735, + "precision": 0.3645026629072681, + "recall": 0.446 + }, + "orv-eng": { + "accuracy": 0.18802395209580838, + "f1": 0.15103004434906303, + "main_score": 0.15103004434906303, + "precision": 0.139949044011544, + "recall": 0.18802395209580838 + }, + "pam-eng": { + "accuracy": 0.067, + "f1": 0.054106224221972254, + "main_score": 0.054106224221972254, + "precision": 0.050884173669467785, + "recall": 0.067 + }, + "pes-eng": { + "accuracy": 0.942, + "f1": 0.9259, + "main_score": 0.9259, + "precision": 0.9180833333333334, + "recall": 0.942 + }, + "pms-eng": { + "accuracy": 0.3638095238095238, + "f1": 0.30698337112622825, + "main_score": 0.30698337112622825, + "precision": 0.29048287691144836, + "recall": 0.3638095238095238 + }, + "pol-eng": { + "accuracy": 0.955, + "f1": 0.9428333333333334, + "main_score": 0.9428333333333334, + "precision": 0.937, + "recall": 0.955 + }, + "por-eng": { + "accuracy": 0.937, + "f1": 0.9213333333333332, + "main_score": 0.9213333333333332, + "precision": 0.9136666666666666, + "recall": 0.937 + }, + "ron-eng": { + "accuracy": 0.963, + "f1": 0.953, + "main_score": 0.953, + "precision": 0.948, + "recall": 0.963 + }, + "rus-eng": { + "accuracy": 0.938, + "f1": 0.9187333333333334, + "main_score": 0.9187333333333334, + "precision": 0.9094166666666668, + "recall": 0.938 + }, + "slk-eng": { + "accuracy": 0.962, + "f1": 0.9515, + "main_score": 0.9515, + "precision": 0.9465, + "recall": 0.962 + }, + "slv-eng": { + "accuracy": 0.976913730255164, + "f1": 0.9692183070068854, + "main_score": 0.9692183070068854, + "precision": 0.965370595382746, + "recall": 0.976913730255164 + }, + "spa-eng": { + "accuracy": 0.965, + "f1": 0.9541666666666667, + "main_score": 0.9541666666666667, + "precision": 0.949, + "recall": 0.965 + }, + "sqi-eng": { + "accuracy": 0.986, + "f1": 0.9816666666666666, + "main_score": 0.9816666666666666, + "precision": 0.9796666666666668, + "recall": 0.986 + }, + "srp-eng": { + "accuracy": 0.94, + "f1": 0.9224, + "main_score": 0.9224, + "precision": 0.9139166666666667, + "recall": 0.94 + }, + "swe-eng": { + "accuracy": 0.956, + "f1": 0.9441666666666666, + "main_score": 0.9441666666666666, + "precision": 0.9383333333333334, + "recall": 0.956 + }, + "swg-eng": { + "accuracy": 0.3125, + "f1": 0.26308944032158316, + "main_score": 0.26308944032158316, + "precision": 0.25082908163265305, + "recall": 0.3125 + }, + "swh-eng": { + "accuracy": 0.20256410256410257, + "f1": 0.14482168116783503, + "main_score": 0.14482168116783503, + "precision": 0.12913330133918371, + "recall": 0.20256410256410257 + }, + "tam-eng": { + "accuracy": 0.28664495114006516, + "f1": 0.24641722402809285, + "main_score": 0.24641722402809285, + "precision": 0.2364329085559867, + "recall": 0.28664495114006516 + }, + "tat-eng": { + "accuracy": 0.124, + "f1": 0.10248605006105005, + "main_score": 0.10248605006105005, + "precision": 0.09633368686868687, + "recall": 0.124 + }, + "tel-eng": { + "accuracy": 0.41452991452991456, + "f1": 0.36401725055571205, + "main_score": 0.36401725055571205, + "precision": 0.35070672687825644, + "recall": 0.41452991452991456 + }, + "tgl-eng": { + "accuracy": 0.156, + "f1": 0.13086049173049172, + "main_score": 0.13086049173049172, + "precision": 0.12408971861471862, + "recall": 0.156 + }, + "tha-eng": { + "accuracy": 0.9744525547445255, + "f1": 0.9671532846715328, + "main_score": 0.9671532846715328, + "precision": 0.9635036496350365, + "recall": 0.9744525547445255 + }, + "tuk-eng": { + "accuracy": 0.2019704433497537, + "f1": 0.15155072809764625, + "main_score": 0.15155072809764625, + "precision": 0.1379720853858785, + "recall": 0.2019704433497537 + }, + "tur-eng": { + "accuracy": 0.962, + "f1": 0.9508333333333334, + "main_score": 0.9508333333333334, + "precision": 0.9453333333333334, + "recall": 0.962 + }, + "tzl-eng": { + "accuracy": 0.3076923076923077, + "f1": 0.2545718170718171, + "main_score": 0.2545718170718171, + "precision": 0.2393429487179487, + "recall": 0.3076923076923077 + }, + "uig-eng": { + "accuracy": 0.289, + "f1": 0.24385631303394462, + "main_score": 0.24385631303394462, + "precision": 0.23044607973725623, + "recall": 0.289 + }, + "ukr-eng": { + "accuracy": 0.944, + "f1": 0.9281666666666667, + "main_score": 0.9281666666666667, + "precision": 0.9203333333333333, + "recall": 0.944 + }, + "urd-eng": { + "accuracy": 0.958, + "f1": 0.9456666666666668, + "main_score": 0.9456666666666668, + "precision": 0.9396666666666668, + "recall": 0.958 + }, + "uzb-eng": { + "accuracy": 0.21261682242990654, + "f1": 0.17144804183355583, + "main_score": 0.17144804183355583, + "precision": 0.15958045070194601, + "recall": 0.21261682242990654 + }, + "vie-eng": { + "accuracy": 0.963, + "f1": 0.9512333333333334, + "main_score": 0.9512333333333334, + "precision": 0.9455833333333333, + "recall": 0.963 + }, + "war-eng": { + "accuracy": 0.09, + "f1": 0.07253528138528138, + "main_score": 0.07253528138528138, + "precision": 0.06787196062341712, + "recall": 0.09 + }, + "wuu-eng": { + "accuracy": 0.799, + "f1": 0.7599642857142858, + "main_score": 0.7599642857142858, + "precision": 0.7441697802197802, + "recall": 0.799 + }, + "xho-eng": { + "accuracy": 0.056338028169014086, + "f1": 0.04524114383269313, + "main_score": 0.04524114383269313, + "precision": 0.0425888665325285, + "recall": 0.056338028169014086 + }, + "yid-eng": { + "accuracy": 0.17452830188679244, + "f1": 0.14381645917725996, + "main_score": 0.14381645917725996, + "precision": 0.13567534889243407, + "recall": 0.17452830188679244 + }, + "yue-eng": { + "accuracy": 0.761, + "f1": 0.7144706349206349, + "main_score": 0.7144706349206349, + "precision": 0.6962428571428572, + "recall": 0.761 + }, + "zsm-eng": { + "accuracy": 0.963, + "f1": 0.9530666666666666, + "main_score": 0.9530666666666666, + "precision": 0.94825, + "recall": 0.963 + } + } +} \ No newline at end of file From e722dce270dfe691be23ac0fc4460c0d7c6eec48 Mon Sep 17 00:00:00 2001 From: lm Date: Thu, 25 Apr 2024 16:37:44 +0200 Subject: [PATCH 02/11] lint --- mteb/abstasks/CrosslingualTask.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mteb/abstasks/CrosslingualTask.py b/mteb/abstasks/CrosslingualTask.py index 1b6dc9e817..70f0477b47 100644 --- a/mteb/abstasks/CrosslingualTask.py +++ b/mteb/abstasks/CrosslingualTask.py @@ -1,7 +1,7 @@ from __future__ import annotations import datasets -import polars as pl + from .AbsTask import AbsTask @@ -21,7 +21,7 @@ def load_data(self, **kwargs): if self.data_loaded: return - fast_loading = self.fast_loading if hasattr(self, 'fast_loading') else False + fast_loading = self.fast_loading if hasattr(self, "fast_loading") else False if fast_loading: self.fast_load() else: @@ -30,13 +30,17 @@ def load_data(self, **kwargs): def fast_load(self, **kwargs): """Load all subsets at once, then group by language with Polars""" self.dataset = {} - merged_dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # load "default" subset + merged_dataset = datasets.load_dataset( + **self.metadata_dict["dataset"] + ) # load "default" subset for split in self.metadata.eval_splits: - grouped_by_lang = dict(merged_dataset[split].to_polars().group_by('lang')) + grouped_by_lang = dict(merged_dataset[split].to_polars().group_by("lang")) for lang in self.langs: if lang not in self.dataset: self.dataset[lang] = dict() - self.dataset[lang][split] = datasets.Dataset.from_polars(grouped_by_lang[lang].drop('lang')) # Remove lang column and convert back to HF datasets, not strictly necessary but better for compatibility + self.dataset[lang][split] = datasets.Dataset.from_polars( + grouped_by_lang[lang].drop("lang") + ) # Remove lang column and convert back to HF datasets, not strictly necessary but better for compatibility self.data_loaded = True def slow_load(self, **kwargs): From f65cdcbeb1d761e2bfdb446ebbea36ed80f0e95c Mon Sep 17 00:00:00 2001 From: lm Date: Thu, 25 Apr 2024 16:58:15 +0200 Subject: [PATCH 03/11] consistency --- mteb/abstasks/CrosslingualTask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/abstasks/CrosslingualTask.py b/mteb/abstasks/CrosslingualTask.py index 70f0477b47..6fe41570c6 100644 --- a/mteb/abstasks/CrosslingualTask.py +++ b/mteb/abstasks/CrosslingualTask.py @@ -37,7 +37,7 @@ def fast_load(self, **kwargs): grouped_by_lang = dict(merged_dataset[split].to_polars().group_by("lang")) for lang in self.langs: if lang not in self.dataset: - self.dataset[lang] = dict() + self.dataset[lang] = {} self.dataset[lang][split] = datasets.Dataset.from_polars( grouped_by_lang[lang].drop("lang") ) # Remove lang column and convert back to HF datasets, not strictly necessary but better for compatibility From 862b1289d09bca6c16366b1c8d2478511dda0dd3 Mon Sep 17 00:00:00 2001 From: lm Date: Thu, 25 Apr 2024 17:03:00 +0200 Subject: [PATCH 04/11] bump datasets version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2ff649cb04..9d292a6e02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ ] requires-python = ">=3.8" dependencies = [ - "datasets>=2.2.0", + "datasets>=2.19.0", "jsonlines", "numpy", "requests>=2.26.0", From 54c01dbb70e545c7fcb503b0a2b9d31895f1b03b Mon Sep 17 00:00:00 2001 From: lm Date: Thu, 25 Apr 2024 17:05:45 +0200 Subject: [PATCH 05/11] add polars dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 9d292a6e02..69481e0a23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "pydantic>=2.0.0", "typing_extensions", "eval_type_backport", + "polars>=0.20.22", ] From ca1210ca9c1826f9cab7a495662f8f1df7f759df Mon Sep 17 00:00:00 2001 From: lm Date: Sun, 28 Apr 2024 16:29:27 +0200 Subject: [PATCH 06/11] loader mixin --- mteb/abstasks/CrosslingualTask.py | 41 ++-------------------------- mteb/abstasks/MultiSubsetLoader.py | 43 ++++++++++++++++++++++++++++++ mteb/abstasks/MultilingualTask.py | 18 ++----------- 3 files changed, 47 insertions(+), 55 deletions(-) create mode 100644 mteb/abstasks/MultiSubsetLoader.py diff --git a/mteb/abstasks/CrosslingualTask.py b/mteb/abstasks/CrosslingualTask.py index 6fe41570c6..a884b98bb0 100644 --- a/mteb/abstasks/CrosslingualTask.py +++ b/mteb/abstasks/CrosslingualTask.py @@ -1,11 +1,10 @@ from __future__ import annotations -import datasets - from .AbsTask import AbsTask +from .MultiSubsetLoader import MultiSubsetLoader -class CrosslingualTask(AbsTask): +class CrosslingualTask(MultiSubsetLoader, AbsTask): def __init__(self, langs=None, **kwargs): super().__init__(**kwargs) if isinstance(langs, list): @@ -15,39 +14,3 @@ def __init__(self, langs=None, **kwargs): else: self.langs = self.metadata_dict["eval_langs"] self.is_crosslingual = True - - def load_data(self, **kwargs): - """Load dataset from HuggingFace hub""" - if self.data_loaded: - return - - fast_loading = self.fast_loading if hasattr(self, "fast_loading") else False - if fast_loading: - self.fast_load() - else: - self.slow_load() - - def fast_load(self, **kwargs): - """Load all subsets at once, then group by language with Polars""" - self.dataset = {} - merged_dataset = datasets.load_dataset( - **self.metadata_dict["dataset"] - ) # load "default" subset - for split in self.metadata.eval_splits: - grouped_by_lang = dict(merged_dataset[split].to_polars().group_by("lang")) - for lang in self.langs: - if lang not in self.dataset: - self.dataset[lang] = {} - self.dataset[lang][split] = datasets.Dataset.from_polars( - grouped_by_lang[lang].drop("lang") - ) # Remove lang column and convert back to HF datasets, not strictly necessary but better for compatibility - self.data_loaded = True - - def slow_load(self, **kwargs): - """Each subsets is loaded iteratively""" - self.dataset = {} - for lang in self.langs: - self.dataset[lang] = datasets.load_dataset( - name=lang, **self.metadata_dict["dataset"] - ) - self.data_loaded = True diff --git a/mteb/abstasks/MultiSubsetLoader.py b/mteb/abstasks/MultiSubsetLoader.py new file mode 100644 index 0000000000..4b88cf9061 --- /dev/null +++ b/mteb/abstasks/MultiSubsetLoader.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import datasets + + +class MultiSubsetLoader: + def load_data(self, **kwargs): + """Load dataset containing multiple subsets from HuggingFace hub""" + if self.data_loaded: + return + + fast_loading = self.fast_loading if hasattr(self, "fast_loading") else False + if fast_loading: + self.fast_load() + else: + self.slow_load() + + self.dataset_transform() + self.data_loaded = True + + def fast_load(self, **kwargs): + """Load all subsets at once, then group by language with Polars""" + self.dataset = {} + merged_dataset = datasets.load_dataset( + **self.metadata_dict["dataset"] + ) # load "default" subset + for split in self.metadata.eval_splits: + grouped_by_lang = dict(merged_dataset[split].to_polars().group_by("lang")) + for lang in self.langs: + if lang not in self.dataset: + self.dataset[lang] = {} + self.dataset[lang][split] = datasets.Dataset.from_polars( + grouped_by_lang[lang].drop("lang") + ) # Remove lang column and convert back to HF datasets, not strictly necessary but better for compatibility + + def slow_load(self, **kwargs): + """Load each subsets iteratively""" + self.dataset = {} + for lang in self.langs: + self.dataset[lang] = datasets.load_dataset( + name=lang, + **self.metadata_dict.get("dataset", None), + ) diff --git a/mteb/abstasks/MultilingualTask.py b/mteb/abstasks/MultilingualTask.py index d28777e26b..699fa64d2d 100644 --- a/mteb/abstasks/MultilingualTask.py +++ b/mteb/abstasks/MultilingualTask.py @@ -1,11 +1,10 @@ from __future__ import annotations -import datasets - from .AbsTask import AbsTask +from .MultiSubsetLoader import MultiSubsetLoader -class MultilingualTask(AbsTask): +class MultilingualTask(MultiSubsetLoader, AbsTask): def __init__(self, langs=None, **kwargs): super().__init__(**kwargs) if isinstance(langs, list): @@ -17,16 +16,3 @@ def __init__(self, langs=None, **kwargs): else: self.langs = self.metadata_dict["eval_langs"] self.is_multilingual = True - - def load_data(self, **kwargs): - """Load dataset from HuggingFace hub""" - if self.data_loaded: - return - self.dataset = {} - for lang in self.langs: - self.dataset[lang] = datasets.load_dataset( - name=lang, - **self.metadata_dict.get("dataset", None), - ) - self.dataset_transform() - self.data_loaded = True From 1c0e246e773eb75d9f9429dc02114c1f83676f02 Mon Sep 17 00:00:00 2001 From: lm Date: Sun, 28 Apr 2024 16:49:26 +0200 Subject: [PATCH 07/11] documentation for fast loading --- mteb/abstasks/MultiSubsetLoader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mteb/abstasks/MultiSubsetLoader.py b/mteb/abstasks/MultiSubsetLoader.py index 4b88cf9061..c89b39d465 100644 --- a/mteb/abstasks/MultiSubsetLoader.py +++ b/mteb/abstasks/MultiSubsetLoader.py @@ -19,7 +19,10 @@ def load_data(self, **kwargs): self.data_loaded = True def fast_load(self, **kwargs): - """Load all subsets at once, then group by language with Polars""" + """Load all subsets at once, then group by language with Polars. Using fast loading has two requirements: + - Each row in the dataset should have a 'lang' feature giving the corresponding language/language pair + - The datasets must have a 'default' config that loads all the subsets of the dataset (see https://huggingface.co/docs/datasets/en/repository_structure#configurations) + """ self.dataset = {} merged_dataset = datasets.load_dataset( **self.metadata_dict["dataset"] From c2b208c7b9b196c418e3df4dbece7f3c8356e53c Mon Sep 17 00:00:00 2001 From: lm Date: Sun, 28 Apr 2024 16:49:58 +0200 Subject: [PATCH 08/11] detail --- mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py index 44b0246691..042f46ae12 100644 --- a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py @@ -121,6 +121,7 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask): + fast_loading = True metadata = TaskMetadata( name="Tatoeba", dataset={ @@ -147,4 +148,3 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask): n_samples={"test": 2000}, avg_character_length={"test": 39.4}, ) - fast_loading = True From d08cdf61d9f676227316fa3de64ca34b862c566c Mon Sep 17 00:00:00 2001 From: lm Date: Sun, 28 Apr 2024 17:27:46 +0200 Subject: [PATCH 09/11] fix tests --- tests/test_all_abstasks.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_all_abstasks.py b/tests/test_all_abstasks.py index b18307bf29..c03a65bc31 100644 --- a/tests/test_all_abstasks.py +++ b/tests/test_all_abstasks.py @@ -11,6 +11,7 @@ from mteb.abstasks import AbsTask from mteb.abstasks.AbsTaskInstructionRetrieval import AbsTaskInstructionRetrieval from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.MultiSubsetLoader import MultiSubsetLoader logging.basicConfig(level=logging.INFO) @@ -19,8 +20,10 @@ @patch("datasets.load_dataset") def test_load_data(mock_load_dataset: Mock, task: AbsTask): # TODO: We skip because this load_data is completely different. - if isinstance(task, AbsTaskRetrieval) or isinstance( - task, AbsTaskInstructionRetrieval + if ( + isinstance(task, AbsTaskRetrieval) + or isinstance(task, AbsTaskInstructionRetrieval) + or isinstance(task, MultiSubsetLoader) ): pytest.skip() with patch.object(task, "dataset_transform") as mock_dataset_transform: From 2012f676c3e2eba14cbbf178934ec47b10c5b198 Mon Sep 17 00:00:00 2001 From: lm Date: Mon, 29 Apr 2024 14:38:55 +0200 Subject: [PATCH 10/11] tatoeba dataset in mteb org --- mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py index 042f46ae12..14ab699a76 100644 --- a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py @@ -125,8 +125,8 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask): metadata = TaskMetadata( name="Tatoeba", dataset={ - "path": "loicmagne/tatoeba-bitext-mining", - "revision": "482264e767155e1f8baf2c27815db6cba8e4efa3", + "path": "mteb/tatoeba-bitext-mining", + "revision": "69e8f12da6e31d59addadda9a9c8a2e601a0e282", }, description="1,000 English-aligned sentence pairs for each language based on the Tatoeba corpus", reference="https://github.com/facebookresearch/LASER/tree/main/data/tatoeba/v1", From 6033f01260e04e4ede1e9d5a4a2333dd7cf34033 Mon Sep 17 00:00:00 2001 From: lm Date: Mon, 29 Apr 2024 14:44:40 +0200 Subject: [PATCH 11/11] added points --- docs/mmteb/points/572.jsonl | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/mmteb/points/572.jsonl diff --git a/docs/mmteb/points/572.jsonl b/docs/mmteb/points/572.jsonl new file mode 100644 index 0000000000..4ae02a8a53 --- /dev/null +++ b/docs/mmteb/points/572.jsonl @@ -0,0 +1,3 @@ +{"GitHub": "loicmagne", "Bug fixes": 8} +{"GitHub": "KennethEnevoldsen", "Review PR": 2} +{"GitHub": "imenelydiaker", "Review PR": 2} \ No newline at end of file