🚀 EVALITA-LLM Leaderboard 🚀
Evalita-LLM is a benchmark designed to evaluate Large Language Models (LLMs) on Italian tasks. The distinguishing features of Evalita-LLM are the following: (i) all tasks are native Italian, avoiding translation issues and potential cultural biases; (ii) the benchmark includes generative tasks, enabling more natural interaction with LLMs; (iii) all tasks are evaluated against multiple prompts, this way mitigating the model sensitivity to specific prompts and allowing a fairer evaluation.
Multiple-choice tasks: 📊TE (Textual Entailment), 😃SA (Sentiment Analysis), ⚠️HS (Hate Speech Detection), 🏥AT (Admission Test), 🔤WIC (Word in Context), ❓FAQ (Frequently Asked Questions)
Generative tasks: 🔄LS (Lexical Substitution), 📝SU (Summarization), 🏷️NER (Named Entity Recognition), 🔗REL (Relation Extraction)
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "metadata": null
Groups
evalita-mp
: All tasks (perplexity and non-perplexity based).evalita-mp_gen
: Only generative tasks.evalita-mp_mc
: Only multiple-choice tasks.
Tasks
The following Evalita-LLM tasks can also be evaluated in isolation:
evalita-mp_te
: Textual Entailment (TE)evalita-mp_sa
: Sentiment Analysis (SA)evalita-mp_wic
: Word in Context (WIC)evalita-mp_hs
: Hate Speech Detection (HS)evalita-mp_at
: Admission Tests (AT)evalita-mp_faq
: Frequently Asked Questions & Question Answering (FAQ)evalita-mp_sum_fp
: Summarization (SU)evalita-mp_ls
: Lexical Substitution LS)evalita-mp_ner_group
: Named Entity Recognition (NER)evalita-mp_re
: Relation Extraction (REL)
Usage
lm_eval --model hf --model_args pretrained=meta-llama/Llama-2-7b-hf --tasks evalita-mp --device cuda:0 --batch_size 1
Textual Entailment (TE) --- Multiple-choice task
The input are two sentences: the text (T) and the hypothesis (H). The model has to determine whether the meaning of the hypothesis is logically entailed by the text.
# | Prompt | Answer Choices |
---|---|---|
1 | La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera? | ["Sì", "No"] |
2 | Devi risolvere un compito di inferenza semantica. La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera? | ["Sì", "No"] |
3 | La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera?\nA: Sì\nB: No\nRisposta: | ["A", "B"] |
4 | Devi risolvere un compito di inferenza semantica. La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera?\nA: Sì\nB: No\nRisposta: | ["A", "B"] |
5 | Frase 1: '{{text1}}' Frase 2: '{{text2}}' | ["La frase 1 implica logicamente che la frase 2 sia vera", "La frase 1 non implica logicamente che la frase 2 sia vera"] |
6 | Devi risolvere un compito di inferenza semantica. Frase 1: '{{text1}}' Frase 2: '{{text2}}' | ["La frase 1 implica logicamente che la frase 2 sia vera", "La frase 1 non implica logicamente che la frase 2 sia vera"] |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = accuracy averaged over the 6 prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "metadata": null
Sentiment Analysis (SA) --- Multiple-choice task
The input is a tweet. The model has to determine the sentiment polarity of the text, categorizing it into one of four classes: positive, negative, neutral, or mixed.
# | Prompt | Answer Choices |
---|---|---|
1 | Qual è il sentiment espresso nel seguente tweet: '{{text}}'? | ["Positivo", "Negativo", "Neutro", "Misto"] |
2 | Devi svolgere un compito di analisi del sentiment. Qual è il sentiment espresso nel seguente tweet: '{{text}}'? | ["Positivo", "Negativo", "Neutro", "Misto"] |
3 | Qual è il sentiment espresso nel seguente tweet: '{{text}}'?\nA: Positivo\nB: Negativo\nC: Neutro\nD: Misto\nRisposta: | ["A", "B", "C", "D"] |
4 | Devi svolgere un compito di analisi del sentiment. Qual è il sentiment espresso nel seguente tweet: '{{text}}'?\nA: Positivo\nB: Negativo\nC: Neutro\nD: \nRisposta: | ["A", "B", "C", "D"] |
5 | Il seguente tweet: '{{text}}' esprime un sentiment | ["Positivo", "Negativo", "Neutro", "Misto"] |
6 | Devi svolgere un compito di analisi del sentiment. Il seguente tweet: '{{text}}' esprime un sentiment | ["Positivo", "Negativo", "Neutro", "Misto"] |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = F1-macro averaged over the 6 prompts. Best Prompt = F1-macro of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "metadata": null
Hate Speech (HS) --- Multiple-choice task
The input is a tweet. The model has to determine whether the text contains hateful content directed towards marginalized or minority groups. The output is a binary classification: hateful or not hateful.
# | Prompt | Answer Choices |
---|---|---|
1 | C'è incitamento all'odio nel seguente tweet: '{{full_text}}'? | ["Falso", "Vero"] |
2 | Devi svolgere un compito di identificazione di incitamento all'odio. C'è incitamento all'odio nel seguente tweet: '{{full_text}}'? | ["Falso", "Vero"] |
3 | C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?\nA: \nB: Falso\nRisposta: | ["B", "A"] |
4 | Devi svolgere un compito di identificazione di incitamento all'odio. C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?\nA: \nB: \nRisposta: | ["B", "A"] |
5 | Il tweet: '{{full_text}}' | ["non contiene incitamento all'odio", "contiene incitamento all'odio"] |
6 | Devi svolgere un compito di identificazione di incitamento all'odio. Il tweet: '{{full_text}}' | ["non contiene incitamento all'odio", "contiene incitamento all'odio"] |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = F1-micro averaged over the 6 prompts. Best Prompt = F1-micro of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "metadata": null
Admission Tests (AT) --- Multiple-choice task
The input is a multiple-choice question with five options (A-E) from Italian medical specialty entrance exams, and the model must identify the correct answer.
# | Prompt | Answer Choices |
---|---|---|
1 | Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta? | ["A", "B", "C", "D", "E"] |
2 | Devi risolvere un compito di risposte a domande. Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta? | ["A", "B", "C", "D", "E"] |
3 | Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nE: {{E}}\nRisposta: | ["A", "B", "C", "D", "E"] |
4 | Devi risolvere un compito a scelta multipla. Dato il seguente caso clinico: '{{background}}', qual è la risposta corretta alla domanda: '{{domanda}}'?\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nE: {{E}}\nRisposta:Devi risolvere un compito a scelta multipla. Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nE: {{E}}\nRisposta: | ["A", "B", "C", "D", "E"] |
5 | Dato il seguente caso clinico: '{{background}}'. La risposta corretta alla domanda: '{{domanda}}' èDato il seguente quesito di medicina '{{Question}}' la risposta corretta è: | ["A", "B", "C", "D", "E"] |
6 | Devi risolvere un compito di risposte a domande. Dato il seguente quesito di medicina '{{Question}}' la risposta corretta è: | ["A", "B", "C", "D", "E"] |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = accuracy averaged over the 6 prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "metadata": null
Word in Context (WIC) --- Multiple-choice task
The input consists of a word (w) and two sentences. The model has to determine whether the word w has the same meaning in both sentences. The output is a binary classification: 1 (same meaning) or 0 (different meaning).
# | Prompt | Answer Choices |
---|---|---|
1 | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'? | ["No", "Sì"] |
2 | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'? | ["No", "Sì"] |
3 | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'?\nA: Sì\nB: No\nRisposta: | ["B", "A"] |
4 | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'?\nA: \nB: No\nRisposta: | ["B", "A"] |
5 | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' e la parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}' | ["non hanno lo stesso significato", "hanno lo stesso significato"] |
6 | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' e la parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}' | ["non hanno lo stesso significato", "hanno lo stesso significato"] |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = F1-macro averaged over the 6 prompts. Best Prompt = F1-macro of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "metadata": null
Frequently Asked Questions & Question Answering (FAQ) --- Multiple-choice task
The input is a user query regarding the water supply service. The model must identify the correct answer from the 4 available options.
# | Prompt | Answer Choices |
---|---|---|
1 | Rispondi alla seguente domanda: '{{question}}' | {{[A, B, C, D]}} |
2 | Devi risolvere un compito di risposte a domande. Rispondi alla seguente domanda: '{{question}}' | {{[A, B, C, D]}} |
3 | Rispondi alla seguente domanda: '{{question}}'\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nRisposta: | ["A", "B", "C", "D"] |
4 | Devi risolvere un compito a scelta multipla. Rispondi alla seguente domanda: '{{question}}'\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nRisposta: | ["A", "B", "C", "D"] |
5 | La risposta alla domanda: '{{question}}' è: | {{[A, B, C, D]}} |
6 | Devi risolvere un compito di risposte a domande. La risposta alla domanda: '{{question}}' è: | {{[A, B, C, D]}} |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = accuracy averaged over the 6 prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "metadata": null
Lexical Substitution (LS) --- Generative task
The input is a sentence containing a target word (w). The model has to replace the target word w with its most suitable synonyms that are contextually relevant.
# | Prompt |
---|---|
1 | Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori <head> nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta: |
2 | Devi risolvere un compito di sostituzione lessicale. Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori <head> nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta: |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = F1 averaged over the 2 prompts. Best Prompt = F1 of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "metadata": null
Summarization (SUM) --- Generative task
The input is a news article. The model has to generate a concise summary of the input text, capturing the key information and main points.
# | Prompt |
---|---|
1 | Riassumi il seguente articolo di giornale: '{{source}}'\nRiassunto: |
2 | Devi risolvere un compito di sintesi automatica del testo. Riassumi il seguente articolo di giornale: '{{source}}'\nRiassunto: |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = F1 averaged over the 2 prompts. Best Prompt = F1 of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "metadata": null
Named Entity Recognition (NER) --- Generative task
The input is a sentence. The model has to identify and classify Named Entities into predefined categories such as person, organization, and location.
# | Prompt |
---|---|
1 | Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'.\nTesto: '{{text}}'\nEntità: |
2 | Devi svolgere un compito di riconoscimento delle entità nei testi. Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'.\nTesto: '{{text}}'\nEntità: |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = F1 averaged over the 2 prompts. Best Prompt = F1 of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "REL",
- "REL Prompt Average",
- "REL Best Prompt",
- "REL Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "metadata": null
Relation Extraction (REL) --- Generative task
The input is a sentence of a clinical text. The model must identify and extract relationships between laboratory test results (e.g., blood pressure) and the corresponding tests or procedures that generated them (e.g., blood pressure test).
# | Prompt |
---|---|
1 | Dato un documento medico devi estrarre tutte le misurazioni degli esami medici presenti. Riporta ogni relazione nel formato: misurazione$esame, separando ciascuna coppia con '%'. Se non ci sono relazioni da estrarre, rispondi con '&&NOREL&&'.\nTesto: '{{text}}'\nRelazioni: |
2 | Devi svolgere un compito di estrazione di relazioni da documenti medici. Dato un documento medico devi estrarre tutte le misurazioni degli esami medici presenti. Riporta ogni relazione nel formato: misurazione$esame, separando ciascuna coppia con '%'. Se non ci sono relazioni da estrarre, rispondi con '&&NOREL&&'.\nTesto: '{{text}}'\nRelazioni: |
Combined Performance = (1 - (Best Prompt - Prompt Average) / 100) * Best Prompt. Prompt Average = F1 averaged over the 2 prompts. Best Prompt = F1 of the best prompt. Prompt ID = ID of the best prompt (see legend above).
- "headers": [
- "FS",
- "IS_FS",
- "Model",
- "Avg. Combined Performance ⬆️",
- "TE",
- "TE Prompt Average",
- "TE Best Prompt",
- "TE Best Prompt Id",
- "SA",
- "SA Prompt Average",
- "SA Best Prompt",
- "SA Best Prompt Id",
- "HS",
- "HS Prompt Average",
- "HS Best Prompt",
- "HS Best Prompt Id",
- "AT",
- "AT Prompt Average",
- "AT Best Prompt",
- "AT Best Prompt Id",
- "WIC",
- "WIC Prompt Average",
- "WIC Best Prompt",
- "WIC Best Prompt Id",
- "FAQ",
- "FAQ Prompt Average",
- "FAQ Best Prompt",
- "FAQ Best Prompt Id",
- "LS",
- "LS Prompt Average",
- "LS Best Prompt",
- "LS Best Prompt Id",
- "SU",
- "SU Prompt Average",
- "SU Best Prompt",
- "SU Best Prompt Id",
- "NER",
- "NER Prompt Average",
- "NER Best Prompt",
- "NER Best Prompt Id",
- "Combined Performance",
- "Prompt Average",
- "Best Prompt",
- "Best Prompt Id",
- "Architecture",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
- "data": [
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 53.64,
- 79.97,
- 79.62,
- 81.5,
- 2,
- 72.85,
- 72.49,
- 73.89,
- 1,
- 71.99,
- 70.71,
- 76.03,
- 3,
- 59.96,
- 54.9,
- 75.8,
- 3,
- 57.79,
- 55.64,
- 61.17,
- 3,
- 51.76,
- 51.16,
- 98.75,
- 3,
- 22.36,
- 21.97,
- 22.47,
- 2,
- 30.43,
- 29.12,
- 31.02,
- 1,
- 38.03,
- 37.92,
- 38.09,
- 1,
- 51.26,
- 50.98,
- 51.56,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 54.72,
- 85.02,
- 84.79,
- 86.5,
- 4,
- 73.17,
- 71.75,
- 78.26,
- 3,
- 72.8,
- 71.8,
- 75.96,
- 4,
- 59.75,
- 55.4,
- 85.6,
- 3,
- 63.59,
- 61.7,
- 67.52,
- 6,
- 52.61,
- 52.37,
- 99.5,
- 4,
- 35.03,
- 34.65,
- 35.24,
- 1,
- 25.9,
- 25.51,
- 26.04,
- 2,
- 35.1,
- 35.03,
- 35.14,
- 1,
- 44.23,
- 42.63,
- 45.56,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 49.39,
- 72.91,
- 71.75,
- 76.75,
- 1,
- 72.07,
- 70.82,
- 76.05,
- 4,
- 70.99,
- 69.88,
- 74.19,
- 4,
- 52.81,
- 46.4,
- 64.4,
- 4,
- 56.82,
- 52.88,
- 63.74,
- 1,
- 56.55,
- 52.41,
- 88.53,
- 3,
- 23.26,
- 22.92,
- 23.37,
- 1,
- 29.1,
- 28.92,
- 29.18,
- 2,
- 17.2,
- 15.77,
- 17.51,
- 2,
- 42.14,
- 41.31,
- 42.76,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 53.92,
- 79.52,
- 79.12,
- 81.25,
- 1,
- 78.34,
- 78.17,
- 78.98,
- 6,
- 71.4,
- 70.14,
- 75.24,
- 4,
- 65.9,
- 62.5,
- 84.6,
- 4,
- 66.5,
- 64.94,
- 70.16,
- 3,
- 52.35,
- 52,
- 99.25,
- 3,
- 26.77,
- 26.64,
- 26.81,
- 2,
- 18.93,
- 18.74,
- 18.98,
- 2,
- 37.86,
- 37.82,
- 37.89,
- 1,
- 41.59,
- 41.46,
- 41.69,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 51.7,
- 72.23,
- 70.92,
- 76.5,
- 4,
- 74.76,
- 74.47,
- 75.67,
- 1,
- 66.88,
- 64.05,
- 75.68,
- 4,
- 54.23,
- 48.17,
- 66,
- 4,
- 65.03,
- 63.71,
- 67.83,
- 4,
- 53.49,
- 52.58,
- 98,
- 3,
- 25.74,
- 25.6,
- 25.79,
- 1,
- 32.88,
- 32.36,
- 33.14,
- 1,
- 31.8,
- 30.59,
- 32.38,
- 2,
- 39.91,
- 39.7,
- 40.06,
- 1,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 51.84,
- 76.84,
- 76.21,
- 79.25,
- 3,
- 75.21,
- 74.44,
- 77.95,
- 4,
- 69.2,
- 67.08,
- 75.89,
- 4,
- 61.91,
- 57.87,
- 85.2,
- 4,
- 59.38,
- 57.17,
- 63.16,
- 4,
- 54.3,
- 53.41,
- 98,
- 4,
- 35.1,
- 35.1,
- 35.11,
- 2,
- 21.24,
- 21.06,
- 21.29,
- 1,
- 28.05,
- 27.93,
- 28.1,
- 2,
- 37.15,
- 36.83,
- 37.34,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 51.22,
- 76.27,
- 75.25,
- 80.5,
- 4,
- 70.48,
- 69.85,
- 72.1,
- 3,
- 62.83,
- 58.75,
- 75.2,
- 3,
- 61.47,
- 56.9,
- 81.2,
- 4,
- 68.92,
- 67.56,
- 72.47,
- 2,
- 52.17,
- 45.47,
- 81.3,
- 4,
- 35.87,
- 35.46,
- 36.11,
- 1,
- 21.59,
- 21.35,
- 21.65,
- 2,
- 25.67,
- 25.55,
- 25.71,
- 2,
- 36.95,
- 36.78,
- 37.04,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 52.62,
- 83.65,
- 83.42,
- 85,
- 2,
- 74.16,
- 73.77,
- 75.35,
- 4,
- 68.51,
- 67.14,
- 72.04,
- 4,
- 53.92,
- 46.93,
- 75.8,
- 4,
- 62.7,
- 60.38,
- 67.5,
- 6,
- 52.46,
- 51.87,
- 98.75,
- 4,
- 28.17,
- 27.85,
- 28.29,
- 2,
- 30.79,
- 30.36,
- 30.98,
- 2,
- 35.18,
- 34.45,
- 35.58,
- 1,
- 36.69,
- 36.64,
- 36.72,
- 2,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 49.41,
- 72.61,
- 71.54,
- 76,
- 1,
- 71.9,
- 70.46,
- 76.59,
- 4,
- 68.95,
- 67.49,
- 72.86,
- 4,
- 52.96,
- 46.33,
- 65.6,
- 3,
- 57.04,
- 54.37,
- 61.26,
- 1,
- 53.68,
- 51.41,
- 94.76,
- 4,
- 25.08,
- 24.86,
- 25.16,
- 1,
- 24.3,
- 24.26,
- 24.31,
- 2,
- 30.98,
- 30.27,
- 31.31,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 49.3,
- 72.91,
- 71.88,
- 76.25,
- 2,
- 72.28,
- 71.9,
- 73.3,
- 5,
- 67.71,
- 65.83,
- 72.71,
- 4,
- 52.54,
- 46.27,
- 63.4,
- 4,
- 62.02,
- 59.58,
- 66.98,
- 3,
- 54.39,
- 51.87,
- 94.01,
- 4,
- 20.55,
- 20.37,
- 20.6,
- 2,
- 18.32,
- 18.27,
- 18.33,
- 1,
- 35.73,
- 35.67,
- 35.77,
- 2,
- 36.56,
- 36.56,
- 36.56,
- 1,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
- 45.5,
- 75.21,
- 73.67,
- 82.5,
- 4,
- 57.18,
- 51.62,
- 70.43,
- 4,
- 64.56,
- 61.54,
- 72.54,
- 4,
- 49.73,
- 41.03,
- 70.4,
- 3,
- 45.89,
- 35.48,
- 67.11,
- 5,
- 52.83,
- 51.58,
- 97.26,
- 4,
- 32.96,
- 32.42,
- 33.22,
- 2,
- 25.39,
- 24.9,
- 25.56,
- 2,
- 14.97,
- 10.24,
- 15.86,
- 2,
- 36.26,
- 36.11,
- 36.35,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 50.06,
- 77.35,
- 76.92,
- 79,
- 2,
- 72.81,
- 72.21,
- 74.57,
- 5,
- 67.05,
- 65.31,
- 71.39,
- 4,
- 53.19,
- 46.5,
- 66.4,
- 3,
- 65.33,
- 63.97,
- 68.25,
- 6,
- 52.72,
- 47.92,
- 88.03,
- 4,
- 24.07,
- 23.86,
- 24.14,
- 2,
- 19.85,
- 19.71,
- 19.89,
- 1,
- 31.98,
- 31.5,
- 32.21,
- 1,
- 36.25,
- 36.15,
- 36.3,
- 2,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 51.71,
- 74.19,
- 73.62,
- 76,
- 3,
- 75.24,
- 74.23,
- 79.03,
- 4,
- 69.24,
- 67.58,
- 73.93,
- 4,
- 54.83,
- 48.43,
- 69.2,
- 4,
- 62.58,
- 61.35,
- 64.86,
- 6,
- 54.43,
- 51,
- 91.52,
- 3,
- 29.5,
- 29.28,
- 29.59,
- 2,
- 21.6,
- 21.38,
- 21.66,
- 2,
- 39.49,
- 38.24,
- 40.34,
- 2,
- 36.02,
- 35.67,
- 36.21,
- 1,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 46.57,
- 68.28,
- 66.46,
- 73.25,
- 2,
- 75.3,
- 74.96,
- 76.37,
- 4,
- 65.02,
- 62.43,
- 71.53,
- 4,
- 51.92,
- 46.53,
- 60,
- 4,
- 49.02,
- 41.31,
- 61.17,
- 6,
- 51.66,
- 50.04,
- 96.51,
- 3,
- 14.55,
- 14.06,
- 14.64,
- 1,
- 17.96,
- 17.95,
- 17.96,
- 1,
- 36,
- 35.79,
- 36.12,
- 1,
- 36.02,
- 35.98,
- 36.04,
- 1,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 50.37,
- 70.53,
- 69.04,
- 75,
- 4,
- 73.77,
- 72.72,
- 77.35,
- 4,
- 65.72,
- 63.22,
- 72.22,
- 4,
- 53.89,
- 46.97,
- 70.2,
- 4,
- 59.03,
- 57.65,
- 61.2,
- 2,
- 53.73,
- 51.16,
- 94.01,
- 3,
- 30.32,
- 29.76,
- 30.57,
- 2,
- 20.44,
- 20.2,
- 20.5,
- 2,
- 40.3,
- 39.7,
- 40.72,
- 2,
- 35.99,
- 35.89,
- 36.05,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 49.37,
- 73.89,
- 72.62,
- 78.5,
- 4,
- 72.23,
- 71.75,
- 73.57,
- 2,
- 66.86,
- 64.99,
- 71.56,
- 3,
- 55.62,
- 50.7,
- 64.6,
- 3,
- 60.35,
- 59.48,
- 61.76,
- 6,
- 51.66,
- 43.81,
- 74.06,
- 3,
- 22.54,
- 22.36,
- 22.59,
- 1,
- 21.18,
- 21.18,
- 21.18,
- 2,
- 33.53,
- 33.24,
- 33.68,
- 1,
- 35.81,
- 35.36,
- 36.06,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-2-9b-it</a>",
- 47.54,
- 71.99,
- 70.12,
- 79,
- 2,
- 64.35,
- 61.74,
- 70.63,
- 4,
- 64.22,
- 61.92,
- 69.42,
- 4,
- 57.94,
- 52.27,
- 77.8,
- 4,
- 42.48,
- 30.36,
- 64.67,
- 5,
- 51.53,
- 50.58,
- 98,
- 4,
- 25.45,
- 25.29,
- 25.5,
- 2,
- 30.48,
- 30.1,
- 30.65,
- 2,
- 32.01,
- 31.63,
- 32.2,
- 1,
- 34.97,
- 34.56,
- 35.19,
- 1,
- "Gemma2ForCausalLM",
- "?",
- 10,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-14B-Instruct-1M</a>",
- 44.36,
- 69.51,
- 66.88,
- 86.75,
- 4,
- 54.07,
- 47.09,
- 74.98,
- 3,
- 59.97,
- 54.95,
- 75.11,
- 4,
- 53.29,
- 47.3,
- 83.4,
- 3,
- 48.97,
- 40.07,
- 67.2,
- 6,
- 54.41,
- 53.74,
- 98.5,
- 3,
- 36.76,
- 35.98,
- 37.23,
- 2,
- 25.17,
- 25.1,
- 25.19,
- 2,
- 8.15,
- 6.63,
- 8.28,
- 2,
- 33.32,
- 33.06,
- 33.45,
- 1,
- "Qwen2ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 43.73,
- 66.86,
- 65.17,
- 71,
- 2,
- 68.39,
- 66.88,
- 72.33,
- 4,
- 49.59,
- 45.34,
- 54.73,
- 4,
- 48.92,
- 41.83,
- 59.2,
- 3,
- 44.42,
- 36.76,
- 53.1,
- 1,
- 49.76,
- 41.36,
- 75.06,
- 4,
- 28,
- 27.25,
- 28.29,
- 1,
- 17.48,
- 17.37,
- 17.5,
- 2,
- 31.55,
- 31.49,
- 31.57,
- 2,
- 32.34,
- 31.3,
- 32.85,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 38,
- 52.59,
- 49.92,
- 56,
- 4,
- 55.46,
- 51.87,
- 61.09,
- 6,
- 53.79,
- 48.48,
- 62.72,
- 1,
- 47.95,
- 42.8,
- 54,
- 3,
- 50.78,
- 42.86,
- 66.49,
- 5,
- 49.64,
- 42.89,
- 83.29,
- 4,
- 2.84,
- 1.56,
- 2.88,
- 2,
- 27.68,
- 25.5,
- 28.56,
- 2,
- 6.96,
- 5.26,
- 7.09,
- 1,
- 32.31,
- 32.03,
- 32.45,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-12b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-12b-it</a>",
- 47.35,
- 71.3,
- 69.25,
- 79,
- 4,
- 61.91,
- 57.59,
- 74.6,
- 4,
- 67.19,
- 65.51,
- 71.4,
- 3,
- 58.57,
- 53.63,
- 83.4,
- 4,
- 46.06,
- 35.76,
- 66.76,
- 5,
- 52.91,
- 52.33,
- 98.75,
- 3,
- 21.85,
- 20.17,
- 22.33,
- 2,
- 29.93,
- 29.46,
- 30.13,
- 2,
- 31.5,
- 29.49,
- 32.47,
- 1,
- 32.23,
- 32.16,
- 32.27,
- 1,
- "?",
- "?",
- 13,
- 0,
- false,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 35.7,
- 67.25,
- 65.33,
- 72.25,
- 3,
- 57.68,
- 54.91,
- 62.25,
- 6,
- 41.59,
- 30.89,
- 54.29,
- 5,
- 37.84,
- 37.27,
- 38.2,
- 2,
- 45.48,
- 36.89,
- 56.73,
- 5,
- 29.88,
- 28.64,
- 30.42,
- 4,
- 5,
- 4.88,
- 5.01,
- 2,
- 15.36,
- 15.12,
- 15.4,
- 2,
- 27.24,
- 27.04,
- 27.32,
- 2,
- 29.69,
- 29.36,
- 29.83,
- 1,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/occiglot/occiglot-7b-it-en-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">occiglot/occiglot-7b-it-en-instruct</a>",
- 43.69,
- 64.34,
- 61.83,
- 70.25,
- 2,
- 69.04,
- 68.43,
- 70.48,
- 4,
- 61.77,
- 58.3,
- 69.78,
- 4,
- 54.16,
- 49.43,
- 61.8,
- 4,
- 55.72,
- 51.2,
- 63.65,
- 3,
- 51.6,
- 43.68,
- 73.07,
- 4,
- 5.6,
- 5.4,
- 5.61,
- 2,
- 21.25,
- 20.55,
- 21.44,
- 2,
- 25.15,
- 24.93,
- 25.22,
- 2,
- 28.28,
- 27.2,
- 28.71,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 35.06,
- 68.71,
- 66.79,
- 74.25,
- 4,
- 53.85,
- 53.14,
- 54.7,
- 5,
- 39.49,
- 34.45,
- 43.34,
- 5,
- 38.04,
- 36.83,
- 38.8,
- 6,
- 43.99,
- 34.55,
- 56,
- 5,
- 28.45,
- 27.27,
- 28.93,
- 1,
- 9.53,
- 9.51,
- 9.53,
- 2,
- 15.94,
- 15.92,
- 15.95,
- 1,
- 24.37,
- 24.29,
- 24.4,
- 1,
- 28.2,
- 26.64,
- 28.83,
- 1,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3-medium-4k-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3-medium-4k-instruct</a>",
- 42.09,
- 66.01,
- 62.58,
- 78.5,
- 3,
- 55.31,
- 48.78,
- 72.58,
- 3,
- 65.19,
- 62.35,
- 72.75,
- 4,
- 54.11,
- 47.67,
- 80.2,
- 3,
- 53.5,
- 46.92,
- 66.67,
- 1,
- 53.24,
- 48.67,
- 88.53,
- 4,
- 0.34,
- 0.31,
- 0.34,
- 2,
- 27.64,
- 26.52,
- 28.08,
- 2,
- 18.6,
- 17.48,
- 18.87,
- 1,
- 27.01,
- 26.99,
- 27.01,
- 2,
- "Phi3ForCausalLM",
- "?",
- 14,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 44.93,
- 65.05,
- 62.62,
- 71,
- 1,
- 69.44,
- 68.97,
- 70.56,
- 4,
- 61.51,
- 60.03,
- 64.16,
- 4,
- 47.01,
- 39.57,
- 56.8,
- 3,
- 52.79,
- 47.59,
- 60.88,
- 3,
- 52.88,
- 48.26,
- 88.53,
- 4,
- 22.47,
- 21.8,
- 22.67,
- 2,
- 21.41,
- 21.25,
- 21.45,
- 1,
- 29.77,
- 29.17,
- 30.02,
- 2,
- 27,
- 26.27,
- 27.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mii-llm/maestrale-chat-v0.4-beta</a>",
- 41.04,
- 60.19,
- 55.42,
- 73.25,
- 4,
- 63.23,
- 61.22,
- 67.4,
- 4,
- 61.53,
- 60.62,
- 63.07,
- 6,
- 51.91,
- 45.17,
- 63.8,
- 3,
- 46.35,
- 36.19,
- 66.84,
- 6,
- 52.36,
- 46.09,
- 83.04,
- 4,
- 20.38,
- 19.22,
- 20.68,
- 2,
- 25.69,
- 25.14,
- 25.89,
- 2,
- 5.02,
- 4.62,
- 5.05,
- 1,
- 23.76,
- 21.98,
- 24.33,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-7B-Instruct-v0.3</a>",
- 41.56,
- 63.83,
- 60.83,
- 71.25,
- 1,
- 61.23,
- 58.71,
- 66.14,
- 6,
- 59.16,
- 55.86,
- 65.39,
- 5,
- 46.03,
- 39.23,
- 54,
- 3,
- 62.89,
- 61.08,
- 66.49,
- 2,
- 53.28,
- 47.63,
- 84.79,
- 4,
- 9.87,
- 6.47,
- 10.26,
- 2,
- 28.1,
- 27.28,
- 28.43,
- 2,
- 9.72,
- 9.64,
- 9.73,
- 1,
- 21.49,
- 20.29,
- 21.83,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/phi-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4</a>",
- 38.37,
- 63.85,
- 60.42,
- 73.25,
- 4,
- 50.54,
- 42.31,
- 74.26,
- 4,
- 61.42,
- 60.41,
- 63.15,
- 4,
- 55.69,
- 50.13,
- 83.2,
- 3,
- 54.67,
- 48.23,
- 69.01,
- 3,
- 53.53,
- 51.45,
- 95.26,
- 4,
- 0,
- 0,
- 0,
- 1,
- 22.5,
- 13.28,
- 25.69,
- 1,
- 0.36,
- 0.18,
- 0.36,
- 1,
- 21.17,
- 19.38,
- 21.66,
- 1,
- "Phi3ForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>",
- 41.74,
- 62.14,
- 58.92,
- 69.5,
- 2,
- 64.05,
- 60.96,
- 72.04,
- 4,
- 48.59,
- 39.59,
- 66.32,
- 6,
- 48.85,
- 40.73,
- 62.2,
- 4,
- 57.27,
- 52.6,
- 66.57,
- 6,
- 51.01,
- 42.85,
- 71.82,
- 4,
- 19.37,
- 19.35,
- 19.37,
- 1,
- 22.63,
- 22.37,
- 22.71,
- 2,
- 22.65,
- 19.02,
- 23.79,
- 1,
- 20.79,
- 17.81,
- 21.61,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-instruct-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-instruct-v1.0</a>",
- 32.5,
- 55.16,
- 54.12,
- 56.5,
- 4,
- 50.57,
- 44.51,
- 59.46,
- 6,
- 47.62,
- 39.22,
- 60.48,
- 2,
- 32.25,
- 28.87,
- 34,
- 6,
- 57.7,
- 53.42,
- 66.04,
- 5,
- 35.17,
- 31.05,
- 37.66,
- 4,
- 0,
- 0,
- 0,
- 1,
- 16.34,
- 16.22,
- 16.36,
- 2,
- 9.61,
- 9.16,
- 9.66,
- 1,
- 20.6,
- 17.75,
- 21.38,
- 2,
- "MistralForCausalLM",
- "?",
- 8,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">arcee-ai/Llama-3.1-SuperNova-Lite</a>",
- 42.66,
- 70.06,
- 68.54,
- 74.5,
- 4,
- 66.85,
- 63.8,
- 77.12,
- 4,
- 56.41,
- 52.21,
- 63.8,
- 5,
- 51.47,
- 43.63,
- 68.6,
- 3,
- 50.8,
- 42.88,
- 66.58,
- 6,
- 54.89,
- 51.16,
- 90.52,
- 4,
- 23.74,
- 22.04,
- 24.29,
- 2,
- 22.77,
- 22.74,
- 22.78,
- 2,
- 9.98,
- 8.86,
- 10.11,
- 2,
- 19.61,
- 17.82,
- 20.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/MoxoffSpA/Volare" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MoxoffSpA/Volare</a>",
- 30.23,
- 53.14,
- 51.25,
- 55.5,
- 6,
- 50.1,
- 44.15,
- 58.49,
- 6,
- 50.7,
- 43.57,
- 62.7,
- 2,
- 26.13,
- 22.77,
- 27.4,
- 3,
- 40.77,
- 27.8,
- 66.4,
- 5,
- 27.95,
- 27.35,
- 28.18,
- 1,
- 0.04,
- 0.02,
- 0.04,
- 1,
- 23.28,
- 23.15,
- 23.32,
- 1,
- 10.77,
- 10.63,
- 10.78,
- 2,
- 19.47,
- 15.95,
- 20.37,
- 1,
- "GemmaForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/Phi-3.5-mini-instruct</a>",
- 44.4,
- 72.16,
- 70.04,
- 81.5,
- 4,
- 51.63,
- 43.72,
- 70.59,
- 3,
- 65.93,
- 64.49,
- 69.17,
- 4,
- 48.9,
- 40.67,
- 62.8,
- 3,
- 60.37,
- 56.97,
- 67.41,
- 5,
- 51.52,
- 44.22,
- 79.05,
- 3,
- 20.38,
- 18.28,
- 20.94,
- 2,
- 23.24,
- 22.7,
- 23.4,
- 2,
- 30.58,
- 30.35,
- 30.68,
- 2,
- 19.33,
- 14.4,
- 20.61,
- 1,
- "Phi3ForCausalLM",
- "?",
- 4,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/google/gemma-3-4b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma-3-4b-it</a>",
- 44.59,
- 66.22,
- 64.88,
- 69.25,
- 3,
- 60.41,
- 57.66,
- 65.65,
- 5,
- 59.8,
- 57.32,
- 64.25,
- 5,
- 46.01,
- 38.87,
- 54.6,
- 3,
- 50.8,
- 44.13,
- 61.44,
- 5,
- 52.55,
- 48.67,
- 90.77,
- 3,
- 32.37,
- 30.72,
- 33.18,
- 1,
- 29.24,
- 28.93,
- 29.37,
- 2,
- 29.76,
- 28.92,
- 30.13,
- 2,
- 18.7,
- 18.34,
- 18.78,
- 2,
- "?",
- "?",
- 0,
- 0,
- false,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
- 40.23,
- 62.73,
- 60.62,
- 67,
- 1,
- 63.07,
- 59.8,
- 71.12,
- 5,
- 51.39,
- 44.49,
- 63.26,
- 5,
- 50.55,
- 42.27,
- 69,
- 4,
- 55.86,
- 50.4,
- 66.93,
- 6,
- 53.12,
- 48.29,
- 87.78,
- 4,
- 19.94,
- 15.75,
- 21.06,
- 2,
- 22.33,
- 22.3,
- 22.34,
- 1,
- 7.93,
- 7.57,
- 7.96,
- 1,
- 15.42,
- 15.08,
- 15.48,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "5️⃣",
- true,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 42.69,
- 74.99,
- 74.46,
- 76.75,
- 2,
- 68.28,
- 67.99,
- 68.93,
- 1,
- 67.17,
- 66.35,
- 68.97,
- 4,
- 54.18,
- 49.73,
- 61.2,
- 4,
- 29.29,
- 15.41,
- 37.69,
- 6,
- 49.28,
- 41.73,
- 80.55,
- 4,
- 9.43,
- 9.21,
- 9.45,
- 1,
- 34.66,
- 34.25,
- 34.88,
- 2,
- 24.83,
- 24.48,
- 24.94,
- 2,
- 14.79,
- 13.08,
- 15.1,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/DeepMount00/Llama-3-8b-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">DeepMount00/Llama-3-8b-Ita</a>",
- 41.02,
- 58.59,
- 56,
- 63,
- 2,
- 62.59,
- 60.33,
- 67.21,
- 5,
- 49.12,
- 40.78,
- 63.86,
- 6,
- 49.04,
- 40.83,
- 63,
- 4,
- 55.44,
- 49.72,
- 67.12,
- 6,
- 51.22,
- 46.22,
- 88.03,
- 4,
- 22.59,
- 21.46,
- 22.93,
- 2,
- 22.76,
- 22.51,
- 22.84,
- 1,
- 25.08,
- 20.84,
- 26.62,
- 1,
- 13.78,
- 12.08,
- 14.06,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">ibm-granite/granite-3.1-8b-instruct</a>",
- 37.26,
- 56.34,
- 51.08,
- 67,
- 2,
- 54.43,
- 47.82,
- 69.45,
- 4,
- 48.15,
- 39.51,
- 62.64,
- 1,
- 49.04,
- 41.97,
- 59.4,
- 3,
- 50.24,
- 42,
- 66.85,
- 5,
- 54.7,
- 51.29,
- 91.52,
- 4,
- 0.17,
- 0.08,
- 0.17,
- 2,
- 30.28,
- 30.04,
- 30.39,
- 1,
- 18.09,
- 16.03,
- 18.57,
- 2,
- 11.21,
- 9.17,
- 11.47,
- 2,
- "GraniteForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/Almawave/Velvet-14B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Almawave/Velvet-14B</a>",
- 39.48,
- 69.96,
- 67.62,
- 78.5,
- 1,
- 62.53,
- 60.49,
- 66.59,
- 5,
- 59.27,
- 55.93,
- 65.64,
- 3,
- 48.79,
- 42.4,
- 57.4,
- 4,
- 47.33,
- 37.91,
- 64.31,
- 5,
- 50.08,
- 45.47,
- 89.53,
- 4,
- 0.13,
- 0.06,
- 0.13,
- 1,
- 31.1,
- 31.09,
- 31.11,
- 2,
- 15.89,
- 15.75,
- 15.91,
- 2,
- 9.68,
- 6.96,
- 9.98,
- 1,
- "MistralForCausalLM",
- "?",
- 15,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
- 39.85,
- 64,
- 60.33,
- 75,
- 1,
- 63.23,
- 60.16,
- 70.62,
- 4,
- 61.4,
- 60.48,
- 62.97,
- 4,
- 46.66,
- 38.53,
- 57.8,
- 4,
- 52.79,
- 45.95,
- 66.19,
- 3,
- 47.56,
- 38.03,
- 66.33,
- 4,
- 15.24,
- 11.62,
- 15.92,
- 2,
- 19.14,
- 18.84,
- 19.21,
- 2,
- 19.08,
- 17.36,
- 19.5,
- 2,
- 9.37,
- 7.52,
- 9.57,
- 2,
- "CohereForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">sapienzanlp/Minerva-7B-base-v1.0</a>",
- 32.36,
- 56.93,
- 54.46,
- 60.75,
- 1,
- 56.56,
- 50.62,
- 71.33,
- 6,
- 53.3,
- 48.04,
- 61.82,
- 2,
- 31.37,
- 27.7,
- 33.2,
- 5,
- 61.49,
- 59.12,
- 66.13,
- 5,
- 29.12,
- 27.81,
- 29.68,
- 5,
- 0.01,
- 0,
- 0.01,
- 2,
- 16.27,
- 16.04,
- 16.31,
- 1,
- 9.64,
- 9.62,
- 9.64,
- 2,
- 8.91,
- 8.09,
- 8.99,
- 2,
- "MistralForCausalLM",
- "?",
- 0,
- 0,
- true,
- ""
- [
- "0️⃣",
- false,
- "<a target="_blank" href="https://huggingface.co/FairMind/Llama-3-8B-4bit-UltraChat-Ita" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">FairMind/Llama-3-8B-4bit-UltraChat-Ita</a>",
- 36.28,
- 60.29,
- 58.08,
- 64.25,
- 6,
- 52.97,
- 46.01,
- 67.3,
- 5,
- 54.09,
- 48.72,
- 63.38,
- 5,
- 41.41,
- 33.37,
- 49.2,
- 3,
- 66.22,
- 66.04,
- 66.58,
- 4,
- 43.89,
- 34.54,
- 55.61,
- 4,
- 0,
- 0,
- 0,
- 1,
- 24.17,
- 23.6,
- 24.35,
- 2,
- 15.59,
- 13.72,
- 15.95,
- 2,
- 4.2,
- 2.62,
- 4.27,
- 2,
- "LlamaForCausalLM",
- "?",
- 9,
- 0,
- true,
- ""
- [
- "metadata": null