Fino1 Leaderboard

The Fino1 Leaderboard evaluates the performance of various LLMs, including general-purpose models and reasoning-enhanced models, on complex financial tasks. These tasks, such as mathematical question answering and equation execution, assess an LLM’s ability to perform structured financial reasoning and numerical computation.

{
  • "headers": [
    • "Models",
    • "Average",
    • "FinQA",
    • "DM-Simplong",
    • "XBRL-Math",
    • "DM-Complong",
    • "Type"
    ],
  • "data": [
    • [
      • "GPT-4o",
      • 61.01,
      • 72.49,
      • 60,
      • 72.22,
      • 39.33,
      • "Instruction-tuned"
      ],
    • [
      • "GPT-o1",
      • 54.045,
      • 49.07,
      • 56,
      • 74.44,
      • 36.67,
      • "Reasoning-enhanced"
      ],
    • [
      • "GPT-o3-mini",
      • 57.885,
      • 60.87,
      • 59,
      • 76.67,
      • 35,
      • "Reasoning-enhanced"
      ],
    • [
      • "DeepSeek-V3",
      • 61.3,
      • 73.2,
      • 53,
      • 76.67,
      • 42.33,
      • "Instruction-tuned"
      ],
    • [
      • "DeepSeek-R1",
      • 60.8675,
      • 65.13,
      • 53,
      • 86.67,
      • 38.67,
      • "Reasoning-enhanced"
      ],
    • [
      • "GPT-4.5",
      • 60.4275,
      • 68.94,
      • 59,
      • 74.44,
      • 39.33,
      • "Instruction-tuned"
      ],
    • [
      • "DeepSeek-R1-Distill-Llama-70B",
      • 59.2675,
      • 66.73,
      • 53,
      • 86.67,
      • 30.67,
      • "Reasoning-enhanced"
      ],
    • [
      • "Llama-3-70B-Instruct",
      • 42.565,
      • 58.92,
      • 41,
      • 56.67,
      • 13.67,
      • "Instruction-tuned"
      ],
    • [
      • "Llama-3.1-70B-Instruct",
      • 52.21,
      • 63.18,
      • 48,
      • 63.33,
      • 34.33,
      • "Instruction-tuned"
      ],
    • [
      • "Llama-3.3-70B-Instruct",
      • 56.0375,
      • 68.15,
      • 54,
      • 70,
      • 32,
      • "Instruction-tuned"
      ],
    • [
      • "DeepSeek-R1-Distill-Qwen-32B",
      • 57.3975,
      • 65.48,
      • 55,
      • 84.44,
      • 24.67,
      • "Reasoning-enhanced"
      ],
    • [
      • "DeepSeek-R1-Distill-Qwen-14B",
      • 53.1775,
      • 63.27,
      • 44,
      • 84.44,
      • 21,
      • "Reasoning-enhanced"
      ],
    • [
      • "DeepSeek-R1-Distill-Llama-8B",
      • 43.935,
      • 45.96,
      • 33,
      • 81.11,
      • 15.67,
      • "Reasoning-enhanced"
      ],
    • [
      • "Llama-3-8B-Instruct",
      • 31.465,
      • 41.97,
      • 29,
      • 48.89,
      • 6,
      • "Instruction-tuned"
      ],
    • [
      • "Llama-3.1-8B-Instruct",
      • 41.1625,
      • 54.13,
      • 34,
      • 62.22,
      • 14.3,
      • "Instruction-tuned"
      ],
    • [
      • "Qwen2.5-7B-Instruct",
      • 39.065,
      • 55.37,
      • 41,
      • 42.22,
      • 17.67,
      • "Instruction-tuned"
      ],
    • [
      • "Qwen2.5-14B-Instruct",
      • 52.7225,
      • 67.44,
      • 59,
      • 57.78,
      • 26.67,
      • "Instruction-tuned"
      ],
    • [
      • "Qwen2.5-32B-Instruct",
      • 56.1675,
      • 73.11,
      • 56,
      • 65.56,
      • 30,
      • "Instruction-tuned"
      ],
    • [
      • "Qwen2.5-72B-Instruct",
      • 53.7075,
      • 73.38,
      • 59,
      • 67.78,
      • 14.67,
      • "Instruction-tuned"
      ],
    • [
      • "Qwen2.5-Math-72B-Instruct",
      • 50.0175,
      • 69.74,
      • 42,
      • 83.33,
      • 5,
      • "Reasoning-enhanced"
      ],
    • [
      • "LIMO",
      • 46.22,
      • 63.44,
      • 45,
      • 61.11,
      • 15.33,
      • "Reasoning-enhanced"
      ],
    • [
      • "S1",
      • 57.0625,
      • 66.81,
      • 53,
      • 84.44,
      • 24,
      • "Reasoning-enhanced"
      ],
    • [
      • "QwQ-32B",
      • 52.915,
      • 61.22,
      • 46,
      • 84.44,
      • 20,
      • "Reasoning-enhanced"
      ],
    • [
      • "FInR1-7B",
      • 34.8525,
      • 58.74,
      • 37,
      • 30,
      • 13.67,
      • "Reasoning-enhanced"
      ],
    • [
      • "Fino1-8B",
      • 50.7725,
      • 60.87,
      • 40,
      • 82.22,
      • 20,
      • "Reasoning-enhanced"
      ],
    • [
      • "Fino1-14B",
      • 60.2525,
      • 70.01,
      • 60,
      • 86.67,
      • 24.33,
      • "Reasoning-enhanced"
      ]
    ],
  • "metadata": null
}