🎭 RoleRMBench Leaderboard

RoleRMBench evaluates reward models on role-playing scenarios across multiple dimensions.

For more information, please refer to: https://github.com/Dear-Sloth/RoleRMBench

Feel free to submit your results to our 🤗 HuggingFace leaderboard.

{
  • "headers": [
    • "Model",
    • "Avg",
    • "Nar",
    • "MT",
    • "Con",
    • "IF",
    • "Scn",
    • "Saf",
    • "Att"
    ],
  • "data": [
    • [
      • "Youtu-RoleRM",
      • 88.32,
      • 90.74,
      • 82.54,
      • 80.28,
      • 94,
      • 90.91,
      • 91.53,
      • 88.24
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/internlm/internlm2-20b-reward" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">internlm/internlm2-20b-reward</a>",
      • 70.58,
      • 70.37,
      • 68.25,
      • 67.61,
      • 76,
      • 72.73,
      • 66.1,
      • 75
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2</a>",
      • 70.36,
      • 66.67,
      • 71.43,
      • 70.42,
      • 70,
      • 65.15,
      • 76.27,
      • 70.59
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-V2-Qwen3-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-V2-Qwen3-8B</a>",
      • 70.07,
      • 64.81,
      • 69.84,
      • 67.61,
      • 66,
      • 75.76,
      • 74.58,
      • 77.94
      ],
    • [
      • "GPT-5-mini-2025-08-07",
      • 69.3,
      • 68.52,
      • 73.02,
      • 59.86,
      • 83,
      • 68.94,
      • 70.34,
      • 65.44
      ],
    • [
      • "GPT-4o-2024-08-06",
      • 69.12,
      • 66.67,
      • 66.67,
      • 66.9,
      • 71,
      • 68.18,
      • 78.81,
      • 67.65
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/internlm/internlm2-7b-reward" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">internlm/internlm2-7b-reward</a>",
      • 67.72,
      • 64.81,
      • 63.49,
      • 64.79,
      • 68,
      • 72.73,
      • 72.88,
      • 66.18
      ],
    • [
      • "GPT-5-2025-08-07",
      • 67.55,
      • 69.44,
      • 66.67,
      • 66.2,
      • 82,
      • 65.91,
      • 60.17,
      • 62.5
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2</a>",
      • 67.53,
      • 70.37,
      • 65.08,
      • 60.56,
      • 76,
      • 71.21,
      • 67.8,
      • 61.76
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-70B-Instruct-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-70B-Instruct-RM-RB2</a>",
      • 66.39,
      • 72.22,
      • 65.08,
      • 56.34,
      • 62,
      • 65.15,
      • 76.27,
      • 67.65
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2</a>",
      • 66.34,
      • 70.37,
      • 61.9,
      • 60.56,
      • 72,
      • 72.73,
      • 69.49,
      • 60.29
      ],
    • [
      • "Claude-3-7-sonnet-20250219",
      • 65.24,
      • 68.52,
      • 62.7,
      • 65.49,
      • 75,
      • 62.88,
      • 61.02,
      • 61.76
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-8B-Instruct-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-8B-Instruct-RM-RB2</a>",
      • 65.06,
      • 59.26,
      • 61.94,
      • 59.15,
      • 70,
      • 72.73,
      • 71.19,
      • 61.16
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2</a>",
      • 64.89,
      • 66.67,
      • 60.32,
      • 57.75,
      • 70,
      • 66.67,
      • 66.1,
      • 64.71
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-V2-Llama-3.1-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-V2-Llama-3.1-8B</a>",
      • 64.17,
      • 53.7,
      • 63.49,
      • 60.56,
      • 66,
      • 71.21,
      • 69.49,
      • 64.71
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/morecry/BaichuanCharRM" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CharacterRM</a>",
      • 61.11,
      • 59.26,
      • 65.08,
      • 56.34,
      • 72,
      • 66.67,
      • 52.54,
      • 55.88
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/infly/INF-ORM-Llama3.1-70B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">infly/INF-ORM-Llama3.1-70B</a>",
      • 58.51,
      • 61.11,
      • 61.9,
      • 50.7,
      • 58,
      • 56.06,
      • 64.41,
      • 57.35
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/Ray2333/GRM_Llama3.1_8B_rewardmodel-ft" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Ray2333/GRM_Llama3.1_8B_rewardmodel-ft</a>",
      • 56.5,
      • 53.7,
      • 58.73,
      • 57.75,
      • 56,
      • 56.06,
      • 59.32,
      • 52.94
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-Llama-3.1-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-Llama-3.1-8B</a>",
      • 53.5,
      • 48.15,
      • 50.79,
      • 50.7,
      • 58,
      • 59.09,
      • 55.93,
      • 50
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-Llama-3.1-8B-v0.2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-Llama-3.1-8B-v0.2</a>",
      • 51.97,
      • 42.58,
      • 50.79,
      • 45.07,
      • 60,
      • 50.06,
      • 55.93,
      • 57.35
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/nicolinho/QRM-Llama3.1-8B-v2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">nicolinho/QRM-Llama3.1-8B-v2</a>",
      • 47.42,
      • 44.44,
      • 58.73,
      • 40.85,
      • 46,
      • 50,
      • 43.37,
      • 48.53
      ],
    • [
      • "<a target="_blank" href="https://huggingface.co/NCSOFT/Llama-3-OffsetBias-RM-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">NCSOFT/Llama-3-OffsetBias-RM-8B</a>",
      • 47.17,
      • 44.44,
      • 49.21,
      • 39.44,
      • 32,
      • 50,
      • 69.49,
      • 45.59
      ]
    ],
  • "metadata": null
}