🎠RoleRMBench Leaderboard
RoleRMBench evaluates reward models on role-playing scenarios across multiple dimensions.
For more information, please refer to: https://github.com/Dear-Sloth/RoleRMBench
Feel free to submit your results to our 🤗 HuggingFace leaderboard.
{
- "headers": [
- "Model",
- "Avg",
- "Nar",
- "MT",
- "Con",
- "IF",
- "Scn",
- "Saf",
- "Att"
- "data": [
- [
- "Youtu-RoleRM",
- 88.32,
- 90.74,
- 82.54,
- 80.28,
- 94,
- 90.91,
- 91.53,
- 88.24
- [
- "<a target="_blank" href="https://huggingface.co/internlm/internlm2-20b-reward" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">internlm/internlm2-20b-reward</a>",
- 70.58,
- 70.37,
- 68.25,
- 67.61,
- 76,
- 72.73,
- 66.1,
- 75
- [
- "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2</a>",
- 70.36,
- 66.67,
- 71.43,
- 70.42,
- 70,
- 65.15,
- 76.27,
- 70.59
- [
- "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-V2-Qwen3-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-V2-Qwen3-8B</a>",
- 70.07,
- 64.81,
- 69.84,
- 67.61,
- 66,
- 75.76,
- 74.58,
- 77.94
- [
- "GPT-5-mini-2025-08-07",
- 69.3,
- 68.52,
- 73.02,
- 59.86,
- 83,
- 68.94,
- 70.34,
- 65.44
- [
- "GPT-4o-2024-08-06",
- 69.12,
- 66.67,
- 66.67,
- 66.9,
- 71,
- 68.18,
- 78.81,
- 67.65
- [
- "<a target="_blank" href="https://huggingface.co/internlm/internlm2-7b-reward" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">internlm/internlm2-7b-reward</a>",
- 67.72,
- 64.81,
- 63.49,
- 64.79,
- 68,
- 72.73,
- 72.88,
- 66.18
- [
- "GPT-5-2025-08-07",
- 67.55,
- 69.44,
- 66.67,
- 66.2,
- 82,
- 65.91,
- 60.17,
- 62.5
- [
- "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2</a>",
- 67.53,
- 70.37,
- 65.08,
- 60.56,
- 76,
- 71.21,
- 67.8,
- 61.76
- [
- "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-70B-Instruct-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-70B-Instruct-RM-RB2</a>",
- 66.39,
- 72.22,
- 65.08,
- 56.34,
- 62,
- 65.15,
- 76.27,
- 67.65
- [
- "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2</a>",
- 66.34,
- 70.37,
- 61.9,
- 60.56,
- 72,
- 72.73,
- 69.49,
- 60.29
- [
- "Claude-3-7-sonnet-20250219",
- 65.24,
- 68.52,
- 62.7,
- 65.49,
- 75,
- 62.88,
- 61.02,
- 61.76
- [
- "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-8B-Instruct-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-8B-Instruct-RM-RB2</a>",
- 65.06,
- 59.26,
- 61.94,
- 59.15,
- 70,
- 72.73,
- 71.19,
- 61.16
- [
- "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2</a>",
- 64.89,
- 66.67,
- 60.32,
- 57.75,
- 70,
- 66.67,
- 66.1,
- 64.71
- [
- "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-V2-Llama-3.1-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-V2-Llama-3.1-8B</a>",
- 64.17,
- 53.7,
- 63.49,
- 60.56,
- 66,
- 71.21,
- 69.49,
- 64.71
- [
- "<a target="_blank" href="https://huggingface.co/morecry/BaichuanCharRM" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CharacterRM</a>",
- 61.11,
- 59.26,
- 65.08,
- 56.34,
- 72,
- 66.67,
- 52.54,
- 55.88
- [
- "<a target="_blank" href="https://huggingface.co/infly/INF-ORM-Llama3.1-70B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">infly/INF-ORM-Llama3.1-70B</a>",
- 58.51,
- 61.11,
- 61.9,
- 50.7,
- 58,
- 56.06,
- 64.41,
- 57.35
- [
- "<a target="_blank" href="https://huggingface.co/Ray2333/GRM_Llama3.1_8B_rewardmodel-ft" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Ray2333/GRM_Llama3.1_8B_rewardmodel-ft</a>",
- 56.5,
- 53.7,
- 58.73,
- 57.75,
- 56,
- 56.06,
- 59.32,
- 52.94
- [
- "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-Llama-3.1-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-Llama-3.1-8B</a>",
- 53.5,
- 48.15,
- 50.79,
- 50.7,
- 58,
- 59.09,
- 55.93,
- 50
- [
- "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-Llama-3.1-8B-v0.2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-Llama-3.1-8B-v0.2</a>",
- 51.97,
- 42.58,
- 50.79,
- 45.07,
- 60,
- 50.06,
- 55.93,
- 57.35
- [
- "<a target="_blank" href="https://huggingface.co/nicolinho/QRM-Llama3.1-8B-v2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">nicolinho/QRM-Llama3.1-8B-v2</a>",
- 47.42,
- 44.44,
- 58.73,
- 40.85,
- 46,
- 50,
- 43.37,
- 48.53
- [
- "<a target="_blank" href="https://huggingface.co/NCSOFT/Llama-3-OffsetBias-RM-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">NCSOFT/Llama-3-OffsetBias-RM-8B</a>",
- 47.17,
- 44.44,
- 49.21,
- 39.44,
- 32,
- 50,
- 69.49,
- 45.59
- [
- "metadata": null