🎭 RoleRMBench Leaderboard

RoleRMBench evaluates reward models on role-playing scenarios across multiple dimensions.

For more information, please refer to: https://github.com/Dear-Sloth/RoleRMBench

Feel free to submit your results to our 🤗 HuggingFace leaderboard.

{

"headers": [
- "Model",
- "Avg",
- "Nar",
- "MT",
- "Con",
- "IF",
- "Scn",
- "Saf",
- "Att"
],
"data": [
- [
  - "Youtu-RoleRM",
  - 88.32,
  - 90.74,
  - 82.54,
  - 80.28,
  - 94,
  - 90.91,
  - 91.53,
  - 88.24
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/internlm/internlm2-20b-reward" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">internlm/internlm2-20b-reward</a>",
  - 70.58,
  - 70.37,
  - 68.25,
  - 67.61,
  - 76,
  - 72.73,
  - 66.1,
  - 75
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2</a>",
  - 70.36,
  - 66.67,
  - 71.43,
  - 70.42,
  - 70,
  - 65.15,
  - 76.27,
  - 70.59
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-V2-Qwen3-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-V2-Qwen3-8B</a>",
  - 70.07,
  - 64.81,
  - 69.84,
  - 67.61,
  - 66,
  - 75.76,
  - 74.58,
  - 77.94
  ],
- [
  - "GPT-5-mini-2025-08-07",
  - 69.3,
  - 68.52,
  - 73.02,
  - 59.86,
  - 83,
  - 68.94,
  - 70.34,
  - 65.44
  ],
- [
  - "GPT-4o-2024-08-06",
  - 69.12,
  - 66.67,
  - 66.67,
  - 66.9,
  - 71,
  - 68.18,
  - 78.81,
  - 67.65
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/internlm/internlm2-7b-reward" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">internlm/internlm2-7b-reward</a>",
  - 67.72,
  - 64.81,
  - 63.49,
  - 64.79,
  - 68,
  - 72.73,
  - 72.88,
  - 66.18
  ],
- [
  - "GPT-5-2025-08-07",
  - 67.55,
  - 69.44,
  - 66.67,
  - 66.2,
  - 82,
  - 65.91,
  - 60.17,
  - 62.5
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2</a>",
  - 67.53,
  - 70.37,
  - 65.08,
  - 60.56,
  - 76,
  - 71.21,
  - 67.8,
  - 61.76
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-70B-Instruct-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-70B-Instruct-RM-RB2</a>",
  - 66.39,
  - 72.22,
  - 65.08,
  - 56.34,
  - 62,
  - 65.15,
  - 76.27,
  - 67.65
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2</a>",
  - 66.34,
  - 70.37,
  - 61.9,
  - 60.56,
  - 72,
  - 72.73,
  - 69.49,
  - 60.29
  ],
- [
  - "Claude-3-7-sonnet-20250219",
  - 65.24,
  - 68.52,
  - 62.7,
  - 65.49,
  - 75,
  - 62.88,
  - 61.02,
  - 61.76
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-8B-Instruct-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-8B-Instruct-RM-RB2</a>",
  - 65.06,
  - 59.26,
  - 61.94,
  - 59.15,
  - 70,
  - 72.73,
  - 71.19,
  - 61.16
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2</a>",
  - 64.89,
  - 66.67,
  - 60.32,
  - 57.75,
  - 70,
  - 66.67,
  - 66.1,
  - 64.71
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-V2-Llama-3.1-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-V2-Llama-3.1-8B</a>",
  - 64.17,
  - 53.7,
  - 63.49,
  - 60.56,
  - 66,
  - 71.21,
  - 69.49,
  - 64.71
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/morecry/BaichuanCharRM" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CharacterRM</a>",
  - 61.11,
  - 59.26,
  - 65.08,
  - 56.34,
  - 72,
  - 66.67,
  - 52.54,
  - 55.88
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/infly/INF-ORM-Llama3.1-70B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">infly/INF-ORM-Llama3.1-70B</a>",
  - 58.51,
  - 61.11,
  - 61.9,
  - 50.7,
  - 58,
  - 56.06,
  - 64.41,
  - 57.35
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/Ray2333/GRM_Llama3.1_8B_rewardmodel-ft" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Ray2333/GRM_Llama3.1_8B_rewardmodel-ft</a>",
  - 56.5,
  - 53.7,
  - 58.73,
  - 57.75,
  - 56,
  - 56.06,
  - 59.32,
  - 52.94
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-Llama-3.1-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-Llama-3.1-8B</a>",
  - 53.5,
  - 48.15,
  - 50.79,
  - 50.7,
  - 58,
  - 59.09,
  - 55.93,
  - 50
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/Skywork/Skywork-Reward-Llama-3.1-8B-v0.2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Skywork/Skywork-Reward-Llama-3.1-8B-v0.2</a>",
  - 51.97,
  - 42.58,
  - 50.79,
  - 45.07,
  - 60,
  - 50.06,
  - 55.93,
  - 57.35
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/nicolinho/QRM-Llama3.1-8B-v2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">nicolinho/QRM-Llama3.1-8B-v2</a>",
  - 47.42,
  - 44.44,
  - 58.73,
  - 40.85,
  - 46,
  - 50,
  - 43.37,
  - 48.53
  ],
- [
  - "<a target="_blank" href="https://huggingface.co/NCSOFT/Llama-3-OffsetBias-RM-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">NCSOFT/Llama-3-OffsetBias-RM-8B</a>",
  - 47.17,
  - 44.44,
  - 49.21,
  - 39.44,
  - 32,
  - 50,
  - 69.49,
  - 45.59
  ]
],
"metadata": null

}