LongBench is a benchmarking tool for evaluating the performance of long text generation models.
LongBench v2 and LongBench (ACL 25'&24') are datasets and evaluation frameworks from Tsinghua University, designed to systematically assess and compare the performance of long text generation models. This tool not only provides rich datasets but also includes detailed evaluation metrics and methods, suitable for model evaluation in both academic research and industrial applications.
This is the machine-readable structured data for this agent. AI systems and search engines use this to understand the agent's capabilities.
[
{
"@context": "https://schema.org",
"@type": "SoftwareApplication",
"@id": "https://agentsignals.ai/agents/longbench",
"name": "LongBench",
"description": "LongBench v2 and LongBench (ACL 25'&24') are datasets and evaluation frameworks from Tsinghua University, designed to systematically assess and compare the performance of long text generation models. This tool not only provides rich datasets but also includes detailed evaluation metrics and methods, suitable for model evaluation in both academic research and industrial applications.",
"url": "https://agentsignals.ai/agents/longbench",
"applicationCategory": "研究",
"operatingSystem": "GitHub",
"sameAs": "https://github.com/THUDM/LongBench",
"installUrl": "https://github.com/THUDM/LongBench",
"offers": {
"@type": "Offer",
"price": "0",
"priceCurrency": "USD",
"description": "免费",
"availability": "https://schema.org/InStock"
},
"featureList": [
"Rich long text datasets",
"Detailed evaluation metrics",
"Supports multiple models"
],
"datePublished": "2025-12-05T17:17:42.91736+00:00",
"dateModified": "2025-12-20T18:04:50.895678+00:00",
"publisher": {
"@type": "Organization",
"name": "Agent Signals",
"url": "https://agentsignals.ai"
}
},
{
"@context": "https://schema.org",
"@type": "BreadcrumbList",
"itemListElement": [
{
"@type": "ListItem",
"position": 1,
"name": "Home",
"item": "https://agentsignals.ai"
},
{
"@type": "ListItem",
"position": 2,
"name": "Agents",
"item": "https://agentsignals.ai/agents"
},
{
"@type": "ListItem",
"position": 3,
"name": "LongBench",
"item": "https://agentsignals.ai/agents/longbench"
}
]
},
{
"@context": "https://schema.org",
"@type": "FAQPage",
"mainEntity": [
{
"@type": "Question",
"name": "What is LongBench?",
"acceptedAnswer": {
"@type": "Answer",
"text": "LongBench is a benchmarking tool for evaluating the performance of long text generation models."
}
},
{
"@type": "Question",
"name": "What features does LongBench offer?",
"acceptedAnswer": {
"@type": "Answer",
"text": "Rich long text datasets, Detailed evaluation metrics, Supports multiple models"
}
},
{
"@type": "Question",
"name": "What are the use cases for LongBench?",
"acceptedAnswer": {
"@type": "Answer",
"text": "Model Performance Evaluation, Academic Research Support, Industrial Application Testing"
}
},
{
"@type": "Question",
"name": "What are the advantages of LongBench?",
"acceptedAnswer": {
"@type": "Answer",
"text": "数据集全面, 评估标准明确, 易于使用"
}
},
{
"@type": "Question",
"name": "What are the limitations of LongBench?",
"acceptedAnswer": {
"@type": "Answer",
"text": "可能需要较强的计算资源, 主要适用于研究领域"
}
}
]
}
]