Tools and datasets for generating and validating OLMo pre-training data
Dolma is a project developed by the Allen Institute for AI, aimed at providing tools and datasets to help researchers generate and verify data used for OLMo (One Language Model) pre-training. The project is open-source on GitHub, offering valuable resources to researchers in natural language processing.
This is the machine-readable structured data for this agent. AI systems and search engines use this to understand the agent's capabilities.
[
{
"@context": "https://schema.org",
"@type": "SoftwareApplication",
"@id": "https://agentsignals.ai/agents/dolma",
"name": "dolma",
"description": "Dolma is a project developed by the Allen Institute for AI, aimed at providing tools and datasets to help researchers generate and verify data used for OLMo (One Language Model) pre-training. The project is open-source on GitHub, offering valuable resources to researchers in natural language processing.",
"url": "https://agentsignals.ai/agents/dolma",
"applicationCategory": "研究",
"operatingSystem": "GitHub",
"sameAs": "https://github.com/allenai/dolma",
"installUrl": "https://github.com/allenai/dolma",
"offers": {
"@type": "Offer",
"price": "0",
"priceCurrency": "USD",
"description": "免费",
"availability": "https://schema.org/InStock"
},
"featureList": [
"Generate pre-training data",
"Check data quality",
"Open-source tools"
],
"datePublished": "2025-12-05T17:16:28.5669+00:00",
"dateModified": "2025-12-19T05:06:22.219736+00:00",
"publisher": {
"@type": "Organization",
"name": "Agent Signals",
"url": "https://agentsignals.ai"
}
},
{
"@context": "https://schema.org",
"@type": "BreadcrumbList",
"itemListElement": [
{
"@type": "ListItem",
"position": 1,
"name": "Home",
"item": "https://agentsignals.ai"
},
{
"@type": "ListItem",
"position": 2,
"name": "Agents",
"item": "https://agentsignals.ai/agents"
},
{
"@type": "ListItem",
"position": 3,
"name": "dolma",
"item": "https://agentsignals.ai/agents/dolma"
}
]
},
{
"@context": "https://schema.org",
"@type": "FAQPage",
"mainEntity": [
{
"@type": "Question",
"name": "What is dolma?",
"acceptedAnswer": {
"@type": "Answer",
"text": "Tools and datasets for generating and validating OLMo pre-training data"
}
},
{
"@type": "Question",
"name": "What features does dolma offer?",
"acceptedAnswer": {
"@type": "Answer",
"text": "Generate pre-training data, Check data quality, Open-source tools"
}
},
{
"@type": "Question",
"name": "What are the use cases for dolma?",
"acceptedAnswer": {
"@type": "Answer",
"text": "Natural Language Processing Research, Pre-trained Model Development, Data Quality Evaluation"
}
},
{
"@type": "Question",
"name": "What are the advantages of dolma?",
"acceptedAnswer": {
"@type": "Answer",
"text": "开源,社区支持, 专为研究设计, 灵活的数据处理工具"
}
},
{
"@type": "Question",
"name": "What are the limitations of dolma?",
"acceptedAnswer": {
"@type": "Answer",
"text": "可能需要一定的编程知识, 主要针对专业研究者"
}
}
]
}
]