{"id": 1012325, "name": "Coding competitions", "unit": "%", "createdAt": "2025-02-17T13:09:24.000Z", "updatedAt": "2025-04-22T21:23:29.000Z", "coverage": "", "timespan": "", "datasetId": 6944, "shortUnit": "%", "columnOrder": 0, "shortName": "performance_code_any_competition", "catalogPath": "grapher/artificial_intelligence/2025-02-17/papers_with_code_coding/papers_with_code_coding#performance_code_any_competition", "type": "float", "dataChecksum": "13474649716071204593", "metadataChecksum": "-8356064199487462467", "datasetName": "AI Performance on Coding Problems", "datasetVersion": "2025-02-17", "nonRedistributable": false, "display": {"unit": "%", "zeroDay": "2021-05-20", "shortUnit": "%", "yearIsDay": true, "numDecimalPlaces": 1}, "schemaVersion": 2, "processingLevel": "major", "presentation": {"topicTagsLinks": ["Artificial Intelligence"]}, "descriptionKey": ["This benchmark measures the accuracy of models in coding competitions based on the APPS benchmark. The APPS benchmark focuses on coding ability and problem-solving in a natural language context. It aims to replicate the evaluation process used for human programmers by presenting coding problems in unrestricted natural language and assessing the correctness of solutions.", "The coding tasks included in this benchmark are sourced from open-access coding websites such as Codeforces and Kattis. These tasks span a range of difficulty levels, from introductory to collegiate competition level. The benchmark evaluates the accuracy of models in solving programming tasks specifically designed for coding competitions."], "dimensions": {"years": {"values": [{"id": 0}, {"id": 48}, {"id": 264}, {"id": 427}, {"id": 482}, {"id": 1094}]}, "entities": {"values": [{"id": 369310, "name": "State of the art", "code": null}]}}, "origins": [{"id": 2881, "title": "AI Performance on Coding Problems", "descriptionSnapshot": "Performance on the APPS coding dataset. The APPS dataset consists of problems collected from different open-access coding websites such as Codeforces, Kattis, and more. The APPS benchmark attempts to mirror how humans programmers are evaluated by posing coding problems in unrestricted natural language and evaluating the correctness of solutions. The problems range in difficulty from introductory to collegiate competition level and measure coding ability as well as problem-solving.\n", "producer": "Papers with Code", "citationFull": "Code Generation on APPS. Papers with Code (2025)", "urlMain": "https://paperswithcode.com/sota/code-generation-on-apps", "urlDownload": "https://paperswithcode.com/sota/code-generation-on-apps", "dateAccessed": "2025-02-17", "datePublished": "2025-02-17", "license": {"url": "https://creativecommons.org/licenses/by-sa/4.0/", "name": "CC BY 4.0"}}]}