{"id": 1012324, "name": "Coding interviews", "unit": "%", "createdAt": "2025-02-17T13:09:24.000Z", "updatedAt": "2025-04-22T21:23:29.000Z", "coverage": "", "timespan": "", "datasetId": 6944, "shortUnit": "%", "columnOrder": 0, "shortName": "performance_code_any_interview", "catalogPath": "grapher/artificial_intelligence/2025-02-17/papers_with_code_coding/papers_with_code_coding#performance_code_any_interview", "type": "float", "dataChecksum": "8267720812551793537", "metadataChecksum": "-7318216833685364619", "datasetName": "AI Performance on Coding Problems", "datasetVersion": "2025-02-17", "nonRedistributable": false, "display": {"unit": "%", "zeroDay": "2021-05-20", "shortUnit": "%", "yearIsDay": true, "numDecimalPlaces": 1}, "schemaVersion": 2, "processingLevel": "major", "presentation": {"topicTagsLinks": ["Artificial Intelligence"]}, "descriptionKey": ["This benchmark assesses the accuracy of models in coding interviews based on the APPS benchmark. The APPS benchmark focuses on coding ability and problem-solving in a natural language context, simulating the evaluation process employed during human programmer interviews. It presents coding problems in unrestricted natural language and evaluates the correctness of solutions.", "The coding tasks within this benchmark are sourced from open-access coding websites such as Codeforces and Kattis. These tasks cover a spectrum of difficulty levels, ranging from introductory to collegiate competition level. The benchmark measures the accuracy of models in solving programming tasks specifically tailored for coding interviews."], "dimensions": {"years": {"values": [{"id": 0}, {"id": 48}, {"id": 264}, {"id": 427}]}, "entities": {"values": [{"id": 369310, "name": "State of the art", "code": null}]}}, "origins": [{"id": 2881, "title": "AI Performance on Coding Problems", "descriptionSnapshot": "Performance on the APPS coding dataset. The APPS dataset consists of problems collected from different open-access coding websites such as Codeforces, Kattis, and more. The APPS benchmark attempts to mirror how humans programmers are evaluated by posing coding problems in unrestricted natural language and evaluating the correctness of solutions. The problems range in difficulty from introductory to collegiate competition level and measure coding ability as well as problem-solving.\n", "producer": "Papers with Code", "citationFull": "Code Generation on APPS. Papers with Code (2025)", "urlMain": "https://paperswithcode.com/sota/code-generation-on-apps", "urlDownload": "https://paperswithcode.com/sota/code-generation-on-apps", "dateAccessed": "2025-02-17", "datePublished": "2025-02-17", "license": {"url": "https://creativecommons.org/licenses/by-sa/4.0/", "name": "CC BY 4.0"}}]}