{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":778921150,"defaultBranch":"main","name":"eval-dev-quality","ownerLogin":"symflower","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2024-03-28T17:09:34.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/20767533?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1716984493.0","currentOid":""},"activityList":{"items":[{"before":"3346f2c95da44880c97e74168ebb3d7c80bc9877","after":"a17ff6853d12541b92c3c8df68f8a88040b12769","ref":"refs/heads/cases-with-more-logic","pushedAt":"2024-05-29T16:56:49.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"ruiAzevedo19","name":"Rui Azevedo","path":"/ruiAzevedo19","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/36680263?s=80&v=4"},"commit":{"message":"Additional Go \"light\" test scenarios from exercism.org","shortMessageHtmlLink":"Additional Go \"light\" test scenarios from exercism.org"}},{"before":"10ab6f6e766ac4ca8b227b84d17d3efb9b03f8d0","after":"3346f2c95da44880c97e74168ebb3d7c80bc9877","ref":"refs/heads/cases-with-more-logic","pushedAt":"2024-05-29T13:29:49.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"Option to disable disqualification to avoid disabling models for mistakes during a single run","shortMessageHtmlLink":"Option to disable disqualification to avoid disabling models for mist…"}},{"before":"b56cc87d8a701e49a4dad71dac68e75ae9c8d034","after":"10ab6f6e766ac4ca8b227b84d17d3efb9b03f8d0","ref":"refs/heads/cases-with-more-logic","pushedAt":"2024-05-29T12:12:05.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"Option to disable disqualification to avoid disabling models for mistakes during a single run","shortMessageHtmlLink":"Option to disable disqualification to avoid disabling models for mist…"}},{"before":"5926574c52c0fece47bb08f043a3712f708e6d3a","after":null,"ref":"refs/heads/147-reset-repository-per-task","pushedAt":"2024-05-29T12:08:13.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"Munsio","name":"Martin Treml","path":"/Munsio","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/646513?s=80&v=4"}},{"before":"b4d248a177a4007a3bbd6637509b5241903a5887","after":"96debe183bfc64b25ef6b1782b7f26a81a258ce5","ref":"refs/heads/main","pushedAt":"2024-05-29T12:08:12.000Z","pushType":"pr_merge","commitsCount":6,"pusher":{"login":"Munsio","name":"Martin Treml","path":"/Munsio","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/646513?s=80&v=4"},"commit":{"message":"Merge pull request #148 from symflower/147-reset-repository-per-task\n\nReset repository per task","shortMessageHtmlLink":"Merge pull request #148 from symflower/147-reset-repository-per-task"}},{"before":"52d3bd55427ba12a2347b032dcd94d0a75365d59","after":"b56cc87d8a701e49a4dad71dac68e75ae9c8d034","ref":"refs/heads/cases-with-more-logic","pushedAt":"2024-05-29T09:49:06.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"Option to disable disqualification to avoid disabling models for mistakes during a single run","shortMessageHtmlLink":"Option to disable disqualification to avoid disabling models for mist…"}},{"before":"54d7b303879de38d1951821775eb75bd5536f8ad","after":"5926574c52c0fece47bb08f043a3712f708e6d3a","ref":"refs/heads/147-reset-repository-per-task","pushedAt":"2024-05-29T09:44:50.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"fix, Add repository reset mechanism inside repository evaluation so it resets on every task\n\nFixes #147","shortMessageHtmlLink":"fix, Add repository reset mechanism inside repository evaluation so i…"}},{"before":"0f3e1134b4884caee3b3b07f1b0b65efc5e6fc78","after":"54d7b303879de38d1951821775eb75bd5536f8ad","ref":"refs/heads/147-reset-repository-per-task","pushedAt":"2024-05-29T09:36:45.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"fix, Add repository reset mechanism inside repository evaluation so it resets on every task\n\nFixes #147","shortMessageHtmlLink":"fix, Add repository reset mechanism inside repository evaluation so i…"}},{"before":"ba81d71f0d50e93b6262c58111d876613683aed0","after":"0f3e1134b4884caee3b3b07f1b0b65efc5e6fc78","ref":"refs/heads/147-reset-repository-per-task","pushedAt":"2024-05-29T09:25:20.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"fix, Move repository reset mechanism into repository evaluation so it resets on every task\n\nFixes #147","shortMessageHtmlLink":"fix, Move repository reset mechanism into repository evaluation so it…"}},{"before":"7937f984dd898651011f2b68b02f3954cd7091d8","after":"52d3bd55427ba12a2347b032dcd94d0a75365d59","ref":"refs/heads/cases-with-more-logic","pushedAt":"2024-05-29T09:09:05.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"Option to disable disqualification to avoid disabling models for mistakes during a single run","shortMessageHtmlLink":"Option to disable disqualification to avoid disabling models for mist…"}},{"before":null,"after":"ba81d71f0d50e93b6262c58111d876613683aed0","ref":"refs/heads/147-reset-repository-per-task","pushedAt":"2024-05-29T08:52:53.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"fix, Move repository reset mechanism into repository evaluation so it resets on every task\n\nFixes #147","shortMessageHtmlLink":"fix, Move repository reset mechanism into repository evaluation so it…"}},{"before":"34f99fd022eb87ccddcd278f4ed25707356f4b8d","after":"7937f984dd898651011f2b68b02f3954cd7091d8","ref":"refs/heads/cases-with-more-logic","pushedAt":"2024-05-28T13:59:19.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"TODO, git repository cleanup is not working\n\nIn the evaluation, one can see that Gemini Flash 1.5 makes its first\nmistake in some task but the corresponding compilation error also\nshows up in the NEXT task, meaning that the test file from the task\nbefore is still in the repository, making all subsequent ones fail.","shortMessageHtmlLink":"TODO, git repository cleanup is not working"}},{"before":"001d41afc231bb68b0e4ee9cded0e87e58a6dd33","after":null,"ref":"refs/heads/145-empty-git-config","pushedAt":"2024-05-28T13:43:00.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"Munsio","name":"Martin Treml","path":"/Munsio","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/646513?s=80&v=4"}},{"before":"6b3b143b5c06371edcc63a1918e046d651acc3b1","after":"b4d248a177a4007a3bbd6637509b5241903a5887","ref":"refs/heads/main","pushedAt":"2024-05-28T13:42:59.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"Munsio","name":"Martin Treml","path":"/Munsio","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/646513?s=80&v=4"},"commit":{"message":"Merge pull request #146 from symflower/145-empty-git-config\n\nUse empty Git config in temporary repositories","shortMessageHtmlLink":"Merge pull request #146 from symflower/145-empty-git-config"}},{"before":null,"after":"001d41afc231bb68b0e4ee9cded0e87e58a6dd33","ref":"refs/heads/145-empty-git-config","pushedAt":"2024-05-28T09:55:35.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"fix, Use empty Git config in temporary repositories to avoid user-defined behavior\n\nIn our case, we would get asked for GPG signing.\n\nCloses #145","shortMessageHtmlLink":"fix, Use empty Git config in temporary repositories to avoid user-def…"}},{"before":"0b0458d19894c75e5c9804a769b08a08d51304a0","after":null,"ref":"refs/heads/128-character-count","pushedAt":"2024-05-28T09:22:13.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"}},{"before":"3f70a04b7454c86cf125366d3555d41023f6210a","after":"6b3b143b5c06371edcc63a1918e046d651acc3b1","ref":"refs/heads/main","pushedAt":"2024-05-28T09:22:12.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"Merge pull request #142 from symflower/128-character-count\n\nTrack how many characters were present in a model response and generated test files","shortMessageHtmlLink":"Merge pull request #142 from symflower/128-character-count"}},{"before":"a49a293379ff95feb9ec39ece1a0b0a3a7feb2c5","after":"0b0458d19894c75e5c9804a769b08a08d51304a0","ref":"refs/heads/128-character-count","pushedAt":"2024-05-28T09:05:36.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"ruiAzevedo19","name":"Rui Azevedo","path":"/ruiAzevedo19","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/36680263?s=80&v=4"},"commit":{"message":"Track how many characters were present in a model response and generated test files\n\nCloses #128","shortMessageHtmlLink":"Track how many characters were present in a model response and genera…"}},{"before":"2db8b7d6f1c62f59f00f987f2b423bccea86a1ce","after":"34f99fd022eb87ccddcd278f4ed25707356f4b8d","ref":"refs/heads/cases-with-more-logic","pushedAt":"2024-05-28T07:54:18.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"ahumenberger","name":null,"path":"/ahumenberger","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/2736221?s=80&v=4"},"commit":{"message":"Additional Java \"light\" test scenarios from exercism.org","shortMessageHtmlLink":"Additional Java \"light\" test scenarios from exercism.org"}},{"before":"bbeab7d492bb5f78d5c169bce90273d982e2e1bd","after":"a49a293379ff95feb9ec39ece1a0b0a3a7feb2c5","ref":"refs/heads/128-character-count","pushedAt":"2024-05-28T07:47:14.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"ruiAzevedo19","name":"Rui Azevedo","path":"/ruiAzevedo19","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/36680263?s=80&v=4"},"commit":{"message":"Track how many characters were present in a model response and generated test files\n\nCloses #128","shortMessageHtmlLink":"Track how many characters were present in a model response and genera…"}},{"before":"8dcd379253a5ce6e40c4447d1ed7d444a6a4694f","after":"3f70a04b7454c86cf125366d3555d41023f6210a","ref":"refs/heads/main","pushedAt":"2024-05-28T06:38:38.000Z","pushType":"pr_merge","commitsCount":5,"pusher":{"login":"Munsio","name":"Martin Treml","path":"/Munsio","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/646513?s=80&v=4"},"commit":{"message":"Merge pull request #112 from symflower/111-generic-openai-api\n\nGeneric OpenAI API provider","shortMessageHtmlLink":"Merge pull request #112 from symflower/111-generic-openai-api"}},{"before":"cd93bb461bdf3c364925e415c6862c04696a520e","after":null,"ref":"refs/heads/111-generic-openai-api","pushedAt":"2024-05-28T06:38:38.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"Munsio","name":"Martin Treml","path":"/Munsio","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/646513?s=80&v=4"}},{"before":"20d0b34459eeaf019b8c75b0dfd2de6c7442bbca","after":"2db8b7d6f1c62f59f00f987f2b423bccea86a1ce","ref":"refs/heads/cases-with-more-logic","pushedAt":"2024-05-28T05:59:21.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"ahumenberger","name":null,"path":"/ahumenberger","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/2736221?s=80&v=4"},"commit":{"message":"Additional Java \"light\" test scenarios from exercism.org","shortMessageHtmlLink":"Additional Java \"light\" test scenarios from exercism.org"}},{"before":"3232e88a2297c93ecbff5898a8b0122c1f942d26","after":"bbeab7d492bb5f78d5c169bce90273d982e2e1bd","ref":"refs/heads/128-character-count","pushedAt":"2024-05-27T16:22:41.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"ruiAzevedo19","name":"Rui Azevedo","path":"/ruiAzevedo19","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/36680263?s=80&v=4"},"commit":{"message":"Track how many characters were present in a model response and generated test files\n\nCloses #128","shortMessageHtmlLink":"Track how many characters were present in a model response and genera…"}},{"before":"2647be713a268c59d6b1dee737e1bb04af814fe2","after":"3232e88a2297c93ecbff5898a8b0122c1f942d26","ref":"refs/heads/128-character-count","pushedAt":"2024-05-27T16:02:34.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"ruiAzevedo19","name":"Rui Azevedo","path":"/ruiAzevedo19","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/36680263?s=80&v=4"},"commit":{"message":"Track how many characters were present in a model response and generated test files\n\nCloses #128","shortMessageHtmlLink":"Track how many characters were present in a model response and genera…"}},{"before":"3dff4c4adb9e63b3a4076539dad98faeb4e4bede","after":"cd93bb461bdf3c364925e415c6862c04696a520e","ref":"refs/heads/111-generic-openai-api","pushedAt":"2024-05-27T13:37:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"refactor, Extract default Ollama test model to avoud thousand magic constants","shortMessageHtmlLink":"refactor, Extract default Ollama test model to avoud thousand magic c…"}},{"before":"b0aca22b5e93211785323376a844311eed14dd2b","after":null,"ref":"refs/heads/symflower-36800","pushedAt":"2024-05-27T13:18:04.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"Munsio","name":"Martin Treml","path":"/Munsio","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/646513?s=80&v=4"}},{"before":"32fdec699de81eeab1a0da6192d815b0e04cdba1","after":"8dcd379253a5ce6e40c4447d1ed7d444a6a4694f","ref":"refs/heads/main","pushedAt":"2024-05-27T13:18:03.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"Munsio","name":"Martin Treml","path":"/Munsio","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/646513?s=80&v=4"},"commit":{"message":"Merge pull request #144 from symflower/symflower-36800\n\nRequire at least symflower v36800","shortMessageHtmlLink":"Merge pull request #144 from symflower/symflower-36800"}},{"before":"db8459dba90613827ac9e10711147f4bf4a157c4","after":"2647be713a268c59d6b1dee737e1bb04af814fe2","ref":"refs/heads/128-character-count","pushedAt":"2024-05-27T10:52:20.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"ruiAzevedo19","name":"Rui Azevedo","path":"/ruiAzevedo19","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/36680263?s=80&v=4"},"commit":{"message":"Track how many characters were present in a model response and generated test files\n\nCloses #128","shortMessageHtmlLink":"Track how many characters were present in a model response and genera…"}},{"before":null,"after":"b0aca22b5e93211785323376a844311eed14dd2b","ref":"refs/heads/symflower-36800","pushedAt":"2024-05-27T09:27:37.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"bauersimon","name":"Simon Bauer","path":"/bauersimon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22770887?s=80&v=4"},"commit":{"message":"Require at least symflower v36800 as it includes a fix for coverage extraction with exceptions","shortMessageHtmlLink":"Require at least symflower v36800 as it includes a fix for coverage e…"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEVzGNbwA","startCursor":null,"endCursor":null}},"title":"Activity · symflower/eval-dev-quality"}