{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":222138340,"defaultBranch":"dev","name":"deepworlds","ownerLogin":"aidudezzz","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2019-11-16T17:56:16.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/57842071?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1707393869.0","currentOid":""},"activityList":{"items":[{"before":"67e33bb9feffa2fbe0773c7d57c73cccc16cf939","after":"60b8fc4ad613a32d72f4238472d87f72bbfcd9ed","ref":"refs/heads/dev","pushedAt":"2024-03-22T22:03:39.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Added paper link and minor text modification","shortMessageHtmlLink":"Added paper link and minor text modification"}},{"before":"4ac0b314e33185e12901f3f733a883785c64e3fe","after":null,"ref":"refs/heads/find-and-avoid-v2","pushedAt":"2024-02-08T12:04:29.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"}},{"before":"703ade43549a4b5bc3b560cd5f02bd75111cb58d","after":"67e33bb9feffa2fbe0773c7d57c73cccc16cf939","ref":"refs/heads/dev","pushedAt":"2023-10-10T13:00:42.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"KelvinYang0320","name":"Jiun Kai Yang","path":"/KelvinYang0320","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49781698?s=80&v=4"},"commit":{"message":"Updated SB3<=1.8.0 for gym","shortMessageHtmlLink":"Updated SB3&lt;=1.8.0 for gym"}},{"before":"68c797f61ac0add58550b61922338a7121c70389","after":"37d4ddaeb665605b58d00613cbc3f0af4e2053b3","ref":"refs/heads/double-cartpole","pushedAt":"2023-07-07T14:27:51.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Removed middle pole bounding object that was causing collisions with robot2 node tree","shortMessageHtmlLink":"Removed middle pole bounding object that was causing collisions with …"}},{"before":"886fbd43424c82d90e3c4e731d2ba266d1144e48","after":"68c797f61ac0add58550b61922338a7121c70389","ref":"refs/heads/double-cartpole","pushedAt":"2023-07-07T12:05:48.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Added missing position sensor and endpoint solid for middle pole","shortMessageHtmlLink":"Added missing position sensor and endpoint solid for middle pole"}},{"before":"501fdfb0963cfc2dcb41b5144ea625cf7def2d20","after":null,"ref":"refs/heads/find_avoid_v2_anim","pushedAt":"2023-07-07T10:45:56.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"}},{"before":"277b19af500e71dfb079742359e12c5dbcd07786","after":"703ade43549a4b5bc3b560cd5f02bd75111cb58d","ref":"refs/heads/dev","pushedAt":"2023-07-07T10:45:52.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Added animation of trained agent in find and avoid v2","shortMessageHtmlLink":"Added animation of trained agent in find and avoid v2"}},{"before":"9c6230cca39c621cf6d2656e5d3c195f2f67a045","after":"501fdfb0963cfc2dcb41b5144ea625cf7def2d20","ref":"refs/heads/find_avoid_v2_anim","pushedAt":"2023-07-07T10:45:04.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Added animation of trained agent in find and avoid v2","shortMessageHtmlLink":"Added animation of trained agent in find and avoid v2"}},{"before":null,"after":"9c6230cca39c621cf6d2656e5d3c195f2f67a045","ref":"refs/heads/find_avoid_v2_anim","pushedAt":"2023-07-07T10:41:59.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Find and Avoid V2\n\nAdded animation of trained agent in find and avoid v2\n\nUpdated wbt file for latest webots version (2023b)\n\nRemoved .csv from test results filename resulting in double postfix\n\nRemoved dynamic angle threshold in masking\n\nMinor fixes in action-related comments\n\nClarified wording for sb3 evaluation\n\nApply suggestions from code review\n\nCo-authored-by: Jiun Kai Yang <kelvin777320@gmail.com>\n\nAdded general problem description in environment docstring\n\nRenamed deprivation to denial\n\nRenamed deprivation to denial\n\nRenamed deprivation to denial and fixed minor bug for starting ds values\n\nUpdated readme\n\nAdded trained agent with exported parameters and training tensorboard logs\n\nAdded deprivation list as argument for trainer run, in case one needs to train with ds denial\n\nFix in deprivation list attribute setting\n\nFixed masking, re-added conditional for printing at the end of episode\n\nAdded distance sensor deprivation, with explanation. Only used in testing.\n\nAdded distance sensor deprivation\n\nMajor updates:\n1. Moved all user modifiable params to run function arguments\n2. Modified logging with new metrics\n3. Refactored and simplified code that runs training, removed code duplication\n\nMoved all user modifiable parameters here\n\nCleanup, refactoring and improvements\n\nMajor updates on robot:\n1. Slightly changed distance sensors positions\n2. Added second touch sensor, now have separate left/right touch sensors\n\nMajor updates on environment:\n1. Modified reward function by a lot\n2. Added left-right touch sensors\n3. Added more parameterization for distance sensors\n4. Heavily modified masking method, crucial difference from previous version\n5. Slightly changed episode termination logic, cleaned up\n6. Changed action scheme to incremental actions instead of separate motor control\n7. Major cleanups, docstring finalization and refactoring\n\nFixed rotation bugs and fixed and simplified smoothness reward\n\nAdded a method to set the reward weights\n\nAdded setting of reward weights before testing\n\nFix in distance reward\n\nRemoved unused stuff in comments\n\nFixed initialization of distance and angle bugs\n\nRemoved todos from observation testing and distance reward, added smoothness reward\n\nAdded smoothness weight\n\nRemoved deleted method\n\nDeleted leftover stuff\n\nRemoved box difficulty\n\nAdded requirements\n\nAdded initial version of example README\n\nRemoved obs turning reward stuff and normed reward\n\nRemoved multiple testing done prints\n\nRemoved done reason printing\n\nMoved sb3 testing to this file, added more prints, added success percentage during custom testing and refactored it a bit, added sb3 rew and std to csv file, added a simple loop to keep the sim running after finishing evaluation\n\nAdded option to skip training and go straight to testing, added some general stuff as arguments, removed sb3 testing and moved it to testing script\n\nAdded some general easy to access parameters for starting experiments\n\nRemoved unused plot function\n\nRemoved unused stuff, fixed env class name after rename, added some prints for testing\n\nRenamed class, left some TODOs and updated docstrings\n\nRenamed to find and avoid v2 and deleted unused stuff\n\nAdded stuff from map branch:\n1. Removed obstacle markers\n2. Added more entries to ds sensors to properly set noise\n\nAdded stuff from map branch:\n1. Changed manual control motor speeds\n2. Removed unused virtual angle stuff in comments\n3. Added proper ds noise\n\nFixed testing seed to get identical test maps\n\nFixed minor 1-off bug with diluted observation\n\nNew baseline params\n\nChangelog:\n1. Added HPARAMS on tensorboard logging\n2. Minor fix on n_steps value to be multiple of 2\n3. Added Monitor wrapper as it is required by sb3 evaluation\n4. Added sb3 policy evaluation\n\nAdded some prints\n\nRefactored to run testing sequentially after training\n\nChangelog:\n1. Refactored to run sequentially with tester\n2. Increased base maximum episode steps\n3. Got rid of timeout wrapper as env is incorporating it\n\nChangelog:\n1. Refactored to run sequentially with trainer\n2. Removed unused imports\n3. Automated testing now runs 100 episodes per difficulty\n4. Added printing of test progress\n\nChangelog:\n1. Incorporated timeout properly\n2. Fixed done reason timeout stuff\n3. Fixed not reach reward normalization range\n4. Added method to properly set the max episode steps\n\nChangelog:\n1. Added maximum episode steps argument to identify timeouts\n2. added a not reach weight that penalizes robot based on distance achieved at timeout, to do this minimum distance is tracked\n3. added touch sensor to observation\n4. added metrics for reset count, reach target count, collision termination count\n5. modified unmasking of backward action\n6. changed distance to target obs normalization to be based on initial distance as max\n7. changed distance reward to be based on minimum distance achieved\n8. changed dist sensor reward to take the mean of the all sensor penalties, but normalized it from realistic max value\n9. added option for setting reset_on_collisions to -1 to never reset from collisions\n10. some updates to export parameters\n\nAdded conv1d actor and critic in comments\n\nRemoved unused stuff\n\nSome improvements\n\nAdded the two new runners\n\nSeparated training and testing in separate runners\n\nMajor rearrangement of parameters, modified and fixed some difficulty stuff, new baseline default parameters\n\nAdded obstacle turning reward and obstacle ds markers stuff in comments\n\nAdded distance sensor markers\n\nAdded obstacle turning weight\n\nAdded special case when the target is right on top of the robot\n\nMinor adjustments in resetting angle and distance observations and readded some rounding in the observation\n\nimproved testing\n\nMinor fix in action comment\n\nNew default multipliers\n\nAdded csv output of test results\n\nOverhauled testing after training\n\nAdded printing of difficulty key, added info return done reason, moved trigger_done flag reset to reset, fixed walls position resetting\n\nExperiment name and diff readded to logger on rollout end\n\nMinor refactoring based on large refactoring on the env\n\nMajor cleanup, docstrings, ds noise argument, fix minor bug in apply_action, fixed minor bug not resetting observation counter, observation memory and applying zero motor velocities\n\nAdded load path, increased default ds threshold, changed test diff to random, added reward printing for test\n\nAdded ds threshold on export params\n\nMore masking, backward\n\nImproved manual control\n\nBetter parameter defaults\n\nBetter masking for left-right, added various new parameters to export\n\nDifferent action scheme, actions now directly change motor speeds\n\nAdded multipliers for target rewards when obstacles are detected.\n\nAdded ds type argument\n\nFixed reset_on_collisions argument and reset of counter\n\nFixed corridor walls bounding box\n\nAdded random corridor map feature\n\nAdded corridor difficulty\n\nAdded corridor walls\n\nReduced steps timelimit, normalized reward weights a bit, fixed saved model names, added flag for continuous lines in tensorboard\n\nSimplified ds reward and added controls to print obs and rew\n\nDistance sensor reward is now based on rate of change as before\n\nAdded a threshold for the distance sensor rewards\n\nAdded a threshold for the distance sensor rewards\n\nRearranged imports and added torch and numpy seeding\n\nMinor fixes and changed default parameters\n\nCommented out dynamic action masking, which doesn't work that well anymore with proper sonar distance sensors\n\nAdded random seed, fixed initial tar d reward, added normed reward metric calculation and resetting, removed tar ang dist reward decrease when obstacles are detected\n\nBetter logger management, added seed, added avg normed reward metric\n\nDecreased arena size\n\nAdded ds range in export params\n\nRemoved reward normalization\n\nModified masking, added dynamic max ds range via argument\n\nAdded max range argument\n\nModified sensors to ultrasonic and modified lookup table\n\nIncreased time limit by a lot and modified reward weights slightly\n\nAdded a printing callback\n\nAdded add_action_to_obs to export params\n\nSome pep8\n\nReduced threshold to unmask backward action\n\nModified backward action masking to accomodate longer ds range\n\nAdded add action one-hot to obs flag\n\nAdded add action one-hot to obs flag\n\nAdded latest action one-hot to observation\n\nAdded random test difficulty\n\nFixed action masking for rearrange backwards/stop\n\nNow usign experiment_dir properly\n\nRearranged stop and backward action and removed stop action\n\nIncreased ds range\n\nReward weights are now normalized to add to 1\n\nSlightly modified base weights\n\nFixed export parameters for sb3\n\nAdded more hyperparams, added export of setup, cleanup\n\nMore sane n_steps etc\n\nMaskablePPO\n\nDense+diluted observation, collision reset now can happen after multiple steps, time penalty added and dist/ang reward now symmetric, removed rounding from observation\n\nsb3 initial\n\nAdded support for action masking, box difficulty, changed defaults\n\nAction masking and larger networks\n\nAdjusted touch sensor bounding box\n\nMajor cleanup refactoring, reward improvements and action masking, alternative difficulty \"box\"\n\nAdded support for action masking on ppo agent\n\nAdded manual control flag and additive reach target reward\n\nIncreased epsilon for adam optimizer as suggested for ppo\n\nRevert to custom implementation\n\nAdded reset on collision flag\n\nTime limit wrapper\n\nModified ds reward\n\nDS reward modified to reward minimum sensor value increasing or decreasing.\n\nRemove progress bar\n\nSb3 ppo\n\nReset on col, fixed starting obs bug, simple reward return for sb3\n\nModified distance sensors\n\nFix last training per episode\n\nAdded expanded action plotting\n\nFixed up the train on done stuff to be more logical\n\nLarger network by default\n\nAdded possibility of setting z when adding new node\n\nImplemented expanded action space option\n\nBaseline setup\n\nBaseline setup\n\nBaseline setup\n\nChanged some defaults and added some batch size options\n\nAdded all agent parameters in this file to get passed to agent ctor\n\nAdded all parameters in this file to get passed to env ctor\n\nAdded gamma explanation in docstring\n\nMajor refactoring, collision weight is additive, removed touch sensor observation, verbose flag to print reward per step, removed rounding from observation values\n\nModified distance reward to discourage the agent standing still\n\nNow using print method on test and fixed test bug where it didn't terminate properly\n\nFixed test cuda wrong device bug\n\nreach target reward gets added when non-zero instead of checking threshold\n\nDistance sensor reward defaults to additive with target dist/angle reward\n\nDefault save to disk to True\n\nAdded some setup parameters for convenience in loading and testing\n\nresults file name now is added automatically\n\nDecreased arena size to avoid going around exploit\n\nMajor cleanup/refactoring and save training metrics per checkpoint\n\nRemoved episode score stuff and solved method\n\nReset viewpoint to get rid of new reset shenanigans\n\nDef on viewpoint\n\nFixed bug with path nodes not resetting\n\nReplaced json with pickle for results for hopefully smaller filesizes\n\nAdded pickle loading option\n\nDefault difficulty now includes all obstacles since the start\n\nIncreased target reach and collision reward weights\n\nFixed bug in results plotting saving\n\nReset now doesn't reset simulation to gain performance\n\nRearranged initialization and added experiment description\n\nAdded experiment description\n\nAdded todo for new reward breakdown\n\nHandle and print reward dict\n\nReturn reward dict\n\nFixed export params\n\nrenamed target stop stuff to target reach\n\nRemoved stopping on target reward stuff, when target is reached episode is terminated instantly and agent is rewarded\n\nFixed max action selection to return the actual probability of the max action\n\nAdded option to entirely disable saving anything to disk\n\nFixed export parameters usage of cuda to get model size\n\nFixed saving plots\n\nAdded actual arguments\n\nRemoved set path\n\nAdded save capability to results plotting\n\nAdded result plotting script\n\nRemoved indent from results json dump to reduce filesize\n\nAdded some more prints at the end of episodes\n\nSome comments\n\nAdded touch sensor and refactored distance sensor init\n\nAdded more distance sensors\n\ntemp\n\nDecreased penalty for not stopping on target by a factor of 10 and reverted distance decrease/increase asymmetric normalization\n\nImproved experiment name on plot and added it into the per episode prints\n\nAdded higher negative reward for increasing angle and distance to potentially mitigate exploit for higher window agents, also stop reward gets multiplied by number of steps it has stopped.\n\nAdded experiment name on pop-up plot\n\nAdded more stats saving into json for later plotting\n\nAdded a call to export_parameters to save experiment setup\n\nAdded a get size method for the networks\n\nAdded an export parameters function\n\nAdded window observation\n\nAdded negative reward when not stopping under target threshold\n\nOverwrite target reward with distance reward and disable path reward\n\nSimplified reward weights to a basic baseline\n\nDecreased angle reward weight\n\nIncreased stop reward weight\n\nModified obstacles in difficulty\n\nAdded extra obstacles to bring the total to 25\n\nSlightly modified reward weights\n\nModified dist sensor reward weight and calculation\n\nFixed find_dist_to_path when path has only 1 node\n\nOverhauled ds reward, added path proximity and angle reward, added separate max values for dsensors, some refactoring\n\nChanged sensors lookup table to form a rectangle\n\nadded position mode to get_angle_to_target instead of node only\n\nChanged  reward weights and removed ds thresholds\n\nReduced collision reward weight\n\nImproved total reward calculation and avoided stop/turn exploit\n\nRemoved negative reward from stop\n\nFixed conditions for stop\n\nFixed apply_action docstring\n\nAdded backwards movement action\n\nRemoved overriding action\n\nConverted angle reward to discrete value\n\nAdded step limit for reset when agent is deployed\n\nFixed saving checkpoint just before training ends\n\nRemoved leftover action override\n\nConditional reward\n\nRemoved facing target condition and increased on target threshold\n\nFixed stop exploit\n\nSimplified stopping on target-facing if-else\n\nReset on target counter on not stopping action in addition to distance and facing\n\nModified stopping reward calculation\n\nRemoved leftover action overwrite\n\nReduced collision weight, modified dist thres multipliers, obstacle reward is now proportional to the ds value\n\nIncreased collision weight\n\nFixed checkpoint naming\n\nIncreased facing threshold, added multipliers for dist sensor thresholds\n\nFixed has stopped reward\n\nAdded customizable reward weights and reimplemented counting multiple steps on stop to complete episode\n\nTidy-up\n\nAdded distance sensor reward and improved total reward calculation and weights\n\nModified episode limit, checkpoint frequency and difficulties\n\nOnly target-robot difficulty no obstacles\n\nReworked distance/angle stuff, no obstacle currently\n\nRemoved not stopping on target penalty, decreased on target limit\n\nDoubled checkpoint frequency\n\nDoubled steps per episode\n\nAdded smoothness reward to punish too much turning\n\nRemoved forced action\n\nRemoved print\n\nFixed add near/neighbors, new more continuous reward function\n\nModified agent model and hyperparameters\n\nFixed add_near\n\nModified some rewards to stop the robot learning to turn in place, also added some keyboard controls for debugging\n\nAngle reward now when not detecting obstacles\n\nAdded small punishment for turning in place when far away\n\nSlightly modified rewards\n\nFixed viewpoint\n\nAdded new sensors\n\nAdded another layer in agent models\n\nIncreased number of distance sensors\n\nModified checkpoint naming\n\nModified difficulty\n\nDisabled solved entirely\n\nGreatly increased solved condition temporarily\n\nOvehauled reward\n\nChanged view and view follows robot\n\nModified angle reward to apply only when near the target, changed some thresholds\n\nModified difficulty\n\nDecreased dist sensor range\n\nIncreased lr again\n\nFixed starting difficulty\n\nDecreased agent learning rate\n\nIncreased decimals of rounding in the observation\n\nModified difficulty\n\nFixed bug when robot is outside grid map\n\nDecreased model size\n\nAdded small logos to show path\n\nAdditional functionality for grid and angle reward based on next node on path instead of final target\n\nAdded changing difficulty dictionary\n\nArena is now smaller to match the size of the grid map\n\nSome renamind and set difficulty method\n\nVarious tidy-up, fixed observation normalization bugs, recalculated maximum achievable reward per episode for the solved condition, greatly improved target randomization\n\nSet default viewpoint\n\nIncrease actor/critic network size\n\nSome fixes\n\nAdded randomizable map with obstacles and randomizable starting and target positions, as well as path finding to ensure that target is reachable, added touch sensor to terminate episode when robot collides.\n\nAdded obstacles etc\n\nDistance sensor values added to observation\n\nNew distance sensors and available objects for obstacles\n\nFix a minor comment\n\nFixed done condition\n\nClean-up\n\nFixed warning for logo\n\nTidy-up and clean-up\n\nMinor bug fix in testing and put plot in try except\n\nDecreased found target thresholds and minor fix in reward\n\nFixed bug on found target\n\nRemoved extra underscore\n\nDecreased saving interval\n\nRenamed is_done to on_target\n\nReset on target counter is thresholds are exceeded\n\nDecreased decreasing distance reward to make robot learn to move more straight\n\nAdded checkpoints\n\nRenamed to path following\n\nSome fixes\n\nSome more progress\n\nAdded agent saving\n\nDisabled camera and improved reward function\n\nVarious fixes\n\nPath following new example initial version WIP","shortMessageHtmlLink":"Find and Avoid V2"}},{"before":"7c83cd04a304f097ee0d37f544f7622a5caee04d","after":"4ac0b314e33185e12901f3f733a883785c64e3fe","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T10:33:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Replaced animation","shortMessageHtmlLink":"Replaced animation"}},{"before":"0b79401d71227bc85ad1d9894933d7ffbff94383","after":"7c83cd04a304f097ee0d37f544f7622a5caee04d","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T10:31:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Delete find_avoid_v2_trained.gif","shortMessageHtmlLink":"Delete find_avoid_v2_trained.gif"}},{"before":"b40abeb4dfc20972c37b0147e894ecc0af182522","after":"0b79401d71227bc85ad1d9894933d7ffbff94383","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T10:31:23.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"80861484f52d331c799c26a8b1dc06c737f31934","after":"b40abeb4dfc20972c37b0147e894ecc0af182522","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T10:05:03.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Merge remote-tracking branch 'origin/find-and-avoid-v2' into find-and-avoid-v2","shortMessageHtmlLink":"Merge remote-tracking branch 'origin/find-and-avoid-v2' into find-and…"}},{"before":"66df21e8ee750914a7d967b501ffd7a594ad0b43","after":"80861484f52d331c799c26a8b1dc06c737f31934","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T09:55:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"1d39d7d96d11348e2258373d080fdae1a9a2bca8","after":"66df21e8ee750914a7d967b501ffd7a594ad0b43","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T09:55:36.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"6bc2724738238b99d1f34b206b7fe34224b28ebb","after":"1d39d7d96d11348e2258373d080fdae1a9a2bca8","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T09:54:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"4f0da5da06a599ad3eddeba33444da5955b80b4f","after":"6bc2724738238b99d1f34b206b7fe34224b28ebb","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T09:52:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Added showcase gif","shortMessageHtmlLink":"Added showcase gif"}},{"before":"6d495f8c05678bc2684d04cfee6ce5e6c38ebe8e","after":"4f0da5da06a599ad3eddeba33444da5955b80b4f","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T09:12:11.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Find and Avoid V2\n\nUpdated wbt file for latest webots version (2023b)\n\nRemoved .csv from test results filename resulting in double postfix\n\nRemoved dynamic angle threshold in masking\n\nMinor fixes in action-related comments\n\nClarified wording for sb3 evaluation\n\nApply suggestions from code review\n\nCo-authored-by: Jiun Kai Yang <kelvin777320@gmail.com>\n\nAdded general problem description in environment docstring\n\nRenamed deprivation to denial\n\nRenamed deprivation to denial\n\nRenamed deprivation to denial and fixed minor bug for starting ds values\n\nUpdated readme\n\nAdded trained agent with exported parameters and training tensorboard logs\n\nAdded deprivation list as argument for trainer run, in case one needs to train with ds denial\n\nFix in deprivation list attribute setting\n\nFixed masking, re-added conditional for printing at the end of episode\n\nAdded distance sensor deprivation, with explanation. Only used in testing.\n\nAdded distance sensor deprivation\n\nMajor updates:\n1. Moved all user modifiable params to run function arguments\n2. Modified logging with new metrics\n3. Refactored and simplified code that runs training, removed code duplication\n\nMoved all user modifiable parameters here\n\nCleanup, refactoring and improvements\n\nMajor updates on robot:\n1. Slightly changed distance sensors positions\n2. Added second touch sensor, now have separate left/right touch sensors\n\nMajor updates on environment:\n1. Modified reward function by a lot\n2. Added left-right touch sensors\n3. Added more parameterization for distance sensors\n4. Heavily modified masking method, crucial difference from previous version\n5. Slightly changed episode termination logic, cleaned up\n6. Changed action scheme to incremental actions instead of separate motor control\n7. Major cleanups, docstring finalization and refactoring\n\nFixed rotation bugs and fixed and simplified smoothness reward\n\nAdded a method to set the reward weights\n\nAdded setting of reward weights before testing\n\nFix in distance reward\n\nRemoved unused stuff in comments\n\nFixed initialization of distance and angle bugs\n\nRemoved todos from observation testing and distance reward, added smoothness reward\n\nAdded smoothness weight\n\nRemoved deleted method\n\nDeleted leftover stuff\n\nRemoved box difficulty\n\nAdded requirements\n\nAdded initial version of example README\n\nRemoved obs turning reward stuff and normed reward\n\nRemoved multiple testing done prints\n\nRemoved done reason printing\n\nMoved sb3 testing to this file, added more prints, added success percentage during custom testing and refactored it a bit, added sb3 rew and std to csv file, added a simple loop to keep the sim running after finishing evaluation\n\nAdded option to skip training and go straight to testing, added some general stuff as arguments, removed sb3 testing and moved it to testing script\n\nAdded some general easy to access parameters for starting experiments\n\nRemoved unused plot function\n\nRemoved unused stuff, fixed env class name after rename, added some prints for testing\n\nRenamed class, left some TODOs and updated docstrings\n\nRenamed to find and avoid v2 and deleted unused stuff\n\nAdded stuff from map branch:\n1. Removed obstacle markers\n2. Added more entries to ds sensors to properly set noise\n\nAdded stuff from map branch:\n1. Changed manual control motor speeds\n2. Removed unused virtual angle stuff in comments\n3. Added proper ds noise\n\nFixed testing seed to get identical test maps\n\nFixed minor 1-off bug with diluted observation\n\nNew baseline params\n\nChangelog:\n1. Added HPARAMS on tensorboard logging\n2. Minor fix on n_steps value to be multiple of 2\n3. Added Monitor wrapper as it is required by sb3 evaluation\n4. Added sb3 policy evaluation\n\nAdded some prints\n\nRefactored to run testing sequentially after training\n\nChangelog:\n1. Refactored to run sequentially with tester\n2. Increased base maximum episode steps\n3. Got rid of timeout wrapper as env is incorporating it\n\nChangelog:\n1. Refactored to run sequentially with trainer\n2. Removed unused imports\n3. Automated testing now runs 100 episodes per difficulty\n4. Added printing of test progress\n\nChangelog:\n1. Incorporated timeout properly\n2. Fixed done reason timeout stuff\n3. Fixed not reach reward normalization range\n4. Added method to properly set the max episode steps\n\nChangelog:\n1. Added maximum episode steps argument to identify timeouts\n2. added a not reach weight that penalizes robot based on distance achieved at timeout, to do this minimum distance is tracked\n3. added touch sensor to observation\n4. added metrics for reset count, reach target count, collision termination count\n5. modified unmasking of backward action\n6. changed distance to target obs normalization to be based on initial distance as max\n7. changed distance reward to be based on minimum distance achieved\n8. changed dist sensor reward to take the mean of the all sensor penalties, but normalized it from realistic max value\n9. added option for setting reset_on_collisions to -1 to never reset from collisions\n10. some updates to export parameters\n\nAdded conv1d actor and critic in comments\n\nRemoved unused stuff\n\nSome improvements\n\nAdded the two new runners\n\nSeparated training and testing in separate runners\n\nMajor rearrangement of parameters, modified and fixed some difficulty stuff, new baseline default parameters\n\nAdded obstacle turning reward and obstacle ds markers stuff in comments\n\nAdded distance sensor markers\n\nAdded obstacle turning weight\n\nAdded special case when the target is right on top of the robot\n\nMinor adjustments in resetting angle and distance observations and readded some rounding in the observation\n\nimproved testing\n\nMinor fix in action comment\n\nNew default multipliers\n\nAdded csv output of test results\n\nOverhauled testing after training\n\nAdded printing of difficulty key, added info return done reason, moved trigger_done flag reset to reset, fixed walls position resetting\n\nExperiment name and diff readded to logger on rollout end\n\nMinor refactoring based on large refactoring on the env\n\nMajor cleanup, docstrings, ds noise argument, fix minor bug in apply_action, fixed minor bug not resetting observation counter, observation memory and applying zero motor velocities\n\nAdded load path, increased default ds threshold, changed test diff to random, added reward printing for test\n\nAdded ds threshold on export params\n\nMore masking, backward\n\nImproved manual control\n\nBetter parameter defaults\n\nBetter masking for left-right, added various new parameters to export\n\nDifferent action scheme, actions now directly change motor speeds\n\nAdded multipliers for target rewards when obstacles are detected.\n\nAdded ds type argument\n\nFixed reset_on_collisions argument and reset of counter\n\nFixed corridor walls bounding box\n\nAdded random corridor map feature\n\nAdded corridor difficulty\n\nAdded corridor walls\n\nReduced steps timelimit, normalized reward weights a bit, fixed saved model names, added flag for continuous lines in tensorboard\n\nSimplified ds reward and added controls to print obs and rew\n\nDistance sensor reward is now based on rate of change as before\n\nAdded a threshold for the distance sensor rewards\n\nAdded a threshold for the distance sensor rewards\n\nRearranged imports and added torch and numpy seeding\n\nMinor fixes and changed default parameters\n\nCommented out dynamic action masking, which doesn't work that well anymore with proper sonar distance sensors\n\nAdded random seed, fixed initial tar d reward, added normed reward metric calculation and resetting, removed tar ang dist reward decrease when obstacles are detected\n\nBetter logger management, added seed, added avg normed reward metric\n\nDecreased arena size\n\nAdded ds range in export params\n\nRemoved reward normalization\n\nModified masking, added dynamic max ds range via argument\n\nAdded max range argument\n\nModified sensors to ultrasonic and modified lookup table\n\nIncreased time limit by a lot and modified reward weights slightly\n\nAdded a printing callback\n\nAdded add_action_to_obs to export params\n\nSome pep8\n\nReduced threshold to unmask backward action\n\nModified backward action masking to accomodate longer ds range\n\nAdded add action one-hot to obs flag\n\nAdded add action one-hot to obs flag\n\nAdded latest action one-hot to observation\n\nAdded random test difficulty\n\nFixed action masking for rearrange backwards/stop\n\nNow usign experiment_dir properly\n\nRearranged stop and backward action and removed stop action\n\nIncreased ds range\n\nReward weights are now normalized to add to 1\n\nSlightly modified base weights\n\nFixed export parameters for sb3\n\nAdded more hyperparams, added export of setup, cleanup\n\nMore sane n_steps etc\n\nMaskablePPO\n\nDense+diluted observation, collision reset now can happen after multiple steps, time penalty added and dist/ang reward now symmetric, removed rounding from observation\n\nsb3 initial\n\nAdded support for action masking, box difficulty, changed defaults\n\nAction masking and larger networks\n\nAdjusted touch sensor bounding box\n\nMajor cleanup refactoring, reward improvements and action masking, alternative difficulty \"box\"\n\nAdded support for action masking on ppo agent\n\nAdded manual control flag and additive reach target reward\n\nIncreased epsilon for adam optimizer as suggested for ppo\n\nRevert to custom implementation\n\nAdded reset on collision flag\n\nTime limit wrapper\n\nModified ds reward\n\nDS reward modified to reward minimum sensor value increasing or decreasing.\n\nRemove progress bar\n\nSb3 ppo\n\nReset on col, fixed starting obs bug, simple reward return for sb3\n\nModified distance sensors\n\nFix last training per episode\n\nAdded expanded action plotting\n\nFixed up the train on done stuff to be more logical\n\nLarger network by default\n\nAdded possibility of setting z when adding new node\n\nImplemented expanded action space option\n\nBaseline setup\n\nBaseline setup\n\nBaseline setup\n\nChanged some defaults and added some batch size options\n\nAdded all agent parameters in this file to get passed to agent ctor\n\nAdded all parameters in this file to get passed to env ctor\n\nAdded gamma explanation in docstring\n\nMajor refactoring, collision weight is additive, removed touch sensor observation, verbose flag to print reward per step, removed rounding from observation values\n\nModified distance reward to discourage the agent standing still\n\nNow using print method on test and fixed test bug where it didn't terminate properly\n\nFixed test cuda wrong device bug\n\nreach target reward gets added when non-zero instead of checking threshold\n\nDistance sensor reward defaults to additive with target dist/angle reward\n\nDefault save to disk to True\n\nAdded some setup parameters for convenience in loading and testing\n\nresults file name now is added automatically\n\nDecreased arena size to avoid going around exploit\n\nMajor cleanup/refactoring and save training metrics per checkpoint\n\nRemoved episode score stuff and solved method\n\nReset viewpoint to get rid of new reset shenanigans\n\nDef on viewpoint\n\nFixed bug with path nodes not resetting\n\nReplaced json with pickle for results for hopefully smaller filesizes\n\nAdded pickle loading option\n\nDefault difficulty now includes all obstacles since the start\n\nIncreased target reach and collision reward weights\n\nFixed bug in results plotting saving\n\nReset now doesn't reset simulation to gain performance\n\nRearranged initialization and added experiment description\n\nAdded experiment description\n\nAdded todo for new reward breakdown\n\nHandle and print reward dict\n\nReturn reward dict\n\nFixed export params\n\nrenamed target stop stuff to target reach\n\nRemoved stopping on target reward stuff, when target is reached episode is terminated instantly and agent is rewarded\n\nFixed max action selection to return the actual probability of the max action\n\nAdded option to entirely disable saving anything to disk\n\nFixed export parameters usage of cuda to get model size\n\nFixed saving plots\n\nAdded actual arguments\n\nRemoved set path\n\nAdded save capability to results plotting\n\nAdded result plotting script\n\nRemoved indent from results json dump to reduce filesize\n\nAdded some more prints at the end of episodes\n\nSome comments\n\nAdded touch sensor and refactored distance sensor init\n\nAdded more distance sensors\n\ntemp\n\nDecreased penalty for not stopping on target by a factor of 10 and reverted distance decrease/increase asymmetric normalization\n\nImproved experiment name on plot and added it into the per episode prints\n\nAdded higher negative reward for increasing angle and distance to potentially mitigate exploit for higher window agents, also stop reward gets multiplied by number of steps it has stopped.\n\nAdded experiment name on pop-up plot\n\nAdded more stats saving into json for later plotting\n\nAdded a call to export_parameters to save experiment setup\n\nAdded a get size method for the networks\n\nAdded an export parameters function\n\nAdded window observation\n\nAdded negative reward when not stopping under target threshold\n\nOverwrite target reward with distance reward and disable path reward\n\nSimplified reward weights to a basic baseline\n\nDecreased angle reward weight\n\nIncreased stop reward weight\n\nModified obstacles in difficulty\n\nAdded extra obstacles to bring the total to 25\n\nSlightly modified reward weights\n\nModified dist sensor reward weight and calculation\n\nFixed find_dist_to_path when path has only 1 node\n\nOverhauled ds reward, added path proximity and angle reward, added separate max values for dsensors, some refactoring\n\nChanged sensors lookup table to form a rectangle\n\nadded position mode to get_angle_to_target instead of node only\n\nChanged  reward weights and removed ds thresholds\n\nReduced collision reward weight\n\nImproved total reward calculation and avoided stop/turn exploit\n\nRemoved negative reward from stop\n\nFixed conditions for stop\n\nFixed apply_action docstring\n\nAdded backwards movement action\n\nRemoved overriding action\n\nConverted angle reward to discrete value\n\nAdded step limit for reset when agent is deployed\n\nFixed saving checkpoint just before training ends\n\nRemoved leftover action override\n\nConditional reward\n\nRemoved facing target condition and increased on target threshold\n\nFixed stop exploit\n\nSimplified stopping on target-facing if-else\n\nReset on target counter on not stopping action in addition to distance and facing\n\nModified stopping reward calculation\n\nRemoved leftover action overwrite\n\nReduced collision weight, modified dist thres multipliers, obstacle reward is now proportional to the ds value\n\nIncreased collision weight\n\nFixed checkpoint naming\n\nIncreased facing threshold, added multipliers for dist sensor thresholds\n\nFixed has stopped reward\n\nAdded customizable reward weights and reimplemented counting multiple steps on stop to complete episode\n\nTidy-up\n\nAdded distance sensor reward and improved total reward calculation and weights\n\nModified episode limit, checkpoint frequency and difficulties\n\nOnly target-robot difficulty no obstacles\n\nReworked distance/angle stuff, no obstacle currently\n\nRemoved not stopping on target penalty, decreased on target limit\n\nDoubled checkpoint frequency\n\nDoubled steps per episode\n\nAdded smoothness reward to punish too much turning\n\nRemoved forced action\n\nRemoved print\n\nFixed add near/neighbors, new more continuous reward function\n\nModified agent model and hyperparameters\n\nFixed add_near\n\nModified some rewards to stop the robot learning to turn in place, also added some keyboard controls for debugging\n\nAngle reward now when not detecting obstacles\n\nAdded small punishment for turning in place when far away\n\nSlightly modified rewards\n\nFixed viewpoint\n\nAdded new sensors\n\nAdded another layer in agent models\n\nIncreased number of distance sensors\n\nModified checkpoint naming\n\nModified difficulty\n\nDisabled solved entirely\n\nGreatly increased solved condition temporarily\n\nOvehauled reward\n\nChanged view and view follows robot\n\nModified angle reward to apply only when near the target, changed some thresholds\n\nModified difficulty\n\nDecreased dist sensor range\n\nIncreased lr again\n\nFixed starting difficulty\n\nDecreased agent learning rate\n\nIncreased decimals of rounding in the observation\n\nModified difficulty\n\nFixed bug when robot is outside grid map\n\nDecreased model size\n\nAdded small logos to show path\n\nAdditional functionality for grid and angle reward based on next node on path instead of final target\n\nAdded changing difficulty dictionary\n\nArena is now smaller to match the size of the grid map\n\nSome renamind and set difficulty method\n\nVarious tidy-up, fixed observation normalization bugs, recalculated maximum achievable reward per episode for the solved condition, greatly improved target randomization\n\nSet default viewpoint\n\nIncrease actor/critic network size\n\nSome fixes\n\nAdded randomizable map with obstacles and randomizable starting and target positions, as well as path finding to ensure that target is reachable, added touch sensor to terminate episode when robot collides.\n\nAdded obstacles etc\n\nDistance sensor values added to observation\n\nNew distance sensors and available objects for obstacles\n\nFix a minor comment\n\nFixed done condition\n\nClean-up\n\nFixed warning for logo\n\nTidy-up and clean-up\n\nMinor bug fix in testing and put plot in try except\n\nDecreased found target thresholds and minor fix in reward\n\nFixed bug on found target\n\nRemoved extra underscore\n\nDecreased saving interval\n\nRenamed is_done to on_target\n\nReset on target counter is thresholds are exceeded\n\nDecreased decreasing distance reward to make robot learn to move more straight\n\nAdded checkpoints\n\nRenamed to path following\n\nSome fixes\n\nSome more progress\n\nAdded agent saving\n\nDisabled camera and improved reward function\n\nVarious fixes\n\nPath following new example initial version WIP","shortMessageHtmlLink":"Find and Avoid V2"}},{"before":"4f0da5da06a599ad3eddeba33444da5955b80b4f","after":"6d495f8c05678bc2684d04cfee6ce5e6c38ebe8e","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T09:08:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Added animation","shortMessageHtmlLink":"Added animation"}},{"before":"fda0934c28e4f4aadf59afcdcda4665a246ccdc9","after":"277b19af500e71dfb079742359e12c5dbcd07786","ref":"refs/heads/dev","pushedAt":"2023-07-07T08:37:32.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Find and Avoid V2\n\nUpdated wbt file for latest webots version (2023b)\n\nRemoved .csv from test results filename resulting in double postfix\n\nRemoved dynamic angle threshold in masking\n\nMinor fixes in action-related comments\n\nClarified wording for sb3 evaluation\n\nApply suggestions from code review\n\nCo-authored-by: Jiun Kai Yang <kelvin777320@gmail.com>\n\nAdded general problem description in environment docstring\n\nRenamed deprivation to denial\n\nRenamed deprivation to denial\n\nRenamed deprivation to denial and fixed minor bug for starting ds values\n\nUpdated readme\n\nAdded trained agent with exported parameters and training tensorboard logs\n\nAdded deprivation list as argument for trainer run, in case one needs to train with ds denial\n\nFix in deprivation list attribute setting\n\nFixed masking, re-added conditional for printing at the end of episode\n\nAdded distance sensor deprivation, with explanation. Only used in testing.\n\nAdded distance sensor deprivation\n\nMajor updates:\n1. Moved all user modifiable params to run function arguments\n2. Modified logging with new metrics\n3. Refactored and simplified code that runs training, removed code duplication\n\nMoved all user modifiable parameters here\n\nCleanup, refactoring and improvements\n\nMajor updates on robot:\n1. Slightly changed distance sensors positions\n2. Added second touch sensor, now have separate left/right touch sensors\n\nMajor updates on environment:\n1. Modified reward function by a lot\n2. Added left-right touch sensors\n3. Added more parameterization for distance sensors\n4. Heavily modified masking method, crucial difference from previous version\n5. Slightly changed episode termination logic, cleaned up\n6. Changed action scheme to incremental actions instead of separate motor control\n7. Major cleanups, docstring finalization and refactoring\n\nFixed rotation bugs and fixed and simplified smoothness reward\n\nAdded a method to set the reward weights\n\nAdded setting of reward weights before testing\n\nFix in distance reward\n\nRemoved unused stuff in comments\n\nFixed initialization of distance and angle bugs\n\nRemoved todos from observation testing and distance reward, added smoothness reward\n\nAdded smoothness weight\n\nRemoved deleted method\n\nDeleted leftover stuff\n\nRemoved box difficulty\n\nAdded requirements\n\nAdded initial version of example README\n\nRemoved obs turning reward stuff and normed reward\n\nRemoved multiple testing done prints\n\nRemoved done reason printing\n\nMoved sb3 testing to this file, added more prints, added success percentage during custom testing and refactored it a bit, added sb3 rew and std to csv file, added a simple loop to keep the sim running after finishing evaluation\n\nAdded option to skip training and go straight to testing, added some general stuff as arguments, removed sb3 testing and moved it to testing script\n\nAdded some general easy to access parameters for starting experiments\n\nRemoved unused plot function\n\nRemoved unused stuff, fixed env class name after rename, added some prints for testing\n\nRenamed class, left some TODOs and updated docstrings\n\nRenamed to find and avoid v2 and deleted unused stuff\n\nAdded stuff from map branch:\n1. Removed obstacle markers\n2. Added more entries to ds sensors to properly set noise\n\nAdded stuff from map branch:\n1. Changed manual control motor speeds\n2. Removed unused virtual angle stuff in comments\n3. Added proper ds noise\n\nFixed testing seed to get identical test maps\n\nFixed minor 1-off bug with diluted observation\n\nNew baseline params\n\nChangelog:\n1. Added HPARAMS on tensorboard logging\n2. Minor fix on n_steps value to be multiple of 2\n3. Added Monitor wrapper as it is required by sb3 evaluation\n4. Added sb3 policy evaluation\n\nAdded some prints\n\nRefactored to run testing sequentially after training\n\nChangelog:\n1. Refactored to run sequentially with tester\n2. Increased base maximum episode steps\n3. Got rid of timeout wrapper as env is incorporating it\n\nChangelog:\n1. Refactored to run sequentially with trainer\n2. Removed unused imports\n3. Automated testing now runs 100 episodes per difficulty\n4. Added printing of test progress\n\nChangelog:\n1. Incorporated timeout properly\n2. Fixed done reason timeout stuff\n3. Fixed not reach reward normalization range\n4. Added method to properly set the max episode steps\n\nChangelog:\n1. Added maximum episode steps argument to identify timeouts\n2. added a not reach weight that penalizes robot based on distance achieved at timeout, to do this minimum distance is tracked\n3. added touch sensor to observation\n4. added metrics for reset count, reach target count, collision termination count\n5. modified unmasking of backward action\n6. changed distance to target obs normalization to be based on initial distance as max\n7. changed distance reward to be based on minimum distance achieved\n8. changed dist sensor reward to take the mean of the all sensor penalties, but normalized it from realistic max value\n9. added option for setting reset_on_collisions to -1 to never reset from collisions\n10. some updates to export parameters\n\nAdded conv1d actor and critic in comments\n\nRemoved unused stuff\n\nSome improvements\n\nAdded the two new runners\n\nSeparated training and testing in separate runners\n\nMajor rearrangement of parameters, modified and fixed some difficulty stuff, new baseline default parameters\n\nAdded obstacle turning reward and obstacle ds markers stuff in comments\n\nAdded distance sensor markers\n\nAdded obstacle turning weight\n\nAdded special case when the target is right on top of the robot\n\nMinor adjustments in resetting angle and distance observations and readded some rounding in the observation\n\nimproved testing\n\nMinor fix in action comment\n\nNew default multipliers\n\nAdded csv output of test results\n\nOverhauled testing after training\n\nAdded printing of difficulty key, added info return done reason, moved trigger_done flag reset to reset, fixed walls position resetting\n\nExperiment name and diff readded to logger on rollout end\n\nMinor refactoring based on large refactoring on the env\n\nMajor cleanup, docstrings, ds noise argument, fix minor bug in apply_action, fixed minor bug not resetting observation counter, observation memory and applying zero motor velocities\n\nAdded load path, increased default ds threshold, changed test diff to random, added reward printing for test\n\nAdded ds threshold on export params\n\nMore masking, backward\n\nImproved manual control\n\nBetter parameter defaults\n\nBetter masking for left-right, added various new parameters to export\n\nDifferent action scheme, actions now directly change motor speeds\n\nAdded multipliers for target rewards when obstacles are detected.\n\nAdded ds type argument\n\nFixed reset_on_collisions argument and reset of counter\n\nFixed corridor walls bounding box\n\nAdded random corridor map feature\n\nAdded corridor difficulty\n\nAdded corridor walls\n\nReduced steps timelimit, normalized reward weights a bit, fixed saved model names, added flag for continuous lines in tensorboard\n\nSimplified ds reward and added controls to print obs and rew\n\nDistance sensor reward is now based on rate of change as before\n\nAdded a threshold for the distance sensor rewards\n\nAdded a threshold for the distance sensor rewards\n\nRearranged imports and added torch and numpy seeding\n\nMinor fixes and changed default parameters\n\nCommented out dynamic action masking, which doesn't work that well anymore with proper sonar distance sensors\n\nAdded random seed, fixed initial tar d reward, added normed reward metric calculation and resetting, removed tar ang dist reward decrease when obstacles are detected\n\nBetter logger management, added seed, added avg normed reward metric\n\nDecreased arena size\n\nAdded ds range in export params\n\nRemoved reward normalization\n\nModified masking, added dynamic max ds range via argument\n\nAdded max range argument\n\nModified sensors to ultrasonic and modified lookup table\n\nIncreased time limit by a lot and modified reward weights slightly\n\nAdded a printing callback\n\nAdded add_action_to_obs to export params\n\nSome pep8\n\nReduced threshold to unmask backward action\n\nModified backward action masking to accomodate longer ds range\n\nAdded add action one-hot to obs flag\n\nAdded add action one-hot to obs flag\n\nAdded latest action one-hot to observation\n\nAdded random test difficulty\n\nFixed action masking for rearrange backwards/stop\n\nNow usign experiment_dir properly\n\nRearranged stop and backward action and removed stop action\n\nIncreased ds range\n\nReward weights are now normalized to add to 1\n\nSlightly modified base weights\n\nFixed export parameters for sb3\n\nAdded more hyperparams, added export of setup, cleanup\n\nMore sane n_steps etc\n\nMaskablePPO\n\nDense+diluted observation, collision reset now can happen after multiple steps, time penalty added and dist/ang reward now symmetric, removed rounding from observation\n\nsb3 initial\n\nAdded support for action masking, box difficulty, changed defaults\n\nAction masking and larger networks\n\nAdjusted touch sensor bounding box\n\nMajor cleanup refactoring, reward improvements and action masking, alternative difficulty \"box\"\n\nAdded support for action masking on ppo agent\n\nAdded manual control flag and additive reach target reward\n\nIncreased epsilon for adam optimizer as suggested for ppo\n\nRevert to custom implementation\n\nAdded reset on collision flag\n\nTime limit wrapper\n\nModified ds reward\n\nDS reward modified to reward minimum sensor value increasing or decreasing.\n\nRemove progress bar\n\nSb3 ppo\n\nReset on col, fixed starting obs bug, simple reward return for sb3\n\nModified distance sensors\n\nFix last training per episode\n\nAdded expanded action plotting\n\nFixed up the train on done stuff to be more logical\n\nLarger network by default\n\nAdded possibility of setting z when adding new node\n\nImplemented expanded action space option\n\nBaseline setup\n\nBaseline setup\n\nBaseline setup\n\nChanged some defaults and added some batch size options\n\nAdded all agent parameters in this file to get passed to agent ctor\n\nAdded all parameters in this file to get passed to env ctor\n\nAdded gamma explanation in docstring\n\nMajor refactoring, collision weight is additive, removed touch sensor observation, verbose flag to print reward per step, removed rounding from observation values\n\nModified distance reward to discourage the agent standing still\n\nNow using print method on test and fixed test bug where it didn't terminate properly\n\nFixed test cuda wrong device bug\n\nreach target reward gets added when non-zero instead of checking threshold\n\nDistance sensor reward defaults to additive with target dist/angle reward\n\nDefault save to disk to True\n\nAdded some setup parameters for convenience in loading and testing\n\nresults file name now is added automatically\n\nDecreased arena size to avoid going around exploit\n\nMajor cleanup/refactoring and save training metrics per checkpoint\n\nRemoved episode score stuff and solved method\n\nReset viewpoint to get rid of new reset shenanigans\n\nDef on viewpoint\n\nFixed bug with path nodes not resetting\n\nReplaced json with pickle for results for hopefully smaller filesizes\n\nAdded pickle loading option\n\nDefault difficulty now includes all obstacles since the start\n\nIncreased target reach and collision reward weights\n\nFixed bug in results plotting saving\n\nReset now doesn't reset simulation to gain performance\n\nRearranged initialization and added experiment description\n\nAdded experiment description\n\nAdded todo for new reward breakdown\n\nHandle and print reward dict\n\nReturn reward dict\n\nFixed export params\n\nrenamed target stop stuff to target reach\n\nRemoved stopping on target reward stuff, when target is reached episode is terminated instantly and agent is rewarded\n\nFixed max action selection to return the actual probability of the max action\n\nAdded option to entirely disable saving anything to disk\n\nFixed export parameters usage of cuda to get model size\n\nFixed saving plots\n\nAdded actual arguments\n\nRemoved set path\n\nAdded save capability to results plotting\n\nAdded result plotting script\n\nRemoved indent from results json dump to reduce filesize\n\nAdded some more prints at the end of episodes\n\nSome comments\n\nAdded touch sensor and refactored distance sensor init\n\nAdded more distance sensors\n\ntemp\n\nDecreased penalty for not stopping on target by a factor of 10 and reverted distance decrease/increase asymmetric normalization\n\nImproved experiment name on plot and added it into the per episode prints\n\nAdded higher negative reward for increasing angle and distance to potentially mitigate exploit for higher window agents, also stop reward gets multiplied by number of steps it has stopped.\n\nAdded experiment name on pop-up plot\n\nAdded more stats saving into json for later plotting\n\nAdded a call to export_parameters to save experiment setup\n\nAdded a get size method for the networks\n\nAdded an export parameters function\n\nAdded window observation\n\nAdded negative reward when not stopping under target threshold\n\nOverwrite target reward with distance reward and disable path reward\n\nSimplified reward weights to a basic baseline\n\nDecreased angle reward weight\n\nIncreased stop reward weight\n\nModified obstacles in difficulty\n\nAdded extra obstacles to bring the total to 25\n\nSlightly modified reward weights\n\nModified dist sensor reward weight and calculation\n\nFixed find_dist_to_path when path has only 1 node\n\nOverhauled ds reward, added path proximity and angle reward, added separate max values for dsensors, some refactoring\n\nChanged sensors lookup table to form a rectangle\n\nadded position mode to get_angle_to_target instead of node only\n\nChanged  reward weights and removed ds thresholds\n\nReduced collision reward weight\n\nImproved total reward calculation and avoided stop/turn exploit\n\nRemoved negative reward from stop\n\nFixed conditions for stop\n\nFixed apply_action docstring\n\nAdded backwards movement action\n\nRemoved overriding action\n\nConverted angle reward to discrete value\n\nAdded step limit for reset when agent is deployed\n\nFixed saving checkpoint just before training ends\n\nRemoved leftover action override\n\nConditional reward\n\nRemoved facing target condition and increased on target threshold\n\nFixed stop exploit\n\nSimplified stopping on target-facing if-else\n\nReset on target counter on not stopping action in addition to distance and facing\n\nModified stopping reward calculation\n\nRemoved leftover action overwrite\n\nReduced collision weight, modified dist thres multipliers, obstacle reward is now proportional to the ds value\n\nIncreased collision weight\n\nFixed checkpoint naming\n\nIncreased facing threshold, added multipliers for dist sensor thresholds\n\nFixed has stopped reward\n\nAdded customizable reward weights and reimplemented counting multiple steps on stop to complete episode\n\nTidy-up\n\nAdded distance sensor reward and improved total reward calculation and weights\n\nModified episode limit, checkpoint frequency and difficulties\n\nOnly target-robot difficulty no obstacles\n\nReworked distance/angle stuff, no obstacle currently\n\nRemoved not stopping on target penalty, decreased on target limit\n\nDoubled checkpoint frequency\n\nDoubled steps per episode\n\nAdded smoothness reward to punish too much turning\n\nRemoved forced action\n\nRemoved print\n\nFixed add near/neighbors, new more continuous reward function\n\nModified agent model and hyperparameters\n\nFixed add_near\n\nModified some rewards to stop the robot learning to turn in place, also added some keyboard controls for debugging\n\nAngle reward now when not detecting obstacles\n\nAdded small punishment for turning in place when far away\n\nSlightly modified rewards\n\nFixed viewpoint\n\nAdded new sensors\n\nAdded another layer in agent models\n\nIncreased number of distance sensors\n\nModified checkpoint naming\n\nModified difficulty\n\nDisabled solved entirely\n\nGreatly increased solved condition temporarily\n\nOvehauled reward\n\nChanged view and view follows robot\n\nModified angle reward to apply only when near the target, changed some thresholds\n\nModified difficulty\n\nDecreased dist sensor range\n\nIncreased lr again\n\nFixed starting difficulty\n\nDecreased agent learning rate\n\nIncreased decimals of rounding in the observation\n\nModified difficulty\n\nFixed bug when robot is outside grid map\n\nDecreased model size\n\nAdded small logos to show path\n\nAdditional functionality for grid and angle reward based on next node on path instead of final target\n\nAdded changing difficulty dictionary\n\nArena is now smaller to match the size of the grid map\n\nSome renamind and set difficulty method\n\nVarious tidy-up, fixed observation normalization bugs, recalculated maximum achievable reward per episode for the solved condition, greatly improved target randomization\n\nSet default viewpoint\n\nIncrease actor/critic network size\n\nSome fixes\n\nAdded randomizable map with obstacles and randomizable starting and target positions, as well as path finding to ensure that target is reachable, added touch sensor to terminate episode when robot collides.\n\nAdded obstacles etc\n\nDistance sensor values added to observation\n\nNew distance sensors and available objects for obstacles\n\nFix a minor comment\n\nFixed done condition\n\nClean-up\n\nFixed warning for logo\n\nTidy-up and clean-up\n\nMinor bug fix in testing and put plot in try except\n\nDecreased found target thresholds and minor fix in reward\n\nFixed bug on found target\n\nRemoved extra underscore\n\nDecreased saving interval\n\nRenamed is_done to on_target\n\nReset on target counter is thresholds are exceeded\n\nDecreased decreasing distance reward to make robot learn to move more straight\n\nAdded checkpoints\n\nRenamed to path following\n\nSome fixes\n\nSome more progress\n\nAdded agent saving\n\nDisabled camera and improved reward function\n\nVarious fixes\n\nPath following new example initial version WIP","shortMessageHtmlLink":"Find and Avoid V2"}},{"before":"9fa2c927e675480f282cfeda33a9944edd190464","after":"4f0da5da06a599ad3eddeba33444da5955b80b4f","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-07T08:12:57.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Find and Avoid V2\n\nUpdated wbt file for latest webots version (2023b)\n\nRemoved .csv from test results filename resulting in double postfix\n\nRemoved dynamic angle threshold in masking\n\nMinor fixes in action-related comments\n\nClarified wording for sb3 evaluation\n\nApply suggestions from code review\n\nCo-authored-by: Jiun Kai Yang <kelvin777320@gmail.com>\n\nAdded general problem description in environment docstring\n\nRenamed deprivation to denial\n\nRenamed deprivation to denial\n\nRenamed deprivation to denial and fixed minor bug for starting ds values\n\nUpdated readme\n\nAdded trained agent with exported parameters and training tensorboard logs\n\nAdded deprivation list as argument for trainer run, in case one needs to train with ds denial\n\nFix in deprivation list attribute setting\n\nFixed masking, re-added conditional for printing at the end of episode\n\nAdded distance sensor deprivation, with explanation. Only used in testing.\n\nAdded distance sensor deprivation\n\nMajor updates:\n1. Moved all user modifiable params to run function arguments\n2. Modified logging with new metrics\n3. Refactored and simplified code that runs training, removed code duplication\n\nMoved all user modifiable parameters here\n\nCleanup, refactoring and improvements\n\nMajor updates on robot:\n1. Slightly changed distance sensors positions\n2. Added second touch sensor, now have separate left/right touch sensors\n\nMajor updates on environment:\n1. Modified reward function by a lot\n2. Added left-right touch sensors\n3. Added more parameterization for distance sensors\n4. Heavily modified masking method, crucial difference from previous version\n5. Slightly changed episode termination logic, cleaned up\n6. Changed action scheme to incremental actions instead of separate motor control\n7. Major cleanups, docstring finalization and refactoring\n\nFixed rotation bugs and fixed and simplified smoothness reward\n\nAdded a method to set the reward weights\n\nAdded setting of reward weights before testing\n\nFix in distance reward\n\nRemoved unused stuff in comments\n\nFixed initialization of distance and angle bugs\n\nRemoved todos from observation testing and distance reward, added smoothness reward\n\nAdded smoothness weight\n\nRemoved deleted method\n\nDeleted leftover stuff\n\nRemoved box difficulty\n\nAdded requirements\n\nAdded initial version of example README\n\nRemoved obs turning reward stuff and normed reward\n\nRemoved multiple testing done prints\n\nRemoved done reason printing\n\nMoved sb3 testing to this file, added more prints, added success percentage during custom testing and refactored it a bit, added sb3 rew and std to csv file, added a simple loop to keep the sim running after finishing evaluation\n\nAdded option to skip training and go straight to testing, added some general stuff as arguments, removed sb3 testing and moved it to testing script\n\nAdded some general easy to access parameters for starting experiments\n\nRemoved unused plot function\n\nRemoved unused stuff, fixed env class name after rename, added some prints for testing\n\nRenamed class, left some TODOs and updated docstrings\n\nRenamed to find and avoid v2 and deleted unused stuff\n\nAdded stuff from map branch:\n1. Removed obstacle markers\n2. Added more entries to ds sensors to properly set noise\n\nAdded stuff from map branch:\n1. Changed manual control motor speeds\n2. Removed unused virtual angle stuff in comments\n3. Added proper ds noise\n\nFixed testing seed to get identical test maps\n\nFixed minor 1-off bug with diluted observation\n\nNew baseline params\n\nChangelog:\n1. Added HPARAMS on tensorboard logging\n2. Minor fix on n_steps value to be multiple of 2\n3. Added Monitor wrapper as it is required by sb3 evaluation\n4. Added sb3 policy evaluation\n\nAdded some prints\n\nRefactored to run testing sequentially after training\n\nChangelog:\n1. Refactored to run sequentially with tester\n2. Increased base maximum episode steps\n3. Got rid of timeout wrapper as env is incorporating it\n\nChangelog:\n1. Refactored to run sequentially with trainer\n2. Removed unused imports\n3. Automated testing now runs 100 episodes per difficulty\n4. Added printing of test progress\n\nChangelog:\n1. Incorporated timeout properly\n2. Fixed done reason timeout stuff\n3. Fixed not reach reward normalization range\n4. Added method to properly set the max episode steps\n\nChangelog:\n1. Added maximum episode steps argument to identify timeouts\n2. added a not reach weight that penalizes robot based on distance achieved at timeout, to do this minimum distance is tracked\n3. added touch sensor to observation\n4. added metrics for reset count, reach target count, collision termination count\n5. modified unmasking of backward action\n6. changed distance to target obs normalization to be based on initial distance as max\n7. changed distance reward to be based on minimum distance achieved\n8. changed dist sensor reward to take the mean of the all sensor penalties, but normalized it from realistic max value\n9. added option for setting reset_on_collisions to -1 to never reset from collisions\n10. some updates to export parameters\n\nAdded conv1d actor and critic in comments\n\nRemoved unused stuff\n\nSome improvements\n\nAdded the two new runners\n\nSeparated training and testing in separate runners\n\nMajor rearrangement of parameters, modified and fixed some difficulty stuff, new baseline default parameters\n\nAdded obstacle turning reward and obstacle ds markers stuff in comments\n\nAdded distance sensor markers\n\nAdded obstacle turning weight\n\nAdded special case when the target is right on top of the robot\n\nMinor adjustments in resetting angle and distance observations and readded some rounding in the observation\n\nimproved testing\n\nMinor fix in action comment\n\nNew default multipliers\n\nAdded csv output of test results\n\nOverhauled testing after training\n\nAdded printing of difficulty key, added info return done reason, moved trigger_done flag reset to reset, fixed walls position resetting\n\nExperiment name and diff readded to logger on rollout end\n\nMinor refactoring based on large refactoring on the env\n\nMajor cleanup, docstrings, ds noise argument, fix minor bug in apply_action, fixed minor bug not resetting observation counter, observation memory and applying zero motor velocities\n\nAdded load path, increased default ds threshold, changed test diff to random, added reward printing for test\n\nAdded ds threshold on export params\n\nMore masking, backward\n\nImproved manual control\n\nBetter parameter defaults\n\nBetter masking for left-right, added various new parameters to export\n\nDifferent action scheme, actions now directly change motor speeds\n\nAdded multipliers for target rewards when obstacles are detected.\n\nAdded ds type argument\n\nFixed reset_on_collisions argument and reset of counter\n\nFixed corridor walls bounding box\n\nAdded random corridor map feature\n\nAdded corridor difficulty\n\nAdded corridor walls\n\nReduced steps timelimit, normalized reward weights a bit, fixed saved model names, added flag for continuous lines in tensorboard\n\nSimplified ds reward and added controls to print obs and rew\n\nDistance sensor reward is now based on rate of change as before\n\nAdded a threshold for the distance sensor rewards\n\nAdded a threshold for the distance sensor rewards\n\nRearranged imports and added torch and numpy seeding\n\nMinor fixes and changed default parameters\n\nCommented out dynamic action masking, which doesn't work that well anymore with proper sonar distance sensors\n\nAdded random seed, fixed initial tar d reward, added normed reward metric calculation and resetting, removed tar ang dist reward decrease when obstacles are detected\n\nBetter logger management, added seed, added avg normed reward metric\n\nDecreased arena size\n\nAdded ds range in export params\n\nRemoved reward normalization\n\nModified masking, added dynamic max ds range via argument\n\nAdded max range argument\n\nModified sensors to ultrasonic and modified lookup table\n\nIncreased time limit by a lot and modified reward weights slightly\n\nAdded a printing callback\n\nAdded add_action_to_obs to export params\n\nSome pep8\n\nReduced threshold to unmask backward action\n\nModified backward action masking to accomodate longer ds range\n\nAdded add action one-hot to obs flag\n\nAdded add action one-hot to obs flag\n\nAdded latest action one-hot to observation\n\nAdded random test difficulty\n\nFixed action masking for rearrange backwards/stop\n\nNow usign experiment_dir properly\n\nRearranged stop and backward action and removed stop action\n\nIncreased ds range\n\nReward weights are now normalized to add to 1\n\nSlightly modified base weights\n\nFixed export parameters for sb3\n\nAdded more hyperparams, added export of setup, cleanup\n\nMore sane n_steps etc\n\nMaskablePPO\n\nDense+diluted observation, collision reset now can happen after multiple steps, time penalty added and dist/ang reward now symmetric, removed rounding from observation\n\nsb3 initial\n\nAdded support for action masking, box difficulty, changed defaults\n\nAction masking and larger networks\n\nAdjusted touch sensor bounding box\n\nMajor cleanup refactoring, reward improvements and action masking, alternative difficulty \"box\"\n\nAdded support for action masking on ppo agent\n\nAdded manual control flag and additive reach target reward\n\nIncreased epsilon for adam optimizer as suggested for ppo\n\nRevert to custom implementation\n\nAdded reset on collision flag\n\nTime limit wrapper\n\nModified ds reward\n\nDS reward modified to reward minimum sensor value increasing or decreasing.\n\nRemove progress bar\n\nSb3 ppo\n\nReset on col, fixed starting obs bug, simple reward return for sb3\n\nModified distance sensors\n\nFix last training per episode\n\nAdded expanded action plotting\n\nFixed up the train on done stuff to be more logical\n\nLarger network by default\n\nAdded possibility of setting z when adding new node\n\nImplemented expanded action space option\n\nBaseline setup\n\nBaseline setup\n\nBaseline setup\n\nChanged some defaults and added some batch size options\n\nAdded all agent parameters in this file to get passed to agent ctor\n\nAdded all parameters in this file to get passed to env ctor\n\nAdded gamma explanation in docstring\n\nMajor refactoring, collision weight is additive, removed touch sensor observation, verbose flag to print reward per step, removed rounding from observation values\n\nModified distance reward to discourage the agent standing still\n\nNow using print method on test and fixed test bug where it didn't terminate properly\n\nFixed test cuda wrong device bug\n\nreach target reward gets added when non-zero instead of checking threshold\n\nDistance sensor reward defaults to additive with target dist/angle reward\n\nDefault save to disk to True\n\nAdded some setup parameters for convenience in loading and testing\n\nresults file name now is added automatically\n\nDecreased arena size to avoid going around exploit\n\nMajor cleanup/refactoring and save training metrics per checkpoint\n\nRemoved episode score stuff and solved method\n\nReset viewpoint to get rid of new reset shenanigans\n\nDef on viewpoint\n\nFixed bug with path nodes not resetting\n\nReplaced json with pickle for results for hopefully smaller filesizes\n\nAdded pickle loading option\n\nDefault difficulty now includes all obstacles since the start\n\nIncreased target reach and collision reward weights\n\nFixed bug in results plotting saving\n\nReset now doesn't reset simulation to gain performance\n\nRearranged initialization and added experiment description\n\nAdded experiment description\n\nAdded todo for new reward breakdown\n\nHandle and print reward dict\n\nReturn reward dict\n\nFixed export params\n\nrenamed target stop stuff to target reach\n\nRemoved stopping on target reward stuff, when target is reached episode is terminated instantly and agent is rewarded\n\nFixed max action selection to return the actual probability of the max action\n\nAdded option to entirely disable saving anything to disk\n\nFixed export parameters usage of cuda to get model size\n\nFixed saving plots\n\nAdded actual arguments\n\nRemoved set path\n\nAdded save capability to results plotting\n\nAdded result plotting script\n\nRemoved indent from results json dump to reduce filesize\n\nAdded some more prints at the end of episodes\n\nSome comments\n\nAdded touch sensor and refactored distance sensor init\n\nAdded more distance sensors\n\ntemp\n\nDecreased penalty for not stopping on target by a factor of 10 and reverted distance decrease/increase asymmetric normalization\n\nImproved experiment name on plot and added it into the per episode prints\n\nAdded higher negative reward for increasing angle and distance to potentially mitigate exploit for higher window agents, also stop reward gets multiplied by number of steps it has stopped.\n\nAdded experiment name on pop-up plot\n\nAdded more stats saving into json for later plotting\n\nAdded a call to export_parameters to save experiment setup\n\nAdded a get size method for the networks\n\nAdded an export parameters function\n\nAdded window observation\n\nAdded negative reward when not stopping under target threshold\n\nOverwrite target reward with distance reward and disable path reward\n\nSimplified reward weights to a basic baseline\n\nDecreased angle reward weight\n\nIncreased stop reward weight\n\nModified obstacles in difficulty\n\nAdded extra obstacles to bring the total to 25\n\nSlightly modified reward weights\n\nModified dist sensor reward weight and calculation\n\nFixed find_dist_to_path when path has only 1 node\n\nOverhauled ds reward, added path proximity and angle reward, added separate max values for dsensors, some refactoring\n\nChanged sensors lookup table to form a rectangle\n\nadded position mode to get_angle_to_target instead of node only\n\nChanged  reward weights and removed ds thresholds\n\nReduced collision reward weight\n\nImproved total reward calculation and avoided stop/turn exploit\n\nRemoved negative reward from stop\n\nFixed conditions for stop\n\nFixed apply_action docstring\n\nAdded backwards movement action\n\nRemoved overriding action\n\nConverted angle reward to discrete value\n\nAdded step limit for reset when agent is deployed\n\nFixed saving checkpoint just before training ends\n\nRemoved leftover action override\n\nConditional reward\n\nRemoved facing target condition and increased on target threshold\n\nFixed stop exploit\n\nSimplified stopping on target-facing if-else\n\nReset on target counter on not stopping action in addition to distance and facing\n\nModified stopping reward calculation\n\nRemoved leftover action overwrite\n\nReduced collision weight, modified dist thres multipliers, obstacle reward is now proportional to the ds value\n\nIncreased collision weight\n\nFixed checkpoint naming\n\nIncreased facing threshold, added multipliers for dist sensor thresholds\n\nFixed has stopped reward\n\nAdded customizable reward weights and reimplemented counting multiple steps on stop to complete episode\n\nTidy-up\n\nAdded distance sensor reward and improved total reward calculation and weights\n\nModified episode limit, checkpoint frequency and difficulties\n\nOnly target-robot difficulty no obstacles\n\nReworked distance/angle stuff, no obstacle currently\n\nRemoved not stopping on target penalty, decreased on target limit\n\nDoubled checkpoint frequency\n\nDoubled steps per episode\n\nAdded smoothness reward to punish too much turning\n\nRemoved forced action\n\nRemoved print\n\nFixed add near/neighbors, new more continuous reward function\n\nModified agent model and hyperparameters\n\nFixed add_near\n\nModified some rewards to stop the robot learning to turn in place, also added some keyboard controls for debugging\n\nAngle reward now when not detecting obstacles\n\nAdded small punishment for turning in place when far away\n\nSlightly modified rewards\n\nFixed viewpoint\n\nAdded new sensors\n\nAdded another layer in agent models\n\nIncreased number of distance sensors\n\nModified checkpoint naming\n\nModified difficulty\n\nDisabled solved entirely\n\nGreatly increased solved condition temporarily\n\nOvehauled reward\n\nChanged view and view follows robot\n\nModified angle reward to apply only when near the target, changed some thresholds\n\nModified difficulty\n\nDecreased dist sensor range\n\nIncreased lr again\n\nFixed starting difficulty\n\nDecreased agent learning rate\n\nIncreased decimals of rounding in the observation\n\nModified difficulty\n\nFixed bug when robot is outside grid map\n\nDecreased model size\n\nAdded small logos to show path\n\nAdditional functionality for grid and angle reward based on next node on path instead of final target\n\nAdded changing difficulty dictionary\n\nArena is now smaller to match the size of the grid map\n\nSome renamind and set difficulty method\n\nVarious tidy-up, fixed observation normalization bugs, recalculated maximum achievable reward per episode for the solved condition, greatly improved target randomization\n\nSet default viewpoint\n\nIncrease actor/critic network size\n\nSome fixes\n\nAdded randomizable map with obstacles and randomizable starting and target positions, as well as path finding to ensure that target is reachable, added touch sensor to terminate episode when robot collides.\n\nAdded obstacles etc\n\nDistance sensor values added to observation\n\nNew distance sensors and available objects for obstacles\n\nFix a minor comment\n\nFixed done condition\n\nClean-up\n\nFixed warning for logo\n\nTidy-up and clean-up\n\nMinor bug fix in testing and put plot in try except\n\nDecreased found target thresholds and minor fix in reward\n\nFixed bug on found target\n\nRemoved extra underscore\n\nDecreased saving interval\n\nRenamed is_done to on_target\n\nReset on target counter is thresholds are exceeded\n\nDecreased decreasing distance reward to make robot learn to move more straight\n\nAdded checkpoints\n\nRenamed to path following\n\nSome fixes\n\nSome more progress\n\nAdded agent saving\n\nDisabled camera and improved reward function\n\nVarious fixes\n\nPath following new example initial version WIP","shortMessageHtmlLink":"Find and Avoid V2"}},{"before":"e8a7ba788020f16ba87dce2c23f74769f538ce45","after":"886fbd43424c82d90e3c4e731d2ba266d1144e48","ref":"refs/heads/double-cartpole","pushedAt":"2023-07-06T14:31:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Added double pendulum cartpole","shortMessageHtmlLink":"Added double pendulum cartpole"}},{"before":"ffd6f8c9b4b76e3c5f208b384554bf01e8c2186f","after":"e8a7ba788020f16ba87dce2c23f74769f538ce45","ref":"refs/heads/double-cartpole","pushedAt":"2023-07-06T14:18:46.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Updated world file for latest Webots version (2023b)","shortMessageHtmlLink":"Updated world file for latest Webots version (2023b)"}},{"before":"f4ce9b9450ea573d181bae700364b37534d78ad8","after":"9fa2c927e675480f282cfeda33a9944edd190464","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-06T09:43:01.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Updated wbt file for latest webots version (2023b)","shortMessageHtmlLink":"Updated wbt file for latest webots version (2023b)"}},{"before":"cd8cb76a2f068e9a69df44cabe8ebc86ec121091","after":"f4ce9b9450ea573d181bae700364b37534d78ad8","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-07-06T09:41:04.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Removed .csv from test results filename resulting in double postfix","shortMessageHtmlLink":"Removed .csv from test results filename resulting in double postfix"}},{"before":"57698d3a7a9b0ca92a342da47ff5c4c0faa1ee67","after":"fda0934c28e4f4aadf59afcdcda4665a246ccdc9","ref":"refs/heads/dev","pushedAt":"2023-06-09T10:13:43.697Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"KelvinYang0320","name":"Jiun Kai Yang","path":"/KelvinYang0320","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49781698?s=80&v=4"},"commit":{"message":"Fixed PPO ImportError and removed requests==2.28.0","shortMessageHtmlLink":"Fixed PPO ImportError and removed requests==2.28.0"}},{"before":"f938df29628e8cc2c32fdbced3c8dfdeb6d712d5","after":null,"ref":"refs/heads/dependabot/pip/examples/khr-3hv/khr-3hv_continuous/requirements/requests-2.31.0","pushedAt":"2023-06-09T06:39:25.626Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"dependabot[bot]","name":null,"path":"/apps/dependabot","primaryAvatarUrl":"https://avatars.githubusercontent.com/in/29110?s=80&v=4"}},{"before":"f198343f9d1f96248542de43b4be05038dcccac1","after":"cd8cb76a2f068e9a69df44cabe8ebc86ec121091","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-05-24T11:18:47.250Z","pushType":"push","commitsCount":2,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Merge branch 'dev' into find-and-avoid-v2","shortMessageHtmlLink":"Merge branch 'dev' into find-and-avoid-v2"}},{"before":null,"after":"f938df29628e8cc2c32fdbced3c8dfdeb6d712d5","ref":"refs/heads/dependabot/pip/examples/khr-3hv/khr-3hv_continuous/requirements/requests-2.31.0","pushedAt":"2023-05-23T04:24:16.263Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"dependabot[bot]","name":null,"path":"/apps/dependabot","primaryAvatarUrl":"https://avatars.githubusercontent.com/in/29110?s=80&v=4"},"commit":{"message":"Bump requests in /examples/khr-3hv/khr-3hv_continuous/requirements\n\nBumps [requests](https://github.com/psf/requests) from 2.28.0 to 2.31.0.\n- [Release notes](https://github.com/psf/requests/releases)\n- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)\n- [Commits](https://github.com/psf/requests/compare/v2.28.0...v2.31.0)\n\n---\nupdated-dependencies:\n- dependency-name: requests\n  dependency-type: direct:production\n...\n\nSigned-off-by: dependabot[bot] <support@github.com>","shortMessageHtmlLink":"Bump requests in /examples/khr-3hv/khr-3hv_continuous/requirements"}},{"before":"6ea9102fb0dfae62c18e4e00b840b51b334cdf92","after":"f198343f9d1f96248542de43b4be05038dcccac1","ref":"refs/heads/find-and-avoid-v2","pushedAt":"2023-05-11T10:31:57.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"tsampazk","name":"Kostas Tsampazis","path":"/tsampazk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27914645?s=80&v=4"},"commit":{"message":"Removed .csv from test results filename resulting in double postfix","shortMessageHtmlLink":"Removed .csv from test results filename resulting in double postfix"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEHSmgTQA","startCursor":null,"endCursor":null}},"title":"Activity · aidudezzz/deepworlds"}