Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions config/docking.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
[base]
env_name = docking

[vec]
# Total parallel agents collected each rollout.
total_agents = 4096
# Number of rollout buffers for overlap.
num_buffers = 2
# CPU threads used for env stepping.
num_threads = 4

[policy]
# Hidden width for the policy network.
hidden_size = 128
# Number of recurrent layers.
num_layers = 1

[env]
# World width in simulation units.
width = 256
# World height in simulation units.
height = 192
# Max steps before timing out.
max_ticks = 1024
# Max ship speed.
max_speed = 6.0
# Heading change applied by turn actions.
turn_rate = 0.10
# Forward speed delta applied by thrust and brake.
accel = 0.55
# Passive speed decay applied every step.
drag = 0.92
# Distance threshold for dock contact.
dock_radius = 18.0
# Max speed allowed for a clean dock.
dock_speed_threshold = 0.72
# Max heading error allowed for a clean dock.
dock_heading_threshold = 0.28
# Small per-step penalty to encourage efficiency.
step_penalty = -0.01
# Scale for distance-progress shaping reward.
progress_reward_scale = 0.25

[train]
# Total agent steps for a default run.
total_timesteps = 100_000_000
# Discount factor.
gamma = 0.99
# Optimizer learning rate.
learning_rate = 0.003
# PPO minibatch size.
minibatch_size = 32768
# Entropy bonus for exploration.
ent_coef = 0.01
38 changes: 38 additions & 0 deletions ocean/docking/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "docking.h"

#define OBS_SIZE DOCKING_OBS_SIZE
#define NUM_ATNS 1
#define ACT_SIZES {5}
#define OBS_TENSOR_T FloatTensor

#define Env Docking
#include "vecenv.h"

void my_init(Env* env, Dict* kwargs) {
env->num_agents = 1;
env->width = (int)dict_get(kwargs, "width")->value;
env->height = (int)dict_get(kwargs, "height")->value;
env->max_ticks = (int)dict_get(kwargs, "max_ticks")->value;
env->max_speed = (float)dict_get(kwargs, "max_speed")->value;
env->turn_rate = (float)dict_get(kwargs, "turn_rate")->value;
env->accel = (float)dict_get(kwargs, "accel")->value;
env->drag = (float)dict_get(kwargs, "drag")->value;
env->dock_radius = (float)dict_get(kwargs, "dock_radius")->value;
env->dock_speed_threshold = (float)dict_get(kwargs, "dock_speed_threshold")->value;
env->dock_heading_threshold = (float)dict_get(kwargs, "dock_heading_threshold")->value;
env->step_penalty = (float)dict_get(kwargs, "step_penalty")->value;
env->progress_reward_scale = (float)dict_get(kwargs, "progress_reward_scale")->value;
c_init(env);
}

void my_log(Log* log, Dict* out) {
dict_set(out, "perf", log->perf);
dict_set(out, "score", log->score);
dict_set(out, "episode_return", log->episode_return);
dict_set(out, "episode_length", log->episode_length);
dict_set(out, "success_rate", log->success_rate);
dict_set(out, "crash_rate", log->crash_rate);
dict_set(out, "timeout_rate", log->timeout_rate);
dict_set(out, "final_distance", log->final_distance);
dict_set(out, "alignment_error", log->alignment_error);
}
53 changes: 53 additions & 0 deletions ocean/docking/docking.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include "docking.h"

int main() {
Docking env = {0};
env.width = 256;
env.height = 192;
env.max_ticks = 1024;
env.max_speed = 6.0f;
env.turn_rate = 0.10f;
env.accel = 0.55f;
env.drag = 0.90f;
env.dock_radius = 18.0f;
env.dock_speed_threshold = 0.72f;
env.dock_heading_threshold = 0.28f;
env.step_penalty = -0.01f;
env.progress_reward_scale = 0.25f;

env.observations = (float*)calloc(DOCKING_OBS_SIZE, sizeof(float));
env.actions = (float*)calloc(1, sizeof(float));
env.rewards = (float*)calloc(1, sizeof(float));
env.terminals = (float*)calloc(1, sizeof(float));

c_init(&env);
c_reset(&env);
c_render(&env);

while (!WindowShouldClose()) {
env.actions[0] = DOCK_NOOP;
if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) {
env.actions[0] = DOCK_TURN_LEFT;
} else if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) {
env.actions[0] = DOCK_TURN_RIGHT;
} else if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) {
env.actions[0] = DOCK_THRUST;
} else if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) {
env.actions[0] = DOCK_BRAKE;
}

if (IsKeyPressed(KEY_R)) {
c_reset(&env);
} else {
c_step(&env);
}
c_render(&env);
}

free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
return 0;
}
Loading