Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 46 additions & 15 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ if [ -z "$1" ]; then
fi
ENV=$1
shift
PYTHON_BIN="${PYTHON:-python}"
if ! command -v "$PYTHON_BIN" >/dev/null 2>&1; then
PYTHON_BIN=python3
fi

for arg in "$@"; do
case $arg in
Expand Down Expand Up @@ -52,9 +56,22 @@ fi

# Linux/mac
PLATFORM="$(uname -s)"
ARCH_NAME="$(uname -m)"
if [ "$PLATFORM" = "Linux" ]; then
RAYLIB_NAME='raylib-5.5_linux_amd64'
OMP_LIB=-lomp5
if [ "$ARCH_NAME" = "aarch64" ] || [ "$ARCH_NAME" = "arm64" ]; then
RAYLIB_NAME='raylib-5.5_linux_aarch64'
else
RAYLIB_NAME='raylib-5.5_linux_amd64'
fi
if ldconfig -p 2>/dev/null | grep -q 'libomp5\.so'; then
OMP_LIB=-lomp5
elif ldconfig -p 2>/dev/null | grep -q 'libomp\.so\.5'; then
OMP_LIB=-l:libomp.so.5
elif ldconfig -p 2>/dev/null | grep -q 'libomp\.so'; then
OMP_LIB=-lomp
else
OMP_LIB=-lgomp
fi
SANITIZE_FLAGS=(-fsanitize=address,undefined,bounds,pointer-overflow,leak -fno-omit-frame-pointer)
STANDALONE_LDFLAGS=(-lGL)
SHARED_LDFLAGS=(-Bsymbolic-functions -Wl,--gc-sections)
Expand All @@ -79,10 +96,16 @@ CLANG_WARN=(
download() {
local name=$1 url=$2
[ -d "$name" ] && return
for fallback in "$HOME/pufferlib/$name" "$HOME/pufferlib-4.0/$name"; do
if [ -d "$fallback" ]; then
ln -s "$fallback" "$name"
return
fi
done
echo "Downloading $name..."
case "$url" in
*.zip) curl -sL "$url" -o "$name.zip" && unzip -q "$name.zip" && rm "$name.zip" ;;
*) curl -sL "$url" -o "$name.tar.gz" && tar xf "$name.tar.gz" && rm "$name.tar.gz" ;;
*.zip) curl -fLsS "$url" -o "$name.zip" && unzip -q "$name.zip" && rm "$name.zip" ;;
*) curl -fLsS "$url" -o "$name.tar.gz" && tar xf "$name.tar.gz" && rm "$name.tar.gz" ;;
esac
}

Expand Down Expand Up @@ -172,6 +195,7 @@ fi
CUDA_HOME=${CUDA_HOME:-${CUDA_PATH:-$(dirname "$(dirname "$(which nvcc)")")}}
CUDNN_IFLAG=""
CUDNN_LFLAG=""
CUDNN_LIB="-lcudnn"
for dir in /usr/local/cuda/include /usr/include; do
if [ -f "$dir/cudnn.h" ]; then
CUDNN_IFLAG="-I$dir"
Expand All @@ -185,10 +209,13 @@ for dir in /usr/local/cuda/lib64 /usr/lib/x86_64-linux-gnu; do
fi
done
if [ -z "$CUDNN_IFLAG" ]; then
CUDNN_IFLAG=$(python -c "import nvidia.cudnn, os; print('-I' + os.path.join(nvidia.cudnn.__path__[0], 'include'))" 2>/dev/null || echo "")
CUDNN_IFLAG=$("$PYTHON_BIN" -c "import nvidia.cudnn, os; print('-I' + os.path.join(nvidia.cudnn.__path__[0], 'include'))" 2>/dev/null || echo "")
fi
if [ -z "$CUDNN_LFLAG" ]; then
CUDNN_LFLAG=$(python -c "import nvidia.cudnn, os; print('-L' + os.path.join(nvidia.cudnn.__path__[0], 'lib'))" 2>/dev/null || echo "")
CUDNN_LFLAG=$("$PYTHON_BIN" -c "import nvidia.cudnn, os; print('-L' + os.path.join(nvidia.cudnn.__path__[0], 'lib'))" 2>/dev/null || echo "")
fi
if [ -n "$CUDNN_LFLAG" ] && [ ! -f "${CUDNN_LFLAG#-L}/libcudnn.so" ] && [ -f "${CUDNN_LFLAG#-L}/libcudnn.so.9" ]; then
CUDNN_LIB="-l:libcudnn.so.9"
fi

# NCCL include/lib fallback (mirrors the cuDNN fallback above).
Expand All @@ -202,10 +229,10 @@ for dir in /usr/lib/x86_64-linux-gnu /usr/local/cuda/lib64; do
if [ -f "$dir/libnccl.so" ] || [ -f "$dir/libnccl.so.2" ]; then NCCL_LFLAG="-L$dir"; break; fi
done
if [ -z "$NCCL_IFLAG" ]; then
NCCL_IFLAG=$(python -c "import nvidia.nccl, os; print('-I' + os.path.join(nvidia.nccl.__path__[0], 'include'))" 2>/dev/null || echo "")
NCCL_IFLAG=$("$PYTHON_BIN" -c "import nvidia.nccl, os; print('-I' + os.path.join(nvidia.nccl.__path__[0], 'include'))" 2>/dev/null || echo "")
fi
if [ -z "$NCCL_LFLAG" ]; then
NCCL_LFLAG=$(python -c "import nvidia.nccl, os; print('-L' + os.path.join(nvidia.nccl.__path__[0], 'lib'))" 2>/dev/null || echo "")
NCCL_LFLAG=$("$PYTHON_BIN" -c "import nvidia.nccl, os; print('-L' + os.path.join(nvidia.nccl.__path__[0], 'lib'))" 2>/dev/null || echo "")
fi

WHEEL_RPATH_FLAGS=()
Expand All @@ -218,14 +245,18 @@ done
export CCACHE_DIR="${CCACHE_DIR:-$HOME/.ccache}"
export CCACHE_BASEDIR="$(pwd)"
export CCACHE_COMPILERCHECK=content
NVCC="ccache $CUDA_HOME/bin/nvcc"
if command -v ccache >/dev/null; then
NVCC="ccache $CUDA_HOME/bin/nvcc"
else
NVCC="$CUDA_HOME/bin/nvcc"
fi
CC="${CC:-$(command -v ccache >/dev/null && echo 'ccache clang' || echo 'clang')}"
ARCH=${NVCC_ARCH:-native}

PYTHON_INCLUDE=$(python -c "import sysconfig; print(sysconfig.get_path('include'))")
PYBIND_INCLUDE=$(python -c "import pybind11; print(pybind11.get_include())")
NUMPY_INCLUDE=$(python -c "import numpy; print(numpy.get_include())")
EXT_SUFFIX=$(python -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))")
PYTHON_INCLUDE=$("$PYTHON_BIN" -c "import sysconfig; print(sysconfig.get_path('include'))")
PYBIND_INCLUDE=$("$PYTHON_BIN" -c "import pybind11; print(pybind11.get_include())")
NUMPY_INCLUDE=$("$PYTHON_BIN" -c "import numpy; print(numpy.get_include())")
EXT_SUFFIX=$("$PYTHON_BIN" -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))")
OUTPUT="pufferlib/_C${EXT_SUFFIX}"

BINDING_SRC="$SRC_DIR/binding.c"
Expand Down Expand Up @@ -273,7 +304,7 @@ if [ -z "$MODE" ]; then
build/bindings.o "$RAYLIB_A"
-L$CUDA_HOME/lib64 $CUDNN_LFLAG $NCCL_LFLAG
"${WHEEL_RPATH_FLAGS[@]}"
-lcudart -lnccl -lnvidia-ml -lcublas -lcusolver -lcurand -lcudnn
-lcudart -lnccl -lnvidia-ml -lcublas -lcusolver -lcurand $CUDNN_LIB
$OMP_LIB $LINK_OPT
"${SHARED_LDFLAGS[@]}"
-o "$OUTPUT"
Expand Down Expand Up @@ -314,7 +345,7 @@ elif [ "$MODE" = "profile" ]; then
-Xcompiler=-fopenmp \
tests/profile_kernels.cu vendor/ini.c \
"$RAYLIB_A" \
-lnccl -lnvidia-ml -lcublas -lcurand -lcudnn \
-lnccl -lnvidia-ml -lcublas -lcurand $CUDNN_LIB \
-lGL -lm -lpthread $OMP_LIB \
-o profile
echo "Built: ./profile"
Expand Down
2 changes: 2 additions & 0 deletions config/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ gamma = 0.995
gae_lambda = 0.90
replay_ratio = 1.0
clip_coef = 0.2
target_kl = 0.0
vf_coef = 2.0
vf_clip_coef = 0.2
max_grad_norm = 1.5
Expand All @@ -89,6 +90,7 @@ prio_alpha = 0.8
prio_beta0 = 0.2
state_buffer_size = 0
cl_frac = 0
anneal_cl = 0
warmup_states = 0
explore_alpha = 0.0
explore_beta = 0.0
Expand Down
40 changes: 21 additions & 19 deletions config/password.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,33 @@ package = ocean
env_name = password

[vec]
total_agents = 4096
num_buffers = 2
num_threads = 8
total_agents = 8192
num_buffers = 4
num_threads = 4

[policy]
hidden_size = 128
num_layers = 2
hidden_size = 256
num_layers = 0

[train]
total_timesteps = 100_000_000
learning_rate = 0.003
gamma = 0.99
gae_lambda = 0.95
replay_ratio = 1.0
minibatch_size = 32768
horizon = 64
ent_coef = 0.01
total_timesteps = 300_000_000
learning_rate = 0.00993091
min_lr_ratio = 0.05
ent_coef = 0.0
gamma = 0.998755
replay_ratio = 1
minibatch_size = 8192
vf_coef = 3.50617
max_grad_norm = 0.1
prio_alpha = 0.0
prio_beta0 = 1.0
prio_beta0 = 0.0

#state_buffer_size = 100_000
#cl_frac = 0.8
#warmup_states = 10_000
#explore_alpha = 0.8
#explore_beta = 0.0
state_buffer_size = 1_000_000
cl_frac = 0.9
anneal_cl = 1
warmup_states = 100_000
explore_alpha = 0.99
explore_beta = 0.0

[sweep]
metric = score
Expand Down
1 change: 1 addition & 0 deletions ocean/password/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#define OBS_TENSOR_T ByteTensor
#define PUFFER_HAS_STATE 1
#define PUFFER_STATE_REFRESH(env) refresh_state(env)
#define PUFFER_STATE_SCORE(env) ((env)->state.pos)

#define Env Password
#include "vecenv.h"
Expand Down
4 changes: 2 additions & 2 deletions ocean/password/password.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
#include <stdio.h>
#include "raylib.h"

#define LENGTH 64
#define LENGTH 128
#define NUM_DIGITS 9
#define PASSWORD_SEED 42u
#define CORRECT_REWARD 0.1f
#define CORRECT_REWARD 1.0f
#define WRONG_REWARD -1.0f

typedef struct {
Expand Down
4 changes: 3 additions & 1 deletion pufferlib/pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,9 @@ def load_config(env_name):
p = configparser.ConfigParser()
p.read(puffer_default_config)
else:
for path in glob.glob(puffer_config_dir, recursive=True):
paths = sorted(glob.glob(puffer_config_dir, recursive=True))
paths.sort(key=lambda path: os.path.splitext(os.path.basename(path))[0] != env_name)
for path in paths:
p = configparser.ConfigParser()
p.read([puffer_default_config, path])
if env_name in p['base']['env_name'].split(): break
Expand Down
8 changes: 8 additions & 0 deletions src/bindings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ pybind11::dict puf_log(pybind11::object pufferl_obj) {
for (int i = 0; i < env_out->size; i++) {
env_dict[env_out->items[i].key] = env_out->items[i].value;
}
if (pufferl.curriculum_enabled) {
env_dict["state_size"] = pufferl.state_buf.size;
env_dict["state_max"] = pufferl.state_buf.max_priority;
}
result["env"] = env_dict;

// Losses
Expand Down Expand Up @@ -426,6 +430,7 @@ std::unique_ptr<PuffeRL> create_pufferl(py::dict args) {
hypers.max_grad_norm = get_config(train_kwargs, "max_grad_norm");
// PPO
hypers.clip_coef = get_config(train_kwargs, "clip_coef");
hypers.target_kl = get_config(train_kwargs, "target_kl");
hypers.vf_clip_coef = get_config(train_kwargs, "vf_clip_coef");
hypers.vf_coef = get_config(train_kwargs, "vf_coef");
hypers.ent_coef = get_config(train_kwargs, "ent_coef");
Expand All @@ -443,6 +448,7 @@ std::unique_ptr<PuffeRL> create_pufferl(py::dict args) {
// Curriculum state buffer
hypers.state_buffer_size = get_config(train_kwargs, "state_buffer_size");
hypers.cl_frac = get_config(train_kwargs, "cl_frac");
hypers.anneal_cl = get_config(train_kwargs, "anneal_cl");
hypers.warmup_states = get_config(train_kwargs, "warmup_states");
hypers.explore_alpha = get_config(train_kwargs, "explore_alpha");
hypers.explore_beta = get_config(train_kwargs, "explore_beta");
Expand Down Expand Up @@ -566,6 +572,7 @@ PYBIND11_MODULE(_C, m) {
.def_readwrite("total_timesteps", &HypersT::total_timesteps)
.def_readwrite("max_grad_norm", &HypersT::max_grad_norm)
.def_readwrite("clip_coef", &HypersT::clip_coef)
.def_readwrite("target_kl", &HypersT::target_kl)
.def_readwrite("vf_clip_coef", &HypersT::vf_clip_coef)
.def_readwrite("vf_coef", &HypersT::vf_coef)
.def_readwrite("ent_coef", &HypersT::ent_coef)
Expand All @@ -579,6 +586,7 @@ PYBIND11_MODULE(_C, m) {
.def_readwrite("prio_beta0", &HypersT::prio_beta0)
.def_readwrite("state_buffer_size", &HypersT::state_buffer_size)
.def_readwrite("cl_frac", &HypersT::cl_frac)
.def_readwrite("anneal_cl", &HypersT::anneal_cl)
.def_readwrite("warmup_states", &HypersT::warmup_states)
.def_readwrite("explore_alpha", &HypersT::explore_alpha)
.def_readwrite("explore_beta", &HypersT::explore_beta)
Expand Down
Loading
Loading