multiarc · multiarc · Aug 14, 2025 · Aug 15, 2025 · Aug 24, 2025 · Aug 24, 2025
diff --git a/.gitignore b/.gitignore
@@ -252,4 +252,6 @@ paket-files/
 **/reranker_m3_onnx
 **/reranker_m3_onnx_gpu
 **/bge_m3_onnx
-**/bge_m3_onnx_gpu
+**/bge_m3_onnx_gpu
+**/llama3.1_8b_onnx_gpu
+**/llama3.2_3b_onnx_gpu
diff --git a/OrtForge.sln b/OrtForge.sln
@@ -11,6 +11,8 @@ EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{63CDC6A4-3C2D-499F-B3F9-6B75D40887E1}"
 	ProjectSection(SolutionItems) = preProject
 		docs\INSTALL_AMD_ROCm.md = docs\INSTALL_AMD_ROCm.md
+		docs\INSTALL.md = docs\INSTALL.md
+		docs\INSTALL_NVIDIA_CUDA.md = docs\INSTALL_NVIDIA_CUDA.md
 	EndProjectSection
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OrtForge.AI.Models.Astractions", "OrtForge.AI.Models.Astractions\OrtForge.AI.Models.Astractions.csproj", "{40A4313C-6826-4E8D-9A01-DA760DE4CE26}"

diff --git a/docs/INSTALL.md b/docs/INSTALL.md
@@ -1,87 +1,33 @@
-# Install AMD ROCm accelerator on Linux/WSL environment.
-Beware of if you have integrated AMD graphics (most likely you do with AMD CPUs), you must turn it off in order for ROCm accelerators to function with ONNX Runtime.
+# Install Optimum CLI for model conversion and optimization
 
-Here is the instruction on how to install version 6.4.2 of ROCm, and it works with an open source AMD driver on Ubuntu 24.04.
 ```bash
-wget https://repo.radeon.com/amdgpu-install/6.4.2/ubuntu/noble/amdgpu-install_6.4.60402-1_all.deb
 sudo apt update
-sudo apt install ./amdgpu-install_6.4.60402-1_all.deb
-sudo amdgpu-install --usecase=rocm,hiplibsdk,graphics,opencl -y --vulkan=amdvlk --no-dkms
+sudo apt install build-essential flex bison libssl-dev libelf-dev bc python3 pahole cpio python3.12-venv python3-pip
+mkdir optimum
+cd optimum
+python3 -m venv .
+source ./bin/activate 
 ```
 
-Sample for version 6.4.3 
-```bash
-wget https://repo.radeon.com/amdgpu-install/6.4.3/ubuntu/noble/amdgpu-install_6.4.60403-1_all.deb
-sudo apt update
-sudo apt install ./amdgpu-install_6.4.60403-1_all.deb
-sudo amdgpu-install --usecase=rocm,hiplibsdk,graphics,opencl -y --vulkan=amdvlk --no-dkms
-```
+AMD GPU support for onnx runtime to run and optimize models, please follow the instructions in [AMD GPU Support](INSTALL_AMD_ROCm.md)
 
-And to check if the installation succeeded.
+## ROCm
 ```bash
-rocminfo #make note of your GPU uuid, to whitelist only CPU and discreet GPU on the next step
+pip install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.4
+pip install onnxruntime_genai onnx-ir
+python3 -m onnxruntime_genai.models.builder -i . -o ./onnx_opt_i4 -p int4 -e rocm
 ```
 
-`rocminfo` DOESN'T fail if integrated GPU is enabled, but a lot of features may not be supported to a point when it will crash a driver at runtime.
-Your options are: disable iGPU in UEFI/BIOS or export environment variable to whitelist CPU and discreet GPU only.
-```bash
-export ROCR_VISIBLE_DEVICES="0,GPU-deadbeefdeadbeef" #0 - CPU, GPU-deadbeefdeadbeef - GPU.
-```
+Nvidia GPU (CUDA) support for onnx runtime to run and optimize models, please follow the instructions in [CUDA GPU Support](INSTALL_NVIDIA_CUDA.md)
 
-The source for instruction was taken from version 6.4.1 — it does not exist for higher versions. But it works with pretty much all versions.
-
-## Instructions source
-https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/install/install-methods/amdgpu-installer/amdgpu-installer-ubuntu.html
-
-# Building ONNX Runtime for ROCm
-
-The build process for ROCm target accelerator is extremely heavy and may take 3+ hours on Ryzen 9 9950X and peaks at ~50 Gb memory usage (with 96 Gb total RAM).
-Considering the above, choose your targets from the beginning. I recommend building all targets in one go (Python and .NET) — this will save a lot of time.
-
-Clone repo
+## CUDA
 ```bash
-git clone --recursive https://github.com/ROCm/onnxruntime.git
-git checkout tags/v1.22.1
-cd onnxruntime
+pip install torch torchvision
+pip install onnxruntime_genai onnx-ir onnxruntime_gpu
+python3 -m onnxruntime_genai.models.builder -i . -o ./onnx_opt_i4 -p int4 -e cuda
 ```
 
-Build for .NET only to run models
+Optimize a model for inference on GPU using FP16 precision
 ```bash
-./build.sh --update --build --config Release --build_nuget --parallel --use_rocm --rocm_home /opt/rocm --skip_tests
-```
-
-Build for .NET and for Python stack with PyTorch and any other toolset that may utilize GPU accelerators on AMD 
-
-```bash
-python3 -m venv .
-source ./bin/activate
-pip install 'cmake>=3.28,<4'
-pip install -r requirements.txt
-pip install setuptools
-./build.sh --update --build --config Release --build_wheel --build_nuget --parallel --use_rocm --rocm_home /opt/rocm --skip_tests
-```
-
-Install wheel for python to use in the venv
-```bash
-pip install ./build/Linux/Release/dist/*.whl
-```
-Instructions primary source
-https://onnxruntime.ai/docs/build/eps.html#amd-rocm
-
-### Pre-built .NET packages are linked to the repo
-
-### Optimum[onnx] CLI can use ROCm but would actually call accelerator/target as CUDA and work for parts of workloads, please hold on tight and brace yourself, this may get fixed at some point in the future.
-Also, AMD has a CUDA translation layer for non-precompiled code, so it may simply work sometimes.
-```text
-  .-'---`-.
-,'          `.
-|             \
-|              \
-\           _  \
-,\  _    ,'-,/-)\
-( * \ \,' ,' ,'-)
- `._,)     -',-')
-   \/         ''/
-    )        / /
-   /       ,'-'
+optimum-cli export onnx --model . --dtype fp16 --task default --device cuda --optimize O4 ./onnx_fp16
 ```
diff --git a/docs/INSTALL_NVIDIA_CUDA.md b/docs/INSTALL_NVIDIA_CUDA.md
@@ -0,0 +1,16 @@
+# Install Nvidia CUDA accelerator on Linux WSL environment.
+
+1. Update drivers to the latest on Windows.
+2. Install CUDA Toolkit 13.0.
+3. Install ONNX Runtime for CUDA.
+
+```bash
+sudo apt-key del 7fa2af80
+wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt-get update
+sudo apt-get -y install cuda-toolkit-13-0
+```
+
+## Instructions source
+https://docs.nvidia.com/cuda/wsl-user-guide/index.html#getting-started-with-cuda-on-wsl