ROCm is a open source software platform for AMD GPUs. As of Debian Trixie many ROCm libraries are available directly within the Debian repositories.

Getting Started

To check if your system has any ROCm devices available, start by installing rocminfo:

sudo apt install rocminfo

The use of ROCm requires access to devices in /dev/kfd and /dev/dri/*, which are owned by the video and render groups. You will need to add your user to the video and render groups to ensure you can access them:

sudo usermod -aG video,render $USER

This group change will take effect the next time you log in, so you may need to refresh your session by logging out.

You can check that everything is working correctly by running rocminfo and checking that your GPU is detected.

Hardware Compatibility

The list of GPUs supported for ROCm on Debian is different from the upstream AMD ROCm project and varies depending on the Debian release. In general, there is wider support for older GPUs on Debian and wider support for newer GPUs upstream.

On Debian Trixie, the ROCm libraries included in the distribution have been built with support for all discrete Vega, RDNA 1, RDNA 2, RDNA 3, CDNA 1 and CDNA 2 GPUs. Integrated Vega and RDNA 2 GPUs are also enabled, but may have some limitations.

Building HIP Programs

Building with CMake and hipcc

sudo apt install hipcc cmake

cat > main.cpp << END
#include <stdio.h>
#include <stdlib.h>
#include <hip/hip_runtime.h>

#define CHECK_HIP(expr) do {              \
  hipError_t result = (expr);             \
  if (result != hipSuccess) {             \
    fprintf(stderr, "%s:%d: %s (%d)\n",   \
      __FILE__, __LINE__,                 \
      hipGetErrorString(result), result); \
    exit(EXIT_FAILURE);                   \
  }                                       \
} while(0)

__global__ void sq_arr(float *arr, int n) {
  int tid = blockDim.x*blockIdx.x + threadIdx.x;
  if (tid < n) {
    arr[tid] = arr[tid] * arr[tid];
  }
}

int main() {
  enum { N = 5 };
  float hArr[N] = { 1, 2, 3, 4, 5 };
  float *dArr;
  CHECK_HIP(hipMalloc(&dArr, sizeof(float) * N));
  CHECK_HIP(hipMemcpy(dArr, hArr, sizeof(float) * N, hipMemcpyHostToDevice));
  sq_arr<<<dim3(1), dim3(32,1,1), 0, 0>>>(dArr, N);
  CHECK_HIP(hipMemcpy(hArr, dArr, sizeof(float) * N, hipMemcpyDeviceToHost));
  for (int i = 0; i < N; ++i) {
    printf("%f\n", hArr[i]);
  }
  CHECK_HIP(hipFree(dArr));
  return 0;
}
END

cat > CMakeLists.txt << END
cmake_minimum_required(VERSION 3.22)
project(example LANGUAGES CXX)
find_package(hip REQUIRED)
add_executable(ex main.cpp)
target_link_libraries(ex PRIVATE hip::device)
END

# autodetects your GPU, but can be manually specified with -DAMDGPU_TARGETS=<list-of-gfxids>
CXX=hipcc cmake -S. -Bbuild
make -C build
./build/ex

Building with CMake support for the HIP language

sudo apt install hipcc cmake

cat > main.hip << END
#include <stdio.h>
#include <stdlib.h>
#include <hip/hip_runtime.h>

#define CHECK_HIP(expr) do {              \
  hipError_t result = (expr);             \
  if (result != hipSuccess) {             \
    fprintf(stderr, "%s:%d: %s (%d)\n",   \
      __FILE__, __LINE__,                 \
      hipGetErrorString(result), result); \
    exit(EXIT_FAILURE);                   \
  }                                       \
} while(0)

__global__ void sq_arr(float *arr, int n) {
  int tid = blockDim.x*blockIdx.x + threadIdx.x;
  if (tid < n) {
    arr[tid] = arr[tid] * arr[tid];
  }
}

int main() {
  enum { N = 5 };
  float hArr[N] = { 1, 2, 3, 4, 5 };
  float *dArr;
  CHECK_HIP(hipMalloc(&dArr, sizeof(float) * N));
  CHECK_HIP(hipMemcpy(dArr, hArr, sizeof(float) * N, hipMemcpyHostToDevice));
  sq_arr<<<dim3(1), dim3(32,1,1), 0, 0>>>(dArr, N);
  CHECK_HIP(hipMemcpy(hArr, dArr, sizeof(float) * N, hipMemcpyDeviceToHost));
  for (int i = 0; i < N; ++i) {
    printf("%f\n", hArr[i]);
  }
  CHECK_HIP(hipFree(dArr));
  return 0;
}
END

cat > CMakeLists.txt << END
cmake_minimum_required(VERSION 3.22)
project(example LANGUAGES HIP)
add_executable(ex main.hip)
END

# Bookworm uses clang++-15
# Trixie uses clang++-17
# autodetects your GPU, but can be manually specified with -DCMAKE_HIP_ARCHITECTURES=<list-of-gfxids>
HIPCXX=clang++-17 cmake -S. -Bbuild
make -C build
./build/ex

Using llama.cpp

The llama.cpp utility is available on Debian Unstable. It can be installed and run with ROCm support with the libggml-hip backend. Note that models are not provided within Debian and may need to be downloaded from a third-party such as ?HuggingFace. This is an example for installing llama.cpp, downloading a model, and using it.

sudo apt install llama.cpp libggml0-backend-hip wget
wget https://huggingface.co/TheBloke/dolphin-2.2.1-mistral-7B-GGUF/resolve/main/dolphin-2.2.1-mistral-7b.Q5_K_M.gguf?download=true -O dolphin-2.2.1-mistral-7b.Q5_K_M.gguf

export HSA_OVERRIDE_GFX_VERSION=10.3.0 # Set GFX version for unsupported AMD GPUs. Adjust as needed.
llama-cli -ngl 32 --color -c 2048 --temp 0.7 --repeat_penalty 1.1 -n -1 -m dolphin-2.2.1-mistral-7b.Q5_K_M.gguf --prompt "Once upon a time"

References