Lava allows leveraging Vulkan compute shaders from Python in an easy manner without writing any C++. Like CUDA kernels, Vulkan compute shaders can be used to move heavy computation into the GPU. For most use cases Lava will expect and return numpy arrays. Overall, Lava is similar to PyCuda or PyOpenCL.
pip install lava
The Vulkan SDK needs to be installed and its environment variables need to be set. See the LunarG guide for Linux and Windows.
See release 0.4.0 for some older Vulkan SDK versions which are no longer available on the LunarG download page.
Sofar lava
was tested on:
Below is the lava
version of Erkaman's vulkan minimal compute:
import cv2 as cv
import lava as lv
import numpy as np
# start session on first device
session = lv.Session(lv.devices()[0])
# compile glsl code
shader_path = "shader.comp"
shader = lv.Shader(session, lv.compile_glsl(shader_path))
# allocate buffer and take its definition from the shader (see glsl: float[HEIGHT][WIDTH][3] imageData)
buffer_out = lv.StagedBuffer.from_shader(session, shader, binding=0)
# compute minimum amount of work groups
work_group_size = 32
x = int(3200 / float(work_group_size) + 0.5)
y = int(2400 / float(work_group_size) + 0.5)
z = 1
# bind buffer to binding 0 (see glsl: layout(std430, binding = 0) buffer buf)
stage = lv.Stage(shader, {0: buffer_out})
stage.record(x, y, z)
stage.run_and_wait()
# retrieve output as numpy array
im = buffer_out["imageData"]
cv.imwrite("mandelbrot.png", np.around(255 * im).astype(np.uint8))
#version 450
#extension GL_ARB_separate_shader_objects : enable
#define WIDTH 3200
#define HEIGHT 2400
#define WORKGROUP_SIZE 32
layout ( local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1 ) in;
layout(std430, binding = 0) buffer buf {
float[HEIGHT][WIDTH][3] imageData;
};
void main() {
if(gl_GlobalInvocationID.x >= WIDTH || gl_GlobalInvocationID.y >= HEIGHT)
return;
float x = float(gl_GlobalInvocationID.x) / float(WIDTH);
float y = float(gl_GlobalInvocationID.y) / float(HEIGHT);
vec2 uv = vec2(x, y);
float n = 0.0;
vec2 c = vec2(-.445, 0.0) + (uv - 0.5) * (2.0 + 1.7 * 0.2),
z = vec2(0.0);
const int M = 128;
for (int i = 0; i < M; i++) {
z = vec2(z.x * z.x - z.y * z.y, 2. * z.x * z.y) + c;
if (dot(z, z) > 2) break;
n++;
}
float t = float(n) / float(M);
vec3 d = vec3(0.3, 0.3 ,0.5);
vec3 e = vec3(-0.2, -0.3 ,-0.5);
vec3 f = vec3(2.1, 2.0, 3.0);
vec3 g = vec3(0.0, 0.1, 0.0);
vec3 color = d + e * cos( 6.28318 * (f * t + g) );
int xIndex = int(gl_GlobalInvocationID.x);
int yIndex = int(gl_GlobalInvocationID.y);
imageData[yIndex][xIndex][0] = color.r;
imageData[yIndex][xIndex][1] = color.g;
imageData[yIndex][xIndex][2] = color.b;
}