Release 260111

This commit is contained in:
Comma Device
2026-01-11 18:23:29 +08:00
commit 3721ecbf8a
2601 changed files with 855070 additions and 0 deletions

View File

@@ -0,0 +1,47 @@
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
__kernel void loadys(__global uchar8 const * const Y,
__global uchar * out,
int out_offset)
{
const int gid = get_global_id(0);
const int ois = gid * 8;
const int oy = ois / TRANSFORMED_WIDTH;
const int ox = ois % TRANSFORMED_WIDTH;
const uchar8 ys = Y[gid];
// 02
// 13
__global uchar* outy0;
__global uchar* outy1;
if ((oy & 1) == 0) {
outy0 = out + out_offset; //y0
outy1 = out + out_offset + UV_SIZE*2; //y2
} else {
outy0 = out + out_offset + UV_SIZE; //y1
outy1 = out + out_offset + UV_SIZE*3; //y3
}
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
}
__kernel void loaduv(__global uchar8 const * const in,
__global uchar8 * out,
int out_offset)
{
const int gid = get_global_id(0);
const uchar8 inv = in[gid];
out[gid + out_offset / 8] = inv;
}
__kernel void copy(__global uchar8 * in,
__global uchar8 * out,
int in_offset,
int out_offset)
{
const int gid = get_global_id(0);
out[gid + out_offset / 8] = in[gid + in_offset / 8];
}

View File

@@ -0,0 +1,20 @@
#pragma once
#include "common/clutil.h"
typedef struct {
int width, height;
cl_kernel loadys_krnl, loaduv_krnl, copy_krnl;
} LoadYUVState;
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height);
void loadyuv_destroy(LoadYUVState* s);
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
cl_mem out_cl);
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
size_t src_offset, size_t dst_offset, size_t size);

View File

@@ -0,0 +1,54 @@
#define INTER_BITS 5
#define INTER_TAB_SIZE (1 << INTER_BITS)
#define INTER_SCALE 1.f / INTER_TAB_SIZE
#define INTER_REMAP_COEF_BITS 15
#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS)
__kernel void warpPerspective(__global const uchar * src,
int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols,
__global uchar * dst,
int dst_row_stride, int dst_offset, int dst_rows, int dst_cols,
__constant float * M)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
float X0 = M[0] * dx + M[1] * dy + M[2];
float Y0 = M[3] * dx + M[4] * dy + M[5];
float W = M[6] * dx + M[7] * dy + M[8];
W = W != 0.0f ? INTER_TAB_SIZE / W : 0.0f;
int X = rint(X0 * W), Y = rint(Y0 * W);
int sx = convert_short_sat(X >> INTER_BITS);
int sy = convert_short_sat(Y >> INTER_BITS);
short sx_clamp = clamp(sx, 0, src_cols - 1);
short sx_p1_clamp = clamp(sx + 1, 0, src_cols - 1);
short sy_clamp = clamp(sy, 0, src_rows - 1);
short sy_p1_clamp = clamp(sy + 1, 0, src_rows - 1);
int v0 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
int v1 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
int v2 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
int v3 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
short ay = (short)(Y & (INTER_TAB_SIZE - 1));
short ax = (short)(X & (INTER_TAB_SIZE - 1));
float taby = 1.f/INTER_TAB_SIZE*ay;
float tabx = 1.f/INTER_TAB_SIZE*ax;
int dst_index = mad24(dy, dst_row_stride, dst_offset + dx);
int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE );
int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE );
int val = v0 * itab0 + v1 * itab1 + v2 * itab2 + v3 * itab3;
uchar pix = convert_uchar_sat((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS);
dst[dst_index] = pix;
}
}

View File

@@ -0,0 +1,25 @@
#pragma once
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "common/mat.h"
typedef struct {
cl_kernel krnl;
cl_mem m_y_cl, m_uv_cl;
} Transform;
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id);
void transform_destroy(Transform* transform);
void transform_queue(Transform* s, cl_command_queue q,
cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
cl_mem out_y, cl_mem out_u, cl_mem out_v,
int out_width, int out_height,
const mat3& projection);