Release 260111
This commit is contained in:
47
selfdrive/modeld/transforms/loadyuv.cl
Normal file
47
selfdrive/modeld/transforms/loadyuv.cl
Normal file
@@ -0,0 +1,47 @@
|
||||
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
|
||||
|
||||
__kernel void loadys(__global uchar8 const * const Y,
|
||||
__global uchar * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const int ois = gid * 8;
|
||||
const int oy = ois / TRANSFORMED_WIDTH;
|
||||
const int ox = ois % TRANSFORMED_WIDTH;
|
||||
|
||||
const uchar8 ys = Y[gid];
|
||||
|
||||
// 02
|
||||
// 13
|
||||
|
||||
__global uchar* outy0;
|
||||
__global uchar* outy1;
|
||||
if ((oy & 1) == 0) {
|
||||
outy0 = out + out_offset; //y0
|
||||
outy1 = out + out_offset + UV_SIZE*2; //y2
|
||||
} else {
|
||||
outy0 = out + out_offset + UV_SIZE; //y1
|
||||
outy1 = out + out_offset + UV_SIZE*3; //y3
|
||||
}
|
||||
|
||||
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
}
|
||||
|
||||
__kernel void loaduv(__global uchar8 const * const in,
|
||||
__global uchar8 * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const uchar8 inv = in[gid];
|
||||
out[gid + out_offset / 8] = inv;
|
||||
}
|
||||
|
||||
__kernel void copy(__global uchar8 * in,
|
||||
__global uchar8 * out,
|
||||
int in_offset,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
out[gid + out_offset / 8] = in[gid + in_offset / 8];
|
||||
}
|
||||
20
selfdrive/modeld/transforms/loadyuv.h
Normal file
20
selfdrive/modeld/transforms/loadyuv.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/clutil.h"
|
||||
|
||||
typedef struct {
|
||||
int width, height;
|
||||
cl_kernel loadys_krnl, loaduv_krnl, copy_krnl;
|
||||
} LoadYUVState;
|
||||
|
||||
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height);
|
||||
|
||||
void loadyuv_destroy(LoadYUVState* s);
|
||||
|
||||
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
|
||||
cl_mem out_cl);
|
||||
|
||||
|
||||
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
|
||||
size_t src_offset, size_t dst_offset, size_t size);
|
||||
54
selfdrive/modeld/transforms/transform.cl
Normal file
54
selfdrive/modeld/transforms/transform.cl
Normal file
@@ -0,0 +1,54 @@
|
||||
#define INTER_BITS 5
|
||||
#define INTER_TAB_SIZE (1 << INTER_BITS)
|
||||
#define INTER_SCALE 1.f / INTER_TAB_SIZE
|
||||
|
||||
#define INTER_REMAP_COEF_BITS 15
|
||||
#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS)
|
||||
|
||||
__kernel void warpPerspective(__global const uchar * src,
|
||||
int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols,
|
||||
__global uchar * dst,
|
||||
int dst_row_stride, int dst_offset, int dst_rows, int dst_cols,
|
||||
__constant float * M)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
if (dx < dst_cols && dy < dst_rows)
|
||||
{
|
||||
float X0 = M[0] * dx + M[1] * dy + M[2];
|
||||
float Y0 = M[3] * dx + M[4] * dy + M[5];
|
||||
float W = M[6] * dx + M[7] * dy + M[8];
|
||||
W = W != 0.0f ? INTER_TAB_SIZE / W : 0.0f;
|
||||
int X = rint(X0 * W), Y = rint(Y0 * W);
|
||||
|
||||
int sx = convert_short_sat(X >> INTER_BITS);
|
||||
int sy = convert_short_sat(Y >> INTER_BITS);
|
||||
|
||||
short sx_clamp = clamp(sx, 0, src_cols - 1);
|
||||
short sx_p1_clamp = clamp(sx + 1, 0, src_cols - 1);
|
||||
short sy_clamp = clamp(sy, 0, src_rows - 1);
|
||||
short sy_p1_clamp = clamp(sy + 1, 0, src_rows - 1);
|
||||
int v0 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
|
||||
int v1 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
|
||||
int v2 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
|
||||
int v3 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
|
||||
|
||||
short ay = (short)(Y & (INTER_TAB_SIZE - 1));
|
||||
short ax = (short)(X & (INTER_TAB_SIZE - 1));
|
||||
float taby = 1.f/INTER_TAB_SIZE*ay;
|
||||
float tabx = 1.f/INTER_TAB_SIZE*ax;
|
||||
|
||||
int dst_index = mad24(dy, dst_row_stride, dst_offset + dx);
|
||||
|
||||
int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
|
||||
int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE );
|
||||
int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
|
||||
int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE );
|
||||
|
||||
int val = v0 * itab0 + v1 * itab1 + v2 * itab2 + v3 * itab3;
|
||||
|
||||
uchar pix = convert_uchar_sat((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS);
|
||||
dst[dst_index] = pix;
|
||||
}
|
||||
}
|
||||
25
selfdrive/modeld/transforms/transform.h
Normal file
25
selfdrive/modeld/transforms/transform.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include "common/mat.h"
|
||||
|
||||
typedef struct {
|
||||
cl_kernel krnl;
|
||||
cl_mem m_y_cl, m_uv_cl;
|
||||
} Transform;
|
||||
|
||||
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id);
|
||||
|
||||
void transform_destroy(Transform* transform);
|
||||
|
||||
void transform_queue(Transform* s, cl_command_queue q,
|
||||
cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
|
||||
cl_mem out_y, cl_mem out_u, cl_mem out_v,
|
||||
int out_width, int out_height,
|
||||
const mat3& projection);
|
||||
Reference in New Issue
Block a user