void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols);