diff --git a/benchmark/rapp_benchmark.c b/benchmark/rapp_benchmark.c index 60d61d7..f20de10 100644 --- a/benchmark/rapp_benchmark.c +++ b/benchmark/rapp_benchmark.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2011, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2011, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -193,6 +193,9 @@ static void rapp_bmark_exec_u8_u8_p(int (*func)(), const int *args); static void +rapp_bmark_exec_thresh_pixel(int (*func)(), const int *args); + +static void rapp_bmark_exec_expand(int (*func)(), const int *args); static void @@ -268,10 +271,11 @@ static const rapp_bmark_table_t rapp_bmark_suite[] = { RAPP_BMARK_ENTRY(type_u8_to_bin, NULL, u8_bin, 0, 0), RAPP_BMARK_ENTRY(type_bin_to_u8, NULL, bin_u8, 0, 0), /* rapp_thresh functions */ - RAPP_BMARK_ENTRY(thresh_gt_u8, NULL, u8_bin, 7, 0), - RAPP_BMARK_ENTRY(thresh_lt_u8, NULL, u8_bin, 7, 0), - RAPP_BMARK_ENTRY(thresh_gtlt_u8, NULL, u8_bin, 7, 9), - RAPP_BMARK_ENTRY(thresh_ltgt_u8, NULL, u8_bin, 7, 9), + RAPP_BMARK_ENTRY(thresh_gt_u8, NULL, u8_bin, 7, 0), + RAPP_BMARK_ENTRY(thresh_lt_u8, NULL, u8_bin, 7, 0), + RAPP_BMARK_ENTRY(thresh_gtlt_u8, NULL, u8_bin, 7, 9), + RAPP_BMARK_ENTRY(thresh_ltgt_u8, NULL, u8_bin, 7, 9), + RAPP_BMARK_ENTRY(thresh_lt_pixel_u8, NULL, thresh_pixel, 0, 0), /* rapp_reduce functions */ RAPP_BMARK_ENTRY(reduce_1x2_u8, NULL, u8_u8, 0, 0), RAPP_BMARK_ENTRY(reduce_2x1_u8, NULL, u8_u8, 0, 0), @@ -753,6 +757,22 @@ rapp_bmark_exec_u8_u8_p(int (*func)(), const int *args) } static void +rapp_bmark_exec_thresh_pixel(int (*func)(), const int *args) +{ + const rapp_bmark_data_t *data = &rapp_bmark_data; + (void)args; + /* The speed is not dependent of the content or calculation results + * so the aux buffer is reused for both high and low thresholds. + * This minimize changes of the entire benchmark test, + i.e. only require a single aux buffer. */ + (*func)(data->dst, data->dim_bin, + data->set, data->dim_u8, + data->width, data->height, + data->aux, data->dim_u8, + data->aux, data->dim_u8); +} + +static void rapp_bmark_exec_expand(int (*func)(), const int *args) { const rapp_bmark_data_t *data = &rapp_bmark_data; diff --git a/compute/generic/Makefile.am b/compute/generic/Makefile.am index 1b5813d..f6de031 100644 --- a/compute/generic/Makefile.am +++ b/compute/generic/Makefile.am @@ -37,34 +37,35 @@ noinst_LTLIBRARIES = librappcompute_gen.la librappcompute_gen_la_LDFLAGS = -no-undefined # The source files to use -librappcompute_gen_la_SOURCES = rc_impl_cfg.h \ - rc_template.h \ - rc_bitblt_rop.h \ - rc_bitblt_wa.c \ - rc_bitblt_wm.c \ - rc_pixop.c \ - rc_type.c \ - rc_thresh.c \ - rc_stat.c \ - rc_moment_bin.c \ - rc_reduce.c \ - rc_reduce_bin.c \ - rc_expand_bin.c \ - rc_rotate.c \ - rc_rotate_bin.c \ - rc_filter.c \ - rc_morph_bin.c \ - rc_fill.c \ - rc_pad.c \ - rc_pad_bin.c \ - rc_margin.c \ - rc_crop.c \ - rc_contour.c \ - rc_rasterize.c \ - rc_cond.c \ - rc_gather.c \ - rc_gather_bin.c \ - rc_scatter.c \ - rc_scatter_bin.c\ - rc_integral.c \ +librappcompute_gen_la_SOURCES = rc_impl_cfg.h \ + rc_thresh_tpl.h \ + rc_thresh_pixel_tpl.h \ + rc_bitblt_rop.h \ + rc_bitblt_wa.c \ + rc_bitblt_wm.c \ + rc_pixop.c \ + rc_type.c \ + rc_thresh.c \ + rc_stat.c \ + rc_moment_bin.c \ + rc_reduce.c \ + rc_reduce_bin.c \ + rc_expand_bin.c \ + rc_rotate.c \ + rc_rotate_bin.c \ + rc_filter.c \ + rc_morph_bin.c \ + rc_fill.c \ + rc_pad.c \ + rc_pad_bin.c \ + rc_margin.c \ + rc_crop.c \ + rc_contour.c \ + rc_rasterize.c \ + rc_cond.c \ + rc_gather.c \ + rc_gather_bin.c \ + rc_scatter.c \ + rc_scatter_bin.c \ + rc_integral.c \ rc_integral_bin.c diff --git a/compute/generic/rc_template.h b/compute/generic/rc_template.h deleted file mode 100644 index 0cd5031..0000000 --- a/compute/generic/rc_template.h +++ /dev/null @@ -1,214 +0,0 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN - * - * This file is part of RAPP. - * - * RAPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * You can use the comments under either the terms of the GNU Lesser General - * Public License version 3 as published by the Free Software Foundation, - * either version 3 of the License or (at your option) any later version, or - * the GNU Free Documentation License version 1.3 or any later version - * published by the Free Software Foundation; with no Invariant Sections, no - * Front-Cover Texts, and no Back-Cover Texts. - * A copy of the license is included in the documentation section entitled - * "GNU Free Documentation License". - * - * RAPP is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License and a copy of the GNU Free Documentation License along - * with RAPP. If not, see . - */ - -/** - * @file rc_template.h - * @brief RAPP Compute layer common templates, generic implementation. - */ - -#ifndef RC_TEMPLATE_H -#define RC_TEMPLATE_H - -#include "rc_word.h" /* Word operations */ - -/* - * ------------------------------------------------------------- - * Thresholding templates - * ------------------------------------------------------------- - */ - -/** - * Threshold-to-binary template. - */ -#define RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, \ - width, height, low, high, cmp, unroll) \ -do { \ - int blk_ = (width) / (8*RC_WORD_SIZE); /* Full dst blocks */ \ - int end_ = (width) % (8*RC_WORD_SIZE); /* Partial dst blocks */ \ - \ - if ((unroll) == 4 && /* Constant */ \ - (blk_ > 0 || end_ >= 4)) /* Variable */ \ - { \ - int len_ = end_ / 4; \ - int rem_ = end_ % 4; \ - RC_TEMPLATE_THRESH_(dst, dst_dim, src, src_dim, height, \ - blk_, len_, rem_, low, high, cmp, \ - RC_TEMPLATE_THRESH_BLK_X4_, \ - RC_TEMPLATE_THRESH_REM_X4_); \ - } \ - else if ((unroll) >= 2 && /* Constant */ \ - (blk_ > 0 || end_ >= 2)) /* Variable */ \ - { \ - int len_ = end_ / 2; \ - int rem_ = end_ % 2; \ - RC_TEMPLATE_THRESH_(dst, dst_dim, src, src_dim, height, \ - blk_, len_, rem_, low, high, cmp, \ - RC_TEMPLATE_THRESH_BLK_X2_, \ - RC_TEMPLATE_THRESH_REM_X2_); \ - } \ - else { \ - RC_TEMPLATE_THRESH_(dst, dst_dim, src, src_dim, height, \ - blk_, 0, end_, low, high, cmp, \ - RC_TEMPLATE_THRESH_BLK_X1_, \ - RC_TEMPLATE_THRESH_REM_NONE_); \ - } \ -} while (0) - - -/* - * ------------------------------------------------------------- - * Internal support macros for thresholding template - * ------------------------------------------------------------- - */ - -/** - * Thresholding template driver. - */ -#define RC_TEMPLATE_THRESH_(dst, dst_dim, src, src_dim, height, \ - blk, len, rem, thr1, thr2, cmp, \ - loop_blk, loop_rem) \ -do { \ - int y_; \ - for (y_ = 0; y_ < (height); y_++) { \ - int i_ = y_*(src_dim); \ - int j_ = y_*(dst_dim); \ - int x_; \ - \ - /* Handle all full destination word blocks */ \ - for (x_ = 0; x_ < (blk); x_++, j_ += RC_WORD_SIZE) { \ - loop_blk(&(dst)[j_], src, i_, cmp, thr1, thr2); \ - } \ - \ - /* Handle partial destination words */ \ - if ((len) || (rem)) { \ - rc_word_t acc_ = RC_WORD_ZERO; \ - int pos_ = 0; \ - \ - /* Handle unrolled source pixels */ \ - loop_rem(src, acc_, i_, pos_, len, cmp, thr1, thr2); \ - \ - /* Handle any remaining source pixels */ \ - for (x_ = 0; x_ < (rem); x_++) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, i_, pos_, \ - cmp, thr1, thr2); \ - } \ - \ - /* Store the partial word */ \ - RC_WORD_STORE(&(dst)[j_], acc_); \ - } \ - } \ -} while (0) - -/** - * Thresholding template block iterator, no unrolling. - */ -#define RC_TEMPLATE_THRESH_BLK_X1_(dst, src, idx, cmp, thr1, thr2) \ -do { \ - rc_word_t acc_ = RC_WORD_ZERO; \ - int k_, b_; \ - for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_++) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - } \ - RC_WORD_STORE(dst, acc_); \ -} while (0) - -/** - * Thresholding template block iterator, unrolled two times. - */ -#define RC_TEMPLATE_THRESH_BLK_X2_(dst, src, idx, cmp, thr1, thr2) \ -do { \ - rc_word_t acc_ = RC_WORD_ZERO; \ - int k_, b_; \ - for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 2) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - } \ - RC_WORD_STORE(dst, acc_); \ -} while (0) - -/** - * Thresholding template block iterator, unrolled four times. - */ -#define RC_TEMPLATE_THRESH_BLK_X4_(dst, src, idx, cmp, thr1, thr2) \ -do { \ - rc_word_t acc_ = RC_WORD_ZERO; \ - int k_, b_; \ - for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 4) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - } \ - RC_WORD_STORE(dst, acc_); \ -} while (0) - -/** - * Thresholding template empty remainder iterator. - */ -#define RC_TEMPLATE_THRESH_REM_NONE_(src, acc, idx, pos, \ - len, cmp, thr1, thr2) - -/** - * Thresholding template remainder iterator, unrolled two times. - */ -#define RC_TEMPLATE_THRESH_REM_X2_(src, acc, idx, pos, \ - len, cmp, thr1, thr2) \ -do { \ - int k_; \ - for (k_ = 0; k_ < (len); k_++) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - } \ -} while (0) - -/** - * Thresholding template remainder iterator, unrolled four times. - */ -#define RC_TEMPLATE_THRESH_REM_X4_(src, acc, idx, pos, \ - len, cmp, thr1, thr2) \ -do { \ - int k_; \ - for (k_ = 0; k_ < (len); k_++) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - } \ -} while (0) - -/** - * Thresholding template iteration. - */ -#define RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2) \ -do { \ - int bit_ = cmp((src)[idx], thr1, thr2); \ - (acc) |= RC_WORD_INSERT(bit_, pos, 1); \ - (idx)++; \ - (pos)++; \ -} while (0) - -#endif /* RC_TEMPLATE_H */ diff --git a/compute/generic/rc_thresh.c b/compute/generic/rc_thresh.c index 5e235c7..cf331d8 100644 --- a/compute/generic/rc_thresh.c +++ b/compute/generic/rc_thresh.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2010, 2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -30,9 +30,11 @@ * @brief RAPP Compute layer thresholding to binary, generic implementation. */ -#include "rc_impl_cfg.h" /* Implementation cfg */ -#include "rc_template.h" /* Thresholding templates */ -#include "rc_thresh.h" /* Thresholding API */ +#include +#include "rc_impl_cfg.h" /* Implementation cfg */ +#include "rc_thresh_tpl.h" /* Thresholding templates */ +#include "rc_thresh_pixel_tpl.h" /* Pixelwise thresholding templates */ +#include "rc_thresh.h" /* Thresholding API */ /* * ------------------------------------------------------------- @@ -82,7 +84,7 @@ rc_thresh_gt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int thresh) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, width, height, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, thresh, 0, RC_THRESH_CMPGT, RC_UNROLL(rc_thresh_gt_u8)); } @@ -98,7 +100,7 @@ rc_thresh_lt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int thresh) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, width, height, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, thresh, 0, RC_THRESH_CMPLT, RC_UNROLL(rc_thresh_lt_u8)); } @@ -114,7 +116,7 @@ rc_thresh_gtlt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int low, int high) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, width, height, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, low, high, RC_THRESH_CMPGTLT, RC_UNROLL(rc_thresh_gtlt_u8)); } @@ -130,8 +132,29 @@ rc_thresh_ltgt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int low, int high) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, width, height, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, low, high, RC_THRESH_CMPLTGT, RC_UNROLL(rc_thresh_ltgt_u8)); } #endif + + +/** + * Pixelwise single thresholding less-than. + */ +#if RC_IMPL(rc_thresh_lt_pixel_u8, 1) +void +rc_thresh_lt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + int width, int height, + const uint8_t *restrict thresh, int thresh_dim) +{ + const uint8_t *thresh_high = NULL; + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, width, height, + thresh, thresh_dim, thresh_high, 0, + RC_THRESH_CMPLT, + RC_THRESH_PIXEL_SINGLE_ARG, + RC_UNROLL(rc_thresh_lt_pixel_u8)); +} +#endif + diff --git a/compute/generic/rc_thresh_pixel_tpl.h b/compute/generic/rc_thresh_pixel_tpl.h new file mode 100644 index 0000000..9c28544 --- /dev/null +++ b/compute/generic/rc_thresh_pixel_tpl.h @@ -0,0 +1,255 @@ +/* Copyright (C) 2016, Axis Communications AB, LUND, SWEDEN + * + * This file is part of RAPP. + * + * RAPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * You can use the comments under either the terms of the GNU Lesser General + * Public License version 3 as published by the Free Software Foundation, + * either version 3 of the License or (at your option) any later version, or + * the GNU Free Documentation License version 1.3 or any later version + * published by the Free Software Foundation; with no Invariant Sections, no + * Front-Cover Texts, and no Back-Cover Texts. + * A copy of the license is included in the documentation section entitled + * "GNU Free Documentation License". + * + * RAPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License and a copy of the GNU Free Documentation License along + * with RAPP. If not, see . + */ + +/** + * @file rc_thresh_pixel_tpl.h + * @brief RAPP Compute layer pixelwise threshold templates, generic implementation. + */ + +#ifndef RC_THRESH_PIXEL_TPL_H +#define RC_THRESH_PIXEL_TPL_H + +#include "rc_word.h" /* Word operations */ + +/* + * ------------------------------------------------------------- + * Pixelwise thresholding templates + * ------------------------------------------------------------- + */ + +#define RC_THRESH_PIXEL_SINGLE_ARG (1) +#define RC_THRESH_PIXEL_DOUBLE_ARG (2) + + +/** + * Pixelwise threshold-to-binary template. + */ +#define RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, width, height, \ + low, low_dim, high, high_dim, \ + cmp, num_args, unroll) \ +do { \ + int blk_ = (width) / (8*RC_WORD_SIZE); /* Full dst blocks */ \ + int end_ = (width) % (8*RC_WORD_SIZE); /* Partial dst blocks */ \ + \ + if ((unroll) == 4 && /* Constant */ \ + (blk_ > 0 || end_ >= 4)) /* Variable */ \ + { \ + int len_ = end_ / 4; \ + int rem_ = end_ % 4; \ + RC_THRESH_PIXEL_(dst, dst_dim, src, src_dim, height, \ + low, low_dim, high, high_dim, \ + blk_, len_, rem_, cmp, num_args, \ + RC_THRESH_PIXEL_BLK_X4_, \ + RC_THRESH_PIXEL_REM_X4_); \ + } \ + else if ((unroll) >= 2 && /* Constant */ \ + (blk_ > 0 || end_ >= 2)) /* Variable */ \ + { \ + int len_ = end_ / 2; \ + int rem_ = end_ % 2; \ + RC_THRESH_PIXEL_(dst, dst_dim, src, src_dim, height, \ + low, low_dim, high, high_dim, \ + blk_, len_, rem_, cmp, num_args, \ + RC_THRESH_PIXEL_BLK_X2_, \ + RC_THRESH_PIXEL_REM_X2_); \ + } \ + else { \ + RC_THRESH_PIXEL_(dst, dst_dim, src, src_dim, height, \ + low, low_dim, high, high_dim, \ + blk_, 0, end_, cmp, num_args, \ + RC_THRESH_PIXEL_BLK_X1_, \ + RC_THRESH_PIXEL_REM_NONE_); \ + } \ +} while (0) + + +/* + * ------------------------------------------------------------- + * Internal support macros for pixelwise thresholding template + * ------------------------------------------------------------- + */ + +/** + * Pixelwise thresholding template driver. + */ +#define RC_THRESH_PIXEL_(dst, dst_dim, src, src_dim, height, \ + thr1, thr1_dim, thr2, thr2_dim, \ + blk, len, rem, cmp, num_args, \ + loop_blk, loop_rem) \ +do { \ + int y_; \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(src_dim); \ + int j_ = y_*(dst_dim); \ + int l_ = y_*(thr1_dim); \ + int m_ = y_*(thr2_dim); \ + int x_; \ + \ + /* Handle all full destination word blocks */ \ + for (x_ = 0; x_ < (blk); x_++, j_ += RC_WORD_SIZE) { \ + loop_blk(&(dst)[j_], src, i_, cmp, thr1, l_, thr2, m_, num_args); \ + } \ + \ + /* Handle partial destination words */ \ + if ((len) || (rem)) { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int pos_ = 0; \ + \ + /* Handle unrolled source pixels */ \ + loop_rem(src, acc_, i_, pos_, len, cmp, thr1, l_, thr2, m_, num_args); \ + \ + /* Handle any remaining source pixels */ \ + for (x_ = 0; x_ < (rem); x_++) { \ + RC_THRESH_PIXEL_ITER_(src, acc_, i_, pos_, \ + cmp, thr1, l_, thr2, m_, num_args); \ + } \ + \ + /* Store the partial word */ \ + RC_WORD_STORE(&(dst)[j_], acc_); \ + } \ + } \ +} while (0) + +/** + * Thresholding template block iterator, no unrolling. + */ +#define RC_THRESH_PIXEL_BLK_X1_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_++) { \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template block iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_BLK_X2_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 2) { \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template block iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_BLK_X4_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 4) { \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template empty remainder iterator. + */ +#define RC_THRESH_PIXEL_REM_NONE_(src, acc, idx, pos, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ + +/** + * Thresholding template remainder iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_REM_X2_(src, acc, idx, pos, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Thresholding template remainder iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_REM_X4_(src, acc, idx, pos, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template iteration. + */ +#define RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int thr2_value_; \ + if ((num_args) == RC_THRESH_PIXEL_DOUBLE_ARG) { \ + thr2_value_ = (thr2)[thr2_idx]; \ + } \ + else { \ + (void)thr2_value_; \ + } \ + \ + int bit_ = cmp((src)[idx], (thr1)[thr1_idx], thr2_value_); \ + (acc) |= RC_WORD_INSERT(bit_, pos, 1); \ + (idx)++; \ + (thr1_idx)++; \ + if ((num_args) == RC_THRESH_PIXEL_DOUBLE_ARG) { \ + (thr2_idx)++; \ + } \ + (pos)++; \ +} while (0) + +#endif /* RC_THRESH_PIXEL_TPL_H */ + diff --git a/compute/generic/rc_thresh_tpl.h b/compute/generic/rc_thresh_tpl.h new file mode 100644 index 0000000..399b8e6 --- /dev/null +++ b/compute/generic/rc_thresh_tpl.h @@ -0,0 +1,211 @@ +/* Copyright (C) 2005-2016, Axis Communications AB, LUND, SWEDEN + * + * This file is part of RAPP. + * + * RAPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * You can use the comments under either the terms of the GNU Lesser General + * Public License version 3 as published by the Free Software Foundation, + * either version 3 of the License or (at your option) any later version, or + * the GNU Free Documentation License version 1.3 or any later version + * published by the Free Software Foundation; with no Invariant Sections, no + * Front-Cover Texts, and no Back-Cover Texts. + * A copy of the license is included in the documentation section entitled + * "GNU Free Documentation License". + * + * RAPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License and a copy of the GNU Free Documentation License along + * with RAPP. If not, see . + */ + +/** + * @file rc_thresh_tpl.h + * @brief RAPP Compute layer threshold templates, generic implementation. + */ + +#ifndef RC_THRESH_TPL_H +#define RC_THRESH_TPL_H + +#include "rc_word.h" /* Word operations */ + +/* + * ------------------------------------------------------------- + * Thresholding templates + * ------------------------------------------------------------- + */ + +/** + * Threshold-to-binary template. + */ +#define RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, \ + width, height, low, high, cmp, unroll) \ +do { \ + int blk_ = (width) / (8*RC_WORD_SIZE); /* Full dst blocks */ \ + int end_ = (width) % (8*RC_WORD_SIZE); /* Partial dst blocks */ \ + \ + if ((unroll) == 4 && /* Constant */ \ + (blk_ > 0 || end_ >= 4)) /* Variable */ \ + { \ + int len_ = end_ / 4; \ + int rem_ = end_ % 4; \ + RC_THRESH_(dst, dst_dim, src, src_dim, height, \ + blk_, len_, rem_, low, high, cmp, \ + RC_THRESH_BLK_X4_, RC_THRESH_REM_X4_); \ + } \ + else if ((unroll) >= 2 && /* Constant */ \ + (blk_ > 0 || end_ >= 2)) /* Variable */ \ + { \ + int len_ = end_ / 2; \ + int rem_ = end_ % 2; \ + RC_THRESH_(dst, dst_dim, src, src_dim, height, \ + blk_, len_, rem_, low, high, cmp, \ + RC_THRESH_BLK_X2_, RC_THRESH_REM_X2_); \ + } \ + else { \ + RC_THRESH_(dst, dst_dim, src, src_dim, height, \ + blk_, 0, end_, low, high, cmp, \ + RC_THRESH_BLK_X1_, RC_THRESH_REM_NONE_); \ + } \ +} while (0) + + +/* + * ------------------------------------------------------------- + * Internal support macros for thresholding template + * ------------------------------------------------------------- + */ + +/** + * Thresholding template driver. + */ +#define RC_THRESH_(dst, dst_dim, src, src_dim, height, \ + blk, len, rem, thr1, thr2, cmp, \ + loop_blk, loop_rem) \ +do { \ + int y_; \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(src_dim); \ + int j_ = y_*(dst_dim); \ + int x_; \ + \ + /* Handle all full destination word blocks */ \ + for (x_ = 0; x_ < (blk); x_++, j_ += RC_WORD_SIZE) { \ + loop_blk(&(dst)[j_], src, i_, cmp, thr1, thr2); \ + } \ + \ + /* Handle partial destination words */ \ + if ((len) || (rem)) { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int pos_ = 0; \ + \ + /* Handle unrolled source pixels */ \ + loop_rem(src, acc_, i_, pos_, len, cmp, thr1, thr2); \ + \ + /* Handle any remaining source pixels */ \ + for (x_ = 0; x_ < (rem); x_++) { \ + RC_THRESH_ITER_(src, acc_, i_, pos_, \ + cmp, thr1, thr2); \ + } \ + \ + /* Store the partial word */ \ + RC_WORD_STORE(&(dst)[j_], acc_); \ + } \ + } \ +} while (0) + +/** + * Thresholding template block iterator, no unrolling. + */ +#define RC_THRESH_BLK_X1_(dst, src, idx, cmp, thr1, thr2) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_++) { \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template block iterator, unrolled two times. + */ +#define RC_THRESH_BLK_X2_(dst, src, idx, cmp, thr1, thr2) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 2) { \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template block iterator, unrolled four times. + */ +#define RC_THRESH_BLK_X4_(dst, src, idx, cmp, thr1, thr2) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 4) { \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template empty remainder iterator. + */ +#define RC_THRESH_REM_NONE_(src, acc, idx, pos, \ + len, cmp, thr1, thr2) + +/** + * Thresholding template remainder iterator, unrolled two times. + */ +#define RC_THRESH_REM_X2_(src, acc, idx, pos, \ + len, cmp, thr1, thr2) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + } \ +} while (0) + +/** + * Thresholding template remainder iterator, unrolled four times. + */ +#define RC_THRESH_REM_X4_(src, acc, idx, pos, \ + len, cmp, thr1, thr2) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + } \ +} while (0) + +/** + * Thresholding template iteration. + */ +#define RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2) \ +do { \ + int bit_ = cmp((src)[idx], thr1, thr2); \ + (acc) |= RC_WORD_INSERT(bit_, pos, 1); \ + (idx)++; \ + (pos)++; \ +} while (0) + +#endif /* RC_THRESH_TPL_H */ diff --git a/compute/generic/rc_type.c b/compute/generic/rc_type.c index d89f9d9..772b6d4 100644 --- a/compute/generic/rc_type.c +++ b/compute/generic/rc_type.c @@ -30,10 +30,10 @@ * @brief RAPP Compute layer type conversions, generic implementation. */ -#include "rc_impl_cfg.h" /* Implementation cfg */ -#include "rc_template.h" /* Thresholding templates */ -#include "rc_table.h" /* Lookup tables */ -#include "rc_type.h" /* Type conversion API */ +#include "rc_impl_cfg.h" /* Implementation cfg */ +#include "rc_thresh_tpl.h" /* Thresholding templates */ +#include "rc_table.h" /* Lookup tables */ +#include "rc_type.h" /* Type conversion API */ /* * ------------------------------------------------------------- @@ -82,7 +82,7 @@ rc_type_u8_to_bin(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, 0, 0, RC_TYPE_U8_TO_BIN, RC_UNROLL(rc_type_u8_to_bin)); } diff --git a/compute/include/rc_thresh.h b/compute/include/rc_thresh.h index 45a8e75..9e1faad 100644 --- a/compute/include/rc_thresh.h +++ b/compute/include/rc_thresh.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2010, 2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -112,6 +112,23 @@ rc_thresh_ltgt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int low, int high); +/** + * Pixelwise single thresholding less-than. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension of the source buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @param[in] thresh Threshold pixel buffer. + * @param thresh_dim Row dimension of the threshold buffer. + */ +RC_EXPORT void +rc_thresh_lt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + int width, int height, + const uint8_t *restrict thresh, int thresh_dim); #ifdef __cplusplus }; diff --git a/compute/tune/arch/rapptune-x86_64-gnu-sse2.h b/compute/tune/arch/rapptune-x86_64-gnu-sse2.h index 88befb7..8f3f9e8 100644 --- a/compute/tune/arch/rapptune-x86_64-gnu-sse2.h +++ b/compute/tune/arch/rapptune-x86_64-gnu-sse2.h @@ -2,7 +2,7 @@ * @file rapptune.h * @brief RAPP Compute implementation tuning config. * Auto-generated by RAPP Compute performance tuner on - * Tue Mar 20 01:57:39 2012 + * Mon Apr 4 17:19:48 2016 */ #ifndef RAPPTUNE_H @@ -10,998 +10,1002 @@ #include "rc_impl.h" /* Implementation names */ -#define rc_compiler_version 404 +#define rc_compiler_version 407 #define rc_bitblt_wm_copy_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_copy_bin_UNROLL 2 -#define rc_bitblt_wm_copy_bin_SCORE 5.15e+10 +#define rc_bitblt_wm_copy_bin_UNROLL 4 +#define rc_bitblt_wm_copy_bin_SCORE 4.69e+10 #define rc_bitblt_wm_not_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_not_bin_UNROLL 1 -#define rc_bitblt_wm_not_bin_SCORE 4.82e+10 +#define rc_bitblt_wm_not_bin_UNROLL 4 +#define rc_bitblt_wm_not_bin_SCORE 4.46e+10 #define rc_bitblt_wm_and_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wm_and_bin_UNROLL 1 -#define rc_bitblt_wm_and_bin_SCORE 4.74e+10 +#define rc_bitblt_wm_and_bin_SCORE 3.85e+10 #define rc_bitblt_wm_or_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_or_bin_UNROLL 1 -#define rc_bitblt_wm_or_bin_SCORE 4.82e+10 +#define rc_bitblt_wm_or_bin_UNROLL 4 +#define rc_bitblt_wm_or_bin_SCORE 4.59e+10 #define rc_bitblt_wm_xor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_xor_bin_UNROLL 1 -#define rc_bitblt_wm_xor_bin_SCORE 4.76e+10 +#define rc_bitblt_wm_xor_bin_UNROLL 2 +#define rc_bitblt_wm_xor_bin_SCORE 3.93e+10 #define rc_bitblt_wm_nand_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_nand_bin_UNROLL 1 -#define rc_bitblt_wm_nand_bin_SCORE 4.41e+10 +#define rc_bitblt_wm_nand_bin_UNROLL 4 +#define rc_bitblt_wm_nand_bin_SCORE 4.09e+10 #define rc_bitblt_wm_nor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_nor_bin_UNROLL 1 -#define rc_bitblt_wm_nor_bin_SCORE 4.36e+10 +#define rc_bitblt_wm_nor_bin_UNROLL 2 +#define rc_bitblt_wm_nor_bin_SCORE 4.52e+10 #define rc_bitblt_wm_xnor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_xnor_bin_UNROLL 1 -#define rc_bitblt_wm_xnor_bin_SCORE 4.39e+10 +#define rc_bitblt_wm_xnor_bin_UNROLL 4 +#define rc_bitblt_wm_xnor_bin_SCORE 4.07e+10 #define rc_bitblt_wm_andn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_andn_bin_UNROLL 1 -#define rc_bitblt_wm_andn_bin_SCORE 4.30e+10 +#define rc_bitblt_wm_andn_bin_UNROLL 2 +#define rc_bitblt_wm_andn_bin_SCORE 4.40e+10 #define rc_bitblt_wm_orn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_orn_bin_UNROLL 1 -#define rc_bitblt_wm_orn_bin_SCORE 4.31e+10 +#define rc_bitblt_wm_orn_bin_UNROLL 2 +#define rc_bitblt_wm_orn_bin_SCORE 4.41e+10 #define rc_bitblt_wm_nandn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_nandn_bin_UNROLL 1 -#define rc_bitblt_wm_nandn_bin_SCORE 4.28e+10 +#define rc_bitblt_wm_nandn_bin_UNROLL 2 +#define rc_bitblt_wm_nandn_bin_SCORE 4.39e+10 #define rc_bitblt_wm_norn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_norn_bin_UNROLL 1 -#define rc_bitblt_wm_norn_bin_SCORE 4.22e+10 +#define rc_bitblt_wm_norn_bin_UNROLL 2 +#define rc_bitblt_wm_norn_bin_SCORE 4.40e+10 #define rc_bitblt_wa_copy_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_copy_bin_UNROLL 4 -#define rc_bitblt_wa_copy_bin_SCORE 8.27e+10 +#define rc_bitblt_wa_copy_bin_UNROLL 2 +#define rc_bitblt_wa_copy_bin_SCORE 1.99e+11 #define rc_bitblt_wa_not_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_not_bin_UNROLL 1 -#define rc_bitblt_wa_not_bin_SCORE 9.14e+10 +#define rc_bitblt_wa_not_bin_UNROLL 4 +#define rc_bitblt_wa_not_bin_SCORE 1.34e+11 #define rc_bitblt_wa_and_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_and_bin_UNROLL 1 -#define rc_bitblt_wa_and_bin_SCORE 6.50e+10 +#define rc_bitblt_wa_and_bin_SCORE 1.04e+11 #define rc_bitblt_wa_or_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_or_bin_UNROLL 1 -#define rc_bitblt_wa_or_bin_SCORE 6.46e+10 +#define rc_bitblt_wa_or_bin_UNROLL 4 +#define rc_bitblt_wa_or_bin_SCORE 1.03e+11 #define rc_bitblt_wa_xor_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_xor_bin_UNROLL 1 -#define rc_bitblt_wa_xor_bin_SCORE 6.52e+10 +#define rc_bitblt_wa_xor_bin_SCORE 1.03e+11 #define rc_bitblt_wa_nand_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_nand_bin_UNROLL 1 -#define rc_bitblt_wa_nand_bin_SCORE 6.29e+10 +#define rc_bitblt_wa_nand_bin_UNROLL 4 +#define rc_bitblt_wa_nand_bin_SCORE 9.92e+10 #define rc_bitblt_wa_nor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_nor_bin_UNROLL 1 -#define rc_bitblt_wa_nor_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_nor_bin_UNROLL 4 +#define rc_bitblt_wa_nor_bin_SCORE 9.82e+10 #define rc_bitblt_wa_xnor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_xnor_bin_UNROLL 1 -#define rc_bitblt_wa_xnor_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_xnor_bin_UNROLL 4 +#define rc_bitblt_wa_xnor_bin_SCORE 9.79e+10 #define rc_bitblt_wa_andn_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_andn_bin_UNROLL 1 -#define rc_bitblt_wa_andn_bin_SCORE 6.38e+10 +#define rc_bitblt_wa_andn_bin_SCORE 1.00e+11 #define rc_bitblt_wa_orn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_orn_bin_UNROLL 1 -#define rc_bitblt_wa_orn_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_orn_bin_UNROLL 4 +#define rc_bitblt_wa_orn_bin_SCORE 9.86e+10 #define rc_bitblt_wa_nandn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_nandn_bin_UNROLL 1 -#define rc_bitblt_wa_nandn_bin_SCORE 6.29e+10 +#define rc_bitblt_wa_nandn_bin_UNROLL 4 +#define rc_bitblt_wa_nandn_bin_SCORE 9.89e+10 #define rc_bitblt_wa_norn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_norn_bin_UNROLL 1 -#define rc_bitblt_wa_norn_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_norn_bin_UNROLL 4 +#define rc_bitblt_wa_norn_bin_SCORE 1.00e+11 #define rc_bitblt_vm_copy_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_copy_bin_UNROLL 1 -#define rc_bitblt_vm_copy_bin_SCORE 6.22e+10 +#define rc_bitblt_vm_copy_bin_SCORE 1.97e+11 #define rc_bitblt_vm_not_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_not_bin_UNROLL 1 -#define rc_bitblt_vm_not_bin_SCORE 5.56e+10 +#define rc_bitblt_vm_not_bin_SCORE 1.96e+11 #define rc_bitblt_vm_and_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_and_bin_UNROLL 1 -#define rc_bitblt_vm_and_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_and_bin_SCORE 1.74e+11 #define rc_bitblt_vm_or_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_or_bin_UNROLL 1 -#define rc_bitblt_vm_or_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_or_bin_SCORE 1.73e+11 #define rc_bitblt_vm_xor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_xor_bin_UNROLL 1 -#define rc_bitblt_vm_xor_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_xor_bin_SCORE 1.74e+11 #define rc_bitblt_vm_nand_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_nand_bin_UNROLL 1 -#define rc_bitblt_vm_nand_bin_SCORE 5.13e+10 +#define rc_bitblt_vm_nand_bin_SCORE 1.59e+11 #define rc_bitblt_vm_nor_bin_IMPL RC_IMPL_SIMD -#define rc_bitblt_vm_nor_bin_UNROLL 1 -#define rc_bitblt_vm_nor_bin_SCORE 5.10e+10 +#define rc_bitblt_vm_nor_bin_UNROLL 2 +#define rc_bitblt_vm_nor_bin_SCORE 1.53e+11 #define rc_bitblt_vm_xnor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_xnor_bin_UNROLL 1 -#define rc_bitblt_vm_xnor_bin_SCORE 5.07e+10 +#define rc_bitblt_vm_xnor_bin_SCORE 1.59e+11 #define rc_bitblt_vm_andn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_andn_bin_UNROLL 1 -#define rc_bitblt_vm_andn_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_andn_bin_SCORE 1.69e+11 #define rc_bitblt_vm_orn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_orn_bin_UNROLL 1 -#define rc_bitblt_vm_orn_bin_SCORE 5.10e+10 +#define rc_bitblt_vm_orn_bin_SCORE 1.52e+11 #define rc_bitblt_vm_nandn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_nandn_bin_UNROLL 1 -#define rc_bitblt_vm_nandn_bin_SCORE 5.41e+10 +#define rc_bitblt_vm_nandn_bin_SCORE 1.54e+11 #define rc_bitblt_vm_norn_bin_IMPL RC_IMPL_SIMD -#define rc_bitblt_vm_norn_bin_UNROLL 2 -#define rc_bitblt_vm_norn_bin_SCORE 5.28e+10 +#define rc_bitblt_vm_norn_bin_UNROLL 1 +#define rc_bitblt_vm_norn_bin_SCORE 1.70e+11 #define rc_bitblt_va_copy_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_copy_bin_UNROLL 1 -#define rc_bitblt_va_copy_bin_SCORE 1.54e+11 +#define rc_bitblt_va_copy_bin_SCORE 2.51e+11 #define rc_bitblt_va_not_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_not_bin_UNROLL 1 -#define rc_bitblt_va_not_bin_SCORE 1.42e+11 +#define rc_bitblt_va_not_bin_SCORE 2.31e+11 #define rc_bitblt_va_and_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_and_bin_UNROLL 1 -#define rc_bitblt_va_and_bin_SCORE 1.08e+11 +#define rc_bitblt_va_and_bin_SCORE 1.81e+11 #define rc_bitblt_va_or_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_or_bin_UNROLL 1 -#define rc_bitblt_va_or_bin_SCORE 1.08e+11 +#define rc_bitblt_va_or_bin_SCORE 1.82e+11 #define rc_bitblt_va_xor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_xor_bin_UNROLL 1 -#define rc_bitblt_va_xor_bin_SCORE 1.08e+11 +#define rc_bitblt_va_xor_bin_SCORE 1.81e+11 #define rc_bitblt_va_nand_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nand_bin_UNROLL 1 -#define rc_bitblt_va_nand_bin_SCORE 1.05e+11 +#define rc_bitblt_va_nand_bin_SCORE 1.73e+11 #define rc_bitblt_va_nor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nor_bin_UNROLL 1 -#define rc_bitblt_va_nor_bin_SCORE 1.06e+11 +#define rc_bitblt_va_nor_bin_SCORE 1.70e+11 #define rc_bitblt_va_xnor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_xnor_bin_UNROLL 1 -#define rc_bitblt_va_xnor_bin_SCORE 1.07e+11 +#define rc_bitblt_va_xnor_bin_SCORE 1.73e+11 #define rc_bitblt_va_andn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_andn_bin_UNROLL 1 -#define rc_bitblt_va_andn_bin_SCORE 1.07e+11 +#define rc_bitblt_va_andn_bin_SCORE 1.76e+11 #define rc_bitblt_va_orn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_orn_bin_UNROLL 1 -#define rc_bitblt_va_orn_bin_SCORE 1.07e+11 +#define rc_bitblt_va_orn_bin_SCORE 1.72e+11 #define rc_bitblt_va_nandn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nandn_bin_UNROLL 1 -#define rc_bitblt_va_nandn_bin_SCORE 1.06e+11 +#define rc_bitblt_va_nandn_bin_SCORE 1.74e+11 #define rc_bitblt_va_norn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_norn_bin_UNROLL 1 -#define rc_bitblt_va_norn_bin_SCORE 1.08e+11 +#define rc_bitblt_va_norn_bin_SCORE 1.77e+11 #define rc_pixop_set_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_set_u8_UNROLL 1 -#define rc_pixop_set_u8_SCORE 2.54e+10 +#define rc_pixop_set_u8_UNROLL 4 +#define rc_pixop_set_u8_SCORE 3.79e+10 #define rc_pixop_not_u8_IMPL RC_IMPL_SIMD #define rc_pixop_not_u8_UNROLL 2 -#define rc_pixop_not_u8_SCORE 2.36e+10 +#define rc_pixop_not_u8_SCORE 3.93e+10 #define rc_pixop_flip_u8_IMPL RC_IMPL_SIMD #define rc_pixop_flip_u8_UNROLL 2 -#define rc_pixop_flip_u8_SCORE 2.40e+10 +#define rc_pixop_flip_u8_SCORE 3.93e+10 #define rc_pixop_lut_u8_IMPL RC_IMPL_GEN -#define rc_pixop_lut_u8_UNROLL 2 -#define rc_pixop_lut_u8_SCORE 1.52e+09 +#define rc_pixop_lut_u8_UNROLL 4 +#define rc_pixop_lut_u8_SCORE 2.33e+09 #define rc_pixop_abs_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_abs_u8_UNROLL 2 -#define rc_pixop_abs_u8_SCORE 1.51e+10 +#define rc_pixop_abs_u8_UNROLL 4 +#define rc_pixop_abs_u8_SCORE 2.14e+10 #define rc_pixop_addc_u8_IMPL RC_IMPL_SIMD #define rc_pixop_addc_u8_UNROLL 4 -#define rc_pixop_addc_u8_SCORE 2.32e+10 +#define rc_pixop_addc_u8_SCORE 3.95e+10 #define rc_pixop_lerpc_u8_IMPL RC_IMPL_SIMD #define rc_pixop_lerpc_u8_UNROLL 4 -#define rc_pixop_lerpc_u8_SCORE 2.32e+10 +#define rc_pixop_lerpc_u8_SCORE 3.98e+10 #define rc_pixop_lerpnc_u8_IMPL RC_IMPL_SIMD #define rc_pixop_lerpnc_u8_UNROLL 4 -#define rc_pixop_lerpnc_u8_SCORE 4.01e+09 +#define rc_pixop_lerpnc_u8_SCORE 7.82e+09 #define rc_pixop_add_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_add_u8_UNROLL 1 -#define rc_pixop_add_u8_SCORE 1.10e+10 +#define rc_pixop_add_u8_UNROLL 4 +#define rc_pixop_add_u8_SCORE 2.13e+10 #define rc_pixop_avg_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_avg_u8_UNROLL 1 -#define rc_pixop_avg_u8_SCORE 1.10e+10 +#define rc_pixop_avg_u8_UNROLL 4 +#define rc_pixop_avg_u8_SCORE 2.14e+10 #define rc_pixop_sub_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_sub_u8_UNROLL 1 -#define rc_pixop_sub_u8_SCORE 1.10e+10 +#define rc_pixop_sub_u8_UNROLL 4 +#define rc_pixop_sub_u8_SCORE 2.11e+10 #define rc_pixop_subh_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_subh_u8_UNROLL 1 -#define rc_pixop_subh_u8_SCORE 1.10e+10 +#define rc_pixop_subh_u8_UNROLL 4 +#define rc_pixop_subh_u8_SCORE 1.93e+10 #define rc_pixop_suba_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_suba_u8_UNROLL 1 -#define rc_pixop_suba_u8_SCORE 1.14e+10 +#define rc_pixop_suba_u8_UNROLL 2 +#define rc_pixop_suba_u8_SCORE 1.85e+10 #define rc_pixop_lerp_u8_IMPL RC_IMPL_SIMD #define rc_pixop_lerp_u8_UNROLL 4 -#define rc_pixop_lerp_u8_SCORE 3.21e+09 +#define rc_pixop_lerp_u8_SCORE 6.24e+09 #define rc_pixop_lerpn_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerpn_u8_UNROLL 4 -#define rc_pixop_lerpn_u8_SCORE 2.61e+09 +#define rc_pixop_lerpn_u8_UNROLL 2 +#define rc_pixop_lerpn_u8_SCORE 4.98e+09 #define rc_pixop_lerpi_u8_IMPL RC_IMPL_SIMD #define rc_pixop_lerpi_u8_UNROLL 4 -#define rc_pixop_lerpi_u8_SCORE 2.97e+09 +#define rc_pixop_lerpi_u8_SCORE 5.97e+09 #define rc_pixop_norm_u8_IMPL RC_IMPL_SIMD #define rc_pixop_norm_u8_UNROLL 1 -#define rc_pixop_norm_u8_SCORE 7.72e+09 +#define rc_pixop_norm_u8_SCORE 1.08e+10 #define rc_type_u8_to_bin_IMPL RC_IMPL_SIMD #define rc_type_u8_to_bin_UNROLL 4 -#define rc_type_u8_to_bin_SCORE 1.86e+10 +#define rc_type_u8_to_bin_SCORE 3.26e+10 #define rc_type_bin_to_u8_IMPL RC_IMPL_SIMD -#define rc_type_bin_to_u8_UNROLL 1 -#define rc_type_bin_to_u8_SCORE 5.98e+09 +#define rc_type_bin_to_u8_UNROLL 2 +#define rc_type_bin_to_u8_SCORE 1.95e+10 #define rc_thresh_gt_u8_IMPL RC_IMPL_SIMD -#define rc_thresh_gt_u8_UNROLL 1 -#define rc_thresh_gt_u8_SCORE 1.26e+10 +#define rc_thresh_gt_u8_UNROLL 4 +#define rc_thresh_gt_u8_SCORE 2.17e+10 #define rc_thresh_lt_u8_IMPL RC_IMPL_SIMD -#define rc_thresh_lt_u8_UNROLL 1 -#define rc_thresh_lt_u8_SCORE 1.34e+10 +#define rc_thresh_lt_u8_UNROLL 4 +#define rc_thresh_lt_u8_SCORE 2.47e+10 #define rc_thresh_gtlt_u8_IMPL RC_IMPL_SIMD -#define rc_thresh_gtlt_u8_UNROLL 1 -#define rc_thresh_gtlt_u8_SCORE 1.05e+10 +#define rc_thresh_gtlt_u8_UNROLL 4 +#define rc_thresh_gtlt_u8_SCORE 1.55e+10 #define rc_thresh_ltgt_u8_IMPL RC_IMPL_SIMD #define rc_thresh_ltgt_u8_UNROLL 1 -#define rc_thresh_ltgt_u8_SCORE 1.00e+10 +#define rc_thresh_ltgt_u8_SCORE 1.45e+10 + +#define rc_thresh_lt_pixel_u8_IMPL RC_IMPL_SIMD +#define rc_thresh_lt_pixel_u8_UNROLL 4 +#define rc_thresh_lt_pixel_u8_SCORE 1.63e+10 #define rc_stat_sum_bin_IMPL RC_IMPL_SIMD #define rc_stat_sum_bin_UNROLL 1 -#define rc_stat_sum_bin_SCORE 2.88e+10 +#define rc_stat_sum_bin_SCORE 4.48e+10 #define rc_stat_sum_u8_IMPL RC_IMPL_SIMD #define rc_stat_sum_u8_UNROLL 1 -#define rc_stat_sum_u8_SCORE 1.08e+10 +#define rc_stat_sum_u8_SCORE 2.26e+10 #define rc_stat_sum2_u8_IMPL RC_IMPL_SIMD #define rc_stat_sum2_u8_UNROLL 1 -#define rc_stat_sum2_u8_SCORE 7.13e+09 +#define rc_stat_sum2_u8_SCORE 1.36e+10 #define rc_stat_xsum_u8_IMPL RC_IMPL_SIMD #define rc_stat_xsum_u8_UNROLL 4 -#define rc_stat_xsum_u8_SCORE 2.80e+09 +#define rc_stat_xsum_u8_SCORE 5.15e+09 -#define rc_stat_min_bin_IMPL RC_IMPL_SIMD -#define rc_stat_min_bin_UNROLL 2 -#define rc_stat_min_bin_SCORE 1.25e+11 +#define rc_stat_min_bin_IMPL RC_IMPL_GEN +#define rc_stat_min_bin_UNROLL 4 +#define rc_stat_min_bin_SCORE 2.23e+11 -#define rc_stat_max_bin_IMPL RC_IMPL_SIMD -#define rc_stat_max_bin_UNROLL 2 -#define rc_stat_max_bin_SCORE 1.24e+11 +#define rc_stat_max_bin_IMPL RC_IMPL_GEN +#define rc_stat_max_bin_UNROLL 4 +#define rc_stat_max_bin_SCORE 2.26e+11 #define rc_stat_min_u8_IMPL RC_IMPL_SIMD #define rc_stat_min_u8_UNROLL 4 -#define rc_stat_min_u8_SCORE 2.59e+10 +#define rc_stat_min_u8_SCORE 3.22e+10 #define rc_stat_max_u8_IMPL RC_IMPL_SIMD #define rc_stat_max_u8_UNROLL 4 -#define rc_stat_max_u8_SCORE 2.60e+10 +#define rc_stat_max_u8_SCORE 3.24e+10 #define rc_reduce_1x2_u8_IMPL RC_IMPL_SIMD -#define rc_reduce_1x2_u8_UNROLL 1 -#define rc_reduce_1x2_u8_SCORE 1.24e+10 +#define rc_reduce_1x2_u8_UNROLL 2 +#define rc_reduce_1x2_u8_SCORE 2.28e+10 #define rc_reduce_2x1_u8_IMPL RC_IMPL_SIMD -#define rc_reduce_2x1_u8_UNROLL 1 -#define rc_reduce_2x1_u8_SCORE 1.67e+10 +#define rc_reduce_2x1_u8_UNROLL 4 +#define rc_reduce_2x1_u8_SCORE 3.44e+10 #define rc_reduce_2x2_u8_IMPL RC_IMPL_SIMD #define rc_reduce_2x2_u8_UNROLL 1 -#define rc_reduce_2x2_u8_SCORE 1.36e+10 +#define rc_reduce_2x2_u8_SCORE 2.43e+10 #define rc_reduce_1x2_rk1_bin_IMPL RC_IMPL_GEN -#define rc_reduce_1x2_rk1_bin_UNROLL 2 -#define rc_reduce_1x2_rk1_bin_SCORE 1.28e+10 +#define rc_reduce_1x2_rk1_bin_UNROLL 1 +#define rc_reduce_1x2_rk1_bin_SCORE 1.98e+10 #define rc_reduce_1x2_rk2_bin_IMPL RC_IMPL_GEN #define rc_reduce_1x2_rk2_bin_UNROLL 1 -#define rc_reduce_1x2_rk2_bin_SCORE 1.32e+10 +#define rc_reduce_1x2_rk2_bin_SCORE 2.02e+10 #define rc_reduce_2x1_rk1_bin_IMPL RC_IMPL_GEN #define rc_reduce_2x1_rk1_bin_UNROLL 1 -#define rc_reduce_2x1_rk1_bin_SCORE 1.21e+11 +#define rc_reduce_2x1_rk1_bin_SCORE 1.96e+11 #define rc_reduce_2x1_rk2_bin_IMPL RC_IMPL_GEN #define rc_reduce_2x1_rk2_bin_UNROLL 1 -#define rc_reduce_2x1_rk2_bin_SCORE 1.21e+11 +#define rc_reduce_2x1_rk2_bin_SCORE 1.96e+11 #define rc_reduce_2x2_rk1_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk1_bin_UNROLL 1 -#define rc_reduce_2x2_rk1_bin_SCORE 2.04e+10 +#define rc_reduce_2x2_rk1_bin_UNROLL 2 +#define rc_reduce_2x2_rk1_bin_SCORE 3.37e+10 #define rc_reduce_2x2_rk2_bin_IMPL RC_IMPL_GEN #define rc_reduce_2x2_rk2_bin_UNROLL 1 -#define rc_reduce_2x2_rk2_bin_SCORE 1.70e+10 +#define rc_reduce_2x2_rk2_bin_SCORE 2.85e+10 #define rc_reduce_2x2_rk3_bin_IMPL RC_IMPL_GEN #define rc_reduce_2x2_rk3_bin_UNROLL 1 -#define rc_reduce_2x2_rk3_bin_SCORE 1.70e+10 +#define rc_reduce_2x2_rk3_bin_SCORE 2.81e+10 #define rc_reduce_2x2_rk4_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk4_bin_UNROLL 1 -#define rc_reduce_2x2_rk4_bin_SCORE 2.11e+10 +#define rc_reduce_2x2_rk4_bin_UNROLL 2 +#define rc_reduce_2x2_rk4_bin_SCORE 3.40e+10 #define rc_expand_1x2_bin_IMPL RC_IMPL_GEN #define rc_expand_1x2_bin_UNROLL 1 -#define rc_expand_1x2_bin_SCORE 1.11e+10 +#define rc_expand_1x2_bin_SCORE 1.62e+10 #define rc_expand_2x1_bin_IMPL RC_IMPL_GEN #define rc_expand_2x1_bin_UNROLL 1 -#define rc_expand_2x1_bin_SCORE 4.52e+10 +#define rc_expand_2x1_bin_SCORE 6.32e+10 #define rc_expand_2x2_bin_IMPL RC_IMPL_GEN #define rc_expand_2x2_bin_UNROLL 1 -#define rc_expand_2x2_bin_SCORE 1.01e+10 +#define rc_expand_2x2_bin_SCORE 1.54e+10 #define rc_rotate_cw_u8_IMPL RC_IMPL_GEN -#define rc_rotate_cw_u8_UNROLL 4 -#define rc_rotate_cw_u8_SCORE 6.38e+08 +#define rc_rotate_cw_u8_UNROLL 1 +#define rc_rotate_cw_u8_SCORE 1.04e+09 #define rc_rotate_ccw_u8_IMPL RC_IMPL_GEN -#define rc_rotate_ccw_u8_UNROLL 4 -#define rc_rotate_ccw_u8_SCORE 6.29e+08 +#define rc_rotate_ccw_u8_UNROLL 1 +#define rc_rotate_ccw_u8_SCORE 1.06e+09 #define rc_filter_diff_1x2_horz_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_1x2_horz_u8_UNROLL 2 -#define rc_filter_diff_1x2_horz_u8_SCORE 8.92e+09 +#define rc_filter_diff_1x2_horz_u8_UNROLL 4 +#define rc_filter_diff_1x2_horz_u8_SCORE 1.46e+10 #define rc_filter_diff_1x2_horz_abs_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_1x2_horz_abs_u8_UNROLL 4 -#define rc_filter_diff_1x2_horz_abs_u8_SCORE 8.33e+09 +#define rc_filter_diff_1x2_horz_abs_u8_UNROLL 2 +#define rc_filter_diff_1x2_horz_abs_u8_SCORE 1.41e+10 #define rc_filter_diff_2x1_vert_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_2x1_vert_u8_UNROLL 1 -#define rc_filter_diff_2x1_vert_u8_SCORE 9.99e+09 +#define rc_filter_diff_2x1_vert_u8_UNROLL 4 +#define rc_filter_diff_2x1_vert_u8_SCORE 1.86e+10 #define rc_filter_diff_2x1_vert_abs_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_2x1_vert_abs_u8_UNROLL 1 -#define rc_filter_diff_2x1_vert_abs_u8_SCORE 9.72e+09 +#define rc_filter_diff_2x1_vert_abs_u8_UNROLL 4 +#define rc_filter_diff_2x1_vert_abs_u8_SCORE 1.81e+10 #define rc_filter_diff_2x2_magn_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_2x2_magn_u8_UNROLL 1 -#define rc_filter_diff_2x2_magn_u8_SCORE 6.22e+09 +#define rc_filter_diff_2x2_magn_u8_UNROLL 2 +#define rc_filter_diff_2x2_magn_u8_SCORE 1.06e+10 #define rc_filter_sobel_3x3_horz_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_horz_u8_UNROLL 1 -#define rc_filter_sobel_3x3_horz_u8_SCORE 2.94e+09 +#define rc_filter_sobel_3x3_horz_u8_SCORE 6.35e+09 #define rc_filter_sobel_3x3_horz_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_horz_abs_u8_UNROLL 1 -#define rc_filter_sobel_3x3_horz_abs_u8_SCORE 3.59e+09 +#define rc_filter_sobel_3x3_horz_abs_u8_SCORE 8.55e+09 #define rc_filter_sobel_3x3_vert_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_vert_u8_UNROLL 1 -#define rc_filter_sobel_3x3_vert_u8_SCORE 3.60e+09 +#define rc_filter_sobel_3x3_vert_u8_SCORE 7.22e+09 #define rc_filter_sobel_3x3_vert_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_vert_abs_u8_UNROLL 1 -#define rc_filter_sobel_3x3_vert_abs_u8_SCORE 3.22e+09 +#define rc_filter_sobel_3x3_vert_abs_u8_SCORE 5.93e+09 #define rc_filter_sobel_3x3_magn_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_magn_u8_UNROLL 1 -#define rc_filter_sobel_3x3_magn_u8_SCORE 1.92e+09 +#define rc_filter_sobel_3x3_magn_u8_SCORE 3.53e+09 #define rc_filter_gauss_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_gauss_3x3_u8_UNROLL 1 -#define rc_filter_gauss_3x3_u8_SCORE 2.74e+09 +#define rc_filter_gauss_3x3_u8_SCORE 6.02e+09 #define rc_filter_laplace_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_laplace_3x3_u8_UNROLL 1 -#define rc_filter_laplace_3x3_u8_SCORE 3.03e+09 +#define rc_filter_laplace_3x3_u8_SCORE 5.71e+09 #define rc_filter_laplace_3x3_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_laplace_3x3_abs_u8_UNROLL 1 -#define rc_filter_laplace_3x3_abs_u8_SCORE 3.39e+09 +#define rc_filter_laplace_3x3_abs_u8_SCORE 7.14e+09 #define rc_filter_highpass_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_highpass_3x3_u8_UNROLL 1 -#define rc_filter_highpass_3x3_u8_SCORE 1.58e+09 +#define rc_filter_highpass_3x3_u8_SCORE 2.60e+09 #define rc_filter_highpass_3x3_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_highpass_3x3_abs_u8_UNROLL 1 -#define rc_filter_highpass_3x3_abs_u8_SCORE 1.86e+09 +#define rc_filter_highpass_3x3_abs_u8_SCORE 3.55e+09 #define rc_morph_erode_line_1x2_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_1x2_bin_UNROLL 4 -#define rc_morph_erode_line_1x2_bin_SCORE 2.35e+10 +#define rc_morph_erode_line_1x2_bin_SCORE 8.98e+10 #define rc_morph_dilate_line_1x2_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_1x2_bin_UNROLL 4 -#define rc_morph_dilate_line_1x2_bin_SCORE 5.51e+10 +#define rc_morph_dilate_line_1x2_bin_SCORE 7.40e+10 #define rc_morph_erode_line_1x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x3_bin_UNROLL 1 -#define rc_morph_erode_line_1x3_bin_SCORE 1.88e+10 +#define rc_morph_erode_line_1x3_bin_UNROLL 4 +#define rc_morph_erode_line_1x3_bin_SCORE 5.23e+10 #define rc_morph_dilate_line_1x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x3_bin_UNROLL 1 -#define rc_morph_dilate_line_1x3_bin_SCORE 1.91e+10 +#define rc_morph_dilate_line_1x3_bin_UNROLL 4 +#define rc_morph_dilate_line_1x3_bin_SCORE 5.20e+10 #define rc_morph_erode_line_1x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x3_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x3_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x3_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x3_p_bin_SCORE 5.65e+10 #define rc_morph_dilate_line_1x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x3_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x3_p_bin_SCORE 1.98e+10 +#define rc_morph_dilate_line_1x3_p_bin_UNROLL 2 +#define rc_morph_dilate_line_1x3_p_bin_SCORE 5.62e+10 #define rc_morph_erode_line_1x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x5_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x5_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x5_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x5_p_bin_SCORE 5.45e+10 #define rc_morph_dilate_line_1x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x5_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x5_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x5_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x5_p_bin_SCORE 5.48e+10 #define rc_morph_erode_line_1x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x7_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x7_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x7_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x7_p_bin_SCORE 5.47e+10 #define rc_morph_dilate_line_1x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x7_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x7_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x7_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x7_p_bin_SCORE 5.49e+10 #define rc_morph_erode_line_1x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x9_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x9_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x9_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x9_p_bin_SCORE 5.20e+10 #define rc_morph_dilate_line_1x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x9_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x9_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x9_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x9_p_bin_SCORE 5.08e+10 #define rc_morph_erode_line_1x13_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_1x13_p_bin_UNROLL 2 -#define rc_morph_erode_line_1x13_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x13_p_bin_SCORE 5.20e+10 #define rc_morph_dilate_line_1x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x13_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x13_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x13_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x13_p_bin_SCORE 5.20e+10 #define rc_morph_erode_line_1x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x15_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x15_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x15_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x15_p_bin_SCORE 5.20e+10 #define rc_morph_dilate_line_1x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x15_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x15_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x15_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x15_p_bin_SCORE 5.13e+10 #define rc_morph_erode_line_1x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x17_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x17_p_bin_SCORE 2.11e+10 +#define rc_morph_erode_line_1x17_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x17_p_bin_SCORE 5.13e+10 #define rc_morph_dilate_line_1x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x17_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x17_p_bin_SCORE 2.13e+10 +#define rc_morph_dilate_line_1x17_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x17_p_bin_SCORE 5.15e+10 #define rc_morph_erode_line_1x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x25_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x25_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x25_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x25_p_bin_SCORE 5.12e+10 #define rc_morph_dilate_line_1x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x25_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x25_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x25_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x25_p_bin_SCORE 5.18e+10 #define rc_morph_erode_line_1x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x29_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x29_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x29_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x29_p_bin_SCORE 5.13e+10 #define rc_morph_dilate_line_1x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x29_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x29_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x29_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x29_p_bin_SCORE 5.17e+10 #define rc_morph_erode_line_1x31_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_1x31_p_bin_UNROLL 2 -#define rc_morph_erode_line_1x31_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x31_p_bin_SCORE 5.22e+10 #define rc_morph_dilate_line_1x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x31_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x31_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x31_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x31_p_bin_SCORE 5.19e+10 #define rc_morph_erode_line_2x1_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_2x1_bin_UNROLL 1 -#define rc_morph_erode_line_2x1_bin_SCORE 6.78e+10 +#define rc_morph_erode_line_2x1_bin_UNROLL 2 +#define rc_morph_erode_line_2x1_bin_SCORE 1.06e+11 #define rc_morph_dilate_line_2x1_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_2x1_bin_UNROLL 1 -#define rc_morph_dilate_line_2x1_bin_SCORE 6.82e+10 +#define rc_morph_dilate_line_2x1_bin_UNROLL 4 +#define rc_morph_dilate_line_2x1_bin_SCORE 1.09e+11 #define rc_morph_erode_line_3x1_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_3x1_bin_UNROLL 1 -#define rc_morph_erode_line_3x1_bin_SCORE 5.11e+10 +#define rc_morph_erode_line_3x1_bin_SCORE 7.99e+10 #define rc_morph_dilate_line_3x1_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_3x1_bin_UNROLL 1 -#define rc_morph_dilate_line_3x1_bin_SCORE 5.11e+10 +#define rc_morph_dilate_line_3x1_bin_SCORE 7.98e+10 #define rc_morph_erode_line_3x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_3x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_3x1_p_bin_SCORE 6.48e+10 +#define rc_morph_erode_line_3x1_p_bin_SCORE 9.87e+10 #define rc_morph_dilate_line_3x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_3x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_3x1_p_bin_SCORE 6.49e+10 +#define rc_morph_dilate_line_3x1_p_bin_SCORE 1.00e+11 #define rc_morph_erode_line_5x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_5x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_5x1_p_bin_SCORE 6.31e+10 +#define rc_morph_erode_line_5x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_5x1_p_bin_SCORE 9.69e+10 #define rc_morph_dilate_line_5x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_5x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_5x1_p_bin_SCORE 6.29e+10 +#define rc_morph_dilate_line_5x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_5x1_p_bin_SCORE 1.03e+11 #define rc_morph_erode_line_7x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_7x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_7x1_p_bin_SCORE 6.32e+10 +#define rc_morph_erode_line_7x1_p_bin_SCORE 9.99e+10 #define rc_morph_dilate_line_7x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_7x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_7x1_p_bin_SCORE 6.32e+10 +#define rc_morph_dilate_line_7x1_p_bin_SCORE 1.00e+11 #define rc_morph_erode_line_9x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_9x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_9x1_p_bin_SCORE 6.35e+10 +#define rc_morph_erode_line_9x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_9x1_p_bin_SCORE 1.05e+11 #define rc_morph_dilate_line_9x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_9x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_9x1_p_bin_SCORE 6.50e+10 +#define rc_morph_dilate_line_9x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_9x1_p_bin_SCORE 1.05e+11 #define rc_morph_erode_line_13x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_13x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_13x1_p_bin_SCORE 6.31e+10 +#define rc_morph_erode_line_13x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_13x1_p_bin_SCORE 1.06e+11 #define rc_morph_dilate_line_13x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_13x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_13x1_p_bin_SCORE 6.30e+10 +#define rc_morph_dilate_line_13x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_13x1_p_bin_SCORE 1.06e+11 #define rc_morph_erode_line_15x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_15x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_15x1_p_bin_SCORE 6.49e+10 +#define rc_morph_erode_line_15x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_15x1_p_bin_SCORE 1.07e+11 #define rc_morph_dilate_line_15x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_15x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_15x1_p_bin_SCORE 6.43e+10 +#define rc_morph_dilate_line_15x1_p_bin_UNROLL 2 +#define rc_morph_dilate_line_15x1_p_bin_SCORE 1.06e+11 #define rc_morph_erode_line_17x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_17x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_17x1_p_bin_SCORE 6.48e+10 +#define rc_morph_erode_line_17x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_17x1_p_bin_SCORE 1.08e+11 #define rc_morph_dilate_line_17x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_17x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_17x1_p_bin_SCORE 6.49e+10 +#define rc_morph_dilate_line_17x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_17x1_p_bin_SCORE 1.06e+11 #define rc_morph_erode_line_25x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_25x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_25x1_p_bin_SCORE 6.47e+10 +#define rc_morph_erode_line_25x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_25x1_p_bin_SCORE 1.04e+11 #define rc_morph_dilate_line_25x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_25x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_25x1_p_bin_SCORE 6.30e+10 +#define rc_morph_dilate_line_25x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_25x1_p_bin_SCORE 1.06e+11 #define rc_morph_erode_line_29x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_29x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_29x1_p_bin_SCORE 6.46e+10 +#define rc_morph_erode_line_29x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_29x1_p_bin_SCORE 1.05e+11 #define rc_morph_dilate_line_29x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_29x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_29x1_p_bin_SCORE 6.48e+10 +#define rc_morph_dilate_line_29x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_29x1_p_bin_SCORE 1.05e+11 #define rc_morph_erode_line_31x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_31x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_31x1_p_bin_SCORE 6.46e+10 +#define rc_morph_erode_line_31x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_31x1_p_bin_SCORE 1.07e+11 #define rc_morph_dilate_line_31x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_31x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_31x1_p_bin_SCORE 6.23e+10 +#define rc_morph_dilate_line_31x1_p_bin_UNROLL 2 +#define rc_morph_dilate_line_31x1_p_bin_SCORE 1.07e+11 #define rc_morph_erode_square_2x2_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_square_2x2_bin_UNROLL 1 -#define rc_morph_erode_square_2x2_bin_SCORE 1.81e+10 +#define rc_morph_erode_square_2x2_bin_UNROLL 2 +#define rc_morph_erode_square_2x2_bin_SCORE 4.75e+10 #define rc_morph_dilate_square_2x2_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_square_2x2_bin_UNROLL 1 -#define rc_morph_dilate_square_2x2_bin_SCORE 3.29e+10 +#define rc_morph_dilate_square_2x2_bin_SCORE 4.51e+10 #define rc_morph_erode_square_3x3_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_square_3x3_bin_UNROLL 1 -#define rc_morph_erode_square_3x3_bin_SCORE 1.19e+10 +#define rc_morph_erode_square_3x3_bin_SCORE 1.96e+10 #define rc_morph_dilate_square_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_square_3x3_bin_UNROLL 1 -#define rc_morph_dilate_square_3x3_bin_SCORE 1.09e+10 +#define rc_morph_dilate_square_3x3_bin_UNROLL 4 +#define rc_morph_dilate_square_3x3_bin_SCORE 1.84e+10 #define rc_morph_erode_square_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_square_3x3_p_bin_UNROLL 4 -#define rc_morph_erode_square_3x3_p_bin_SCORE 2.33e+10 +#define rc_morph_erode_square_3x3_p_bin_UNROLL 1 +#define rc_morph_erode_square_3x3_p_bin_SCORE 3.14e+10 #define rc_morph_dilate_square_3x3_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_square_3x3_p_bin_UNROLL 4 -#define rc_morph_dilate_square_3x3_p_bin_SCORE 2.18e+10 +#define rc_morph_dilate_square_3x3_p_bin_SCORE 3.11e+10 #define rc_morph_erode_diamond_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_3x3_bin_UNROLL 1 -#define rc_morph_erode_diamond_3x3_bin_SCORE 1.66e+10 +#define rc_morph_erode_diamond_3x3_bin_UNROLL 4 +#define rc_morph_erode_diamond_3x3_bin_SCORE 4.13e+10 #define rc_morph_dilate_diamond_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_3x3_bin_UNROLL 1 -#define rc_morph_dilate_diamond_3x3_bin_SCORE 1.71e+10 +#define rc_morph_dilate_diamond_3x3_bin_UNROLL 4 +#define rc_morph_dilate_diamond_3x3_bin_SCORE 4.23e+10 #define rc_morph_erode_diamond_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_3x3_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_3x3_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_3x3_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_3x3_p_bin_SCORE 4.51e+10 #define rc_morph_dilate_diamond_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_3x3_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_3x3_p_bin_SCORE 1.76e+10 +#define rc_morph_dilate_diamond_3x3_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_3x3_p_bin_SCORE 4.50e+10 #define rc_morph_erode_diamond_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_5x5_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_5x5_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_5x5_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_5x5_p_bin_SCORE 4.43e+10 #define rc_morph_dilate_diamond_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_5x5_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_5x5_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_5x5_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_5x5_p_bin_SCORE 4.50e+10 #define rc_morph_erode_diamond_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_7x7_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_7x7_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_7x7_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_7x7_p_bin_SCORE 4.34e+10 #define rc_morph_dilate_diamond_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_7x7_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_7x7_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_7x7_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_7x7_p_bin_SCORE 4.57e+10 #define rc_morph_erode_diamond_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_9x9_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_9x9_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_9x9_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_9x9_p_bin_SCORE 4.12e+10 #define rc_morph_dilate_diamond_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_9x9_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_9x9_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_9x9_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_9x9_p_bin_SCORE 4.20e+10 #define rc_morph_erode_diamond_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_13x13_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_13x13_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_13x13_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_13x13_p_bin_SCORE 4.07e+10 #define rc_morph_dilate_diamond_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_13x13_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_13x13_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_13x13_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_13x13_p_bin_SCORE 4.18e+10 #define rc_morph_erode_diamond_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_15x15_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_15x15_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_15x15_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_15x15_p_bin_SCORE 4.14e+10 #define rc_morph_dilate_diamond_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_15x15_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_15x15_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_15x15_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_15x15_p_bin_SCORE 4.12e+10 #define rc_morph_erode_diamond_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_17x17_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_17x17_p_bin_SCORE 1.77e+10 +#define rc_morph_erode_diamond_17x17_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_17x17_p_bin_SCORE 4.08e+10 #define rc_morph_dilate_diamond_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_17x17_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_17x17_p_bin_SCORE 1.84e+10 +#define rc_morph_dilate_diamond_17x17_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_17x17_p_bin_SCORE 4.16e+10 #define rc_morph_erode_diamond_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_25x25_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_25x25_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_25x25_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_25x25_p_bin_SCORE 4.08e+10 #define rc_morph_dilate_diamond_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_25x25_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_25x25_p_bin_SCORE 1.76e+10 +#define rc_morph_dilate_diamond_25x25_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_25x25_p_bin_SCORE 4.14e+10 #define rc_morph_erode_diamond_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_29x29_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_29x29_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_29x29_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_29x29_p_bin_SCORE 4.10e+10 #define rc_morph_dilate_diamond_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_29x29_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_29x29_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_29x29_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_29x29_p_bin_SCORE 4.17e+10 #define rc_morph_erode_diamond_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_31x31_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_31x31_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_31x31_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_31x31_p_bin_SCORE 4.07e+10 #define rc_morph_dilate_diamond_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_31x31_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_31x31_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_31x31_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_31x31_p_bin_SCORE 4.08e+10 #define rc_morph_erode_octagon_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_5x5_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_5x5_p_bin_SCORE 1.27e+10 +#define rc_morph_erode_octagon_5x5_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_5x5_p_bin_SCORE 1.62e+10 #define rc_morph_dilate_octagon_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_5x5_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_5x5_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_5x5_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_5x5_p_bin_SCORE 1.55e+10 #define rc_morph_erode_octagon_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_7x7_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_7x7_p_bin_SCORE 1.29e+10 +#define rc_morph_erode_octagon_7x7_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_7x7_p_bin_SCORE 1.63e+10 #define rc_morph_dilate_octagon_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_7x7_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_7x7_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_7x7_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_7x7_p_bin_SCORE 1.55e+10 #define rc_morph_erode_octagon_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_9x9_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_9x9_p_bin_SCORE 1.26e+10 +#define rc_morph_erode_octagon_9x9_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_9x9_p_bin_SCORE 1.54e+10 #define rc_morph_dilate_octagon_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_9x9_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_9x9_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_9x9_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_9x9_p_bin_SCORE 1.54e+10 #define rc_morph_erode_octagon_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_13x13_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_13x13_p_bin_SCORE 1.26e+10 +#define rc_morph_erode_octagon_13x13_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_13x13_p_bin_SCORE 1.53e+10 #define rc_morph_dilate_octagon_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_13x13_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_13x13_p_bin_SCORE 1.18e+10 +#define rc_morph_dilate_octagon_13x13_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_13x13_p_bin_SCORE 1.56e+10 #define rc_morph_erode_octagon_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_15x15_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_15x15_p_bin_SCORE 1.25e+10 +#define rc_morph_erode_octagon_15x15_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_15x15_p_bin_SCORE 1.56e+10 #define rc_morph_dilate_octagon_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_15x15_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_15x15_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_15x15_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_15x15_p_bin_SCORE 1.54e+10 #define rc_morph_erode_octagon_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_17x17_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_17x17_p_bin_SCORE 1.27e+10 +#define rc_morph_erode_octagon_17x17_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_17x17_p_bin_SCORE 1.55e+10 #define rc_morph_dilate_octagon_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_17x17_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_17x17_p_bin_SCORE 1.20e+10 +#define rc_morph_dilate_octagon_17x17_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_17x17_p_bin_SCORE 1.56e+10 #define rc_morph_erode_octagon_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_25x25_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_25x25_p_bin_SCORE 1.24e+10 +#define rc_morph_erode_octagon_25x25_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_25x25_p_bin_SCORE 1.53e+10 #define rc_morph_dilate_octagon_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_25x25_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_25x25_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_25x25_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_25x25_p_bin_SCORE 1.54e+10 #define rc_morph_erode_octagon_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_29x29_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_29x29_p_bin_SCORE 1.23e+10 +#define rc_morph_erode_octagon_29x29_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_29x29_p_bin_SCORE 1.54e+10 #define rc_morph_dilate_octagon_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_29x29_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_29x29_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_29x29_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_29x29_p_bin_SCORE 1.54e+10 #define rc_morph_erode_octagon_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_31x31_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_31x31_p_bin_SCORE 1.24e+10 +#define rc_morph_erode_octagon_31x31_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_31x31_p_bin_SCORE 1.52e+10 #define rc_morph_dilate_octagon_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_31x31_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_31x31_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_31x31_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_31x31_p_bin_SCORE 1.54e+10 #define rc_morph_erode_disc_7x7_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_7x7_bin_UNROLL 1 -#define rc_morph_erode_disc_7x7_bin_SCORE 4.37e+09 +#define rc_morph_erode_disc_7x7_bin_SCORE 5.70e+09 #define rc_morph_dilate_disc_7x7_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_7x7_bin_UNROLL 1 -#define rc_morph_dilate_disc_7x7_bin_SCORE 3.72e+09 +#define rc_morph_dilate_disc_7x7_bin_UNROLL 4 +#define rc_morph_dilate_disc_7x7_bin_SCORE 5.25e+09 #define rc_morph_erode_disc_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_7x7_p_bin_UNROLL 1 -#define rc_morph_erode_disc_7x7_p_bin_SCORE 1.10e+10 +#define rc_morph_erode_disc_7x7_p_bin_UNROLL 4 +#define rc_morph_erode_disc_7x7_p_bin_SCORE 1.76e+10 #define rc_morph_dilate_disc_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_7x7_p_bin_UNROLL 1 -#define rc_morph_dilate_disc_7x7_p_bin_SCORE 1.05e+10 +#define rc_morph_dilate_disc_7x7_p_bin_UNROLL 4 +#define rc_morph_dilate_disc_7x7_p_bin_SCORE 1.76e+10 #define rc_morph_erode_disc_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_9x9_p_bin_UNROLL 1 -#define rc_morph_erode_disc_9x9_p_bin_SCORE 1.12e+10 +#define rc_morph_erode_disc_9x9_p_bin_UNROLL 4 +#define rc_morph_erode_disc_9x9_p_bin_SCORE 1.79e+10 #define rc_morph_dilate_disc_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_9x9_p_bin_UNROLL 1 -#define rc_morph_dilate_disc_9x9_p_bin_SCORE 1.07e+10 +#define rc_morph_dilate_disc_9x9_p_bin_UNROLL 4 +#define rc_morph_dilate_disc_9x9_p_bin_SCORE 1.77e+10 #define rc_morph_erode_disc_11x11_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_11x11_p_bin_UNROLL 4 -#define rc_morph_erode_disc_11x11_p_bin_SCORE 6.75e+09 +#define rc_morph_erode_disc_11x11_p_bin_UNROLL 2 +#define rc_morph_erode_disc_11x11_p_bin_SCORE 8.35e+09 #define rc_morph_dilate_disc_11x11_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_11x11_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_11x11_p_bin_SCORE 6.06e+09 +#define rc_morph_dilate_disc_11x11_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_11x11_p_bin_SCORE 8.07e+09 #define rc_morph_erode_disc_13x13_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_13x13_p_bin_UNROLL 4 -#define rc_morph_erode_disc_13x13_p_bin_SCORE 5.06e+09 +#define rc_morph_erode_disc_13x13_p_bin_SCORE 6.60e+09 #define rc_morph_dilate_disc_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_13x13_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_13x13_p_bin_SCORE 4.85e+09 +#define rc_morph_dilate_disc_13x13_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_13x13_p_bin_SCORE 6.58e+09 #define rc_morph_erode_disc_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_15x15_p_bin_UNROLL 4 -#define rc_morph_erode_disc_15x15_p_bin_SCORE 5.15e+09 +#define rc_morph_erode_disc_15x15_p_bin_UNROLL 1 +#define rc_morph_erode_disc_15x15_p_bin_SCORE 6.55e+09 #define rc_morph_dilate_disc_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_15x15_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_15x15_p_bin_SCORE 4.87e+09 +#define rc_morph_dilate_disc_15x15_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_15x15_p_bin_SCORE 6.55e+09 #define rc_morph_erode_disc_17x17_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_17x17_p_bin_UNROLL 4 -#define rc_morph_erode_disc_17x17_p_bin_SCORE 5.15e+09 +#define rc_morph_erode_disc_17x17_p_bin_SCORE 6.71e+09 #define rc_morph_dilate_disc_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_17x17_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_17x17_p_bin_SCORE 4.84e+09 +#define rc_morph_dilate_disc_17x17_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_17x17_p_bin_SCORE 6.52e+09 #define rc_morph_erode_disc_19x19_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_19x19_p_bin_UNROLL 4 -#define rc_morph_erode_disc_19x19_p_bin_SCORE 5.11e+09 +#define rc_morph_erode_disc_19x19_p_bin_SCORE 6.79e+09 #define rc_morph_dilate_disc_19x19_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_19x19_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_19x19_p_bin_SCORE 4.83e+09 +#define rc_morph_dilate_disc_19x19_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_19x19_p_bin_SCORE 6.59e+09 #define rc_morph_erode_disc_25x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_25x25_p_bin_UNROLL 4 -#define rc_morph_erode_disc_25x25_p_bin_SCORE 3.14e+09 +#define rc_morph_erode_disc_25x25_p_bin_SCORE 4.69e+09 #define rc_morph_dilate_disc_25x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_disc_25x25_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_25x25_p_bin_SCORE 3.27e+09 +#define rc_morph_dilate_disc_25x25_p_bin_SCORE 4.36e+09 #define rc_morph_hmt_golay_l_3x3_c48_r0_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_UNROLL 4 -#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_SCORE 2.05e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_UNROLL 1 +#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_SCORE 2.79e+10 #define rc_morph_hmt_golay_l_3x3_c48_r90_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_l_3x3_c48_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c48_r90_bin_SCORE 1.14e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r90_bin_SCORE 2.05e+10 #define rc_morph_hmt_golay_l_3x3_c48_r180_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_l_3x3_c48_r180_bin_UNROLL 4 -#define rc_morph_hmt_golay_l_3x3_c48_r180_bin_SCORE 1.95e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r180_bin_SCORE 2.69e+10 #define rc_morph_hmt_golay_l_3x3_c48_r270_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_l_3x3_c48_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c48_r270_bin_SCORE 1.10e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r270_bin_SCORE 1.97e+10 #define rc_morph_hmt_golay_l_3x3_c4_r45_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_SCORE 1.51e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_SCORE 3.31e+10 #define rc_morph_hmt_golay_l_3x3_c4_r135_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_SCORE 1.50e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_SCORE 3.29e+10 #define rc_morph_hmt_golay_l_3x3_c4_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_SCORE 1.46e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_SCORE 3.07e+10 #define rc_morph_hmt_golay_l_3x3_c4_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_SCORE 1.44e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_SCORE 3.00e+10 #define rc_morph_hmt_golay_l_3x3_c8_r45_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_SCORE 1.54e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_SCORE 3.25e+10 #define rc_morph_hmt_golay_l_3x3_c8_r135_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_SCORE 1.55e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_SCORE 3.16e+10 #define rc_morph_hmt_golay_l_3x3_c8_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_SCORE 1.49e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_SCORE 3.02e+10 #define rc_morph_hmt_golay_l_3x3_c8_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_SCORE 1.41e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_SCORE 3.08e+10 #define rc_morph_hmt_golay_e_3x3_c4_r0_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_SCORE 1.70e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_SCORE 4.76e+10 #define rc_morph_hmt_golay_e_3x3_c4_r90_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_SCORE 3.85e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_SCORE 5.73e+10 #define rc_morph_hmt_golay_e_3x3_c4_r180_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_UNROLL 2 -#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_SCORE 1.76e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_SCORE 4.80e+10 #define rc_morph_hmt_golay_e_3x3_c4_r270_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_SCORE 1.87e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_SCORE 6.18e+10 #define rc_morph_hmt_golay_e_3x3_c8_r0_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c8_r0_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_SCORE 1.20e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_SCORE 2.31e+10 #define rc_morph_hmt_golay_e_3x3_c8_r90_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_SCORE 1.85e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_SCORE 2.57e+10 #define rc_morph_hmt_golay_e_3x3_c8_r180_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c8_r180_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r180_bin_SCORE 1.24e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r180_bin_SCORE 2.21e+10 #define rc_morph_hmt_golay_e_3x3_c8_r270_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c8_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r270_bin_SCORE 1.25e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r270_bin_SCORE 2.76e+10 #define rc_morph_hmt_golay_e_3x3_c8_r45_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c8_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r45_bin_SCORE 1.33e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r45_bin_SCORE 2.79e+10 #define rc_morph_hmt_golay_e_3x3_c8_r135_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c8_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_SCORE 1.32e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_SCORE 2.25e+10 #define rc_morph_hmt_golay_e_3x3_c8_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_SCORE 1.85e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_SCORE 2.59e+10 #define rc_morph_hmt_golay_e_3x3_c8_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_SCORE 1.13e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_SCORE 2.34e+10 #define rc_margin_horz_bin_IMPL RC_IMPL_SIMD #define rc_margin_horz_bin_UNROLL 1 -#define rc_margin_horz_bin_SCORE 1.19e+11 +#define rc_margin_horz_bin_SCORE 1.93e+11 #define rc_margin_vert_bin_IMPL RC_IMPL_GEN #define rc_margin_vert_bin_UNROLL 4 -#define rc_margin_vert_bin_SCORE 1.01e+11 +#define rc_margin_vert_bin_SCORE 1.86e+11 #endif /* RAPPTUNE_H */ diff --git a/compute/tune/benchmark/rc_benchmark.c b/compute/tune/benchmark/rc_benchmark.c index aadca70..dafae6a 100644 --- a/compute/tune/benchmark/rc_benchmark.c +++ b/compute/tune/benchmark/rc_benchmark.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2012, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2012, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -186,6 +186,9 @@ rc_bmark_exec_u8_u8_p(int (*func)(), const int *args); static void rc_bmark_exec_rotate(int (*func)(), const int *args); +static void +rc_bmark_exec_thresh_pixel(int (*func)(), const int *args); + /* * ------------------------------------------------------------- @@ -204,268 +207,269 @@ static const rc_bmark_table_t rc_bmark_suite[] = { * first comma. */ /* Word-misaligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_wm_copy_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_not_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_and_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_or_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_xor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nand_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_xnor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_andn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_orn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nandn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_norn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_copy_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_not_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_and_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_or_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_xor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nand_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_xnor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_andn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_orn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nandn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_norn_bin, bin_bin_m, 3, 0), /* Word-aligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_wa_copy_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_not_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_and_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_or_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_xor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nand_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_xnor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_andn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_orn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nandn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_norn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_copy_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_not_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_and_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_or_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_xor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nand_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_xnor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_andn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_orn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nandn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_norn_bin, bin_bin, 0, 0), /* Vector-misaligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_vm_copy_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_not_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_and_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_or_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_xor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nand_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_xnor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_andn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_orn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nandn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_norn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_copy_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_not_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_and_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_or_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_xor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nand_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_xnor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_andn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_orn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nandn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_norn_bin, bin_bin_m, 0, 0), /* Vector-aligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_va_copy_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_not_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_and_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_or_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_xor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nand_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_xnor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_andn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_orn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nandn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_norn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_copy_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_not_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_and_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_or_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_xor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nand_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_xnor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_andn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_orn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nandn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_norn_bin, bin_bin, 0, 0), /* Pixelwise operations */ - RC_BMARK_ENTRY(rc_pixop_set_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_not_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_flip_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_lut_u8, u8_p, 0, 0), - RC_BMARK_ENTRY(rc_pixop_abs_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_addc_u8, u8, 7, 0), - RC_BMARK_ENTRY(rc_pixop_lerpc_u8, u8, 12, 0x80), - RC_BMARK_ENTRY(rc_pixop_lerpnc_u8, u8, 12, 0x80), - RC_BMARK_ENTRY(rc_pixop_add_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_avg_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_sub_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_subh_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_suba_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_lerp_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_lerpn_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_lerpi_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_norm_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_set_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_not_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_flip_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_lut_u8, u8_p, 0, 0), + RC_BMARK_ENTRY(rc_pixop_abs_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_addc_u8, u8, 7, 0), + RC_BMARK_ENTRY(rc_pixop_lerpc_u8, u8, 12, 0x80), + RC_BMARK_ENTRY(rc_pixop_lerpnc_u8, u8, 12, 0x80), + RC_BMARK_ENTRY(rc_pixop_add_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_avg_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_sub_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_subh_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_suba_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_lerp_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_lerpn_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_lerpi_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_norm_u8, u8_u8, 0, 0), /* Type conversions */ - RC_BMARK_ENTRY(rc_type_u8_to_bin, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_type_bin_to_u8, bin_u8, 0, 0), + RC_BMARK_ENTRY(rc_type_u8_to_bin, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_type_bin_to_u8, bin_u8, 0, 0), /* Thresholding */ - RC_BMARK_ENTRY(rc_thresh_gt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_lt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_gtlt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_ltgt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_gt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_lt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_gtlt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_ltgt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_lt_pixel_u8, thresh_pixel, 0, 0), /* Statistics */ - RC_BMARK_ENTRY(rc_stat_sum_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_sum_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_stat_sum2_u8, u8_p, 0, 0), - RC_BMARK_ENTRY(rc_stat_xsum_u8, u8_u8_p, 0, 0), - RC_BMARK_ENTRY(rc_stat_min_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_max_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_min_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_stat_max_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum2_u8, u8_p, 0, 0), + RC_BMARK_ENTRY(rc_stat_xsum_u8, u8_u8_p, 0, 0), + RC_BMARK_ENTRY(rc_stat_min_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_max_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_min_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_max_u8, u8, 0, 0), /* 8-bit 2x reductions */ - RC_BMARK_ENTRY(rc_reduce_1x2_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_u8, u8_u8, 0, 0), /* Binary 2x reductions */ - RC_BMARK_ENTRY(rc_reduce_1x2_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_1x2_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk4_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk4_bin, bin_bin, 0, 0), /* Binary 2x expansions */ - RC_BMARK_ENTRY(rc_expand_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_expand_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_expand_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_2x2_bin, bin_bin, 0, 0), /* 8-bit rotation */ - RC_BMARK_ENTRY(rc_rotate_cw_u8, rotate, 0, 0), - RC_BMARK_ENTRY(rc_rotate_ccw_u8, rotate, 0, 0), + RC_BMARK_ENTRY(rc_rotate_cw_u8, rotate, 0, 0), + RC_BMARK_ENTRY(rc_rotate_ccw_u8, rotate, 0, 0), /* Fixed-filter convolutions */ - RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x2_magn_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_magn_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_gauss_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_laplace_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_laplace_3x3_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_highpass_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_highpass_3x3_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x2_magn_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_magn_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_gauss_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_laplace_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_laplace_3x3_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_highpass_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_highpass_3x3_abs_u8, u8_u8, 0, 0), /* Binary morphology */ - RC_BMARK_ENTRY(rc_morph_erode_line_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_3x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_3x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_5x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_5x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_7x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_7x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_9x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_9x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_13x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_13x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_15x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_15x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_17x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_17x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_25x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_25x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_29x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_29x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_31x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_31x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_2x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_2x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_11x11_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_11x11_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_19x19_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_19x19_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r315_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r315_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_3x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_3x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_5x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_5x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_7x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_7x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_9x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_9x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_13x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_13x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_15x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_15x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_17x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_17x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_25x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_25x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_29x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_29x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_31x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_31x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_11x11_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_11x11_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_19x19_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_19x19_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r315_bin, bin_bin, 0, 0), /* Binary logical margins */ - RC_BMARK_ENTRY(rc_margin_horz_bin, p_bin, 0, 0), - RC_BMARK_ENTRY(rc_margin_vert_bin, p_bin, 0, 0) + RC_BMARK_ENTRY(rc_margin_horz_bin, p_bin, 0, 0), + RC_BMARK_ENTRY(rc_margin_vert_bin, p_bin, 0, 0) }; @@ -838,3 +842,18 @@ rc_bmark_exec_rotate(int (*func)(), const int *args) rc_bmark_data.src, rc_bmark_data.dim_u8, rc_bmark_data.width, rc_bmark_data.height); } + +static void +rc_bmark_exec_thresh_pixel(int (*func)(), const int *args) +{ + (void)args; + /* The speed is not dependent of the content or calculation results + * so the aux buffer is reused for both high and low thresholds. + * This minimizes changes of the entire benchmark test, + i.e. only require a single aux buffer. */ + (*func)(rc_bmark_data.dst, rc_bmark_data.dim_bin, + rc_bmark_data.src, rc_bmark_data.dim_u8, + rc_bmark_data.width, rc_bmark_data.height, + rc_bmark_data.aux, rc_bmark_data.dim_u8, + rc_bmark_data.aux, rc_bmark_data.dim_u8); +} diff --git a/compute/vector/Makefile.am b/compute/vector/Makefile.am index 75b9646..93f8408 100644 --- a/compute/vector/Makefile.am +++ b/compute/vector/Makefile.am @@ -40,17 +40,18 @@ librappcompute_swar_la_LDFLAGS = -no-undefined librappcompute_simd_la_LDFLAGS = -no-undefined # The source files are the same for both vector implementations -librappcompute_swar_la_SOURCES = rc_impl_cfg.h \ - rc_bitblt_rop.h \ - rc_bitblt_va.c \ - rc_bitblt_vm.c \ - rc_pixop.c \ - rc_type.c \ - rc_thresh.c \ - rc_thresh_tpl.h \ - rc_reduce.c \ - rc_stat.c \ - rc_filter.c \ +librappcompute_swar_la_SOURCES = rc_impl_cfg.h \ + rc_bitblt_rop.h \ + rc_bitblt_va.c \ + rc_bitblt_vm.c \ + rc_pixop.c \ + rc_type.c \ + rc_thresh.c \ + rc_thresh_tpl.h \ + rc_thresh_pixel_tpl.h \ + rc_reduce.c \ + rc_stat.c \ + rc_filter.c \ rc_margin.c librappcompute_simd_la_SOURCES = $(librappcompute_swar_la_SOURCES) diff --git a/compute/vector/rc_thresh.c b/compute/vector/rc_thresh.c index 73a560f..dc0e1e2 100644 --- a/compute/vector/rc_thresh.c +++ b/compute/vector/rc_thresh.c @@ -30,10 +30,12 @@ * @brief RAPP Compute layer thresholding to binary, vector implementation. */ -#include "rc_impl_cfg.h" /* Implementation config */ -#include "rc_vector.h" /* Vector operations */ -#include "rc_thresh.h" /* Thresholding API */ -#include "rc_thresh_tpl.h" /* Thresholding templates */ +#include "rc_impl_cfg.h" /* Implementation config */ +#include "rc_vector.h" /* Vector operations */ +#include "rc_thresh.h" /* Thresholding API */ +#include "rc_thresh_tpl.h" /* Thresholding templates */ +#include "rc_thresh_pixel_tpl.h" /* Thresholding pixelwise templates */ +#include /* NULL */ #ifdef RC_THRESH_TEMPLATE @@ -235,4 +237,24 @@ rc_thresh_ltgt_u8(uint8_t *restrict dst, int dst_dim, #endif #endif +/** + * Single pixelwise thresholding less-than. + */ +#if RC_IMPL(rc_thresh_lt_pixel_u8, 1) +#if defined RC_THRESH_CMPLT && defined RC_THRESH_PIXEL_TEMPLATE +void +rc_thresh_lt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + int width, int height, + const uint8_t *restrict thresh, int thresh_dim) +{ + const uint8_t *thresh_high = NULL; + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, width, height, + thresh, thresh_dim, thresh_high, 0, + RC_THRESH_CMPLT, RC_THRESH_PIXEL_SINGLE_ARG, + RC_UNROLL(rc_thresh_lt_pixel_u8)); +} +#endif +#endif + #endif /* RC_THRESH_TEMPLATE */ diff --git a/compute/vector/rc_thresh_pixel_tpl.h b/compute/vector/rc_thresh_pixel_tpl.h new file mode 100644 index 0000000..9f0b8d3 --- /dev/null +++ b/compute/vector/rc_thresh_pixel_tpl.h @@ -0,0 +1,521 @@ +/* Copyright (C) 2016, Axis Communications AB, LUND, SWEDEN + * + * This file is part of RAPP. + * + * RAPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * You can use the comments under either the terms of the GNU Lesser General + * Public License version 3 as published by the Free Software Foundation, + * either version 3 of the License or (at your option) any later version, or + * the GNU Free Documentation License version 1.3 or any later version + * published by the Free Software Foundation; with no Invariant Sections, no + * Front-Cover Texts, and no Back-Cover Texts. + * A copy of the license is included in the documentation section entitled + * "GNU Free Documentation License". + * + * RAPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License and a copy of the GNU Free Documentation License along + * with RAPP. If not, see . + */ + +/** + * @file rc_thresh_pixel_tpl.h + * @brief RAPP Compute layer pixelwise thresholding templates. + */ + +#ifndef RC_THRESH_PIXEL_TPL_H +#define RC_THRESH_PIXEL_TPL_H + +#include "rc_util.h" /* RC_DIV_CEIL() */ +#include "rc_word.h" /* Word operations */ +#include "rc_vector.h" /* Vector operations */ + +/* + * ------------------------------------------------------------- + * Exported pixelwise thresholding template + * ------------------------------------------------------------- + */ +#define RC_THRESH_PIXEL_SINGLE_ARG (1) +#define RC_THRESH_PIXEL_DOUBLE_ARG (2) + +/** + * The pixelwise thresholding template. + * Use the word version unless hinted otherwise. + */ + +#if RC_VEC_SIZE >= 8 && defined RC_VEC_GETMASKV && \ + (defined RC_VEC_HINT_GETMASKV || !defined RC_VEC_GETMASKW) && \ + defined RC_VEC_SPLAT && defined RC_VEC_ALIGNC && \ + defined RC_VEC_SHINIT && defined RC_VEC_SHL && \ + defined RC_VEC_ZERO +#define RC_THRESH_PIXEL_TEMPLATE RC_THRESH_PIXEL_VEC + +#elif defined RC_VEC_SPLAT && defined RC_VEC_GETMASKW +#define RC_THRESH_PIXEL_TEMPLATE RC_THRESH_PIXEL_WORD +#endif + + +/* + * ------------------------------------------------------------- + * Internal thresholding templates + * ------------------------------------------------------------- + */ + +/** + * Pixelwise threshold-to-binary template using a word accumulator. + */ +#define RC_THRESH_PIXEL_WORD(dst, dst_dim, src, src_dim, width, height, \ + low, low_dim, high, high_dim, cmp, num_args, unroll) \ +do { \ + int div_ = (8*RC_WORD_SIZE) / RC_VEC_SIZE; /* Vectors per word */ \ + int tot_ = RC_DIV_CEIL(width, RC_VEC_SIZE); /* Num src vectors */ \ + int blk_ = tot_ / div_; /* Full dst blocks */ \ + int end_ = tot_ % div_; /* Partial dst blocks */ \ + \ + RC_VEC_DECLARE(); \ + \ + if ((unroll) == 4 && /* Constant */ \ + (8*RC_WORD_SIZE) / RC_VEC_SIZE >= 4 && /* Constant */ \ + (blk_ > 0 || end_ >= 4)) /* Variable */ \ + { \ + int len_ = end_ / 4; \ + int rem_ = end_ % 4; \ + RC_THRESH_PIXEL_WORD_DRV_(dst, dst_dim, src, src_dim, height, \ + blk_, len_, rem_, low, low_dim, high, high_dim, \ + cmp, RC_THRESH_PIXEL_WORD_BLK_X4_, \ + RC_THRESH_PIXEL_WORD_REM_X4_, num_args); \ + } \ + else if ((unroll) >= 2 && /* Constant */ \ + (8*RC_WORD_SIZE) / RC_VEC_SIZE >= 2 && /* Constant */ \ + (blk_ > 0 || end_ >= 2)) /* Variable */ \ + { \ + int len_ = end_ / 2; \ + int rem_ = end_ % 2; \ + RC_THRESH_PIXEL_WORD_DRV_(dst, dst_dim, src, src_dim, height, \ + blk_, len_, rem_, low, low_dim, high, high_dim, \ + cmp, RC_THRESH_PIXEL_WORD_BLK_X2_, \ + RC_THRESH_PIXEL_WORD_REM_X2_, num_args); \ + } \ + else { \ + RC_THRESH_PIXEL_WORD_DRV_(dst, dst_dim, src, src_dim, height, \ + blk_, 0, end_, low, low_dim, high, high_dim, \ + cmp, RC_THRESH_PIXEL_WORD_BLK_X1_, \ + RC_THRESH_PIXEL_WORD_REM_NONE_, num_args); \ + } \ + RC_VEC_CLEANUP(); \ +} while (0) + + +/** + * Pixelwise threshold-to-binary template using a vector accumulator. + */ +#define RC_THRESH_PIXEL_VEC(dst, dst_dim, src, src_dim, width, height, \ + low, low_dim, high, high_dim, cmp, num_args, unroll) \ +do { \ + int tot_ = RC_DIV_CEIL(width, RC_VEC_SIZE); /* Num src vectors */ \ + int blk_ = tot_ / 8; /* Full dst blocks */ \ + int end_ = tot_ % 8; /* Partial dst blocks */ \ + \ + RC_VEC_DECLARE(); \ + \ + if ((unroll) == 4 && /* Constant */ \ + (blk_ > 0 || end_ >= 4)) /* Variable */ \ + { \ + int len_ = end_ / 4; \ + int rem_ = end_ % 4; \ + RC_THRESH_PIXEL_VEC_DRV_(dst, dst_dim, src, src_dim, height, \ + blk_, end_, len_, rem_, low, low_dim, \ + high, high_dim, cmp, \ + RC_THRESH_PIXEL_VEC_BLK_X4_, \ + RC_THRESH_PIXEL_VEC_REM_X4_, num_args); \ + } \ + else if ((unroll) >= 2 && /* Constant */ \ + (blk_ > 0 || end_ >= 2)) /* Variable */ \ + { \ + int len_ = end_ / 2; \ + int rem_ = end_ % 2; \ + RC_THRESH_PIXEL_VEC_DRV_(dst, dst_dim, src, src_dim, height, \ + blk_, end_, len_, rem_, low, low_dim, \ + high, high_dim, cmp, \ + RC_THRESH_PIXEL_VEC_BLK_X2_, \ + RC_THRESH_PIXEL_VEC_REM_X2_, num_args); \ + } \ + else { \ + RC_THRESH_PIXEL_VEC_DRV_(dst, dst_dim, src, src_dim, height, \ + blk_, end_, 0, end_, low, low_dim, \ + high, high_dim, cmp, \ + RC_THRESH_PIXEL_VEC_BLK_X1_, \ + RC_THRESH_PIXEL_VEC_REM_NONE_, num_args); \ + } \ + RC_VEC_CLEANUP(); \ +} while (0) + + +/* + * ------------------------------------------------------------- + * Internal support macros for word-based thresholding template + * ------------------------------------------------------------- + */ + +/** + * Pixelwise thresholding template word driver. + */ +#define RC_THRESH_PIXEL_WORD_DRV_(dst, dst_dim, src, src_dim, height, \ + blk, len, rem, thr1, thr1_dim, thr2, thr2_dim, \ + cmp, loop_blk, loop_rem, num_args) \ +do { \ + int y_; \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(src_dim); \ + int j_ = y_*(dst_dim); \ + int m_ = y_*(thr1_dim); \ + int n_ = y_*(thr2_dim); \ + int x_; \ + \ + /* Handle all full destination word blocks */ \ + for (x_ = 0; x_ < (blk); x_++, j_ += RC_WORD_SIZE) { \ + loop_blk(&(dst)[j_], src, i_, cmp, thr1, m_, thr2, n_, num_args); \ + } \ + \ + /* Handle partial destination words */ \ + if ((len) || (rem)) { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int pos_ = 0; \ + \ + /* Handle unrolled source vectors */ \ + loop_rem(src, acc_, i_, pos_, len, cmp, thr1, m_, thr2, n_, num_args); \ + \ + /* Handle any remaining source vectors */ \ + for (x_ = 0; x_ < (rem); x_++) { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, i_, pos_, \ + cmp, thr1, m_, thr2, n_, num_args); \ + } \ + \ + /* Store the partial word */ \ + RC_WORD_STORE(&(dst)[j_], acc_); \ + } \ + } \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, no unrolling. + */ +#define RC_THRESH_PIXEL_WORD_BLK_X1_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; \ + k_ < (int)((8*RC_WORD_SIZE) / RC_VEC_SIZE); \ + k_++) \ + { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_WORD_BLK_X2_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; \ + k_ < (int)((8*RC_WORD_SIZE) / RC_VEC_SIZE); \ + k_ += 2) \ + { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_WORD_BLK_X4_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; \ + k_ < (int)((8*RC_WORD_SIZE) / RC_VEC_SIZE); \ + k_ += 4) \ + { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template empty remainder iterator. + */ +#define RC_THRESH_PIXEL_WORD_REM_NONE_(src, acc, idx, pos, \ + len, cmp, thr1, thr1_idx, \ + thr2, thr2_idx, num_args) + +/** + * Pixelwise thresholding template remainder iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_WORD_REM_X2_(src, acc, idx, pos, \ + len, cmp, thr1, thr1_idx, \ + thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template remainder iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_WORD_REM_X4_(src, acc, idx, pos, \ + len, cmp, thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template iteration. + */ +#define RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t vec_; \ + rc_vec_t thr1_vec_; \ + rc_vec_t thr2_vec_; \ + unsigned mask_; \ + \ + RC_VEC_LOAD(vec_, &(src)[(idx)]); /* Load vector data */ \ + if ((num_args) == RC_THRESH_PIXEL_DOUBLE_ARG) { \ + RC_VEC_LOAD(thr1_vec_, &(thr1)[(thr1_idx)]); /* Load vector data */ \ + RC_VEC_LOAD(thr2_vec_, &(thr2)[(thr2_idx)]); /* Load vector data */ \ + (thr1_idx) += RC_VEC_SIZE; \ + (thr2_idx) += RC_VEC_SIZE; \ + } \ + else { \ + RC_VEC_LOAD(thr1_vec_, &(thr1)[(thr1_idx)]); \ + (thr1_idx) += RC_VEC_SIZE; \ + (void)thr2_vec_; \ + } \ + cmp(vec_, vec_, thr1_vec_, thr2_vec_); /* Cmp to thresholds */ \ + RC_VEC_GETMASKW(mask_, vec_); /* Pack to binary */ \ + (acc) |= RC_WORD_INSERT(mask_, pos, RC_VEC_SIZE); /* Update accum */ \ + (idx) += RC_VEC_SIZE; /* Advance src index */ \ + (pos) += RC_VEC_SIZE; /* Advance acc pos */ \ +} while (0) + + +/* + * ------------------------------------------------------------- + * Internal support macros for vector-based thresholding + * ------------------------------------------------------------- + */ + +/** + * Pixelwise thresholding template vector driver. + */ +#define RC_THRESH_PIXEL_VEC_DRV_(dst, dst_dim, src, src_dim, height, \ + blk, end, len, rem, thr1, thr1_dim, \ + thr2, thr2_dim, cmp, \ + loop_blk, loop_rem, num_args) \ +do { \ + rc_vec_t shv_; \ + int y_; \ + \ + /* Set the remainder alignment shift vector */ \ + RC_VEC_SHINIT(shv_, RC_VEC_SIZE - (end)*RC_VEC_SIZE/8); \ + \ + /* Process all rows */ \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(src_dim); \ + int j_ = y_*(dst_dim); \ + int m_ = y_*(thr1_dim); \ + int n_ = y_*(thr2_dim); \ + int x_; \ + \ + /* Handle all full destination vector blocks */ \ + for (x_ = 0; x_ < (blk); x_++, j_ += RC_VEC_SIZE) { \ + loop_blk(&(dst)[j_], src, i_, cmp, thr1, m_, thr2, n_, num_args); \ + } \ + \ + /* Handle partial destination words */ \ + if ((len) || (rem)) { \ + rc_vec_t acc_; \ + \ + /* Handle unrolled source vectors */ \ + RC_VEC_ZERO(acc_); \ + loop_rem(src, acc_, i_, len, cmp, thr1, m_, thr2, n_, num_args); \ + \ + /* Handle any remaining source vectors */ \ + for (x_ = 0; x_ < (rem); x_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, i_, cmp, \ + thr1, m_, thr2, n_, num_args); \ + } \ + \ + /* Shift and store the partial dst vector */ \ + RC_VEC_SHL(acc_, acc_, shv_); \ + RC_VEC_STORE(&(dst)[j_], acc_); \ + } \ + } \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, no unrolling. + */ +#define RC_THRESH_PIXEL_VEC_BLK_X1_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t acc_; \ + int k_; \ + RC_VEC_ZERO(acc_); \ + for (k_ = 0; k_ < 8; k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_VEC_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_VEC_BLK_X2_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t acc_; \ + int k_; \ + RC_VEC_ZERO(acc_); \ + for (k_ = 0; k_ < 4; k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_VEC_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_VEC_BLK_X4_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t acc_; \ + int k_; \ + RC_VEC_ZERO(acc_); \ + for (k_ = 0; k_ < 2; k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_VEC_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template empty remainder iterator. + */ +#define RC_THRESH_PIXEL_VEC_REM_NONE_(src, acc, idx, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) + +/** + * Pixelwise thresholding template remainder iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_VEC_REM_X2_(src, acc, idx, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template remainder iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_VEC_REM_X4_(src, acc, idx, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template iteration. + */ +#define RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t vec_; \ + rc_vec_t thr1_vec_; \ + rc_vec_t thr2_vec_; \ + rc_vec_t mask_; \ + \ + RC_VEC_LOAD(vec_, &(src)[(idx)]); /* Load vector data */ \ + if ((num_args) == RC_THRESH_PIXEL_DOUBLE_ARG) { \ + RC_VEC_LOAD(thr1_vec_, &(thr1)[(thr1_idx)]); /* Load vector data */ \ + RC_VEC_LOAD(thr2_vec_, &(thr2)[(thr2_idx)]); /* Load vector data */ \ + (thr1_idx) += RC_VEC_SIZE; \ + (thr2_idx) += RC_VEC_SIZE; \ + } \ + else { \ + RC_VEC_LOAD(thr1_vec_, &(thr1)[(thr1_idx)]); \ + (thr1_idx) += RC_VEC_SIZE; \ + (void)thr2_vec_; \ + } \ + cmp(vec_, vec_, thr1_vec_, thr2_vec_); /* Cmp to thresholds */ \ + RC_VEC_GETMASKV(mask_, vec_); /* Pack to binary */ \ + RC_VEC_ALIGNC(acc, acc, mask_, RC_VEC_SIZE / 8); /* Update accum */ \ + (idx) += RC_VEC_SIZE; /* Advance src index */ \ +} while (0) + +#endif /* RC_THRESH_PIXEL_TPL_H */ diff --git a/driver/rapp_thresh.c b/driver/rapp_thresh.c index 7fa6104..d387807 100644 --- a/driver/rapp_thresh.c +++ b/driver/rapp_thresh.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2011, 2014 Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2011, 2014, 2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -314,3 +314,48 @@ RAPP_API(int, rapp_thresh_ltgt_u8, return RAPP_OK; } + +/** + * Pixelwise single thresholding less-than. + */ +RAPP_API(int, rapp_thresh_lt_pixel_u8, + (uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + int width, int height, + const uint8_t *restrict thresh, int thresh_dim)) +{ + if (!RAPP_INITIALIZED()) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_UNINITIALIZED; + } + + /* Validate arguments */ + if (!RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, src, src_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, thresh, thresh_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT(src, src_dim, thresh, thresh_dim, height, width)) + { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_OVERLAP; + } + + if (!RAPP_VALIDATE_BIN(dst, dst_dim, width, height) || + !RAPP_VALIDATE_U8(src, src_dim, width, height)) + { + /* Return the error code */ + return rapp_error_bin_u8(dst, dst_dim, src, src_dim, width, height); + } + + if (!RAPP_VALIDATE_U8(thresh, thresh_dim, width, height)) { + /* Return the error code */ + return rapp_error_u8(thresh, thresh_dim, width, height); + } + + /* Perform thresholding */ + rc_thresh_lt_pixel_u8(dst, dst_dim, src, src_dim, width, height, thresh, thresh_dim); + + return RAPP_OK; +} diff --git a/include/rapp_thresh.h b/include/rapp_thresh.h index c6948c8..4c579c6 100644 --- a/include/rapp_thresh.h +++ b/include/rapp_thresh.h @@ -132,6 +132,26 @@ rapp_thresh_ltgt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int low, int high); +/** + * Pixelwise single thresholding less-than. + * Computes dst[i] = src[i] < thresh[i]. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension in bytes of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension in bytes of the source buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @param[in] thresh Threshold pixel buffer. + * @param thresh_dim Row dimension in bytes of the threshold buffer. + * @return A negative error code on error, zero otherwise. + */ +RAPP_EXPORT int +rapp_thresh_lt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + int width, int height, + const uint8_t *restrict thresh, int thresh_dim); + /** @} */ #ifdef __cplusplus diff --git a/test/rapp_test_thresh.c b/test/rapp_test_thresh.c index d86a407..ffee3d2 100644 --- a/test/rapp_test_thresh.c +++ b/test/rapp_test_thresh.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, 2014, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2010, 2014, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -67,6 +67,9 @@ static bool rapp_test_thresh_driver(int (*test)(), void (*ref)()); +static bool +rapp_test_thresh_pixel_driver(int (*test)(), void (*ref)(), int nbr_thresholds); + /* * ------------------------------------------------------------- @@ -102,6 +105,12 @@ rapp_test_thresh_ltgt_u8(void) &rapp_ref_thresh_ltgt_u8); } +bool +rapp_test_thresh_lt_pixel_u8(void) +{ + return rapp_test_thresh_pixel_driver(&rapp_thresh_lt_pixel_u8, + &rapp_ref_thresh_lt_pixel_u8, 1); +} /* * ------------------------------------------------------------- @@ -259,3 +268,150 @@ Done: return ok; } + +static bool +rapp_test_thresh_pixel_driver(int (*test)(), void (*ref)(), int nbr_thresholds) +{ + /* Special boundary cases to test explicitly */ + static const uint8_t special[][2] = {{0, 0xfe }, {1, 0xfe }, + {0, 0xff }, {1, 0xff }, + {0xfe, 0}, {0xfe, 1}, + {0xff, 0}, {0xff, 1}, + {0x80, 0x80}, {0x81, 0x80}, {0x80, 0x81}}; + + int dst_dim = rapp_align(RAPP_TEST_WIDTH); + int src_dim = rapp_align(RAPP_TEST_WIDTH) + rapp_alignment; + int low_dim = rapp_align(RAPP_TEST_WIDTH) + rapp_alignment; + int high_dim = rapp_align(RAPP_TEST_WIDTH) + rapp_alignment; + uint8_t *dst_buf = rapp_malloc(dst_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *src_buf = rapp_malloc(src_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *low_buf = rapp_malloc(low_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *high_buf = rapp_malloc(high_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *ref_buf = rapp_malloc(dst_dim*RAPP_TEST_HEIGHT, 0); + int k; + bool ok = false; + + /* Initialize the source buffer */ + rapp_test_init(src_buf, 0, src_dim*RAPP_TEST_HEIGHT, 1, false); + + for (k = 0; k < RAPP_TEST_ITER; k++) { + int width = rapp_test_rand(1, RAPP_TEST_WIDTH); + int height = rapp_test_rand(1, RAPP_TEST_HEIGHT); + uint8_t low, high; + + /* Verify that we get an overlap error for overlapping buffers */ + if (/* src == dst */ + (*test)(dst_buf, dst_dim, dst_buf, src_dim, + width, height, low_buf, low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP + /* src = far end of dst_buf */ + || (*test)(dst_buf, dst_dim, + dst_buf + dst_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment, src_dim, + width, height, low_buf, low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP + /* src = before dst, but not long enough */ + || (*test)(dst_buf, dst_dim, + dst_buf - (src_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment), src_dim, + width, height, low_buf, low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP) + { + DBG("Src/dst overlap undetected\n"); + goto Done; + } + + if (/* low == dst */ + (*test)(dst_buf, dst_dim, src_buf, src_dim, + width, height, dst_buf, low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP + /* low = far end of dst_buf */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + width, height, + dst_buf + dst_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment, low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP + /* low = before dst, but not long enough */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + width, height, + dst_buf - (low_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment), low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP) + { + DBG("Low/dst overlap undetected\n"); + goto Done; + } + + if (nbr_thresholds == 2) { + if (/* high == dst */ + (*test)(dst_buf, dst_dim, src_buf, src_dim, + width, height, low_buf, low_dim, + dst_buf, high_dim) != RAPP_ERR_OVERLAP + /* high = far end of dst_buf */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + width, height, low_buf, low_dim, + dst_buf + dst_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment, high_dim) != RAPP_ERR_OVERLAP + /* high = before dst, but not long enough */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + width, height, low_buf, low_dim, + dst_buf - (high_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment), high_dim) != RAPP_ERR_OVERLAP) + { + DBG("High/dst overlap undetected\n"); + goto Done; + } + } + + if (k < (int)(sizeof special / sizeof special[0])) { + /* Test special cases */ + low = special[k][0]; + high = special[k][1]; + } + else { + /* Test random cases */ + low = rapp_test_rand(0, 0xff); + high = rapp_test_rand(0, 0xff); + } + + memset(low_buf, low, low_dim*RAPP_TEST_HEIGHT); + memset(high_buf, high, high_dim*RAPP_TEST_HEIGHT); + + /* Call RAPP function */ + if ((*test)(dst_buf, dst_dim, src_buf, src_dim, width, height, + low_buf, low_dim, high_buf, high_dim) < 0) { + DBG("Got FAIL return value\n"); + goto Done; + } + + /* Call reference function */ + (*ref)(ref_buf, dst_dim, src_buf, src_dim, width, height, + low_buf, low_dim, high_buf, high_dim); + + /* Check result */ + if (!rapp_test_compare_bin(dst_buf, dst_dim, ref_buf, + dst_dim, 0, width, height)) + { + DBG("Invalid result\n"); + DBG("src=\n"); + rapp_test_dump_u8(src_buf, src_dim, width, height); + DBG("dst=\n"); + rapp_test_dump_bin(dst_buf, dst_dim, 0, width, height); + DBG("ref=\n"); + rapp_test_dump_bin(ref_buf, dst_dim, 0, width, height); + + goto Done; + } + } + + ok = true; + +Done: + rapp_free(dst_buf); + rapp_free(src_buf); + rapp_free(low_buf); + rapp_free(high_buf); + rapp_free(ref_buf); + + return ok; +} \ No newline at end of file diff --git a/test/rapp_tests.def b/test/rapp_tests.def index a07577f..406c0cc 100644 --- a/test/rapp_tests.def +++ b/test/rapp_tests.def @@ -52,6 +52,7 @@ RAPP_TESTH(thresh_gt_u8, "rapp_thresh - thresholding to binary") RAPP_TEST(thresh_lt_u8) RAPP_TEST(thresh_gtlt_u8) RAPP_TEST(thresh_ltgt_u8) +RAPP_TEST(thresh_lt_pixel_u8) /* Test cases for rapp_stat functions */ RAPP_TESTH(stat_sum_bin, "rapp_stat - statistical operations") diff --git a/test/reference/rapp_ref_thresh.c b/test/reference/rapp_ref_thresh.c index 567d36e..920ffe2 100644 --- a/test/reference/rapp_ref_thresh.c +++ b/test/reference/rapp_ref_thresh.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -58,6 +58,14 @@ rapp_ref_thresh_driver(uint8_t *dst, int dst_dim, int width, int height, int low, int high, int (*cmp)(int, int, int)); +static void +rapp_ref_thresh_pixel_driver(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *low, int low_dim, + const uint8_t *high, int high_dim, + int width, int height, + int (*cmp)(int, int, int)); + /* * ------------------------------------------------------------- * Exported functions @@ -100,6 +108,16 @@ rapp_ref_thresh_ltgt_u8(uint8_t *dst, int dst_dim, low, high, &rapp_ref_thresh_ltgt); } +void +rapp_ref_thresh_lt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + int width, int height, + const uint8_t *thresh, int thresh_dim) +{ + rapp_ref_thresh_pixel_driver(dst, dst_dim, src, src_dim, thresh, thresh_dim, + NULL, 0, width, height, &rapp_ref_thresh_lt); +} + /* * ------------------------------------------------------------- @@ -150,3 +168,31 @@ rapp_ref_thresh_driver(uint8_t *dst, int dst_dim, } } } + +static void +rapp_ref_thresh_pixel_driver(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *low, int low_dim, + const uint8_t *high, int high_dim, + int width, int height, + int (*cmp)(int, int, int)) +{ + int x, y; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + int val = rapp_pixel_get_u8(src, src_dim, x, y); + + int thresh_low = rapp_pixel_get_u8(low, low_dim, x, y); + + int thresh_high = 0; + if (high != NULL) { + thresh_high = rapp_pixel_get_u8(high, high_dim, x, y); + } + + int bit = (*cmp)(val, thresh_low, thresh_high); + + rapp_pixel_set_bin(dst, dst_dim, 0, x, y, bit); + } + } +} diff --git a/test/reference/rapp_ref_thresh.h b/test/reference/rapp_ref_thresh.h index f091a02..4e423d9 100644 --- a/test/reference/rapp_ref_thresh.h +++ b/test/reference/rapp_ref_thresh.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -65,6 +65,12 @@ rapp_ref_thresh_ltgt_u8(uint8_t *dst, int dst_dim, const uint8_t *src, int src_dim, int width, int height, int low, int high); +void +rapp_ref_thresh_lt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + int width, int height, + const uint8_t *thresh, int thresh_dim); + #ifdef __cplusplus }; #endif