Hanjie's Blog

一只有理想的羊驼

我们希望对输入图片根据Distort程序进行畸变处理:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
void Distort(const float &x_src,
const float &y_src,
const cv::Mat &control_points,
const float &r2,
const cv::Mat &W,
float &x_dst,
float &y_dst) {

int pts_num = control_points.rows;

x_dst = 0;
y_dst = 0;
for (int i = 0; i < pts_num; i++) {
float x_diff = x_src - control_points.at<cv::Vec2f>(i)[0];
float y_diff = y_src - control_points.at<cv::Vec2f>(i)[1];

float kernel = 1.f / sqrt(x_diff * x_diff + y_diff * y_diff + r2);

x_dst += kernel * W.at<float>(i, 0);
y_dst += kernel * W.at<float>(i, 1);
}

x_dst += (W.at<float>(pts_num, 0) + W.at<float>(pts_num + 1, 0) * x_src + W.at<float>(pts_num + 2, 0) * y_src);
y_dst += (W.at<float>(pts_num, 1) + W.at<float>(pts_num + 1, 1) * x_src + W.at<float>(pts_num + 2, 1) * y_src);
}
输入图像 处理后图像
chessboard_input chessboard_warp

可以根据Distort程序,生成OpenCV中cv::remap函数所需的map1map2映射矩阵,然后再使用cv::remap对输入图片进行处理。我们希望在Unity中,实现类似于cv::remap函数的功能。

首先我们根据Distort()生成像素点的LUT映射矩阵,并且保存到screen_calibration_lut.bin文件中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
void GenerateLut(const cv::Mat &control_points,
const float &r2,
const cv::Mat &W,
const int width,
const int height,
cv::Mat &lut) {
// Generate Texcoords LUT
// -------------------------------------------------------------------------------------------
// By default, gl_FragCoord assumes a lower-left origin for window coordinates and assumes pixel centers are located at half-pixel coordinates. For example, the (x, y) location (0.5, 0.5) is returned for the lower-left-most pixel in a window"
lut = cv::Mat::zeros(height, width, CV_32FC2);
float x_distort, y_distort, x_cv, y_cv, x_tex, y_tex;
// float x_frag, y_frag;
for (int y = 0; y < height; y++) {
// y_frag = y + 0.5f;
// y_cv = float(img_height) - y_frag - 0.5f;
y_cv = float(height) - y - 1;
for (int x = 0; x < width; x++) {
// x_frag = x + 0.5f;
// x_cv = x_frag - 0.5f;
x_cv = x;

Distort(x_cv, y_cv, control_points, r2, W, x_distort, y_distort);
// std::cout<<"["<<x<<", "<<y<<"] -> ["<<x_distort<<", "<<y_distort<<"]"<<std::endl;

x_tex = (x_distort + 0.5f) / float(width);
y_tex = 1.f - (y_distort + 0.5f) / float(height);

lut.at<cv::Vec2f>(y, x)[0] = x_tex;
lut.at<cv::Vec2f>(y, x)[1] = y_tex;
}
}
// -------------------------------------------------------------------------------------------
}

int main(int argc, char *argv[]) {
// ...
cv::Mat lut;
GenerateLut(control_points, r2, weights, screen_size.width, screen_size.height, lut);

std::string screen_calibration_lut_file = calibration_result_save_path + "/screen_calibration_lut.bin";

std::ofstream out(screen_calibration_lut_file, std::ios::out | std::ios::binary | std::ios::trunc);
if (!lut.isContinuous()) {lut = lut.clone();}
out.write((char *)lut.data, 2 * lut.cols * lut.rows * sizeof(float));
out.close();
// ...
}

在Unity端中,读取screen_calibration_lut.bin文件,并且通过Unity Shader屏幕特效函数OnRenderImage()1,在图像渲染完成后对图像进行Remap处理:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
using System.Collections;
using System.Collections.Generic;
using System.IO;
using UnityEngine;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;

public class ScreenUndistortion : MonoBehaviour
{
public bool enable_screen_undistortion = true;
public string screen_calibration_file = "screen_calibration_lut.bin";
public int screen_width = 1920;
public int screen_height = 1080;

private Material material;


void Start()
{
if (enable_screen_undistortion == false) return;

// ReadCalibrationData
// ------------------------------------------------------------------------
float[] lut = new float[screen_width * screen_height * 2];
if (!ReadCalibrationData(screen_calibration_file, lut))
{
Debug.Log("Can not open file: " + screen_calibration_file);
return;
}
// ------------------------------------------------------------------------

// material
// ------------------------------------------------------------------------
Shader shader = Shader.Find("Hidden/ScreenUndistortion");
if (shader == null) {
Debug.Log("Can not found shader!");
return;
}

material = new Material(shader);

Texture2D tex = new Texture2D(screen_width, screen_height, TextureFormat.RGFloat, true);
// public void SetPixelData(NativeArray<T> data, int mipLevel, int sourceDataStartIndex = 0);
// - data Data array to initialize texture pixels with.
// - mipLevel Mip level to fill.
// - sourceDataStartIndex Index in the source array to start copying from (default 0).
tex.SetPixelData(lut, 0, 0);
// public void Apply(bool updateMipmaps = true, bool makeNoLongerReadable = false);
// - updateMipmaps When set to true, mipmap levels are recalculated.
// - makeNoLongerReadable When set to true, Unity discards the copy of pixel data in CPU-addressable memory after this operation.
tex.Apply(false, false);
material.SetTexture("_LutTex", tex);

MeshRenderer meshRenderer = gameObject.AddComponent<MeshRenderer>();
meshRenderer.material = material;

}

bool ReadCalibrationData(string file,
float[] lut)
{
if (lut == null || !File.Exists(file))
{
return false;
}

using(BinaryReader reader = new BinaryReader(File.Open(file, FileMode.Open)))
{
for(int i = 0; i < lut.Length; i++)
{
lut[i] = reader.ReadSingle();
}
}

// Debug.Log(lut[0] + "," + lut[1] + "," + lut[2] + "," + lut[3]);

return true;
}

void OnRenderImage(RenderTexture src, RenderTexture dest)
{
if (enable_screen_undistortion == false || material == null)
{
Graphics.Blit(src, dest);
}
else
{
Graphics.Blit(src, dest, material);
}
}

}

其中,所使用的shader文件ScreenUndistortion.shader为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
Shader "Hidden/ScreenUndistortion"
{
Properties
{
_MainTex ("Texture", 2D) = "white" {}
_LutTex("Texture", 2D) = "white" {}
}
SubShader
{
// No culling or depth
Cull Off ZWrite Off ZTest Always

Pass
{
CGPROGRAM
#pragma vertex vert
#pragma fragment frag

#include "UnityCG.cginc"

struct appdata
{
float4 vertex : POSITION;
float2 uv : TEXCOORD0;
};

struct v2f
{
// Unity stores UVs in 0-1 space. [0,0] represents the bottom-left corner of the texture, and [1,1] represents the top-right. Values are not clamped; you can use values below 0 and above 1 if needed.
float2 uv : TEXCOORD0;
float4 vertex : SV_POSITION;
};

v2f vert (appdata v)
{
v2f o;
o.vertex = UnityObjectToClipPos(v.vertex);
o.uv = v.uv;
return o;
}

sampler2D _MainTex;
sampler2D _LutTex;

fixed4 frag (v2f i) : SV_Target
{
float4 uv_distort = tex2D(_LutTex, i.uv);

float4 col;

if (uv_distort.x <= 0.0f || uv_distort.y <= 0.0f || uv_distort.x >= 1.0f || uv_distort.y >= 1.0f)
{
col = float4(0, 0, 0, 1);
}
else
{
col = tex2D(_MainTex, float2(uv_distort.x, uv_distort.y));
}

return col;
}
ENDCG
}
}
}

Background

我们希望对输入图片进行检测,判断图片是否清晰。传统方法中,我们可以通过使用Laplacian算子,求输入图片的二阶导图,得到图像的边缘信息。然后对二阶导图求方差,根据方差值的大小可以判断出图像模糊的模糊程度1。方差值越小,图像越模糊。

可是该方法存在的问题是,很难确定一个阀值来区分清晰和模糊图片。在不同的场景中,清晰与模糊之间的阀值会发生变化。

The downside is that the Laplacian method required significant manual tuning to define the “threshold” at which an image was considered blurry or not. If you could control your lighting conditions, environment, and image capturing process, it worked quite well — but if not, you would obtain mixed results, to say the least.2

detecting_blur_result_006 detecting_blur_result_004

我们参考了34中提出的方法,实现了一个基于傅立叶变换(FFT)的图片模糊检测算法。算法中,首先会将输入图片转变为频谱图。频谱图中,中心代表的是低频,往四面八方扩展后逐渐变为高频。通过屏蔽掉频谱图的中心区域,对图像实现高通滤波,保留图像中边缘等高频的信息。然后对频谱图求均值,求图片中平均高频幅值,通过该平均幅值来判断图像是否模糊。平均幅值越小,图像越模糊。

Code

  • 对于全黑图片,我们将Blur Value设置为100,并且认为是模糊图片。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
cv::Mat ColorMat(const cv::Mat &mat_float, bool draw_colorbar = false, const bool is_white_background = false, double min_val = 1, double max_val = 0, const cv::Mat &user_color = cv::Mat(), int colorbar_width = 50, int colorbar_gap = 5) {
if (min_val > max_val) {
cv::minMaxLoc(mat_float, &min_val, &max_val);
}

cv::Mat mat;
mat_float.convertTo(mat, CV_8UC1, 255 / (max_val - min_val), -255 * min_val / (max_val - min_val));

cv::Mat mat_show;

if (user_color.empty()) {
cv::applyColorMap(mat, mat_show, cv::COLORMAP_JET);
} else {
cv::applyColorMap(mat, mat_show, user_color);
}

if (is_white_background) {
cv::Mat mask;
cv::threshold(mat, mask, 0, 255, cv::THRESH_BINARY_INV);
cv::Mat img_white(mat.size(), CV_8UC3, cv::Scalar(255, 255, 255));
img_white.copyTo(mat_show, mask);
}

if (draw_colorbar) {
cv::Mat color_bar_value(cv::Size(colorbar_width, mat_show.rows), CV_8UC1);
cv::Mat color_bar;

for (int i = 0; i < mat_show.rows; i++) {
uchar value = 255 - 255 * float(i) / float(mat_show.rows);
for (int j = 0; j < colorbar_width; j++) {
color_bar_value.at<uchar>(i, j) = value;
}
}

if (user_color.empty()) {
cv::applyColorMap(color_bar_value, color_bar, cv::COLORMAP_JET);
} else {
cv::applyColorMap(color_bar_value, color_bar, user_color);
}

cv::Mat mat_colorbar_show(cv::Size(mat_show.cols + colorbar_width + colorbar_gap, mat_show.rows), CV_8UC3, cv::Scalar(255, 255, 255));

mat_show.copyTo(mat_colorbar_show(cv::Rect(0, 0, mat_show.cols, mat_show.rows)));
color_bar.copyTo(mat_colorbar_show(cv::Rect(mat_show.cols + colorbar_gap, 0, color_bar.cols, color_bar.rows)));

cv::putText(mat_colorbar_show, ToStr(max_val), cv::Point(mat_show.cols + colorbar_gap, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255), 1);
cv::putText(mat_colorbar_show, ToStr(min_val), cv::Point(mat_show.cols + colorbar_gap, mat_show.rows - 10), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255), 1);

mat_show = mat_colorbar_show;
}

return mat_show;
};

cv::Mat ShowColorMat(const std::string &name, const cv::Mat &mat_float, bool draw_colorbar = false, const float scale = 1, const bool is_white_background = false, double min_val = 1, double max_val = 0, const cv::Mat &user_color = cv::Mat(), int colorbar_width = 50, int colorbar_gap = 5) {
cv::Mat mat_resize;
cv::resize(mat_float, mat_resize, cv::Size(), scale, scale, cv::INTER_NEAREST);
cv::Mat img = ColorMat(mat_resize, draw_colorbar, is_white_background, min_val, max_val, user_color, colorbar_width, colorbar_gap);
cv::imshow(name, img);
return img;
}

cv::Mat FilterMask(const float radius, const cv::Size &mask_size) {
cv::Mat mask = cv::Mat::ones(mask_size, CV_8UC1);
cv::circle(mask, cv::Point(mask_size.width / 2, mask_size.height / 2), radius, cv::Scalar(0), -1);

// https://datahacker.rs/opencv-discrete-fourier-transform-part2/
cv::Mat mask_float, filter_mask;
mask.convertTo(mask_float, CV_32F);
std::vector<cv::Mat> mask_merge = {mask_float, mask_float};
cv::merge(mask_merge, filter_mask);
return filter_mask;
}

bool BlurDetection(const cv::Mat &img,
const cv::Mat &filter_mask,
float &blur_value,
const float thresh = 10,
const bool debug_show = false) {
// https://pyimagesearch.com/2020/06/15/opencv-fast-fourier-transform-fft-for-blur-detection-in-images-and-video-streams/
// https://github.com/Qengineering/Blur-detection-with-FFT-in-C/blob/master/main.cpp
// https://docs.opencv.org/3.4/d8/d01/tutorial_discrete_fourier_transform.html

cv::Mat img_scale, img_gray, img_fft;
cv::resize(img, img_scale, filter_mask.size());

if (img_scale.channels() == 3) {
cv::cvtColor(img_scale, img_gray, cv::COLOR_BGR2GRAY);
} else {
img_gray = img_scale;
}
img_gray.convertTo(img_fft, CV_32F);

// If DFT_SCALE is set, the scaling is done after the transformation.
// When DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as input.
cv::dft(img_fft, img_fft, cv::DFT_COMPLEX_OUTPUT); // cv::DFT_SCALE |

// # zero-out the center of the FFT shift (i.e., remove low
// # frequencies), apply the inverse shift such that the DC
// # component once again becomes the top-left, and then apply
// # the inverse FFT

// rearrange the quadrants of the result, so that the origin (zero, zero) corresponds with the image center.
int cx = img_gray.cols / 2;
int cy = img_gray.rows / 2;

// center low frequencies in the middle
// by shuffling the quadrants.
cv::Mat q0(img_fft, cv::Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
cv::Mat q1(img_fft, cv::Rect(cx, 0, cx, cy)); // Top-Right
cv::Mat q2(img_fft, cv::Rect(0, cy, cx, cy)); // Bottom-Left
cv::Mat q3(img_fft, cv::Rect(cx, cy, cx, cy)); // Bottom-Right

cv::Mat tmp; // swap quadrants (Top-Left with Bottom-Right)
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);

q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
q2.copyTo(q1);
tmp.copyTo(q2);

if (debug_show) {
std::vector<cv::Mat> planes;
cv::Mat fft_mag;
cv::split(img_fft, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
cv::magnitude(planes[0], planes[1], fft_mag);
fft_mag += cv::Scalar::all(1); // switch to logarithmic scale
cv::log(fft_mag, fft_mag);
ShowColorMat("fft", fft_mag, true, 1, false, 0, 20);
}

// Block the low frequencies
cv::mulSpectrums(img_fft, filter_mask, img_fft, 0); // multiply 2 spectrums

if (debug_show) {
std::vector<cv::Mat> planes;
cv::Mat fft_mag;
cv::split(img_fft, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
cv::magnitude(planes[0], planes[1], fft_mag);
fft_mag += cv::Scalar::all(1); // switch to logarithmic scale
cv::log(fft_mag, fft_mag);
ShowColorMat("fft block low frequencies", fft_mag, true, 1, false, 0, 20);
}

// shuffle the quadrants to their original position
cv::Mat p0(img_fft, cv::Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
cv::Mat p1(img_fft, cv::Rect(cx, 0, cx, cy)); // Top-Right
cv::Mat p2(img_fft, cv::Rect(0, cy, cx, cy)); // Bottom-Left
cv::Mat p3(img_fft, cv::Rect(cx, cy, cx, cy)); // Bottom-Right

p0.copyTo(tmp);
p3.copyTo(p0);
tmp.copyTo(p3);

p1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
p2.copyTo(p1);
tmp.copyTo(p2);

cv::dft(img_fft, img_fft, cv::DFT_SCALE | cv::DFT_INVERSE);

std::vector<cv::Mat> complex_number;
cv::Mat img_blur;
cv::split(img_fft, complex_number); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
magnitude(complex_number[0], complex_number[1], img_blur); // abs of complex number


double min_val, max_val;
cv::minMaxLoc(img_blur, &min_val, &max_val);

if (max_val <= 0.f) {
blur_value = 100;
// black image
return true;
}

cv::log(img_blur, img_blur);
blur_value = cv::mean(img_blur)[0] * 20.f;

return blur_value < thresh;
}

int main(int argc, char *argv[]) {
float blur_thresh = 14;
float filter_mask_radius = 50;
cv::Size mask_size = cv::Size(640, 360);

cv::Mat img = cv::Mat::zeros(cv::Size(1280, 720), CV_8UC1);
cv::Mat filter_mask = FilterMask(filter_mask_radius, mask_size);
float blur_value;
BlurDetection(img, filter_mask, blur_value, blur_thresh, true);
cv::imshow("input", img);
std::cout<<blur_value<<std::endl;
cv::waitKey(0);

return 1;
}

Validation

radius of filter_mask = 50 size of filter_mask = (640, 360) blur_thresh = 16

Input FFT FFT without low frq. Blur Value Blur?
mesuem_18_1034 fft_mesuem_18_1034 fft_block_mesuem_18_1034 16.24 False
mesuem_18_1012 fft_mesuem_18_1012 fft_block_mesuem_18_1012 11.67 True
mesuem_18_18711 fft_mesuem_18_18711 fft_block_mesuem_18_18711 16.86 False
mesuem_18_18727 fft_mesuem_18_18727 fft_block_mesuem_18_18727 12.91 True

3D物体跟踪算法SRT3D的研究笔记,包括以下论文的内容:

Papers
Cremers, D., Rousson, M., Deriche, R., 2007. A Review of Statistical Approaches to Level Set Segmentation: Integrating Color, Texture, Motion and Shape. Int J Comput Vision 72, 195–215. https://doi.org/10.1007/s11263-006-8711-1
Bibby, C., Reid, I., 2008. Robust Real-Time Visual Tracking Using Pixel-Wise Posteriors, in: Forsyth, D., Zisserman, A. (Eds.), Computer Vision – ECCV 2008, Lecture Notes in Computer Science. Springer Berlin Heidelberg, Berlin, Heidelberg, pp. 831–844. https://doi.org/10.1007/978-3-540-88688-4_61
Hexner, J., Hagege, R.R., 2016. 2D-3D Pose Estimation of Heterogeneous Objects Using a Region Based Approach. Int J Comput Vis 118, 95–112. https://doi.org/10.1007/s11263-015-0873-2
Tjaden, H., Schwanecke, U., Schomer, E., 2017. Real-Time Monocular Pose Estimation of 3D Objects Using Temporally Consistent Local Color Histograms, in: 2017 IEEE International Conference on Computer Vision (ICCV). Presented at the 2017 IEEE International Conference on Computer Vision (ICCV), IEEE, Venice, pp. 124–132. https://doi.org/10.1109/ICCV.2017.23
Kehl, W., Tombari, F., Ilic, S., Navab, N., 2017. Real-Time 3D Model Tracking in Color and Depth on a Single CPU Core, in: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Presented at the 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), IEEE, Honolulu, HI, pp. 465–473. https://doi.org/10.1109/CVPR.2017.57
Tjaden, H., Schwanecke, U., Schömer, E., Cremers, D., 2019. A Region-based Gauss-Newton Approach to Real-Time Monocular Multiple Object Tracking. IEEE Trans. Pattern Anal. Mach. Intell. 41, 1797–1812. https://doi.org/10.1109/TPAMI.2018.2884990
Huang, H., Zhong, F., Qin, X., 2021. Pixel-Wise Weighted Region-Based 3D Object Tracking using Contour Constraints. IEEE Trans. Visual. Comput. Graphics 1–1. https://doi.org/10.1109/TVCG.2021.3085197
Stoiber, M., Pfanne, M., Strobl, K.H., Triebel, R. and Albu-Schäffer, A., 2020. A sparse gaussian approach to region-based 6DoF object tracking. In Proceedings of the Asian Conference on Computer Vision.

研究笔记: SRT3D三维物体跟踪算法研究笔记

0%