Prev Tutorial: Changing the contrast and brightness of an image!
Next Tutorial: File Input and Output using XML / YAML / JSON files
| |
Original author | Bernát Gábor |
Compatibility | OpenCV >= 3.0 |
Goal
We'll seek answers for the following questions:
Source code
C++
You can download this from here or find it in the samples/cpp/tutorial_code/core/discrete_fourier_transform/discrete_fourier_transform.cpp
of the OpenCV source code library.
Java
You can download this from here or find it in the samples/java/tutorial_code/core/discrete_fourier_transform/DiscreteFourierTransform.java
of the OpenCV source code library.
Python
You can download this from here or find it in the samples/python/tutorial_code/core/discrete_fourier_transform/discrete_fourier_transform.py
of the OpenCV source code library.
Here's a sample usage of dft() :
C++
#include <iostream>
static void help(char ** argv)
{
cout << endl
<< "This program demonstrated the use of the discrete Fourier transform (DFT). " << endl
<< "The dft of an image is taken and it's power spectrum is displayed." << endl << endl
<< "Usage:" << endl
<< argv[0] << " [image_name -- default lena.jpg]" << endl << endl;
}
int main(
int argc,
char ** argv)
{
help(argv);
const char* filename = argc >=2 ? argv[1] : "lena.jpg";
if( I.empty()){
cout << "Error opening image" << endl;
return EXIT_FAILURE;
}
Mat padded;
Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(),
CV_32F)};
Mat complexI;
merge(planes, 2, complexI);
Mat magI = planes[0];
magI += Scalar::all(1);
magI = magI(
Rect(0, 0, magI.cols & -2, magI.rows & -2));
int cx = magI.cols/2;
int cy = magI.rows/2;
Mat q0(magI,
Rect(0, 0, cx, cy));
Mat q1(magI,
Rect(cx, 0, cx, cy));
Mat q2(magI,
Rect(0, cy, cx, cy));
Mat q3(magI,
Rect(cx, cy, cx, cy));
Mat tmp;
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp);
q2.copyTo(q1);
tmp.copyTo(q2);
imshow(
"spectrum magnitude", magI);
return EXIT_SUCCESS;
}
void split(const Mat &src, Mat *mvbegin)
Divides a multi-channel array into several single-channel arrays.
void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType, const Scalar &value=Scalar())
Forms a border around an image.
int getOptimalDFTSize(int vecsize)
Returns the optimal DFT size for a given vector size.
void magnitude(InputArray x, InputArray y, OutputArray magnitude)
Calculates the magnitude of 2D vectors.
void merge(const Mat *mv, size_t count, OutputArray dst)
Creates one multi-channel array out of several single-channel ones.
void normalize(InputArray src, InputOutputArray dst, double alpha=1, double beta=0, int norm_type=NORM_L2, int dtype=-1, InputArray mask=noArray())
Normalizes the norm or value range of an array.
void log(InputArray src, OutputArray dst)
Calculates the natural logarithm of every array element.
void dft(InputArray src, OutputArray dst, int flags=0, int nonzeroRows=0)
Performs a forward or inverse Discrete Fourier transform of a 1D or 2D floating-point array.
@ BORDER_CONSTANT
iiiiii|abcdefgh|iiiiiii with some specified i
Definition: core/include/opencv2/core/base.hpp:269
@ NORM_MINMAX
flag
Definition: core/include/opencv2/core/base.hpp:207
Rect2i Rect
Definition: modules/core/include/opencv2/core/types.hpp:496
#define CV_32F
Definition: core/include/opencv2/core/hal/interface.h:78
cv::String findFile(const cv::String &relative_path, bool required=true, bool silentMode=false)
Try to find requested data file.
void imshow(const String &winname, InputArray mat)
Displays an image in the specified window.
int waitKey(int delay=0)
Waits for a pressed key.
@ IMREAD_GRAYSCALE
If set, always convert image to the single channel grayscale image (codec internal conversion).
Definition: imgcodecs.hpp:70
CV_EXPORTS_W Mat imread(const String &filename, int flags=IMREAD_COLOR_BGR)
Loads an image from a file.
int main(int argc, char *argv[])
Definition: highgui_qt.cpp:3
Definition: core/include/opencv2/core.hpp:107
Java
import org.opencv.core.*;
import org.opencv.highgui.HighGui;
import org.opencv.imgcodecs.Imgcodecs;
import java.util.List;
import java.util.*;
class DiscreteFourierTransformRun{
private void help() {
System.out.println("" +
"This program demonstrated the use of the discrete Fourier transform (DFT). \n" +
"The dft of an image is taken and it's power spectrum is displayed.\n" +
"Usage:\n" +
"./DiscreteFourierTransform [image_name -- default ../data/lena.jpg]");
}
public void run(
String[] args){
help();
String filename = ((args.length > 0) ? args[0] :
"../data/lena.jpg");
Mat I = Imgcodecs.imread(filename, Imgcodecs.IMREAD_GRAYSCALE);
if( I.empty() ) {
System.out.println("Error opening image");
System.exit(-1);
}
Mat padded = new Mat();
int m = Core.getOptimalDFTSize( I.rows() );
int n = Core.getOptimalDFTSize( I.cols() );
Core.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT,
Scalar.
all(0));
List<Mat> planes = new ArrayList<Mat>();
padded.convertTo(padded, CvType.CV_32F);
planes.add(padded);
planes.add(Mat.zeros(padded.size(), CvType.CV_32F));
Mat complexI = new Mat();
Core.merge(planes, complexI);
Core.dft(complexI, complexI);
Core.split(complexI, planes);
Core.magnitude(planes.get(0), planes.get(1), planes.get(0));
Mat magI = planes.get(0);
Mat matOfOnes = Mat.ones(magI.size(), magI.type());
Core.add(matOfOnes, magI, magI);
Core.log(magI, magI);
magI = magI.submat(
new Rect(0, 0, magI.cols() & -2, magI.rows() & -2));
int cx = magI.cols()/2;
int cy = magI.rows()/2;
Mat q0 =
new Mat(magI,
new Rect(0, 0, cx, cy));
Mat q1 =
new Mat(magI,
new Rect(cx, 0, cx, cy));
Mat q2 =
new Mat(magI,
new Rect(0, cy, cx, cy));
Mat q3 =
new Mat(magI,
new Rect(cx, cy, cx, cy));
Mat tmp = new Mat();
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp);
q2.copyTo(q1);
tmp.copyTo(q2);
magI.convertTo(magI, CvType.CV_8UC1);
Core.normalize(magI, magI, 0, 255, Core.NORM_MINMAX, CvType.CV_8UC1);
HighGui.imshow("Input Image" , I );
HighGui.imshow("Spectrum Magnitude", magI);
HighGui.waitKey();
System.exit(0);
}
}
public class DiscreteFourierTransform {
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
new DiscreteFourierTransformRun().run(args);
}
}
static Scalar_< _Tp > all(_Tp v0)
returns a scalar with all elements set to v0
std::string String
Definition: cvstd.hpp:149
Scalar_< double > Scalar
Definition: modules/core/include/opencv2/core/types.hpp:709
Python
from __future__ import print_function
import sys
import cv2 as cv
import numpy as np
def print_help():
This program demonstrated the use of the discrete Fourier transform (DFT).
The dft of an image is taken and it's power spectrum is displayed.
Usage:
discrete_fourier_transform.py [image_name -- default lena.jpg]''')
def main(argv):
print_help()
filename = argv[0] if len(argv) > 0 else 'lena.jpg'
if I is None:
print(
'Error opening image')
return -1
rows, cols = I.shape
padded =
cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])
planes = [np.float32(padded), np.zeros(padded.shape, np.float32)]
magI = planes[0]
matOfOnes = np.ones(magI.shape, dtype=magI.dtype)
magI_rows, magI_cols = magI.shape
magI = magI[0:(magI_rows & -2), 0:(magI_cols & -2)]
cx = int(magI_rows/2)
cy = int(magI_cols/2)
q0 = magI[0:cx, 0:cy]
q1 = magI[cx:cx+cx, 0:cy]
q2 = magI[0:cx, cy:cy+cy]
q3 = magI[cx:cx+cx, cy:cy+cy]
tmp = np.copy(q0)
magI[0:cx, 0:cy] = q3
magI[cx:cx + cx, cy:cy + cy] = tmp
tmp = np.copy(q1)
magI[cx:cx + cx, 0:cy] = q2
magI[0:cx, cy:cy + cy] = tmp
if __name__ == "__main__":
void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask=noArray(), int dtype=-1)
Calculates the per-element sum of two arrays or an array and a scalar.
static void print(const std::vector< _Tp > &shape, const String &name="")
Definition: shape_utils.hpp:228
Explanation
The Fourier Transform will decompose an image into its sinus and cosines components. In other words, it will transform an image from its spatial domain to its frequency domain. The idea is that any function may be approximated exactly with the sum of infinite sinus and cosines functions. The Fourier Transform is a way how to do this. Mathematically a two dimensional images Fourier transform is:
\[F(k,l) = \displaystyle\sum\limits_{i=0}^{N-1}\sum\limits_{j=0}^{N-1} f(i,j)e^{-i2\pi(\frac{ki}{N}+\frac{lj}{N})}\]
\[e^{ix} = \cos{x} + i\sin {x}\]
Here f is the image value in its spatial domain and F in its frequency domain. The result of the transformation is complex numbers. Displaying this is possible either via a real image and a complex image or via a magnitude and a phase image. However, throughout the image processing algorithms only the magnitude image is interesting as this contains all the information we need about the images geometric structure. Nevertheless, if you intend to make some modifications of the image in these forms and then you need to retransform it you'll need to preserve both of these.
In this sample I'll show how to calculate and show the magnitude image of a Fourier Transform. In case of digital images are discrete. This means they may take up a value from a given domain value. For example in a basic gray scale image values usually are between zero and 255. Therefore the Fourier Transform too needs to be of a discrete type resulting in a Discrete Fourier Transform (DFT). You'll want to use this whenever you need to determine the structure of an image from a geometrical point of view. Here are the steps to follow (in case of a gray scale input image I):
Expand the image to an optimal size
The performance of a DFT is dependent of the image size. It tends to be the fastest for image sizes that are multiple of the numbers two, three and five. Therefore, to achieve maximal performance it is generally a good idea to pad border values to the image to get a size with such traits. The getOptimalDFTSize() returns this optimal size and we can use the copyMakeBorder() function to expand the borders of an image (the appended pixels are initialized with zero):
C++
Java
Mat padded = new Mat();
int m = Core.getOptimalDFTSize( I.rows() );
int n = Core.getOptimalDFTSize( I.cols() );
Core.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT,
Scalar.
all(0));
Python
rows, cols = I.shape
padded =
cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])
Make place for both the complex and the real values
The result of a Fourier Transform is complex. This implies that for each image value the result is two image values (one per component). Moreover, the frequency domains range is much larger than its spatial counterpart. Therefore, we store these usually at least in a float format. Therefore we'll convert our input image to this type and expand it with another channel to hold the complex values:
C++
Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(),
CV_32F)};
Mat complexI;
merge(planes, 2, complexI);
Java
List<Mat> planes = new ArrayList<Mat>();
padded.convertTo(padded, CvType.CV_32F);
planes.add(padded);
planes.add(Mat.zeros(padded.size(), CvType.CV_32F));
Mat complexI = new Mat();
Core.merge(planes, complexI);
Python
planes = [np.float32(padded), np.zeros(padded.shape, np.float32)]
Make the Discrete Fourier Transform
It's possible an in-place calculation (same input as output):
C++
Java
Core.dft(complexI, complexI);
Python
Transform the real and complex values to magnitude
A complex number has a real (Re) and a complex (imaginary - Im) part. The results of a DFT are complex numbers. The magnitude of a DFT is:
\[M = \sqrt[2]{ {Re(DFT(I))}^2 + {Im(DFT(I))}^2}\]
Translated to OpenCV code:
C++
Java
Core.split(complexI, planes);
Core.magnitude(planes.get(0), planes.get(1), planes.get(0));
Mat magI = planes.get(0);
Python
Switch to a logarithmic scale
It turns out that the dynamic range of the Fourier coefficients is too large to be displayed on the screen. We have some small and some high changing values that we can't observe like this. Therefore the high values will all turn out as white points, while the small ones as black. To use the gray scale values to for visualization we can transform our linear scale to a logarithmic one:
\[M_1 = \log{(1 + M)}\]
Translated to OpenCV code:
C++
Java
Mat matOfOnes = Mat.ones(magI.size(), magI.type());
Core.add(matOfOnes, magI, magI);
Core.log(magI, magI);
Python
matOfOnes = np.ones(magI.shape, dtype=magI.dtype)
Crop and rearrange
Remember, that at the first step, we expanded the image? Well, it's time to throw away the newly introduced values. For visualization purposes we may also rearrange the quadrants of the result, so that the origin (zero, zero) corresponds with the image center.
C++
magI = magI(
Rect(0, 0, magI.cols & -2, magI.rows & -2));
int cx = magI.cols/2;
int cy = magI.rows/2;
Mat q0(magI,
Rect(0, 0, cx, cy));
Mat q1(magI,
Rect(cx, 0, cx, cy));
Mat q2(magI,
Rect(0, cy, cx, cy));
Mat q3(magI,
Rect(cx, cy, cx, cy));
Mat tmp;
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp);
q2.copyTo(q1);
tmp.copyTo(q2);
Java
magI = magI.submat(
new Rect(0, 0, magI.cols() & -2, magI.rows() & -2));
int cx = magI.cols()/2;
int cy = magI.rows()/2;
Mat q0 =
new Mat(magI,
new Rect(0, 0, cx, cy));
Mat q1 =
new Mat(magI,
new Rect(cx, 0, cx, cy));
Mat q2 =
new Mat(magI,
new Rect(0, cy, cx, cy));
Mat q3 =
new Mat(magI,
new Rect(cx, cy, cx, cy));
Mat tmp = new Mat();
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp);
q2.copyTo(q1);
tmp.copyTo(q2);
Python
magI_rows, magI_cols = magI.shape
magI = magI[0:(magI_rows & -2), 0:(magI_cols & -2)]
cx = int(magI_rows/2)
cy = int(magI_cols/2)
q0 = magI[0:cx, 0:cy]
q1 = magI[cx:cx+cx, 0:cy]
q2 = magI[0:cx, cy:cy+cy]
q3 = magI[cx:cx+cx, cy:cy+cy]
tmp = np.copy(q0)
magI[0:cx, 0:cy] = q3
magI[cx:cx + cx, cy:cy + cy] = tmp
tmp = np.copy(q1)
magI[cx:cx + cx, 0:cy] = q2
magI[0:cx, cy:cy + cy] = tmp
Normalize
This is done again for visualization purposes. We now have the magnitudes, however this are still out of our image display range of zero to one. We normalize our values to this range using the cv::normalize() function.
C++
Java
Core.normalize(magI, magI, 0, 255, Core.NORM_MINMAX, CvType.CV_8UC1);
Python
Result
An application idea would be to determine the geometrical orientation present in the image. For example, let us find out if a text is horizontal or not? Looking at some text you'll notice that the text lines sort of form also horizontal lines and the letters form sort of vertical lines. These two main components of a text snippet may be also seen in case of the Fourier transform. Let us use this horizontal and this rotated image about a text.
In case of the horizontal text:
In case of a rotated text:
You can see that the most influential components of the frequency domain (brightest dots on the magnitude image) follow the geometric rotation of objects on the image. From this we may calculate the offset and perform an image rotation to correct eventual miss alignments.