#include <iostream>
#include <fstream>
const char* keys =
"{ help h | | Print help message. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{ detModel dmp | | Path to a binary .pb file contains trained detector network.}"
"{ width | 320 | Preprocess input image by resizing to a specific width. It should be a multiple of 32. }"
"{ height | 320 | Preprocess input image by resizing to a specific height. It should be a multiple of 32. }"
"{ thr | 0.5 | Confidence threshold. }"
"{ nms | 0.4 | Non-maximum suppression threshold. }"
"{ recModel rmp | | Path to a binary .onnx file contains trained CRNN text recognition model. "
"Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}"
"{ RGBInput rgb |0| 0: imread with flags=IMREAD_GRAYSCALE; 1: imread with flags=IMREAD_COLOR. }"
"{ vocabularyPath vp | alphabet_36.txt | Path to benchmarks for evaluation. "
"Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}";
void fourPointsTransform(
const Mat& frame,
const Point2f vertices[], Mat& result);
int main(
int argc,
char** argv)
{
CommandLineParser parser(argc, argv, keys);
parser.about("Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of "
"EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
return 0;
}
float confThreshold = parser.get<float>("thr");
float nmsThreshold = parser.get<float>("nms");
int width = parser.get<int>("width");
int height = parser.get<int>("height");
int imreadRGB = parser.get<int>("RGBInput");
if (!parser.check())
{
parser.printErrors();
return 1;
}
CV_Assert(!detModelPath.empty() && !recModelPath.empty());
TextDetectionModel_EAST detector(detModelPath);
detector.setConfidenceThreshold(confThreshold)
.setNMSThreshold(nmsThreshold);
TextRecognitionModel recognizer(recModelPath);
std::ifstream vocFile;
std::vector<String> vocabulary;
while (std::getline(vocFile, vocLine)) {
vocabulary.push_back(vocLine);
}
recognizer.setVocabulary(vocabulary);
recognizer.setDecodeType("CTC-greedy");
double recScale = 1.0 / 127.5;
recognizer.setInputParams(recScale, recInputSize, recMean);
double detScale = 1.0;
Size detInputSize =
Size(width, height);
bool swapRB = true;
detector.setInputParams(detScale, detInputSize, detMean, swapRB);
VideoCapture cap;
bool openSuccess = parser.has(
"input") ? cap.open(parser.get<
String>(
"input")) : cap.open(0);
static const std::string kWinName = "EAST: An Efficient and Accurate Scene Text Detector";
Mat frame;
{
cap >> frame;
if (frame.empty())
{
break;
}
std::cout << frame.size << std::endl;
std::vector< std::vector<Point> > detResults;
detector.detect(frame, detResults);
Mat frame2 = frame.clone();
if (detResults.size() > 0) {
Mat recInput;
if (!imreadRGB) {
} else {
recInput = frame;
}
std::vector< std::vector<Point> > contours;
for (
uint i = 0; i < detResults.size(); i++)
{
const auto& quadrangle = detResults[i];
contours.emplace_back(quadrangle);
std::vector<Point2f> quadrangle_2f;
for (int j = 0; j < 4; j++)
quadrangle_2f.emplace_back(quadrangle[j]);
Mat cropped;
fourPointsTransform(recInput, &quadrangle_2f[0], cropped);
std::string recognitionResult = recognizer.recognize(cropped);
std::cout << i << ": '" << recognitionResult << "'" << std::endl;
}
}
}
return 0;
}
void fourPointsTransform(
const Mat& frame,
const Point2f vertices[], Mat& result)
{
Point(0, outputSize.height - 1),
Point(outputSize.width - 1, outputSize.height - 1)
};
}
#define CV_CheckEQ(v1, v2, msg)
Supported values of these types: int, float, double.
Definition: check.hpp:118
Point2i Point
Definition: modules/core/include/opencv2/core/types.hpp:209
std::string String
Definition: cvstd.hpp:149
Size2i Size
Definition: modules/core/include/opencv2/core/types.hpp:370
Scalar_< double > Scalar
Definition: modules/core/include/opencv2/core/types.hpp:709
Point_< float > Point2f
Definition: modules/core/include/opencv2/core/types.hpp:207
uint32_t uint
Definition: core/include/opencv2/core/hal/interface.h:42
cv::String findFile(const cv::String &relative_path, bool required=true, bool silentMode=false)
Try to find requested data file.
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: core/include/opencv2/core/base.hpp:359
void imshow(const String &winname, InputArray mat)
Displays an image in the specified window.
int waitKey(int delay=0)
Waits for a pressed key.
void cvtColor(InputArray src, OutputArray dst, int code, int dstCn=0, AlgorithmHint hint=cv::ALGO_HINT_DEFAULT)
Converts an image from one color space to another.
@ COLOR_BGR2GRAY
convert between RGB/BGR and grayscale, color conversions
Definition: imgproc/include/opencv2/imgproc.hpp:555
void putText(InputOutputArray img, const String &text, Point org, int fontFace, double fontScale, Scalar color, int thickness=1, int lineType=LINE_8, bool bottomLeftOrigin=false)
Draws a text string.
void polylines(InputOutputArray img, InputArrayOfArrays pts, bool isClosed, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
Draws several polygonal curves.
@ FONT_HERSHEY_SIMPLEX
normal size sans-serif font
Definition: imgproc/include/opencv2/imgproc.hpp:901
int main(int argc, char *argv[])
Definition: highgui_qt.cpp:3
Definition: all_layers.hpp:47
Definition: core/include/opencv2/core.hpp:107