From 426dddd553d71070f53bd2d04454bf28a5983aee Mon Sep 17 00:00:00 2001 From: "N.N." Date: Mon, 13 Jul 2009 18:17:19 +0000 Subject: added a simple OCR object based on cvKNearest : pdp_opencv_cvk svn path=/trunk/externals/pdp_opencv/; revision=11858 --- pdp_opencv_cvk.cc | 532 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 532 insertions(+) create mode 100755 pdp_opencv_cvk.cc (limited to 'pdp_opencv_cvk.cc') diff --git a/pdp_opencv_cvk.cc b/pdp_opencv_cvk.cc new file mode 100755 index 0000000..e75accd --- /dev/null +++ b/pdp_opencv_cvk.cc @@ -0,0 +1,532 @@ +/* + * Pure Data Packet module. + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * pdp_opencv_cvk : OCR like pattern recognition + * based on basic OCR with Open CV tutorial + * by damiles : http://blog.damiles.com/?p=93 + * + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "pdp.h" + +#include "cv.h" +#include "highgui.h" +#include "ml.h" + +#define MAX_MARKERS 10 + +const int MAX_COUNT = 500; + +typedef struct pdp_opencv_cvk_struct +{ + t_object x_obj; + t_float x_f; + + t_outlet *x_outlet0; + t_outlet *x_outlet1; + + int x_packet0; + int x_packet1; + int x_dropped; + int x_queue_id; + + int x_width; + int x_height; + int x_size; + + int x_classify; + int x_ROIx; + int x_ROIy; + int x_ROIw; + int x_ROIh; + + // The output and temporary images + IplImage *rgb, *grey; + + // open cv classifier data + char *x_filepath; + int x_nsamples; + int x_rsamples; + CvMat* trainData; + CvMat* trainClasses; + CvMat* x_nearest; + CvMat* x_dist; + int x_pwidth; + int x_pheight; + CvKNearest *knn; + +} t_pdp_opencv_cvk; + +IplImage pdp_opencv_cvk_preprocessing(IplImage* imgSrc,int new_width, int new_height); + +static void pdp_opencv_cvk_process_rgb(t_pdp_opencv_cvk *x) +{ + t_pdp *header = pdp_packet_header(x->x_packet0); + short int *data = (short int *)pdp_packet_data(x->x_packet0); + t_pdp *newheader = pdp_packet_header(x->x_packet1); + short int *newdata = (short int *)pdp_packet_data(x->x_packet1); + int i,j,k,im; + + if ((x->x_width != (t_int)header->info.image.width) || + (x->x_height != (t_int)header->info.image.height)) + { + + post("pdp_opencv_cvk :: resizing"); + + x->x_width = header->info.image.width; + x->x_height = header->info.image.height; + x->x_size = x->x_width*x->x_height; + x->x_ROIx = 0; + x->x_ROIy = 0; + x->x_ROIw = header->info.image.width; + x->x_ROIh = header->info.image.height; + + cvReleaseImage( &x->rgb ); + cvReleaseImage( &x->grey ); + + // create cv_images + x->rgb = cvCreateImage( cvSize(x->x_width, x->x_height), 8, 3 ); + x->grey = cvCreateImage( cvSize(x->x_width, x->x_height), 8, 1 ); + + } + + newheader->info.image.encoding = header->info.image.encoding; + newheader->info.image.width = x->x_width; + newheader->info.image.height = x->x_height; + + memcpy( x->rgb->imageData, data, x->x_size*3 ); + + cvCvtColor( x->rgb, x->grey, CV_BGR2GRAY ); + + if ( x->x_classify ) + { + IplImage prs_image; + float result; + CvMat row_header, *row1, odata; + + // post( "pdp_opencv_cvk : size : (%dx%d)", x->x_pwidth, x->x_pheight); + + // process file + prs_image = pdp_opencv_cvk_preprocessing(x->grey, x->x_pwidth, x->x_pheight); + + //Set data + IplImage* img32 = cvCreateImage( cvSize( x->x_pwidth, x->x_pheight ), IPL_DEPTH_32F, 1 ); + cvConvertScale(&prs_image, img32, 0.0039215, 0); + cvGetSubRect(img32, &odata, cvRect(0,0, x->x_pwidth, x->x_pheight)); + row1 = cvReshape( &odata, &row_header, 0, 1 ); + + result=x->knn->find_nearest(row1,x->x_nsamples,0,0,x->x_nearest,x->x_dist); + for ( i=0; ix_nsamples; i++ ) + { + // post( "pdp_opencv_cvk : distance : %f", x->x_dist->data.fl[i] ); + } + outlet_float(x->x_outlet1, x->x_dist->data.fl[0]); + + // cvReleaseImage( &img32 ); + x->x_classify = 0; + } + + memcpy( newdata, x->rgb->imageData, x->x_size*3 ); + return; +} + +static void pdp_opencv_cvk_sendpacket(t_pdp_opencv_cvk *x) +{ + /* release the packet */ + pdp_packet_mark_unused(x->x_packet0); + x->x_packet0 = -1; + + /* unregister and propagate if valid dest packet */ + pdp_packet_pass_if_valid(x->x_outlet0, &x->x_packet1); +} + +static void pdp_opencv_cvk_process(t_pdp_opencv_cvk *x) +{ + int encoding; + t_pdp *header = 0; + char *parname; + unsigned pi; + int partype; + float pardefault; + t_atom plist[2]; + t_atom tlist[2]; + t_atom vlist[2]; + + /* check if image data packets are compatible */ + if ( (header = pdp_packet_header(x->x_packet0)) + && (PDP_BITMAP == header->type)){ + + /* pdp_opencv_cvk_process inputs and write into active inlet */ + switch(pdp_packet_header(x->x_packet0)->info.image.encoding){ + + case PDP_BITMAP_RGB: + x->x_packet1 = pdp_packet_clone_rw(x->x_packet0); + pdp_queue_add(x, (void*)pdp_opencv_cvk_process_rgb, (void*)pdp_opencv_cvk_sendpacket, &x->x_queue_id); + break; + + default: + /* don't know the type, so dont pdp_opencv_cvk_process */ + break; + + } + } + +} + +static void pdp_opencv_cvk_input_0(t_pdp_opencv_cvk *x, t_symbol *s, t_floatarg f) +{ + /* if this is a register_ro message or register_rw message, register with packet factory */ + + if (s == gensym("register_rw")) + x->x_dropped = pdp_packet_convert_ro_or_drop(&x->x_packet0, (int)f, pdp_gensym("bitmap/rgb/*") ); + + if ((s == gensym("process")) && (-1 != x->x_packet0) && (!x->x_dropped)) + { + /* add the process method and callback to the process queue */ + pdp_opencv_cvk_process(x); + } +} + +static void pdp_opencv_cvk_free(t_pdp_opencv_cvk *x) +{ + int i; + + pdp_queue_finish(x->x_queue_id); + pdp_packet_mark_unused(x->x_packet0); + + // destroy cv_images + cvReleaseImage( &x->rgb ); + cvReleaseImage( &x->grey ); + +} + +t_class *pdp_opencv_cvk_class; + +// +// +// Find the min box. The min box respect original aspect ratio image +// The image is a binary data and background is white. +// +// + + +void pdp_opencv_cvk_findX(IplImage* imgSrc,int* min, int* max) +{ + int i; + int minFound=0; + CvMat data; + CvScalar maxVal=cvRealScalar(imgSrc->width * 255); + CvScalar val=cvRealScalar(0); + + // for each col sum, if sum < width*255 then we find the min + // then continue to end to search the max, if sum< width*255 then is new max + for (i=0; i< imgSrc->width; i++) + { + cvGetCol(imgSrc, &data, i); + val= cvSum(&data); + if(val.val[0] < maxVal.val[0]) + { + *max= i; + if(!minFound) + { + *min= i; + minFound= 1; + } + } + } +} + +void pdp_opencv_cvk_findY(IplImage* imgSrc,int* min, int* max) +{ + int i; + int minFound=0; + CvMat data; + CvScalar maxVal=cvRealScalar(imgSrc->width * 255); + CvScalar val=cvRealScalar(0); + + // for each col sum, if sum < width*255 then we find the min + // then continue to end to search the max, if sum< width*255 then is new max + for (i=0; i< imgSrc->height; i++) + { + cvGetRow(imgSrc, &data, i); + val= cvSum(&data); + if(val.val[0] < maxVal.val[0]) + { + *max=i; + if(!minFound) + { + *min= i; + minFound= 1; + } + } + } +} + +// +// +// Find the bounding box. +// +// + +CvRect pdp_opencv_cvk_findBB(IplImage* imgSrc) +{ + CvRect aux; + int xmin, xmax, ymin, ymax; + xmin=xmax=ymin=ymax=0; + + pdp_opencv_cvk_findX(imgSrc, &xmin, &xmax); + pdp_opencv_cvk_findY(imgSrc, &ymin, &ymax); + + aux=cvRect(xmin, ymin, xmax-xmin, ymax-ymin); + + return aux; +} + +IplImage pdp_opencv_cvk_preprocessing(IplImage* imgSrc,int new_width, int new_height) +{ + IplImage* result; + IplImage* scaledResult; + + CvMat data; + CvMat dataA; + CvRect bb;//bounding box + CvRect bba;//boundinb box maintain aspect ratio + + // find bounding box + bb=pdp_opencv_cvk_findBB(imgSrc); + + if ( ( bb.width == 0 ) || ( bb.height == 0 ) ) + { + bb.x = 0; + bb.y = 0; + bb.width = imgSrc->width; + bb.height = imgSrc->height; + } + + // get bounding box data and no with aspect ratio, the x and y can be corrupted + cvGetSubRect(imgSrc, &data, cvRect(bb.x, bb.y, bb.width, bb.height)); + // create image with this data with width and height with aspect ratio 1 + // then we get highest size betwen width and height of our bounding box + int size=(bb.width>bb.height)?bb.width:bb.height; + result=cvCreateImage( cvSize( size, size ), 8, 1 ); + cvSet(result,CV_RGB(255,255,255),NULL); + // copy de data in center of image + int x=(int)floor((float)(size-bb.width)/2.0f); + int y=(int)floor((float)(size-bb.height)/2.0f); + cvGetSubRect(result, &dataA, cvRect(x,y,bb.width, bb.height)); + cvCopy(&data, &dataA, NULL); + // scale result + scaledResult=cvCreateImage( cvSize( new_width, new_height ), 8, 1 ); + cvResize(result, scaledResult, CV_INTER_NN); + + // return processed data + return *scaledResult; + +} + +void pdp_opencv_cvk_load(t_pdp_opencv_cvk *x) +{ + IplImage* src_image; + IplImage prs_image; + CvMat row,data; + char file[255]; + int i=0,j; + CvMat row_header, *row1; + + x->x_rsamples = 0; + + for( j = 0; j< x->x_nsamples; j++) + { + + // load file + sprintf(file,"%s/%03d.png",x->x_filepath, j); + src_image = cvLoadImage(file,0); + if(!src_image) + { + post("pdp_opencv_cvk : error: couldn't load image %s\n", file); + continue; + } + if ( ( x->x_pwidth == -1 ) || ( x->x_pheight == -1 ) ) + { + x->x_pwidth = src_image->width; + x->x_pheight = src_image->height; + // post( "pdp_opencv_cvk : loaded : %s (%dx%d)", file, src_image->width, src_image->height); + x->x_rsamples++; + } + else if ( ( src_image->width != x->x_pwidth ) || ( src_image->height != x->x_pheight ) ) + { + post( "pdp_opencv_cvk : error : %s (%dx%d) : wrong size ( should be %dx%d )", file, src_image->width, src_image->height, x->x_pwidth, x->x_pheight); + continue; + } + else + { + // post( "pdp_opencv_cvk : loaded : %s (%dx%d)", file, src_image->width, src_image->height); + x->x_rsamples++; + } + + // process file + prs_image = pdp_opencv_cvk_preprocessing(src_image, x->x_pwidth, x->x_pheight); + // post( "pdp_opencv_cvk : preprocessed : %s (%dx%d)", file, x->x_pwidth, x->x_pheight); + + if ( ( x->trainData == NULL ) || ( x->trainClasses == NULL )) + { + x->trainData = cvCreateMat(x->x_nsamples, x->x_pwidth*x->x_pheight, CV_32FC1); + x->trainClasses = cvCreateMat(x->x_nsamples, 1, CV_32FC1); + } + + // set class label + cvGetRow(x->trainClasses, &row, j); + cvSet(&row, cvRealScalar(i), NULL); + // set data + cvGetRow(x->trainData, &row, j); + + IplImage* img = cvCreateImage( cvSize( x->x_pwidth, x->x_pheight ), IPL_DEPTH_32F, 1 ); + // convert 8 bits image to 32 float image + cvConvertScale(&prs_image, img, 0.0039215, 0); + + cvGetSubRect(img, &data, cvRect( 0, 0, x->x_pwidth, x->x_pheight) ); + + // convert data matrix sizexsize to vecor + row1 = cvReshape( &data, &row_header, 0, 1 ); + cvCopy(row1, &row, NULL); + } + + // create the classifier + post( "pdp_opencv_cvk : loaded : %d samples", x->x_rsamples); + if ( x->x_rsamples == x->x_nsamples ) + { + x->knn=new CvKNearest( x->trainData, x->trainClasses, 0, false, x->x_nsamples ); + x->x_nearest=cvCreateMat(1,x->x_nsamples,CV_32FC1); + x->x_dist=cvCreateMat(1,x->x_nsamples,CV_32FC1); + } +} + +static void pdp_opencv_cvk_classify(t_pdp_opencv_cvk *x) +{ + if ( x->trainData == NULL ) + { + post( "pdp_opencv_cvk : no patterns loaded : cannot process" ); + return; + } + x->x_classify=1; +} + +void *pdp_opencv_cvk_new(t_symbol *s, int argc, t_atom *argv ) +{ + int i; + + t_pdp_opencv_cvk *x = (t_pdp_opencv_cvk *)pd_new(pdp_opencv_cvk_class); + + x->x_outlet0 = outlet_new(&x->x_obj, &s_anything); + x->x_outlet1 = outlet_new(&x->x_obj, &s_anything); + + x->x_packet0 = -1; + x->x_packet1 = -1; + x->x_queue_id = -1; + + x->x_width = 320; + x->x_height = 240; + x->x_size = x->x_width * x->x_height; + x->x_ROIx = 0; + x->x_ROIy = 0; + x->x_ROIw = x->x_width; + x->x_ROIh = x->x_height; + + x->rgb = cvCreateImage( cvSize(x->x_width, x->x_height), 8, 3 ); + x->grey = cvCreateImage( cvSize(x->x_width, x->x_height), 8, 1 ); + + x->x_filepath = ( char * ) getbytes( 1024 ); + sprintf( x->x_filepath, "./data" ); + x->x_nsamples = 10; + + if ( argc >= 1 ) + { + if ( argv[0].a_type != A_SYMBOL ) + { + error( "pdp_opencv_cvk : wrong argument (file path : 1)" ); + return NULL; + } + if ( !strcmp( argv[0].a_w.w_symbol->s_name, "" ) ) + { + error( "pdp_opencv_cvk : no extension specified" ); + error( "pdp_opencv_cvk : usage : pdp_opencv_cvk_new " ); + return NULL; + } + strcpy( x->x_filepath, argv[0].a_w.w_symbol->s_name ); + } + if ( argc >= 2 ) + { + if ( argv[1].a_type != A_FLOAT ) + { + error( "pdp_opencv_cvk : wrong argument (nsamples : 2)" ); + return NULL; + } + if ( (int)argv[1].a_w.w_float <= 0 ) + { + error( "pdp_opencv_cvk : wrong number of samples (%d)", (t_int)(int)argv[1].a_w.w_float ); + error( "pdp_opencv_cvk : usage : pdp_opencv_cvk_new " ); + return NULL; + } + x->x_nsamples = (int)argv[1].a_w.w_float; + } + + x->x_classify = 0; + x->x_pwidth = -1; + x->x_pheight = -1; + + x->trainData = NULL; + x->trainClasses = NULL; + + error( "pdp_opencv_cvk : loading %d samples from : %s", x->x_nsamples, x->x_filepath ); + pdp_opencv_cvk_load( x ); + + return (void *)x; +} + + +#ifdef __cplusplus +extern "C" +{ +#endif + + +void pdp_opencv_cvk_setup(void) +{ + + post( " pdp_opencv_cvk"); + pdp_opencv_cvk_class = class_new(gensym("pdp_opencv_cvk"), (t_newmethod)pdp_opencv_cvk_new, + (t_method)pdp_opencv_cvk_free, sizeof(t_pdp_opencv_cvk), CLASS_DEFAULT, A_GIMME, A_NULL); + + class_addmethod(pdp_opencv_cvk_class, (t_method)pdp_opencv_cvk_input_0, gensym("pdp"), A_SYMBOL, A_DEFFLOAT, A_NULL); + class_addmethod(pdp_opencv_cvk_class, (t_method)pdp_opencv_cvk_classify, gensym("bang"), A_NULL); + +} + +#ifdef __cplusplus +} +#endif -- cgit v1.2.1