mitk: ANN.h Source File

Go to the documentation of this file.
00001 //----------------------------------------------------------------------
00002 // File:                        ANN.h
00003 // Programmer:          Sunil Arya and David Mount
00004 // Last modified:       05/03/05 (Release 1.1)
00005 // Description:         Basic include file for approximate nearest
00006 //                                      neighbor searching.
00007 //----------------------------------------------------------------------
00008 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
00009 // David Mount.  All Rights Reserved.
00010 //
00011 // This software and related documentation is part of the 
00012 // Approximate Nearest Neighbor Library (ANN).
00013 // 
00014 // Permission to use, copy, and distribute this software and its 
00015 // documentation is hereby granted free of charge, provided that 
00016 // (1) it is not a component of a commercial product, and 
00017 // (2) this notice appears in all copies of the software and
00018 //   related documentation. 
00019 // 
00020 // The University of Maryland (U.M.) and the authors make no representations
00021 // about the suitability or fitness of this software for any purpose.  It is
00022 // provided "as is" without express or implied warranty.
00023 //----------------------------------------------------------------------
00024 // History:
00025 //      Revision 0.1  03/04/98
00026 //              Initial release
00027 //      Revision 1.0  04/01/05
00028 //              Added copyright and revision information
00029 //              Added ANNcoordPrec for coordinate precision.
00030 //              Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
00031 //              Cleaned up C++ structure for modern compilers
00032 //      Revision 1.1  05/03/05
00033 //              Added fixed-radius k-NN searching
00034 //----------------------------------------------------------------------
00035 
00036 //----------------------------------------------------------------------
00037 // ANN - approximate nearest neighbor searching
00038 //      ANN is a library for approximate nearest neighbor searching,
00039 //      based on the use of standard and priority search in kd-trees
00040 //      and balanced box-decomposition (bbd) trees. Here are some
00041 //      references to the main algorithmic techniques used here:
00042 //
00043 //              kd-trees:
00044 //                      Friedman, Bentley, and Finkel, ``An algorithm for finding
00045 //                              best matches in logarithmic expected time,'' ACM
00046 //                              Transactions on Mathematical Software, 3(3):209-226, 1977.
00047 //
00048 //              Priority search in kd-trees:
00049 //                      Arya and Mount, ``Algorithms for fast vector quantization,''
00050 //                              Proc. of DCC '93: Data Compression Conference, eds. J. A.
00051 //                              Storer and M. Cohn, IEEE Press, 1993, 381-390.
00052 //
00053 //              Approximate nearest neighbor search and bbd-trees:
00054 //                      Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
00055 //                              algorithm for approximate nearest neighbor searching,''
00056 //                              5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
00057 //                              1994, 573-582.
00058 //----------------------------------------------------------------------
00059 
00060 #ifndef ANN_H
00061 #define ANN_H
00062 
00063 #ifdef WIN32
00064   //----------------------------------------------------------------------
00065   // For Microsoft Visual C++, externally accessible symbols must be
00066   // explicitly indicated with DLL_API, which is somewhat like "extern."
00067   //
00068   // The following ifdef block is the standard way of creating macros
00069   // which make exporting from a DLL simpler. All files within this DLL
00070   // are compiled with the DLL_EXPORTS preprocessor symbol defined on the
00071   // command line. In contrast, projects that use (or import) the DLL
00072   // objects do not define the DLL_EXPORTS symbol. This way any other
00073   // project whose source files include this file see DLL_API functions as
00074   // being imported from a DLL, wheras this DLL sees symbols defined with
00075   // this macro as being exported.
00076   // CHANGE: allow build as static lib when ANN_STATIC_LIB is defined
00077   //         change DLL_EXPORTS to ann_EXPORTS
00078   //----------------------------------------------------------------------
00079   #ifdef ANN_STATIC_LIB
00080     #define DLL_API
00081   #else
00082     #ifdef ann_EXPORTS
00083             #define DLL_API __declspec(dllexport)
00084     #else
00085             #define DLL_API 
00086 //__declspec(dllimport)
00087     #endif
00088   #endif
00089   //----------------------------------------------------------------------
00090   // DLL_API is ignored for all other systems
00091   //----------------------------------------------------------------------
00092 #else
00093   #define DLL_API
00094 #endif
00095 
00096 //----------------------------------------------------------------------
00097 //  basic includes
00098 //----------------------------------------------------------------------
00099 
00100 #include <cmath>                        // math includes
00101 #include <iostream>                     // I/O streams
00102 
00103 //----------------------------------------------------------------------
00104 // Limits
00105 // There are a number of places where we use the maximum double value as
00106 // default initializers (and others may be used, depending on the
00107 // data/distance representation). These can usually be found in limits.h
00108 // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
00109 //
00110 // Not all systems have these files.  If you are using such a system,
00111 // you should set the preprocessor symbol ANN_NO_LIMITS_H when
00112 // compiling, and modify the statements below to generate the
00113 // appropriate value. For practical purposes, this does not need to be
00114 // the maximum double value. It is sufficient that it be at least as
00115 // large than the maximum squared distance between between any two
00116 // points.
00117 //----------------------------------------------------------------------
00118 #ifdef ANN_NO_LIMITS_H                                  // limits.h unavailable
00119   #include <cvalues>                                    // replacement for limits.h
00120   const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double
00121 #else
00122   #include <climits>
00123   #include <cfloat>
00124   const double ANN_DBL_MAX = DBL_MAX;
00125 #endif
00126 
00127 #define ANNversion              "1.0"                   // ANN version and information
00128 #define ANNversionCmt   ""
00129 #define ANNcopyright    "David M. Mount and Sunil Arya"
00130 #define ANNlatestRev    "Mar 1, 2005"
00131 
00132 //----------------------------------------------------------------------
00133 //      ANNbool
00134 //      This is a simple boolean type. Although ANSI C++ is supposed
00135 //      to support the type bool, some compilers do not have it.
00136 //----------------------------------------------------------------------
00137 
00138 enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++)
00139 
00140 //----------------------------------------------------------------------
00141 //      ANNcoord, ANNdist
00142 //              ANNcoord and ANNdist are the types used for representing
00143 //              point coordinates and distances.  They can be modified by the
00144 //              user, with some care.  It is assumed that they are both numeric
00145 //              types, and that ANNdist is generally of an equal or higher type
00146 //              from ANNcoord.  A variable of type ANNdist should be large
00147 //              enough to store the sum of squared components of a variable
00148 //              of type ANNcoord for the number of dimensions needed in the
00149 //              application.  For example, the following combinations are
00150 //              legal:
00151 //
00152 //              ANNcoord                ANNdist
00153 //              ---------               -------------------------------
00154 //              short                   short, int, long, float, double
00155 //              int                             int, long, float, double
00156 //              long                    long, float, double
00157 //              float                   float, double
00158 //              double                  double
00159 //
00160 //              It is the user's responsibility to make sure that overflow does
00161 //              not occur in distance calculation.
00162 //----------------------------------------------------------------------
00163 
00164 typedef float   ANNcoord;                               // coordinate data type
00165 typedef float   ANNdist;                                // distance data type
00166 
00167 //----------------------------------------------------------------------
00168 //      ANNidx
00169 //              ANNidx is a point index.  When the data structure is built, the
00170 //              points are given as an array.  Nearest neighbor results are
00171 //              returned as an integer index into this array.  To make it
00172 //              clearer when this is happening, we define the integer type
00173 //              ANNidx.  Indexing starts from 0.
00174 //              
00175 //              For fixed-radius near neighbor searching, it is possible that
00176 //              there are not k nearest neighbors within the search radius.  To
00177 //              indicate this, the algorithm returns ANN_NULL_IDX as its result.
00178 //              It should be distinguishable from any valid array index.
00179 //----------------------------------------------------------------------
00180 
00181 typedef int             ANNidx;                                 // point index
00182 const ANNidx    ANN_NULL_IDX = -1;              // a NULL point index
00183 
00184 //----------------------------------------------------------------------
00185 //      Infinite distance:
00186 //              The code assumes that there is an "infinite distance" which it
00187 //              uses to initialize distances before performing nearest neighbor
00188 //              searches.  It should be as larger or larger than any legitimate
00189 //              nearest neighbor distance.
00190 //
00191 //              On most systems, these should be found in the standard include
00192 //              file <limits.h> or possibly <float.h>.  If you do not have these
00193 //              file, some suggested values are listed below, assuming 64-bit
00194 //              long, 32-bit int and 16-bit short.
00195 //
00196 //              ANNdist ANN_DIST_INF    Values (see <limits.h> or <float.h>)
00197 //              ------- ------------    ------------------------------------
00198 //              double  DBL_MAX                 1.79769313486231570e+308
00199 //              float   FLT_MAX                 3.40282346638528860e+38
00200 //              long    LONG_MAX                0x7fffffffffffffff
00201 //              int             INT_MAX                 0x7fffffff
00202 //              short   SHRT_MAX                0x7fff
00203 //----------------------------------------------------------------------
00204 
00205 const ANNdist   ANN_DIST_INF = FLT_MAX;
00206 
00207 //----------------------------------------------------------------------
00208 //      Significant digits for tree dumps:
00209 //              When floating point coordinates are used, the routine that dumps
00210 //              a tree needs to know roughly how many significant digits there
00211 //              are in a ANNcoord, so it can output points to full precision.
00212 //              This is defined to be ANNcoordPrec.  On most systems these
00213 //              values can be found in the standard include files <limits.h> or
00214 //              <float.h>.  For integer types, the value is essentially ignored.
00215 //
00216 //              ANNcoord ANNcoordPrec   Values (see <limits.h> or <float.h>)
00217 //              -------- ------------   ------------------------------------
00218 //              double   DBL_DIG                15
00219 //              float    FLT_DIG                6
00220 //              long     doesn't matter 19
00221 //              int              doesn't matter 10
00222 //              short    doesn't matter 5
00223 //----------------------------------------------------------------------
00224 
00225 #ifdef DBL_DIG                                                  // number of sig. bits in ANNcoord
00226         const int        ANNcoordPrec   = DBL_DIG;
00227 #else
00228         const int        ANNcoordPrec   = 15;   // default precision
00229 #endif
00230 
00231 //----------------------------------------------------------------------
00232 // Self match?
00233 //      In some applications, the nearest neighbor of a point is not
00234 //      allowed to be the point itself. This occurs, for example, when
00235 //      computing all nearest neighbors in a set.  By setting the
00236 //      parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
00237 //      is the closest point whose distance from the query point is
00238 //      strictly positive.
00239 //----------------------------------------------------------------------
00240 
00241 const ANNbool   ANN_ALLOW_SELF_MATCH    = ANNtrue;
00242 
00243 //----------------------------------------------------------------------
00244 //      Norms and metrics:
00245 //              ANN supports any Minkowski norm for defining distance.  In
00246 //              particular, for any p >= 1, the L_p Minkowski norm defines the
00247 //              length of a d-vector (v0, v1, ..., v(d-1)) to be
00248 //
00249 //                              (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p),
00250 //
00251 //              (where ^ denotes exponentiation, and |.| denotes absolute
00252 //              value).  The distance between two points is defined to be the
00253 //              norm of the vector joining them.  Some common distance metrics
00254 //              include
00255 //
00256 //                              Euclidean metric                p = 2
00257 //                              Manhattan metric                p = 1
00258 //                              Max metric                              p = infinity
00259 //
00260 //              In the case of the max metric, the norm is computed by taking
00261 //              the maxima of the absolute values of the components.  ANN is
00262 //              highly "coordinate-based" and does not support general distances
00263 //              functions (e.g. those obeying just the triangle inequality).  It
00264 //              also does not support distance functions based on
00265 //              inner-products.
00266 //
00267 //              For the purpose of computing nearest neighbors, it is not
00268 //              necessary to compute the final power (1/p).  Thus the only
00269 //              component that is used by the program is |v(i)|^p.
00270 //
00271 //              ANN parameterizes the distance computation through the following
00272 //              macros.  (Macros are used rather than procedures for
00273 //              efficiency.) Recall that the distance between two points is
00274 //              given by the length of the vector joining them, and the length
00275 //              or norm of a vector v is given by formula:
00276 //
00277 //                              |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
00278 //
00279 //              where ROOT, POW are unary functions and # is an associative and
00280 //              commutative binary operator mapping the following types:
00281 //
00282 //                      **      POW:    ANNcoord                                --> ANNdist
00283 //                      **      #:              ANNdist x ANNdist               --> ANNdist
00284 //                      **      ROOT:   ANNdist (>0)                    --> double
00285 //
00286 //              For early termination in distance calculation (partial distance
00287 //              calculation) we assume that POW and # together are monotonically
00288 //              increasing on sequences of arguments, meaning that for all
00289 //              v0..vk and y:
00290 //
00291 //              POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
00292 //
00293 //      Incremental Distance Calculation:
00294 //              The program uses an optimized method of computing distances for
00295 //              kd-trees and bd-trees, called incremental distance calculation.
00296 //              It is used when distances are to be updated when only a single
00297 //              coordinate of a point has been changed.  In order to use this,
00298 //              we assume that there is an incremental update function DIFF(x,y)
00299 //              for #, such that if:
00300 //
00301 //                                      s = x0 # ... # xi # ... # xk 
00302 //
00303 //              then if s' is equal to s but with xi replaced by y, that is, 
00304 //              
00305 //                                      s' = x0 # ... # y # ... # xk
00306 //
00307 //              then the length of s' can be computed by:
00308 //
00309 //                                      |s'| = |s| # DIFF(xi,y).
00310 //
00311 //              Thus, if # is + then DIFF(xi,y) is (yi-x).  For the L_infinity
00312 //              norm we make use of the fact that in the program this function
00313 //              is only invoked when y > xi, and hence DIFF(xi,y)=y.
00314 //
00315 //              Finally, for approximate nearest neighbor queries we assume
00316 //              that POW and ROOT are related such that
00317 //
00318 //                                      v*ROOT(x) = ROOT(POW(v)*x)
00319 //
00320 //              Here are the values for the various Minkowski norms:
00321 //
00322 //              L_p:    p even:                                                 p odd:
00323 //                              -------------------------               ------------------------
00324 //                              POW(v)                  = v^p                   POW(v)                  = |v|^p
00325 //                              ROOT(x)                 = x^(1/p)               ROOT(x)                 = x^(1/p)
00326 //                              #                               = +                             #                               = +
00327 //                              DIFF(x,y)               = y - x                 DIFF(x,y)               = y - x 
00328 //
00329 //              L_inf:
00330 //                              POW(v)                  = |v|
00331 //                              ROOT(x)                 = x
00332 //                              #                               = max
00333 //                              DIFF(x,y)               = y
00334 //
00335 //              By default the Euclidean norm is assumed.  To change the norm,
00336 //              uncomment the appropriate set of macros below.
00337 //----------------------------------------------------------------------
00338 
00339 //----------------------------------------------------------------------
00340 //      Use the following for the Euclidean norm
00341 //----------------------------------------------------------------------
00342 #define ANN_POW(v)                      ((v)*(v))
00343 #define ANN_ROOT(x)                     sqrt(x)
00344 #define ANN_SUM(x,y)            ((x) + (y))
00345 #define ANN_DIFF(x,y)           ((y) - (x))
00346 
00347 //----------------------------------------------------------------------
00348 //      Use the following for the L_1 (Manhattan) norm
00349 //----------------------------------------------------------------------
00350 // #define ANN_POW(v)           fabs(v)
00351 // #define ANN_ROOT(x)          (x)
00352 // #define ANN_SUM(x,y)         ((x) + (y))
00353 // #define ANN_DIFF(x,y)        ((y) - (x))
00354 
00355 //----------------------------------------------------------------------
00356 //      Use the following for a general L_p norm
00357 //----------------------------------------------------------------------
00358 // #define ANN_POW(v)           pow(fabs(v),p)
00359 // #define ANN_ROOT(x)          pow(fabs(x),1/p)
00360 // #define ANN_SUM(x,y)         ((x) + (y))
00361 // #define ANN_DIFF(x,y)        ((y) - (x))
00362 
00363 //----------------------------------------------------------------------
00364 //      Use the following for the L_infinity (Max) norm
00365 //----------------------------------------------------------------------
00366 // #define ANN_POW(v)           fabs(v)
00367 // #define ANN_ROOT(x)          (x)
00368 // #define ANN_SUM(x,y)         ((x) > (y) ? (x) : (y))
00369 // #define ANN_DIFF(x,y)        (y)
00370 
00371 //----------------------------------------------------------------------
00372 //      Array types
00373 //              The following array types are of basic interest.  A point is
00374 //              just a dimensionless array of coordinates, a point array is a
00375 //              dimensionless array of points.  A distance array is a
00376 //              dimensionless array of distances and an index array is a
00377 //              dimensionless array of point indices.  The latter two are used
00378 //              when returning the results of k-nearest neighbor queries.
00379 //----------------------------------------------------------------------
00380 
00381 typedef ANNcoord* ANNpoint;                     // a point
00382 typedef ANNpoint* ANNpointArray;        // an array of points 
00383 typedef ANNdist*  ANNdistArray;         // an array of distances 
00384 typedef ANNidx*   ANNidxArray;          // an array of point indices
00385 
00386 //----------------------------------------------------------------------
00387 //      Basic point and array utilities:
00388 //              The following procedures are useful supplements to ANN's nearest
00389 //              neighbor capabilities.
00390 //
00391 //              annDist():
00392 //                      Computes the (squared) distance between a pair of points.
00393 //                      Note that this routine is not used internally by ANN for
00394 //                      computing distance calculations.  For reasons of efficiency
00395 //                      this is done using incremental distance calculation.  Thus,
00396 //                      this routine cannot be modified as a method of changing the
00397 //                      metric.
00398 //
00399 //              Because points (somewhat like strings in C) are stored as
00400 //              pointers.  Consequently, creating and destroying copies of
00401 //              points may require storage allocation.  These procedures do
00402 //              this.
00403 //
00404 //              annAllocPt() and annDeallocPt():
00405 //                              Allocate a deallocate storage for a single point, and
00406 //                              return a pointer to it.  The argument to AllocPt() is
00407 //                              used to initialize all components.
00408 //
00409 //              annAllocPts() and annDeallocPts():
00410 //                              Allocate and deallocate an array of points as well a
00411 //                              place to store their coordinates, and initializes the
00412 //                              points to point to their respective coordinates.  It
00413 //                              allocates point storage in a contiguous block large
00414 //                              enough to store all the points.  It performs no
00415 //                              initialization.
00416 //
00417 //              annCopyPt():
00418 //                              Creates a copy of a given point, allocating space for
00419 //                              the new point.  It returns a pointer to the newly
00420 //                              allocated copy.
00421 //----------------------------------------------------------------------
00422    
00423 DLL_API ANNdist annDist(
00424         int                             dim,            // dimension of space
00425         ANNpoint                p,                      // points
00426         ANNpoint                q);
00427 
00428 DLL_API ANNpoint annAllocPt(
00429         int                             dim,            // dimension
00430         ANNcoord                c = 0);         // coordinate value (all equal)
00431 
00432 DLL_API ANNpointArray annAllocPts(
00433         int                             n,                      // number of points
00434         int                             dim);           // dimension
00435 
00436 DLL_API void annDeallocPt(
00437         ANNpoint                &p);            // deallocate 1 point
00438    
00439 DLL_API void annDeallocPts(
00440         ANNpointArray   &pa);           // point array
00441 
00442 DLL_API ANNpoint annCopyPt(
00443         int                             dim,            // dimension
00444         ANNpoint                source);        // point to copy
00445 
00446 //----------------------------------------------------------------------
00447 //Overall structure: ANN supports a number of different data structures
00448 //for approximate and exact nearest neighbor searching.  These are:
00449 //
00450 //              ANNbruteForce   A simple brute-force search structure.
00451 //              ANNkd_tree              A kd-tree tree search structure.  ANNbd_tree
00452 //              A bd-tree tree search structure (a kd-tree with shrink
00453 //              capabilities).
00454 //
00455 //              At a minimum, each of these data structures support k-nearest
00456 //              neighbor queries.  The nearest neighbor query, annkSearch,
00457 //              returns an integer identifier and the distance to the nearest
00458 //              neighbor(s) and annRangeSearch returns the nearest points that
00459 //              lie within a given query ball.
00460 //
00461 //              Each structure is built by invoking the appropriate constructor
00462 //              and passing it (at a minimum) the array of points, the total
00463 //              number of points and the dimension of the space.  Each structure
00464 //              is also assumed to support a destructor and member functions
00465 //              that return basic information about the point set.
00466 //
00467 //              Note that the array of points is not copied by the data
00468 //              structure (for reasons of space efficiency), and it is assumed
00469 //              to be constant throughout the lifetime of the search structure.
00470 //
00471 //              The search algorithm, annkSearch, is given the query point (q),
00472 //              and the desired number of nearest neighbors to report (k), and
00473 //              the error bound (eps) (whose default value is 0, implying exact
00474 //              nearest neighbors).  It returns two arrays which are assumed to
00475 //              contain at least k elements: one (nn_idx) contains the indices
00476 //              (within the point array) of the nearest neighbors and the other
00477 //              (dd) contains the squared distances to these nearest neighbors.
00478 //
00479 //              The search algorithm, annkFRSearch, is a fixed-radius kNN
00480 //              search.  In addition to a query point, it is given a (squared)
00481 //              radius bound.  (This is done for consistency, because the search
00482 //              returns distances as squared quantities.) It does two things.
00483 //              First, it computes the k nearest neighbors within the radius
00484 //              bound, and second, it returns the total number of points lying
00485 //              within the radius bound. It is permitted to set k = 0, in which
00486 //              case it effectively answers a range counting query.  If the
00487 //              error bound epsilon is positive, then the search is approximate
00488 //              in the sense that it is free to ignore any point that lies
00489 //              outside a ball of radius r/(1+epsilon), where r is the given
00490 //              (unsquared) radius bound.
00491 //
00492 //              The generic object from which all the search structures are
00493 //              dervied is given below.  It is a virtual object, and is useless
00494 //              by itself.
00495 //----------------------------------------------------------------------
00496 
00497 class DLL_API ANNpointSet {
00498 public:
00499         virtual ~ANNpointSet() {}                       // virtual distructor
00500 
00501         virtual void annkSearch(                        // approx k near neighbor search
00502                 ANNpoint                q,                              // query point
00503                 int                             k,                              // number of near neighbors to return
00504                 ANNidxArray             nn_idx,                 // nearest neighbor array (modified)
00505                 ANNdistArray    dd,                             // dist to near neighbors (modified)
00506                 double                  eps=0.0                 // error bound
00507                 ) = 0;                                                  // pure virtual (defined elsewhere)
00508 
00509         virtual int annkFRSearch(                       // approx fixed-radius kNN search
00510                 ANNpoint                q,                              // query point
00511                 ANNdist                 sqRad,                  // squared radius
00512                 int                             k = 0,                  // number of near neighbors to return
00513                 ANNidxArray             nn_idx = NULL,  // nearest neighbor array (modified)
00514                 ANNdistArray    dd = NULL,              // dist to near neighbors (modified)
00515                 double                  eps=0.0                 // error bound
00516                 ) = 0;                                                  // pure virtual (defined elsewhere)
00517 
00518         virtual int theDim() = 0;                       // return dimension of space
00519         virtual int nPoints() = 0;                      // return number of points
00520                                                                                 // return pointer to points
00521         virtual ANNpointArray thePoints() = 0;
00522 };
00523 
00524 //----------------------------------------------------------------------
00525 //      Brute-force nearest neighbor search:
00526 //              The brute-force search structure is very simple but inefficient.
00527 //              It has been provided primarily for the sake of comparison with
00528 //              and validation of the more complex search structures.
00529 //
00530 //              Query processing is the same as described above, but the value
00531 //              of epsilon is ignored, since all distance calculations are
00532 //              performed exactly.
00533 //
00534 //              WARNING: This data structure is very slow, and should not be
00535 //              used unless the number of points is very small.
00536 //
00537 //              Internal information:
00538 //              ---------------------
00539 //              This data structure bascially consists of the array of points
00540 //              (each a pointer to an array of coordinates).  The search is
00541 //              performed by a simple linear scan of all the points.
00542 //----------------------------------------------------------------------
00543 
00544 class DLL_API ANNbruteForce: public ANNpointSet {
00545         int                             dim;                            // dimension
00546         int                             n_pts;                          // number of points
00547         ANNpointArray   pts;                            // point array
00548 public:
00549         ANNbruteForce(                                          // constructor from point array
00550                 ANNpointArray   pa,                             // point array
00551                 int                             n,                              // number of points
00552                 int                             dd);                    // dimension
00553 
00554         ~ANNbruteForce();                                       // destructor
00555 
00556         void annkSearch(                                        // approx k near neighbor search
00557                 ANNpoint                q,                              // query point
00558                 int                             k,                              // number of near neighbors to return
00559                 ANNidxArray             nn_idx,                 // nearest neighbor array (modified)
00560                 ANNdistArray    dd,                             // dist to near neighbors (modified)
00561                 double                  eps=0.0);               // error bound
00562 
00563         int annkFRSearch(                                       // approx fixed-radius kNN search
00564                 ANNpoint                q,                              // query point
00565                 ANNdist                 sqRad,                  // squared radius
00566                 int                             k = 0,                  // number of near neighbors to return
00567                 ANNidxArray             nn_idx = NULL,  // nearest neighbor array (modified)
00568                 ANNdistArray    dd = NULL,              // dist to near neighbors (modified)
00569                 double                  eps=0.0);               // error bound
00570 
00571         int theDim()                                            // return dimension of space
00572                 { return dim; }
00573 
00574         int nPoints()                                           // return number of points
00575                 { return n_pts; }
00576 
00577         ANNpointArray thePoints()                       // return pointer to points
00578                 {  return pts;  }
00579 };
00580 
00581 //----------------------------------------------------------------------
00582 // kd- and bd-tree splitting and shrinking rules
00583 //              kd-trees supports a collection of different splitting rules.
00584 //              In addition to the standard kd-tree splitting rule proposed
00585 //              by Friedman, Bentley, and Finkel, we have introduced a
00586 //              number of other splitting rules, which seem to perform
00587 //              as well or better (for the distributions we have tested).
00588 //
00589 //              The splitting methods given below allow the user to tailor
00590 //              the data structure to the particular data set.  They are
00591 //              are described in greater details in the kd_split.cc source
00592 //              file.  The method ANN_KD_SUGGEST is the method chosen (rather
00593 //              subjectively) by the implementors as the one giving the
00594 //              fastest performance, and is the default splitting method.
00595 //
00596 //              As with splitting rules, there are a number of different
00597 //              shrinking rules.  The shrinking rule ANN_BD_NONE does no
00598 //              shrinking (and hence produces a kd-tree tree).  The rule
00599 //              ANN_BD_SUGGEST uses the implementors favorite rule.
00600 //----------------------------------------------------------------------
00601 
00602 enum ANNsplitRule {
00603                 ANN_KD_STD                              = 0,    // the optimized kd-splitting rule
00604                 ANN_KD_MIDPT                    = 1,    // midpoint split
00605                 ANN_KD_FAIR                             = 2,    // fair split
00606                 ANN_KD_SL_MIDPT                 = 3,    // sliding midpoint splitting method
00607                 ANN_KD_SL_FAIR                  = 4,    // sliding fair split method
00608                 ANN_KD_SUGGEST                  = 5};   // the authors' suggestion for best
00609 const int ANN_N_SPLIT_RULES             = 6;    // number of split rules
00610 
00611 enum ANNshrinkRule {
00612                 ANN_BD_NONE                             = 0,    // no shrinking at all (just kd-tree)
00613                 ANN_BD_SIMPLE                   = 1,    // simple splitting
00614                 ANN_BD_CENTROID                 = 2,    // centroid splitting
00615                 ANN_BD_SUGGEST                  = 3};   // the authors' suggested choice
00616 const int ANN_N_SHRINK_RULES    = 4;    // number of shrink rules
00617 
00618 //----------------------------------------------------------------------
00619 //      kd-tree:
00620 //              The main search data structure supported by ANN is a kd-tree.
00621 //              The main constructor is given a set of points and a choice of
00622 //              splitting method to use in building the tree.
00623 //
00624 //              Construction:
00625 //              -------------
00626 //              The constructor is given the point array, number of points,
00627 //              dimension, bucket size (default = 1), and the splitting rule
00628 //              (default = ANN_KD_SUGGEST).  The point array is not copied, and
00629 //              is assumed to be kept constant throughout the lifetime of the
00630 //              search structure.  There is also a "load" constructor that
00631 //              builds a tree from a file description that was created by the
00632 //              Dump operation.
00633 //
00634 //              Search:
00635 //              -------
00636 //              There are two search methods:
00637 //
00638 //                      Standard search (annkSearch()):
00639 //                              Searches nodes in tree-traversal order, always visiting
00640 //                              the closer child first.
00641 //                      Priority search (annkPriSearch()):
00642 //                              Searches nodes in order of increasing distance of the
00643 //                              associated cell from the query point.  For many
00644 //                              distributions the standard search seems to work just
00645 //                              fine, but priority search is safer for worst-case
00646 //                              performance.
00647 //
00648 //              Printing:
00649 //              ---------
00650 //              There are two methods provided for printing the tree.  Print()
00651 //              is used to produce a "human-readable" display of the tree, with
00652 //              indenation, which is handy for debugging.  Dump() produces a
00653 //              format that is suitable reading by another program.  There is a
00654 //              "load" constructor, which constructs a tree which is assumed to
00655 //              have been saved by the Dump() procedure.
00656 //              
00657 //              Performance and Structure Statistics:
00658 //              -------------------------------------
00659 //              The procedure getStats() collects statistics information on the
00660 //              tree (its size, height, etc.)  See ANNperf.h for information on
00661 //              the stats structure it returns.
00662 //
00663 //              Internal information:
00664 //              ---------------------
00665 //              The data structure consists of three major chunks of storage.
00666 //              The first (implicit) storage are the points themselves (pts),
00667 //              which have been provided by the users as an argument to the
00668 //              constructor, or are allocated dynamically if the tree is built
00669 //              using the load constructor).  These should not be changed during
00670 //              the lifetime of the search structure.  It is the user's
00671 //              responsibility to delete these after the tree is destroyed.
00672 //
00673 //              The second is the tree itself (which is dynamically allocated in
00674 //              the constructor) and is given as a pointer to its root node
00675 //              (root).  These nodes are automatically deallocated when the tree
00676 //              is deleted.  See the file src/kd_tree.h for further information
00677 //              on the structure of the tree nodes.
00678 //
00679 //              Each leaf of the tree does not contain a pointer directly to a
00680 //              point, but rather contains a pointer to a "bucket", which is an
00681 //              array consisting of point indices.  The third major chunk of
00682 //              storage is an array (pidx), which is a large array in which all
00683 //              these bucket subarrays reside.  (The reason for storing them
00684 //              separately is the buckets are typically small, but of varying
00685 //              sizes.  This was done to avoid fragmentation.)  This array is
00686 //              also deallocated when the tree is deleted.
00687 //
00688 //              In addition to this, the tree consists of a number of other
00689 //              pieces of information which are used in searching and for
00690 //              subsequent tree operations.  These consist of the following:
00691 //
00692 //              dim                                             Dimension of space
00693 //              n_pts                                   Number of points currently in the tree
00694 //              n_max                                   Maximum number of points that are allowed
00695 //                                                              in the tree
00696 //              bkt_size                                Maximum bucket size (no. of points per leaf)
00697 //              bnd_box_lo                              Bounding box low point
00698 //              bnd_box_hi                              Bounding box high point
00699 //              splitRule                               Splitting method used
00700 //
00701 //----------------------------------------------------------------------
00702 
00703 //----------------------------------------------------------------------
00704 // Some types and objects used by kd-tree functions
00705 // See src/kd_tree.h and src/kd_tree.cpp for definitions
00706 //----------------------------------------------------------------------
00707 class ANNkdStats;                               // stats on kd-tree
00708 class ANNkd_node;                               // generic node in a kd-tree
00709 typedef ANNkd_node*     ANNkd_ptr;      // pointer to a kd-tree node
00710 
00711 class DLL_API ANNkd_tree: public ANNpointSet {
00712 protected:
00713         int                             dim;                            // dimension of space
00714         int                             n_pts;                          // number of points in tree
00715         int                             bkt_size;                       // bucket size
00716         ANNpointArray   pts;                            // the points
00717         ANNidxArray             pidx;                           // point indices (to pts array)
00718         ANNkd_ptr               root;                           // root of kd-tree
00719         ANNpoint                bnd_box_lo;                     // bounding box low point
00720         ANNpoint                bnd_box_hi;                     // bounding box high point
00721 
00722         void SkeletonTree(                                      // construct skeleton tree
00723                 int                             n,                              // number of points
00724                 int                             dd,                             // dimension
00725                 int                             bs,                             // bucket size
00726                 ANNpointArray pa = NULL,                // point array (optional)
00727                 ANNidxArray pi = NULL);                 // point indices (optional)
00728 
00729 public:
00730         ANNkd_tree(                                                     // build skeleton tree
00731                 int                             n = 0,                  // number of points
00732                 int                             dd = 0,                 // dimension
00733                 int                             bs = 1);                // bucket size
00734 
00735         ANNkd_tree(                                                     // build from point array
00736                 ANNpointArray   pa,                             // point array
00737                 int                             n,                              // number of points
00738                 int                             dd,                             // dimension
00739                 int                             bs = 1,                 // bucket size
00740                 ANNsplitRule    split = ANN_KD_SUGGEST);        // splitting method
00741 
00742         ANNkd_tree(                                                     // build from dump file
00743                 std::istream&   in);                    // input stream for dump file
00744 
00745         ~ANNkd_tree();                                          // tree destructor
00746 
00747         void annkSearch(                                        // approx k near neighbor search
00748                 ANNpoint                q,                              // query point
00749                 int                             k,                              // number of near neighbors to return
00750                 ANNidxArray             nn_idx,                 // nearest neighbor array (modified)
00751                 ANNdistArray    dd,                             // dist to near neighbors (modified)
00752                 double                  eps=0.0);               // error bound
00753 
00754         void annkPriSearch(                             // priority k near neighbor search
00755                 ANNpoint                q,                              // query point
00756                 int                             k,                              // number of near neighbors to return
00757                 ANNidxArray             nn_idx,                 // nearest neighbor array (modified)
00758                 ANNdistArray    dd,                             // dist to near neighbors (modified)
00759                 double                  eps=0.0);               // error bound
00760 
00761         int annkFRSearch(                                       // approx fixed-radius kNN search
00762                 ANNpoint                q,                              // the query point
00763                 ANNdist                 sqRad,                  // squared radius of query ball
00764                 int                             k,                              // number of neighbors to return
00765                 ANNidxArray             nn_idx = NULL,  // nearest neighbor array (modified)
00766                 ANNdistArray    dd = NULL,              // dist to near neighbors (modified)
00767                 double                  eps=0.0);               // error bound
00768 
00769         int theDim()                                            // return dimension of space
00770                 { return dim; }
00771 
00772         int nPoints()                                           // return number of points
00773                 { return n_pts; }
00774 
00775         ANNpointArray thePoints()                       // return pointer to points
00776                 {  return pts;  }
00777 
00778         virtual void Print(                                     // print the tree (for debugging)
00779                 ANNbool                 with_pts,               // print points as well?
00780                 std::ostream&   out);                   // output stream
00781 
00782         virtual void Dump(                                      // dump entire tree
00783                 ANNbool                 with_pts,               // print points as well?
00784                 std::ostream&   out);                   // output stream
00785                                                                 
00786         virtual void getStats(                          // compute tree statistics
00787                 ANNkdStats&             st);                    // the statistics (modified)
00788 };                                                              
00789 
00790 //----------------------------------------------------------------------
00791 //      Box decomposition tree (bd-tree)
00792 //              The bd-tree is inherited from a kd-tree.  The main difference
00793 //              in the bd-tree and the kd-tree is a new type of internal node
00794 //              called a shrinking node (in the kd-tree there is only one type
00795 //              of internal node, a splitting node).  The shrinking node
00796 //              makes it possible to generate balanced trees in which the
00797 //              cells have bounded aspect ratio, by allowing the decomposition
00798 //              to zoom in on regions of dense point concentration.  Although
00799 //              this is a nice idea in theory, few point distributions are so
00800 //              densely clustered that this is really needed.
00801 //----------------------------------------------------------------------
00802 
00803 class DLL_API ANNbd_tree: public ANNkd_tree {
00804 public:
00805         ANNbd_tree(                                                     // build skeleton tree
00806                 int                             n,                              // number of points
00807                 int                             dd,                             // dimension
00808                 int                             bs = 1)                 // bucket size
00809                 : ANNkd_tree(n, dd, bs) {}              // build base kd-tree
00810 
00811         ANNbd_tree(                                                     // build from point array
00812                 ANNpointArray   pa,                             // point array
00813                 int                             n,                              // number of points
00814                 int                             dd,                             // dimension
00815                 int                             bs = 1,                 // bucket size
00816                 ANNsplitRule    split  = ANN_KD_SUGGEST,        // splitting rule
00817                 ANNshrinkRule   shrink = ANN_BD_SUGGEST);       // shrinking rule
00818 
00819         ANNbd_tree(                                                     // build from dump file
00820                 std::istream&   in);                    // input stream for dump file
00821 };
00822 
00823 //----------------------------------------------------------------------
00824 //      Other functions
00825 //      annMaxPtsVisit          Sets a limit on the maximum number of points
00826 //                                              to visit in the search.
00827 //  annClose                    Can be called when all use of ANN is finished.
00828 //                                              It clears up a minor memory leak.
00829 //----------------------------------------------------------------------
00830 
00831 DLL_API void annMaxPtsVisit(    // max. pts to visit in search
00832         int                             maxPts);        // the limit
00833 
00834 DLL_API void annClose();                // called to end use of ANN
00835 
00836 #endif