00001 //---------------------------------------------------------------------- 00002 // File: ANNperf.h 00003 // Programmer: Sunil Arya and David Mount 00004 // Last modified: 03/04/98 (Release 0.1) 00005 // Description: Include file for ANN performance stats 00006 // 00007 // Some of the code for statistics gathering has been adapted 00008 // from the SmplStat.h package in the g++ library. 00009 //---------------------------------------------------------------------- 00010 // Copyright (c) 1997-1998 University of Maryland and Sunil Arya and David 00011 // Mount. All Rights Reserved. 00012 // 00013 // This software and related documentation is part of the 00014 // Approximate Nearest Neighbor Library (ANN). 00015 // 00016 // Permission to use, copy, and distribute this software and its 00017 // documentation is hereby granted free of charge, provided that 00018 // (1) it is not a component of a commercial product, and 00019 // (2) this notice appears in all copies of the software and 00020 // related documentation. 00021 // 00022 // The University of Maryland (U.M.) and the authors make no representations 00023 // about the suitability or fitness of this software for any purpose. It is 00024 // provided "as is" without express or implied warranty. 00025 //---------------------------------------------------------------------- 00026 // History: 00027 // Revision 0.1 03/04/98 00028 // Initial release 00029 // Revision 1.0 04/01/05 00030 // Added ANN_ prefix to avoid name conflicts. 00031 //---------------------------------------------------------------------- 00032 00033 #ifndef ANNperf_H 00034 #define ANNperf_H 00035 00036 //---------------------------------------------------------------------- 00037 // basic includes 00038 //---------------------------------------------------------------------- 00039 00040 #include <ANN/ANN.h> // basic ANN includes 00041 00042 //---------------------------------------------------------------------- 00043 // kd-tree stats object 00044 // This object is used for collecting information about a kd-tree 00045 // or bd-tree. 00046 //---------------------------------------------------------------------- 00047 00048 class ANNkdStats { // stats on kd-tree 00049 public: 00050 int dim; // dimension of space 00051 int n_pts; // no. of points 00052 int bkt_size; // bucket size 00053 int n_lf; // no. of leaves (including trivial) 00054 int n_tl; // no. of trivial leaves (no points) 00055 int n_spl; // no. of splitting nodes 00056 int n_shr; // no. of shrinking nodes (for bd-trees) 00057 int depth; // depth of tree 00058 float sum_ar; // sum of leaf aspect ratios 00059 float avg_ar; // average leaf aspect ratio 00060 // 00061 // reset stats 00062 void reset(int d=0, int n=0, int bs=0) 00063 { 00064 dim = d; n_pts = n; bkt_size = bs; 00065 n_lf = n_tl = n_spl = n_shr = depth = 0; 00066 sum_ar = avg_ar = 0.0; 00067 } 00068 00069 ANNkdStats() // basic constructor 00070 { reset(); } 00071 00072 void merge(const ANNkdStats &st); // merge stats from child 00073 }; 00074 00075 //---------------------------------------------------------------------- 00076 // ANNsampStat 00077 // A sample stat collects numeric (double) samples and returns some 00078 // simple statistics. Its main functions are: 00079 // 00080 // reset() Reset to no samples. 00081 // += x Include sample x. 00082 // samples() Return number of samples. 00083 // mean() Return mean of samples. 00084 // stdDev() Return standard deviation 00085 // min() Return minimum of samples. 00086 // max() Return maximum of samples. 00087 //---------------------------------------------------------------------- 00088 class DLL_API ANNsampStat { 00089 int n; // number of samples 00090 double sum; // sum 00091 double sum2; // sum of squares 00092 double minVal, maxVal; // min and max 00093 public : 00094 void reset() // reset everything 00095 { 00096 n = 0; 00097 sum = sum2 = 0; 00098 minVal = ANN_DBL_MAX; 00099 maxVal = -ANN_DBL_MAX; 00100 } 00101 00102 ANNsampStat() { reset(); } // constructor 00103 00104 void operator+=(double x) // add sample 00105 { 00106 n++; sum += x; sum2 += x*x; 00107 if (x < minVal) minVal = x; 00108 if (x > maxVal) maxVal = x; 00109 } 00110 00111 int samples() { return n; } // number of samples 00112 00113 double mean() { return sum/n; } // mean 00114 00115 // standard deviation 00116 double stdDev() { return sqrt((sum2 - (sum*sum)/n)/(n-1));} 00117 00118 double min() { return minVal; } // minimum 00119 double max() { return maxVal; } // maximum 00120 }; 00121 00122 //---------------------------------------------------------------------- 00123 // Operation count updates 00124 //---------------------------------------------------------------------- 00125 00126 #ifdef ANN_PERF 00127 #define ANN_FLOP(n) {ann_Nfloat_ops += (n);} 00128 #define ANN_LEAF(n) {ann_Nvisit_lfs += (n);} 00129 #define ANN_SPL(n) {ann_Nvisit_spl += (n);} 00130 #define ANN_SHR(n) {ann_Nvisit_shr += (n);} 00131 #define ANN_PTS(n) {ann_Nvisit_pts += (n);} 00132 #define ANN_COORD(n) {ann_Ncoord_hts += (n);} 00133 #else 00134 #define ANN_FLOP(n) 00135 #define ANN_LEAF(n) 00136 #define ANN_SPL(n) 00137 #define ANN_SHR(n) 00138 #define ANN_PTS(n) 00139 #define ANN_COORD(n) 00140 #endif 00141 00142 //---------------------------------------------------------------------- 00143 // Performance statistics 00144 // The following data and routines are used for computing performance 00145 // statistics for nearest neighbor searching. Because these routines 00146 // can slow the code down, they can be activated and deactiviated by 00147 // defining the ANN_PERF variable, by compiling with the option: 00148 // -DANN_PERF 00149 //---------------------------------------------------------------------- 00150 00151 //---------------------------------------------------------------------- 00152 // Global counters for performance measurement 00153 // 00154 // visit_lfs The number of leaf nodes visited in the 00155 // tree. 00156 // 00157 // visit_spl The number of splitting nodes visited in the 00158 // tree. 00159 // 00160 // visit_shr The number of shrinking nodes visited in the 00161 // tree. 00162 // 00163 // visit_pts The number of points visited in all the 00164 // leaf nodes visited. Equivalently, this 00165 // is the number of points for which distance 00166 // calculations are performed. 00167 // 00168 // coord_hts The number of times a coordinate of a 00169 // data point is accessed. This is generally 00170 // less than visit_pts*d if partial distance 00171 // calculation is used. This count is low 00172 // in the sense that if a coordinate is hit 00173 // many times in the same routine we may 00174 // count it only once. 00175 // 00176 // float_ops The number of floating point operations. 00177 // This includes all operations in the heap 00178 // as well as distance calculations to boxes. 00179 // 00180 // average_err The average error of each query (the 00181 // error of the reported point to the true 00182 // nearest neighbor). For k nearest neighbors 00183 // the error is computed k times. 00184 // 00185 // rank_err The rank error of each query (the difference 00186 // in the rank of the reported point and its 00187 // true rank). 00188 // 00189 // data_pts The number of data points. This is not 00190 // a counter, but used in stats computation. 00191 //---------------------------------------------------------------------- 00192 00193 extern int ann_Ndata_pts; // number of data points 00194 extern int ann_Nvisit_lfs; // number of leaf nodes visited 00195 extern int ann_Nvisit_spl; // number of splitting nodes visited 00196 extern int ann_Nvisit_shr; // number of shrinking nodes visited 00197 extern int ann_Nvisit_pts; // visited points for one query 00198 extern int ann_Ncoord_hts; // coordinate hits for one query 00199 extern int ann_Nfloat_ops; // floating ops for one query 00200 extern ANNsampStat ann_visit_lfs; // stats on leaf nodes visits 00201 extern ANNsampStat ann_visit_spl; // stats on splitting nodes visits 00202 extern ANNsampStat ann_visit_shr; // stats on shrinking nodes visits 00203 extern ANNsampStat ann_visit_nds; // stats on total nodes visits 00204 extern ANNsampStat ann_visit_pts; // stats on points visited 00205 extern ANNsampStat ann_coord_hts; // stats on coordinate hits 00206 extern ANNsampStat ann_float_ops; // stats on floating ops 00207 //---------------------------------------------------------------------- 00208 // The following need to be part of the public interface, because 00209 // they are accessed outside the DLL in ann_test.cpp. 00210 //---------------------------------------------------------------------- 00211 DLL_API extern ANNsampStat ann_average_err; // average error 00212 DLL_API extern ANNsampStat ann_rank_err; // rank error 00213 00214 //---------------------------------------------------------------------- 00215 // Declaration of externally accessible routines for statistics 00216 //---------------------------------------------------------------------- 00217 00218 DLL_API void annResetStats(int data_size); // reset stats for a set of queries 00219 00220 DLL_API void annResetCounts(); // reset counts for one queries 00221 00222 DLL_API void annUpdateStats(); // update stats with current counts 00223 00224 DLL_API void annPrintStats(ANNbool validate); // print statistics for a run 00225 00226 #endif