NCBI C++ ToolKit
clusterer_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: clusterer_unit_test.cpp 100474 2023-08-04 15:13:04Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Greg Boratyn
27 *
28 * File Description:
29 * Unit tests for CClusterer
30 *
31 *
32 * ===========================================================================
33 */
34 
35 #include <ncbi_pch.hpp>
36 
37 #include <corelib/ncbi_system.hpp>
39 
40 #include <math.h>
41 
42 // This macro should be defined before inclusion of test_boost.hpp in all
43 // "*.cpp" files inside executable except one. It is like function main() for
44 // non-Boost.Test executables is defined only in one *.cpp file - other files
45 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
46 // then test_boost.hpp will define such "main()" function for tests.
47 //
48 // Usually if your unit tests contain only one *.cpp file you should not
49 // care about this macro at all.
50 //
51 #define NCBI_BOOST_NO_AUTO_TEST_MAIN
52 
53 
54 // This header must be included before all Boost.Test headers if there are any
55 #include <corelib/test_boost.hpp>
56 
57 #ifndef SKIP_DOXYGEN_PROCESSING
58 
60 USING_SCOPE(cobalt);
61 
62 BOOST_AUTO_TEST_SUITE(clusterer)
63 
64 BOOST_AUTO_TEST_CASE(TestSingleCluster)
65 {
66  // Empty cluster
68  BOOST_CHECK_EQUAL((int)cluster.size(), 0);
69  BOOST_CHECK(cluster.GetPrototype() < 0);
70 
71  // Index out of range throws
72  BOOST_CHECK_THROW(cluster[0], CClustererException);
73 
74  // Finding center element of an empty cluster throws exception
75  BOOST_CHECK_THROW(
76  cluster.FindCenterElement(CClusterer::TDistMatrix(2, 2, 0.0)),
78 }
79 
80 BOOST_AUTO_TEST_CASE(TestClusterer)
81 {
82  // Empty clusterer is created with no clusters and distance matrix
83  CClusterer clusterer;
84  BOOST_CHECK_EQUAL((int)clusterer.GetClusters().size(), 0);
85  BOOST_CHECK_THROW(clusterer.GetDistMatrix(), CClustererException);
86 
87  // One element distance matrix yields one one-element cluster
88  CClusterer::TDistMatrix dmat(1, 1, 0.0);
89  clusterer.SetDistMatrix(dmat);
90  clusterer.ComputeClusters(1.0);
91  BOOST_CHECK_EQUAL((int)clusterer.GetClusters().size(), 1);
92  BOOST_CHECK_EQUAL((int)clusterer.GetSingleCluster(0).size(), 1);
93 
94  // For two elements...
95  dmat.Resize(2, 2, 0.0);
96  dmat(0, 1) = 0.1;
97  dmat(1, 0) = dmat(0, 1);
98  clusterer.SetDistMatrix(dmat);
99 
100  // ... larger max distance yields one cluster
101  clusterer.ComputeClusters(1.0);
102  BOOST_CHECK_EQUAL((int)clusterer.GetClusters().size(), 1);
103 
104  // ... smaller max distance yields two clusters
105  clusterer.ComputeClusters(0.01);
106  BOOST_CHECK_EQUAL((int)clusterer.GetClusters().size(), 2);
107 
108  // Accessing cluster by index out of range yields exception
109  BOOST_CHECK_THROW(clusterer.GetSingleCluster(3), CClustererException);
110 
111  // The settings below produce more than 1 cluster
112  dmat.Resize(4, 4, 0.0);
113  dmat(0, 1) = dmat(1, 0) = 0.1;
114  dmat(0, 2) = dmat(2, 0) = 0.3;
115  dmat(0, 3) = dmat(3, 0) = 0.2;
116  dmat(1, 2) = dmat(2, 1) = 0.5;
117  dmat(1, 3) = dmat(3, 1) = 0.2;
118  dmat(2, 3) = dmat(3, 2) = 0.1;
119  clusterer.SetDistMatrix(dmat);
120  clusterer.ComputeClusters(0.4);
121  BOOST_CHECK((int)clusterer.GetClusters().size() > 1);
122 
123  // Cluster distance matrix is square with size equal to number of cluster
124  // elements
125  clusterer.GetClusterDistMatrix(0, dmat);
126  BOOST_CHECK_EQUAL(dmat.GetRows(), dmat.GetCols());
127  BOOST_CHECK_EQUAL(dmat.GetRows(), clusterer.GetSingleCluster(0).size());
128  clusterer.GetClusterDistMatrix(1, dmat);
129  BOOST_CHECK_EQUAL(dmat.GetRows(), dmat.GetCols());
130  BOOST_CHECK_EQUAL(dmat.GetRows(), clusterer.GetSingleCluster(1).size());
131 
132  // Incompatible cluster elements and distance matrix cause exception
133  clusterer.SetClusters()[0].AddElement(10);
134  BOOST_CHECK_THROW(clusterer.GetClusterDistMatrix(0, dmat),
136 
137  // Supplying non-square distance matrix yields exception
138  dmat.Resize(2, 3, 0.0);
139  BOOST_CHECK_THROW(clusterer.SetDistMatrix(dmat), CClustererException);
140 
141  // Accessing non-existing distance matrix yields exception
142  clusterer.PurgeDistMatrix();
143  BOOST_CHECK_THROW(clusterer.GetDistMatrix(), CClustererException);
144 }
145 
146 
147 ///Read distance matrix from file
148 ///@param filename Filename [in]
149 ///@param dmat Distance matrix [out]
150 static void s_ReadDistMatrix(const string& filename,
152 {
153  CNcbiIfstream istr(filename.c_str());
154  BOOST_REQUIRE(istr);
155  vector<double> dists;
156  while (!istr.eof()) {
157  double val = DBL_MAX;
158  istr >> val;
159 
160  if (val != DBL_MAX) {
161  dists.push_back(val);
162  }
163  }
164 
165  int num_elements = (int)sqrt((double)dists.size());
166 
167  // distances must form a square matrix
168  BOOST_REQUIRE_EQUAL(num_elements * num_elements, (int)dists.size());
169 
170  dmat.Resize(num_elements, num_elements, 0.0);
171  for (int i=0;i < num_elements;i++) {
172  for (int j=0;j < num_elements;j++) {
173  dmat(i, j) = dists[i*num_elements + j];
174  }
175  }
176 }
177 
178 
179 // Traverse the tree and check if elements such that elems[elem] == false
180 // appear in the tree and others do not
181 static void s_TestTree(vector<bool>& elems, const TPhyTreeNode* node)
182 {
183  if (node->IsLeaf()) {
184 
185  int id = node->GetValue().GetId();
186  BOOST_REQUIRE(id < (int)elems.size());
187 
188  // each element appears in the tree exactly once
189  BOOST_REQUIRE(!elems[id]);
190  elems[id] = true;
191  }
192  else {
193 
195  for (; it != node->SubNodeEnd();it++) {
196  s_TestTree(elems, *it);
197  }
198  }
199 }
200 
201 
202 // Check if all cluster elements appear in the tree
204  const TPhyTreeNode* tree)
205 {
206  BOOST_REQUIRE(tree);
207  BOOST_REQUIRE(cluster.size() > 0);
208 
209  // find max element
210  int max_elem = cluster[0];
211  for (int i=1;i < (int)cluster.size();i++) {
212  if (cluster[i] > max_elem) {
213  max_elem = cluster[i];
214  }
215  }
216 
217  // s_TestTree fails if e such that elems[e] == true is found or e such
218  // that elems[e] == false is not found
219  vector<bool> elems(max_elem + 1, true);
220  ITERATE (CClusterer::TSingleCluster, elem, cluster) {
221  elems[*elem] = false;
222  }
223 
224  s_TestTree(elems, tree);
225 
226  // make sure all elements were found
227  ITERATE (vector<bool>, it, elems) {
228  BOOST_CHECK(*it);
229  }
230 }
231 
232 
233 /// Check clusters
234 /// @param dmat Distance matrix [in]
235 /// @param clusters Clusters to examine [in]
236 /// @param trees Cluster trees to examine [in]
237 /// @param ref_filename Name of filename containing reference clusters data [in]
238 static void s_TestClustersAndTrees(int num_elems,
239  CClusterer& clusterer,
240  const string& ref_filename = "")
241 {
242  BOOST_REQUIRE(clusterer.GetClusters().size() > 0);
243 
244  vector<bool> check_elems(num_elems, false);
245 
246  const CClusterer::TClusters& clusters = clusterer.GetClusters();
247  const vector<TPhyTreeNode*>& trees = clusterer.GetTrees();
248 
249  // check whether each element belongs to exactly one cluster
250  int num_elements = 0;
251  ITERATE (CClusterer::TClusters, cluster, clusters) {
252  ITERATE (CClusterer::TSingleCluster, elem, *cluster) {
253 
254  BOOST_REQUIRE(*elem < num_elems);
255 
256  // each element appear only once in all clusters
257  BOOST_REQUIRE(!check_elems[*elem]);
258  check_elems[*elem] = true;
259 
260  num_elements++;
261  }
262  }
263 
264  // make sure all elements were found in clusters
265  ITERATE (vector<bool>, it, check_elems) {
266  BOOST_CHECK(*it);
267  }
268 
269  // Check cluster trees
270  // each cluster must have its tree
271  BOOST_REQUIRE_EQUAL(clusters.size(), trees.size());
272 
273  // check each tree
274  for (size_t i=0;i < clusters.size();i++) {
275  s_TestClusterTree(clusters[i], trees[i]);
276  }
277 
278  // Compare clusters and their elements to reference
279  if (!ref_filename.empty()) {
280 
281  // read reference clusters from file
282  // format: number of clusters, cluster sizes, elements of cluster 0,
283  // elements of cluster1, ...
284  CNcbiIfstream ref_istr(ref_filename.c_str());
285  BOOST_REQUIRE(ref_istr);
286  vector<int> ref_clust_elems;
287  int ref_num_clusters = 0;
288  int ref_num_elems = 0;
289 
290  // ingnore comment lines
291  // comments are allowed in the beginning of the reference file
292  char* buff = new char[256];
293  while (ref_istr.peek() == (int)'#') {
294  ref_istr.getline(buff, 256);
295  }
296  delete [] buff;
297 
298  // read number of clusters
299  ref_istr >> ref_num_clusters;
300  // ... and compare with computed clusters
301  BOOST_REQUIRE_EQUAL((int)clusters.size(), ref_num_clusters);
302 
303  // read cluster sizes and compare with computed cluster sizes
304  int ind = 0;
305  for (int i=0;i < ref_num_clusters;i++) {
306 
307  BOOST_REQUIRE(!ref_istr.eof());
308 
309  int ref_size = 0;
310  ref_istr >> ref_size;
311  BOOST_REQUIRE_EQUAL((int)clusters[ind++].size(), ref_size);
312 
313  ref_num_elems += ref_size;
314  }
315 
316  // read cluster elements
317  bool zero_found = false;
318  for (int i=0;i < ref_num_elems;i++) {
319 
320  BOOST_REQUIRE(!ref_istr.eof());
321 
322  int ref_elem;
323  ref_istr >> ref_elem;
324 
325  ref_clust_elems.push_back(ref_elem);
326 
327 
328  // there can be only one zero
329  BOOST_REQUIRE((!zero_found && ref_elem >= 0)
330  || (zero_found && ref_elem > 0));
331 
332  if (ref_elem == 0) {
333  zero_found = true;
334  }
335  }
336 
337  // compare computed cluster elements with refernece
338  ind = 0;
339  ITERATE(CClusterer::TClusters, cluster, clusters) {
340  ITERATE(CClusterer::TSingleCluster, elem, *cluster) {
341 
342  BOOST_REQUIRE_EQUAL(*elem, ref_clust_elems[ind++]);
343  }
344  }
345  }
346 }
347 
348 
350 {
351  CClusterer clusterer;
352  clusterer.SetMakeTrees(true);
353 
354  const int num_elements = 5;
355  CRef<CLinks> links(new CLinks(num_elements));
356  clusterer.SetLinks(links);
357  clusterer.Run();
358 
359  s_TestClustersAndTrees(num_elements, clusterer);
360 }
361 
362 BOOST_AUTO_TEST_CASE(TestOneElement)
363 {
364  CClusterer clusterer;
365  clusterer.SetMakeTrees(true);
366 
367  // one element
368  const int num_elements = 1;
369 
370  CRef<CLinks> links(new CLinks(num_elements));
371  // one element, hence no links
372  clusterer.SetLinks(links);
373  clusterer.Run();
374 
375  s_TestClustersAndTrees(num_elements, clusterer);
376 }
377 
378 
379 BOOST_AUTO_TEST_CASE(TestTwoElementsOneCluster)
380 {
381  CClusterer clusterer;
382  clusterer.SetMakeTrees(true);
383 
384  // two elements
385  const int num_elements = 2;
386 
387  CRef<CLinks> links(new CLinks(num_elements));
388  // link exists, hence the elements will be joined
389  links->AddLink(0, 1, 0.0);
390  clusterer.SetLinks(links);
391  clusterer.Run();
392 
393  s_TestClustersAndTrees(num_elements, clusterer);
394 }
395 
396 
397 BOOST_AUTO_TEST_CASE(TestTwoElementsTwoClusters)
398 {
399  CClusterer clusterer;
400  clusterer.SetMakeTrees(true);
401 
402  // two elements
403  const int num_elements = 2;
404 
405  CRef<CLinks> links(new CLinks(num_elements));
406  // no link between elements, hence two clusters
407  clusterer.SetLinks(links);
408  clusterer.Run();
409 
410  s_TestClustersAndTrees(num_elements, clusterer);
411 }
412 
413 BOOST_AUTO_TEST_CASE(TestMoreElements)
414 {
415  // Check clusters and trees and compare clusters with reference files
416 
417  CClusterer clusterer;
418  clusterer.SetMakeTrees(true);
419 
421  s_ReadDistMatrix("data/dist_matrix.txt", dmat);
422 
423  BOOST_REQUIRE_EQUAL(dmat.GetCols(), dmat.GetRows());
424 
425  // two elements
426  const int num_elements = (int)dmat.GetCols();
427 
428  // maximum cluster diameter
429  const double max_distance = 0.8;
430 
431  CRef<CLinks> links(new CLinks(num_elements));
432  for (size_t i=0;i < dmat.GetCols() - 1;i++) {
433  for (size_t j=i+1;j < dmat.GetCols();j++) {
434  if (dmat(i, j) < max_distance) {
435  links->AddLink((int)i, (int)j, dmat(i, j));
436  }
437  }
438  }
439  clusterer.SetLinks(links);
440  clusterer.Run();
441 
442  s_TestClustersAndTrees(num_elements, clusterer, "data/ref_clusters.txt");
443 }
444 
445 // Test clustering using pre-computed clusters as another input
446 BOOST_AUTO_TEST_CASE(TestPrecomputedClusters)
447 {
448  // create two pre-computed clusters with elements (0, 1, 2), (3)
449  CClusterer::TClusters pre_clusters(2);
450  pre_clusters[0].AddElement(0);
451  pre_clusters[0].AddElement(1);
452  pre_clusters[0].AddElement(2);
453  pre_clusters[1].AddElement(3);
454 
455  // create links for clustering
456  CRef<CLinks> links(new CLinks(6));
457  links->AddLink(0, 4, 0.5);
458  links->AddLink(1, 4, 1.5);
459  links->AddLink(4, 2, 2.0);
460 
461  // set pre-computed clusters
462  CClusterer clusterer(links);
463  ITERATE (CClusterer::TClusters, it, pre_clusters) {
464  clusterer.SetClusters().push_back(*it);
465  }
466 
467  // run clustering
468  clusterer.Run();
469  CClusterer::TClusters& clusters = clusterer.SetClusters();
470  BOOST_REQUIRE_EQUAL(clusters.size(), 3u);
471 
472  // make sure that all elements are present once and assigned to correct
473  // clusters
474  vector<bool> presence(links->GetNumElements(), false);
475  NON_CONST_ITERATE(CClusterer::TClusters, clust, clusters) {
476  int elem = *clust->begin();
477  if (elem == 5 || elem == 3) {
478  BOOST_REQUIRE_EQUAL(clust->size(), 1u);
479  BOOST_REQUIRE(!presence[elem]);
480  presence[elem] = true;
481  }
482  else {
483  BOOST_REQUIRE_EQUAL(clust->size(), 4u);
484  ITERATE (CClusterer::TSingleCluster, it, *clust) {
485  BOOST_REQUIRE(!presence[*it]);
486  presence[*it] = true;
487  }
488  }
489  }
490 
491  ITERATE (vector<bool>, it, presence) {
492  BOOST_REQUIRE(*it);
493  }
494 }
495 
496 // Test incremental clustering: first cluster a set of links, then cluster
497 // new set of links using the clusters as a staring point
498 BOOST_AUTO_TEST_CASE(TestIncremental)
499 {
500  const int kNumElements = 10;
501 
502  // create links
503  CRef<CLinks> links(new CLinks(kNumElements));
504  links->AddLink(0, 1, 0.0);
505  links->AddLink(5, 6, 0.0);
506 
507  // run first clustering
508  unique_ptr<CClusterer> clusterer(new CClusterer(links));
509  clusterer->SetReportSingletons(false);
510  clusterer->Run();
511 
512  // save clusters from the first round
513  CClusterer::TClusters clusters;
514  clusters.swap(clusterer->SetClusters());
515 
516  // create new set of links
517  links.Reset(new CLinks(kNumElements));
518  links->AddLink(0, 2, 0.0);
519  links->AddLink(1, 2, 0.0);
520  links->AddLink(5, 8, 0.0);
521 
522  // reset clusterer, set new links, pre-computed clusters and run clustering
523  clusterer.reset(new CClusterer(links));
524  clusterer->SetClusters().swap(clusters);
525  clusterer->Run();
526 
527  CClusterer::TClusters& result = clusterer->SetClusters();
528 
529  // these should be sizes of resulting clusters, indexed by the smalles
530  // element of each cluster
531  vector<size_t> sizes(kNumElements, 0);
532  sizes[0] = 3;
533  sizes[3] = 1;
534  sizes[4] = 1;
535  sizes[5] = 2;
536  sizes[7] = 1;
537  sizes[8] = 1;
538  sizes[9] = 1;
539 
540  // the sets must correspond to clusters
541  vector< set<int> > reference(kNumElements);
542  reference[0].insert(0);
543  reference[0].insert(1);
544  reference[0].insert(2);
545 
546  reference[3].insert(3);
547  reference[4].insert(4);
548 
549  reference[5].insert(5);
550  reference[5].insert(6);
551 
552  reference[7].insert(7);
553  reference[8].insert(8);
554  reference[9].insert(9);
555 
556  // check the size and elements of the resulting clusters
558  sort(cluster->begin(), cluster->end());
559  int cl_index = *cluster->begin();
560  BOOST_REQUIRE_EQUAL(cluster->size(), sizes[cl_index]);
561  ITERATE (CClusterer::TSingleCluster, elem, *cluster) {
562  set<int>::iterator it = reference[cl_index].find(*elem);
563  BOOST_REQUIRE(it != reference[cl_index].end());
564  }
565  }
566 }
567 
568 
570 
571 #endif /* SKIP_DOXYGEN_PROCESSING */
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Exceptions for CClusterer class.
Definition: clusterer.hpp:395
int FindCenterElement(const TDistMatrix &dmatrix) const
Find element that is closest to the center of the cluster.
Definition: clusterer.cpp:1184
int GetPrototype(void) const
Get cluster prototype.
Definition: clusterer.hpp:103
size_t size(void) const
Get cluster size.
Definition: clusterer.hpp:118
Interface for CClusterer class used for clustering any type of data based on distance matrix.
Definition: clusterer.hpp:53
void GetTrees(vector< TPhyTreeNode * > &trees) const
Get list of trees for clusters.
Definition: clusterer.cpp:1064
const TSingleCluster & GetSingleCluster(size_t index) const
Get list of elements of a specified cluster.
Definition: clusterer.cpp:130
void SetMakeTrees(bool trees)
Set make cluster tree/dendrogram option.
Definition: clusterer.hpp:230
void SetDistMatrix(const TDistMatrix &dmat)
Set new distance matrix.
Definition: clusterer.cpp:97
const TDistMatrix & GetDistMatrix(void) const
Get distance matrix.
Definition: clusterer.cpp:118
void SetLinks(CRef< CLinks > links)
Set distance links.
Definition: clusterer.cpp:113
void Run(void)
Cluster elements.
Definition: clusterer.cpp:1157
const TClusters & GetClusters(void) const
Get clusters.
Definition: clusterer.hpp:272
void ComputeClusters(double max_diam, EDistMethod dist_method=eCompleteLinkage, bool do_trees=true, double infinity=-1.0)
Compute clusters.
Definition: clusterer.cpp:272
vector< TSingleCluster > TClusters
Definition: clusterer.hpp:160
TClusters & SetClusters(void)
Set clusters.
Definition: clusterer.hpp:277
void PurgeDistMatrix(void)
Delete distance matrix.
Definition: clusterer.hpp:323
void GetClusterDistMatrix(int index, TDistMatrix &mat) const
Get distance matrix for elements of a selected cluster.
Definition: clusterer.cpp:1123
void Resize(size_t i, size_t j, T val=T())
resize this matrix, filling the empty cells with a known value
Definition: matrix.hpp:390
size_t GetRows() const
get the number of rows in this matrix
Definition: matrix.hpp:298
size_t GetCols() const
get the number of columns in this matrix
Definition: matrix.hpp:305
definition of a Culling tree
Definition: ncbi_tree.hpp:100
Definition: set.hpp:45
const_iterator find(const key_type &key) const
Definition: set.hpp:137
USING_SCOPE(cobalt)
static void s_ReadDistMatrix(const string &filename, CClusterer::TDistMatrix &dmat)
Read distance matrix from file.
BOOST_AUTO_TEST_CASE(TestSingleCluster)
static void s_TestClusterTree(const CClusterer::TSingleCluster &cluster, const TPhyTreeNode *tree)
static void s_TestTree(vector< bool > &elems, const TPhyTreeNode *node)
static void s_TestClustersAndTrees(int num_elems, CClusterer &clusterer, const string &ref_filename="")
Check clusters.
USING_NCBI_SCOPE
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
Definition: ncbi_tree.hpp:160
TNodeList::const_iterator TNodeList_CI
Definition: ncbi_tree.hpp:110
bool IsLeaf() const
Report whether this is a leaf node.
Definition: ncbi_tree.hpp:296
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
Definition: ncbi_tree.hpp:166
const TValue & GetValue(void) const
Return node's value.
Definition: ncbi_tree.hpp:184
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
Utility stuff for more convenient using of Boost.Test library.
else result
Definition: token2.c:20
Modified on Fri May 03 15:52:43 2024 by modify_doxy.py rev. 669887