[avida-cvs] avida CVS commits: /current/source/main analyze.cc

dule123 avida-cvs at alife.org
Tue Nov 4 02:27:42 PST 2003


dule123		Mon Nov  3 18:27:42 2003 EDT

  Modified files:              
    /avida/current/source/main	analyze.cc 
  Log:
  
  Added two now modularity measures to AVERAGE_MODULARITY in the analyze mode. 
  
  1) 1 - average proportion of task overlap
  Rich came up with this one. It calculates what proportion of a task overlaps       
  with the each of the other tasks, averages that out over all tasks. It looks 
  well behaved and it's bounded by 0/1. Actual number being reported is one 
  minus proportion of overlap, so 1 is totally modular, 0 totally overlapping.
  
  2) task length
  Bob suggested this could be useful. It reports the average number of 
  instructions between first and the last one involved in a task, including 
  those two. 
  
  The output file now has a whopping 45 columns and yes there is a legend 
  included ;-) 
  
  
  
-------------- next part --------------
Index: avida/current/source/main/analyze.cc
diff -u avida/current/source/main/analyze.cc:1.81 avida/current/source/main/analyze.cc:1.82
--- avida/current/source/main/analyze.cc:1.81	Thu Oct 30 13:42:28 2003
+++ avida/current/source/main/analyze.cc	Mon Nov  3 18:27:42 2003
@@ -2033,9 +2033,11 @@
     fp << "# 5: average number of tasks done per site" << endl;
     fp << "# 6: average number sites per task done" << endl;
     fp << "# 7: average number tasks per site per task" << endl;
-    fp << "# 8-16: average StDev in positions used for task 1-9" << endl;
-    fp << "# 17-25: average number of sites necessary for each of the tasks" << endl;
-    fp << "# 26-35: number of sites involved in 0-9 tasks" << endl;
+    fp << "# 8: average proportion of the non-overlaping region of a task" << endl;
+    fp << "# 9-17: average StDev in positions used for task 1-9" << endl;
+    fp << "# 18-26: average number of sites necessary for each of the tasks" << endl;
+    fp << "# 27-36: number of sites involved in 0-9 tasks" << endl;
+    fp << "# 37-45: average task length (distance from first to last inst used)" << endl;
     fp << endl;
     return;
 }        
@@ -2051,28 +2053,27 @@
     double  av_site_task = 0; 	// average number of sites per task
     double  av_task_site = 0;   // average number of tasks per site
     double  av_t_s_norm = 0;	// average number of tasks per site per task
+    double  av_task_overlap = 0; // average overlap between tasks
 
     // average StDev in positions used for a task
     tArray<double> std_task_position(num_cols);
-    
+    std_task_position.SetAll(0.0);
+
     // # of organisms actually doing a task
     tArray<double> org_task(num_cols);
+    org_task.SetAll(0.0);
 
     // av. # of sites necessary for each of the tasks
     tArray<double> av_num_inst(num_cols);
+    av_num_inst.SetAll(0.0);
     
     // number of sites involved in 0-9 tasks 
     tArray<double> av_inst_task(num_cols+1);
+    av_inst_task.SetAll(0.0);
 
-    for (int i = 0; i < num_cols; i++) {
-      av_num_inst[i] = 0;
-      org_task[i] = 0; 
-      std_task_position[i] = 0;
-    }
-
-    for (int i = 0; i < num_cols+1; i++) {
-      av_inst_task[i] = 0;
-    }
+    // av. # task length (distance from first to last site used)
+    tArray<double> av_task_length(num_cols);
+    av_task_length.SetAll(0.0);
 
     
   ///////////////////////////////////////////////////////
@@ -2111,11 +2112,11 @@
 
     // Create and initialize the modularity matrix
     tMatrix<int> mod_matrix(num_cols, max_line);
-    for (int i = 0; i < num_cols; i++) {
-        for (int j = 0; j < max_line; j++) {
-      	    mod_matrix(i,j) = 0;
-	}
-    } 
+    mod_matrix.SetAll(0);
+
+    // Create and initialize the task overalp matrix
+    tMatrix<int> task_overlap(num_cols, num_cols);
+    task_overlap.SetAll(0);
 
     // Create an initialize the counters for modularity
     tArray<int> num_task(max_line); // number of tasks instruction is used in
@@ -2123,22 +2124,19 @@
     tArray<int> sum(num_cols); 	    // helps with StDev calculations
     tArray<int> sumsq(num_cols);    // helps with StDev calculations
     tArray<int> inst_task(num_cols+1); // # of inst's involved in 0,1,2,3... tasks
+    tArray<int> task_length(num_cols);    // ditance between first and last inst involved in a task
 
-    for (int i = 0; i < num_cols; i++) {
-      num_inst[i] = 0;
-      sum[i] = 0;
-      sumsq[i] = 0; 		
-    }
-    for (int i = 0; i < num_cols+1; i++) {
-      inst_task[i] = 0;		
-    }
-    for (int i = 0; i < max_line; i++) {
-      num_task[i] = 0;		
-    }
-
-    int total_task = 0; // total number of tasks done
-    int total_inst = 0; // total number of instructions involved in tasks
-    int total_all = 0;  // sum of mod_matrix
+    num_task.SetAll(0);
+    num_inst.SetAll(0);
+    sum.SetAll(0);
+    sumsq.SetAll(0);
+    inst_task.SetAll(0);
+    task_length.SetAll(0);
+
+    int total_task = 0; 	// total number of tasks done
+    int total_inst = 0; 	// total number of instructions involved in tasks
+    int total_all = 0;  	// sum of mod_matrix
+    double sum_task_overlap = 0;// task overlap for for this geneome
 
     cInstSet map_inst_set(inst_set);
 
@@ -2190,10 +2188,9 @@
 	}
 	cur_col++;
       }
-
       // Reset the mod_genome back to the original sequence.
       mod_genome[line_num].SetOp(cur_inst);
-    }
+    } // end of genotype-phenotype mapping for a single organism
     
     for (int i = 0; i < num_cols; i++) {if (num_inst[i] != 0) total_task++;}
     for (int i = 0; i < max_line; i++) {if (num_task[i] != 0) total_inst++;}
@@ -2220,8 +2217,68 @@
 	}
     }	
 
-    // calculate the Standard Deviation in the mean position of the task
+    // calculate average task overlap
+    // first construct num_task x num_task matrix with number of sites overlapping
+    for (int i = 0; i < max_line; i++) {
+        for (int j = 0; j < num_cols; j++) {
+	    for (int k = j; k < num_cols; k++) {
+		if (mod_matrix(j,i)>0 && mod_matrix(k,i)>0) {
+			task_overlap(j,k)++;
+			if (j!=k) task_overlap(k,j)++;
+		}		
+	    }
+	}
+    }
 
+    // go though the task_overlap matrix, add and average everything up. 
+    if (total_task > 1) {
+       for (int i = 0; i < num_cols; i++) {
+	   double overlap_per_task = 0; 		
+	   for (int j = 0; j < num_cols; j++) {
+	    	if (i!=j) {overlap_per_task = overlap_per_task + task_overlap(i,j);}
+	   }
+	   sum_task_overlap = sum_task_overlap + overlap_per_task / (task_overlap(i,i) * (total_task-1)); 	
+	}
+    }
+
+    // now, divide that by number of tasks done and add to the grand sum, weigthed by num_cpus 
+    if (total_task !=0) av_task_overlap = av_task_overlap + num_cpus * (double) sum_task_overlap/total_task ;
+
+    // calculate the first/last postion of a task, the task "spread"
+    // starting from the top look for the fist command that matters for a task
+
+    for (int i = 0; i < num_cols; i++) { 
+	int j = 0; 
+	while (j < max_line) {
+	     if (mod_matrix(i,j) > 0 && task_length[i] == 0 ) {
+		task_length[i] = j;
+		break;
+	     }
+	     j++;
+	}
+     }
+
+    // starting frm the bottom look for the last command that matters for a task
+    // and substract it from the first to get the task length
+    // add one in order to account for both the beginning and the end instruction
+    for (int i = 0; i < num_cols; i++) { 
+	int j = max_line - 1; 
+	while (j > -1) {
+	     if (mod_matrix(i,j) > 0) {
+		task_length[i] = j - task_length[i] + 1;
+		break;
+	     }
+	     j--;
+	}
+    }
+   // add the task lengths to the average for the batch
+   // weigthed by the number of cpus for that genotype 
+   for (int i = 0; i < num_cols; i++) { 
+	av_task_length[i] = av_task_length[i] +  num_cpus * task_length[i];
+   }
+
+
+    // calculate the Standard Deviation in the mean position of the task
     for (int i = 0; i < num_cols; i++) { 
 	for (int j = 0; j < max_line; j++) { 
 		if (mod_matrix(i,j)>0) sum[i] = sum[i] + j;
@@ -2243,7 +2300,10 @@
     for (int i = 0; i < num_cols+1; i++) { av_inst_task[i] = av_inst_task[i] + inst_task[i] * num_cpus;}
 
    }
-  }
+  }  // this is the end of the loop going though all the organisms
+
+
+
 
  // make sure there are some organisms doing task in this batch
  // if not, return all zeros
@@ -2256,23 +2316,26 @@
     fp << (double) av_task_site/num_orgs << " ";	// 5: av. number of tasks per site
     fp << (double) av_site_task/num_orgs << " ";	// 6: av. number of sites per task
     fp << (double) av_t_s_norm/num_orgs << " ";		// 7: av. number of tasks per site per task
+    fp << (double) 1 - av_task_overlap/num_orgs << " ";	// 8: av. proportion of a task that DOESN'T overlap 
     for (int i = 0; i < num_cols; i++) {
-	if (org_task[i] > 0) fp << std_task_position[i]/org_task[i]  << " ";
+        if (org_task[i] > 0) fp << std_task_position[i]/org_task[i]  << " ";
         else fp << 0 << " ";
     }
-    for (int i = 0; i < num_cols; i++) { 
-	if (org_task[i] > 0) fp << (double) av_num_inst[i]/org_task[i]  << " ";
+    for (int i = 0; i < num_cols; i++) {
+        if (org_task[i] > 0) fp << (double) av_num_inst[i]/org_task[i]  << " ";
         else fp << 0 << " ";
     }
     for (int i = 0; i < num_cols+1; i++) { fp << (double) av_inst_task[i]/num_orgs  << " ";}
+    for (int i = 0; i < num_cols; i++) { fp << (double) av_task_length[i]/num_orgs  << " ";}
     fp << endl;
-    }
-
+    } 
+      
  else {
-    for (int i = 0; i < 7+3*num_cols+1; i++) {fp << "0 ";}
-    fp << endl; 
+    for (int i = 0; i < 8+4*num_cols+1; i++) {fp << "0 ";}
+    fp << endl;
  }
-}
+} 
+
 
 
 void cAnalyze::CommandMapMutations(cString cur_string)


More information about the Avida-cvs mailing list