[avida-cvs] avida CVS commits: /current/source/main analyze.cc
dule123
avida-cvs at alife.org
Tue Nov 4 02:27:42 PST 2003
dule123 Mon Nov 3 18:27:42 2003 EDT
Modified files:
/avida/current/source/main analyze.cc
Log:
Added two now modularity measures to AVERAGE_MODULARITY in the analyze mode.
1) 1 - average proportion of task overlap
Rich came up with this one. It calculates what proportion of a task overlaps
with the each of the other tasks, averages that out over all tasks. It looks
well behaved and it's bounded by 0/1. Actual number being reported is one
minus proportion of overlap, so 1 is totally modular, 0 totally overlapping.
2) task length
Bob suggested this could be useful. It reports the average number of
instructions between first and the last one involved in a task, including
those two.
The output file now has a whopping 45 columns and yes there is a legend
included ;-)
-------------- next part --------------
Index: avida/current/source/main/analyze.cc
diff -u avida/current/source/main/analyze.cc:1.81 avida/current/source/main/analyze.cc:1.82
--- avida/current/source/main/analyze.cc:1.81 Thu Oct 30 13:42:28 2003
+++ avida/current/source/main/analyze.cc Mon Nov 3 18:27:42 2003
@@ -2033,9 +2033,11 @@
fp << "# 5: average number of tasks done per site" << endl;
fp << "# 6: average number sites per task done" << endl;
fp << "# 7: average number tasks per site per task" << endl;
- fp << "# 8-16: average StDev in positions used for task 1-9" << endl;
- fp << "# 17-25: average number of sites necessary for each of the tasks" << endl;
- fp << "# 26-35: number of sites involved in 0-9 tasks" << endl;
+ fp << "# 8: average proportion of the non-overlaping region of a task" << endl;
+ fp << "# 9-17: average StDev in positions used for task 1-9" << endl;
+ fp << "# 18-26: average number of sites necessary for each of the tasks" << endl;
+ fp << "# 27-36: number of sites involved in 0-9 tasks" << endl;
+ fp << "# 37-45: average task length (distance from first to last inst used)" << endl;
fp << endl;
return;
}
@@ -2051,28 +2053,27 @@
double av_site_task = 0; // average number of sites per task
double av_task_site = 0; // average number of tasks per site
double av_t_s_norm = 0; // average number of tasks per site per task
+ double av_task_overlap = 0; // average overlap between tasks
// average StDev in positions used for a task
tArray<double> std_task_position(num_cols);
-
+ std_task_position.SetAll(0.0);
+
// # of organisms actually doing a task
tArray<double> org_task(num_cols);
+ org_task.SetAll(0.0);
// av. # of sites necessary for each of the tasks
tArray<double> av_num_inst(num_cols);
+ av_num_inst.SetAll(0.0);
// number of sites involved in 0-9 tasks
tArray<double> av_inst_task(num_cols+1);
+ av_inst_task.SetAll(0.0);
- for (int i = 0; i < num_cols; i++) {
- av_num_inst[i] = 0;
- org_task[i] = 0;
- std_task_position[i] = 0;
- }
-
- for (int i = 0; i < num_cols+1; i++) {
- av_inst_task[i] = 0;
- }
+ // av. # task length (distance from first to last site used)
+ tArray<double> av_task_length(num_cols);
+ av_task_length.SetAll(0.0);
///////////////////////////////////////////////////////
@@ -2111,11 +2112,11 @@
// Create and initialize the modularity matrix
tMatrix<int> mod_matrix(num_cols, max_line);
- for (int i = 0; i < num_cols; i++) {
- for (int j = 0; j < max_line; j++) {
- mod_matrix(i,j) = 0;
- }
- }
+ mod_matrix.SetAll(0);
+
+ // Create and initialize the task overalp matrix
+ tMatrix<int> task_overlap(num_cols, num_cols);
+ task_overlap.SetAll(0);
// Create an initialize the counters for modularity
tArray<int> num_task(max_line); // number of tasks instruction is used in
@@ -2123,22 +2124,19 @@
tArray<int> sum(num_cols); // helps with StDev calculations
tArray<int> sumsq(num_cols); // helps with StDev calculations
tArray<int> inst_task(num_cols+1); // # of inst's involved in 0,1,2,3... tasks
+ tArray<int> task_length(num_cols); // ditance between first and last inst involved in a task
- for (int i = 0; i < num_cols; i++) {
- num_inst[i] = 0;
- sum[i] = 0;
- sumsq[i] = 0;
- }
- for (int i = 0; i < num_cols+1; i++) {
- inst_task[i] = 0;
- }
- for (int i = 0; i < max_line; i++) {
- num_task[i] = 0;
- }
-
- int total_task = 0; // total number of tasks done
- int total_inst = 0; // total number of instructions involved in tasks
- int total_all = 0; // sum of mod_matrix
+ num_task.SetAll(0);
+ num_inst.SetAll(0);
+ sum.SetAll(0);
+ sumsq.SetAll(0);
+ inst_task.SetAll(0);
+ task_length.SetAll(0);
+
+ int total_task = 0; // total number of tasks done
+ int total_inst = 0; // total number of instructions involved in tasks
+ int total_all = 0; // sum of mod_matrix
+ double sum_task_overlap = 0;// task overlap for for this geneome
cInstSet map_inst_set(inst_set);
@@ -2190,10 +2188,9 @@
}
cur_col++;
}
-
// Reset the mod_genome back to the original sequence.
mod_genome[line_num].SetOp(cur_inst);
- }
+ } // end of genotype-phenotype mapping for a single organism
for (int i = 0; i < num_cols; i++) {if (num_inst[i] != 0) total_task++;}
for (int i = 0; i < max_line; i++) {if (num_task[i] != 0) total_inst++;}
@@ -2220,8 +2217,68 @@
}
}
- // calculate the Standard Deviation in the mean position of the task
+ // calculate average task overlap
+ // first construct num_task x num_task matrix with number of sites overlapping
+ for (int i = 0; i < max_line; i++) {
+ for (int j = 0; j < num_cols; j++) {
+ for (int k = j; k < num_cols; k++) {
+ if (mod_matrix(j,i)>0 && mod_matrix(k,i)>0) {
+ task_overlap(j,k)++;
+ if (j!=k) task_overlap(k,j)++;
+ }
+ }
+ }
+ }
+ // go though the task_overlap matrix, add and average everything up.
+ if (total_task > 1) {
+ for (int i = 0; i < num_cols; i++) {
+ double overlap_per_task = 0;
+ for (int j = 0; j < num_cols; j++) {
+ if (i!=j) {overlap_per_task = overlap_per_task + task_overlap(i,j);}
+ }
+ sum_task_overlap = sum_task_overlap + overlap_per_task / (task_overlap(i,i) * (total_task-1));
+ }
+ }
+
+ // now, divide that by number of tasks done and add to the grand sum, weigthed by num_cpus
+ if (total_task !=0) av_task_overlap = av_task_overlap + num_cpus * (double) sum_task_overlap/total_task ;
+
+ // calculate the first/last postion of a task, the task "spread"
+ // starting from the top look for the fist command that matters for a task
+
+ for (int i = 0; i < num_cols; i++) {
+ int j = 0;
+ while (j < max_line) {
+ if (mod_matrix(i,j) > 0 && task_length[i] == 0 ) {
+ task_length[i] = j;
+ break;
+ }
+ j++;
+ }
+ }
+
+ // starting frm the bottom look for the last command that matters for a task
+ // and substract it from the first to get the task length
+ // add one in order to account for both the beginning and the end instruction
+ for (int i = 0; i < num_cols; i++) {
+ int j = max_line - 1;
+ while (j > -1) {
+ if (mod_matrix(i,j) > 0) {
+ task_length[i] = j - task_length[i] + 1;
+ break;
+ }
+ j--;
+ }
+ }
+ // add the task lengths to the average for the batch
+ // weigthed by the number of cpus for that genotype
+ for (int i = 0; i < num_cols; i++) {
+ av_task_length[i] = av_task_length[i] + num_cpus * task_length[i];
+ }
+
+
+ // calculate the Standard Deviation in the mean position of the task
for (int i = 0; i < num_cols; i++) {
for (int j = 0; j < max_line; j++) {
if (mod_matrix(i,j)>0) sum[i] = sum[i] + j;
@@ -2243,7 +2300,10 @@
for (int i = 0; i < num_cols+1; i++) { av_inst_task[i] = av_inst_task[i] + inst_task[i] * num_cpus;}
}
- }
+ } // this is the end of the loop going though all the organisms
+
+
+
// make sure there are some organisms doing task in this batch
// if not, return all zeros
@@ -2256,23 +2316,26 @@
fp << (double) av_task_site/num_orgs << " "; // 5: av. number of tasks per site
fp << (double) av_site_task/num_orgs << " "; // 6: av. number of sites per task
fp << (double) av_t_s_norm/num_orgs << " "; // 7: av. number of tasks per site per task
+ fp << (double) 1 - av_task_overlap/num_orgs << " "; // 8: av. proportion of a task that DOESN'T overlap
for (int i = 0; i < num_cols; i++) {
- if (org_task[i] > 0) fp << std_task_position[i]/org_task[i] << " ";
+ if (org_task[i] > 0) fp << std_task_position[i]/org_task[i] << " ";
else fp << 0 << " ";
}
- for (int i = 0; i < num_cols; i++) {
- if (org_task[i] > 0) fp << (double) av_num_inst[i]/org_task[i] << " ";
+ for (int i = 0; i < num_cols; i++) {
+ if (org_task[i] > 0) fp << (double) av_num_inst[i]/org_task[i] << " ";
else fp << 0 << " ";
}
for (int i = 0; i < num_cols+1; i++) { fp << (double) av_inst_task[i]/num_orgs << " ";}
+ for (int i = 0; i < num_cols; i++) { fp << (double) av_task_length[i]/num_orgs << " ";}
fp << endl;
- }
-
+ }
+
else {
- for (int i = 0; i < 7+3*num_cols+1; i++) {fp << "0 ";}
- fp << endl;
+ for (int i = 0; i < 8+4*num_cols+1; i++) {fp << "0 ";}
+ fp << endl;
}
-}
+}
+
void cAnalyze::CommandMapMutations(cString cur_string)
More information about the Avida-cvs
mailing list