[avida-cvs] avida CVS commits: /current/source/main analyze.cc analyze.hh
dule123
avida-cvs at alife.org
Sat Oct 11 05:42:44 PDT 2003
dule123 Fri Oct 10 21:42:44 2003 EDT
Modified files:
/avida/current/source/main analyze.cc analyze.hh
Log:
After much tweaking, implemented a new analyze function, AVERAGE_MODULARITY
Usage:
AVERAGE_MODULARITY output_file task.0 task.1 task.2 task.3 task.4 task.5 task.6 task.7 task.8
To output the legend (see below) do "AVERAGE_MODULARITY output_file"
before actually doing the actual calculations
I will add more info to the documentation file in the near future.
1: organism length
2: number of tasks done
3: number of sites used in tasks
4: proportion of sites used in tasks
5: average number of tasks done per site
6: average number sites per task done
7: average number tasks per site per task
8-16: average StDev in positions used for task 1-9
17-25: average number of sites necessary for each of the tasks
26-35: number of sites involved in 0-9 tasks
-------------- next part --------------
Index: avida/current/source/main/analyze.cc
diff -u avida/current/source/main/analyze.cc:1.75 avida/current/source/main/analyze.cc:1.76
--- avida/current/source/main/analyze.cc:1.75 Fri Aug 1 11:08:17 2003
+++ avida/current/source/main/analyze.cc Fri Oct 10 21:42:43 2003
@@ -1898,6 +1898,305 @@
}
}
+void cAnalyze::CommandAverageModularity(cString cur_string)
+{
+ cout << "Average Modularity calculations" << endl;
+
+ // Load in the variables...
+ cString filename = cur_string.PopWord();
+// cString filename = "average.dat";
+
+ int print_mode = 0; // 0=Normal, 1=Boolean results
+
+ // Collect any other format information needed...
+ tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
+ tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
+
+ cStringList arg_list(cur_string);
+
+ cout << "Found " << arg_list.GetSize() << " args." << endl;
+
+ // Check for some command specific variables.
+ if (arg_list.PopString("0") != "") print_mode = 0;
+ if (arg_list.PopString("1") != "") print_mode = 1;
+
+ cout << "There are " << arg_list.GetSize() << " column args." << endl;
+
+ LoadGenotypeDataList(arg_list, output_list);
+
+ cout << "Args are loaded." << endl;
+
+ const int num_cols = output_list.GetSize();
+
+ // Give some information in verbose mode.
+ if (verbose == true) {
+ cout << " outputing as ";
+ if (print_mode == 1) cout << "boolean ";
+ cout << "text files." << endl;
+ cout << " Format: ";
+
+ output_it.Reset();
+ while (output_it.Next() != NULL) {
+ cout << output_it.Get()->GetName() << " ";
+ }
+ cout << endl;
+ }
+
+ ofstream & fp = data_file_manager.GetOFStream(filename);
+
+ // printing the headers
+ // not done by default since many dumps may be analyzed at the same time
+ // and results would be put in the same file
+ if (arg_list.GetSize()==0) {
+ // Headers
+ fp << "# Avida analyze modularity data" << endl;
+ fp << "# 1: organism length" << endl;
+ fp << "# 2: number of tasks done" << endl;
+ fp << "# 3: number of sites used in tasks" << endl;
+ fp << "# 4: proportion of sites used in tasks" << endl;
+ fp << "# 5: average number of tasks done per site" << endl;
+ fp << "# 6: average number sites per task done" << endl;
+ fp << "# 7: average number tasks per site per task" << endl;
+ fp << "# 8-16: average StDev in positions used for task 1-9" << endl;
+ fp << "# 17-25: average number of sites necessary for each of the tasks" << endl;
+ fp << "# 26-35: number of sites involved in 0-9 tasks" << endl;
+ fp << endl;
+ return;
+}
+
+ // initialize various variables used in calculations
+
+ int num_orgs = 0; // number of organisms in the dump
+
+ double av_length = 0; // average organism length
+ double av_task = 0; // average # of tasks done
+ double av_inst = 0; // average # instructions used in tasks
+ double av_inst_len = 0; // proportion of sites used for tasks
+ double av_site_task = 0; // average number of sites per task
+ double av_task_site = 0; // average number of tasks per site
+ double av_t_s_norm = 0; // average number of tasks per site per task
+
+ double std_task_position[num_cols]; // average StDev in positions used for a task
+
+ double * org_task = new double[num_cols]; // # of organisms actually doing a task
+ double * av_num_inst = new double[num_cols]; // av. # of sites necessary for each of the tasks
+ double * av_inst_task = new double[num_cols+1]; // number of sites involved in 0-9 tasks
+
+ for (int i = 0; i < num_cols; i++) {
+ av_num_inst[i] = 0;
+ org_task[i] = 0;
+ std_task_position[i] = 0;
+ }
+
+ for (int i = 0; i < num_cols+1; i++) {
+ av_inst_task[i] = 0;
+ }
+
+
+ ///////////////////////////////////////////////////////
+ // Loop through all of the genotypes in this batch...
+ ///////////////////////////////////////////////////////
+
+ tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+ cAnalyzeGenotype * genotype = NULL;
+
+ // would like to test oly the viable ones, but they can be non-viable
+ // and still reproduce and do tasks
+ // while ((genotype = batch_it.Next()) != NULL && genotype->GetViable()) {
+ while ((genotype = batch_it.Next()) != NULL) {
+
+ int num_cpus = genotype->GetNumCPUs();
+
+ if (verbose == true) cout << " Mapping " << genotype->GetName() << endl;
+
+ // Calculate the stats for the genotype we're working with...
+ genotype->Recalculate();
+
+ // Check if the organism does any tasks.
+ int does_tasks = 0;
+ for (int i = 0; i < num_cols; i++) {
+ if (genotype->GetTaskCount(i) > 0) does_tasks = 1;
+ }
+
+ // Don't calculate the modularity if the organism doesn't reproduce
+ // i.e. if the fitness is 0
+ if (genotype->GetFitness() != 0 && does_tasks != 0) {
+ num_orgs = num_orgs + num_cpus;
+
+ const int max_line = genotype->GetLength();
+ const cGenome & base_genome = genotype->GetGenome();
+ cGenome mod_genome(base_genome);
+
+ // Create and initialize the modularity matrix
+ tMatrix<int> mod_matrix(num_cols, max_line);
+ for (int i = 0; i < num_cols; i++) {
+ for (int j = 0; j < max_line; j++) {
+ mod_matrix(i,j) = 0;
+ }
+ }
+
+ // Create an initialize the counters for modularity
+
+ int * num_task = new int[max_line]; // number of tasks instruction is used in
+ int * num_inst = new int[num_cols]; // number of instructions involved in a task
+ int sum[num_cols]; // helps with StDev calculations
+ int sumsq[num_cols]; // helps with StDev calculations
+ int * inst_task = new int[num_cols+1]; // # of inst's involved in 0,1,2,3... tasks
+
+ for (int i = 0; i < num_cols; i++) {
+ num_inst[i] = 0;
+ sum[i] = 0;
+ sumsq[i] = 0;
+ }
+ for (int i = 0; i < num_cols+1; i++) {
+ inst_task[i] = 0;
+ }
+ for (int i = 0; i < max_line; i++) {
+ num_task[i] = 0;
+ }
+
+ int total_task = 0; // total number of tasks done
+ int total_inst = 0; // total number of instructions involved in tasks
+ int total_all = 0; // sum of mod_matrix
+
+ cInstSet map_inst_set(inst_set);
+
+ // Locate instruction corresponding to "NULL" in the instruction library.
+ {
+ const cInstruction inst_lib_null_inst = map_inst_set.GetInstLib()->GetInst("NULL");
+ if(inst_lib_null_inst == map_inst_set.GetInstLib()->GetInstError()){
+ cout << "<cAnalyze::CommandMapTasks> got error:" << endl;
+ cout << " --- instruction \"NULL\" isn't in the instruction library;" << endl;
+ cout << " --- get somebody to map a function to \"NULL\" in the library." << endl;
+ cout << " --- (probably to class method \"cHardware-of-some-type::initInstLib\"" << endl;
+ cout << " --- in file named \"cpu/hardware-of-some-type.cc\".)" << endl;
+ cout << " --- bailing-out." << endl;
+ exit(1);
+ }
+ // Add mapping to located instruction.
+ map_inst_set.Add2(inst_lib_null_inst.GetOp());
+ }
+ const cInstruction null_inst = map_inst_set.GetInst("NULL");
+
+ // Loop through all the lines of code, testing the removal of each.
+ for (int line_num = 0; line_num < max_line; line_num++) {
+ int cur_inst = base_genome[line_num].GetOp();
+ char cur_symbol = base_genome[line_num].GetSymbol();
+
+ mod_genome[line_num] = null_inst;
+ cAnalyzeGenotype test_genotype(mod_genome, map_inst_set);
+ test_genotype.Recalculate();
+
+ // Print the individual columns...
+ output_it.Reset();
+ tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
+ int cur_col = 0;
+ while ((data_command = output_it.Next()) != NULL) {
+ data_command->SetTarget(&test_genotype);
+ test_genotype.SetSpecialArgs(data_command->GetArgs());
+ int compare = data_command->Compare(genotype);
+
+ // If knocking out an instruction stops the expression of a
+ // particular task, mark that in the modularity matrix
+ // and add it to two counts
+ // Only do the checking if the test_genotype replicate, i.e.
+ // if it's fitness is not zeros
+
+ if (compare == -2 && test_genotype.GetFitness() != 0) {
+ mod_matrix(cur_col,line_num) = 1;
+ num_inst[cur_col]++;
+ num_task[line_num]++;
+ }
+ cur_col++;
+ }
+
+ // Reset the mod_genome back to the original sequence.
+ mod_genome[line_num].SetOp(cur_inst);
+ }
+
+ for (int i = 0; i < num_cols; i++) {if (num_inst[i] != 0) total_task++;}
+ for (int i = 0; i < max_line; i++) {if (num_task[i] != 0) total_inst++;}
+ for (int i = 0; i < num_cols; i++) {total_all = total_all + num_inst[i];}
+
+ // Add the values to the av_ variables, used for calculating the average
+ // in order to weigh them by abundance, multiply everything by num_cpus
+
+ av_length = av_length + max_line*num_cpus;
+ av_task = av_task + total_task*num_cpus;
+ av_inst = av_inst + total_inst*num_cpus;
+ av_inst_len = av_inst_len + (double) total_inst*num_cpus/max_line;
+
+ if (total_task !=0) av_site_task = av_site_task + num_cpus * (double) total_all/total_task;
+ if (total_inst !=0) av_task_site = av_task_site + num_cpus * (double) total_all/total_inst;
+ if (total_inst !=0 && total_task !=0) {
+ av_t_s_norm = av_t_s_norm + num_cpus * (double) total_all/(total_inst*total_task);
+ }
+
+ for (int i = 0; i < num_cols; i++) {
+ if (num_inst[i] > 0) {
+ av_num_inst[i] = av_num_inst[i] + num_inst[i] * num_cpus;
+ org_task[i]++; // count how many are actually doing the task
+ }
+ }
+
+ // calculate the Standard Deviation in the mean position of the task
+
+ for (int i = 0; i < num_cols; i++) {
+ for (int j = 0; j < max_line; j++) {
+ if (mod_matrix(i,j)>0) sum[i] = sum[i] + j;
+ }
+ }
+
+ double temp = 0;
+ for (int i = 0; i < num_cols; i++) {
+ if (num_inst[i]>1) {
+ double av_sum = sum[i]/num_inst[i];
+ for (int j = 0; j < max_line; j++) {
+ if (mod_matrix(i,j)>0) temp = (av_sum - j)*(av_sum - j);
+ }
+ std_task_position[i] = std_task_position[i] + sqrt(temp/(num_inst[i]-1))*num_cpus;
+ }
+ }
+
+ for (int i = 0; i < max_line; i++) { inst_task[num_task[i]]++ ;}
+ for (int i = 0; i < num_cols+1; i++) { av_inst_task[i] = av_inst_task[i] + inst_task[i] * num_cpus;}
+
+ delete [] num_inst;
+ delete [] num_task;
+ delete [] inst_task;
+
+ }
+ }
+
+ // make sure there are some organisms doing task in this batch
+ // if not, return all zeros
+
+ if (num_orgs != 0) {
+ fp << (double) av_length/num_orgs << " "; // 1: average length
+ fp << (double) av_task/num_orgs << " "; // 2: av. number of tasks done
+ fp << (double) av_inst/num_orgs << " "; // 3: av. number of sites used for tasks
+ fp << (double) av_inst_len/num_orgs << " "; // 4: proportion of sites used for tasks
+ fp << (double) av_task_site/num_orgs << " "; // 5: av. number of tasks per site
+ fp << (double) av_site_task/num_orgs << " "; // 6: av. number of sites per task
+ fp << (double) av_t_s_norm/num_orgs << " "; // 7: av. number of tasks per site per task
+ for (int i = 0; i < num_cols; i++) {
+ if (org_task[i] > 0) fp << std_task_position[i]/org_task[i] << " ";
+ else fp << 0 << " ";
+ }
+ for (int i = 0; i < num_cols; i++) {
+ if (org_task[i] > 0) fp << (double) av_num_inst[i]/org_task[i] << " ";
+ else fp << 0 << " ";
+ }
+ for (int i = 0; i < num_cols+1; i++) { fp << (double) av_inst_task[i]/num_orgs << " ";}
+ fp << endl;
+ }
+
+ else {
+ for (int i = 0; i < 7+3*num_cols+1; i++) {fp << "0 ";}
+ fp << endl;
+ }
+}
+
void cAnalyze::CommandMapMutations(cString cur_string)
{
@@ -3908,6 +4207,7 @@
AddLibraryDef("FITNESS_MATRIX", &cAnalyze::CommandFitnessMatrix);
AddLibraryDef("MAP", &cAnalyze::CommandMapTasks); // Deprecated...
AddLibraryDef("MAP_TASKS", &cAnalyze::CommandMapTasks);
+ AddLibraryDef("AVERAGE_MODULARITY", &cAnalyze::CommandAverageModularity);
AddLibraryDef("MAP_MUTATIONS", &cAnalyze::CommandMapMutations);
// Population comparison commands...
Index: avida/current/source/main/analyze.hh
diff -u avida/current/source/main/analyze.hh:1.44 avida/current/source/main/analyze.hh:1.45
--- avida/current/source/main/analyze.hh:1.44 Sun May 18 13:49:27 2003
+++ avida/current/source/main/analyze.hh Fri Oct 10 21:42:43 2003
@@ -448,6 +448,7 @@
void CommandLandscape(cString cur_string);
void CommandFitnessMatrix(cString cur_string);
void CommandMapTasks(cString cur_string);
+ void CommandAverageModularity(cString cur_string);
void CommandMapMutations(cString cur_string);
// Population Comparison Commands...
More information about the Avida-cvs
mailing list