[avida-cvs] avida CVS commits: /current/source/main analyze.cc analyze.hh

dule123 avida-cvs at alife.org
Sat Oct 11 05:42:44 PDT 2003


dule123		Fri Oct 10 21:42:44 2003 EDT

  Modified files:              
    /avida/current/source/main	analyze.cc analyze.hh 
  Log:
  
  After much tweaking, implemented a new analyze function, AVERAGE_MODULARITY
  
  Usage: 
  AVERAGE_MODULARITY output_file task.0 task.1 task.2 task.3 task.4 task.5 task.6 task.7 task.8
  
  To output the legend (see below) do "AVERAGE_MODULARITY output_file"
  before actually doing the actual calculations
  
  I will add more info to the documentation file in the near future. 
  
  1: organism length
  2: number of tasks done
  3: number of sites used in tasks 
  4: proportion of sites used in tasks
  5: average number of tasks done per site
  6: average number sites per task done
  7: average number tasks per site per task
  8-16: average StDev in positions used for task 1-9
  17-25: average number of sites necessary for each of the tasks
  26-35: number of sites involved in 0-9 tasks
  
  
  
-------------- next part --------------
Index: avida/current/source/main/analyze.cc
diff -u avida/current/source/main/analyze.cc:1.75 avida/current/source/main/analyze.cc:1.76
--- avida/current/source/main/analyze.cc:1.75	Fri Aug  1 11:08:17 2003
+++ avida/current/source/main/analyze.cc	Fri Oct 10 21:42:43 2003
@@ -1898,6 +1898,305 @@
   }
 }
 
+void cAnalyze::CommandAverageModularity(cString cur_string)
+{
+  cout << "Average Modularity calculations" << endl;
+
+  // Load in the variables...
+  cString filename = cur_string.PopWord();
+//    cString filename = "average.dat";
+
+  int print_mode = 0;   // 0=Normal, 1=Boolean results
+
+  // Collect any other format information needed...
+  tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
+  tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
+
+  cStringList arg_list(cur_string);
+
+  cout << "Found " << arg_list.GetSize() << " args." << endl;
+
+  // Check for some command specific variables.
+  if (arg_list.PopString("0") != "") print_mode = 0;
+  if (arg_list.PopString("1") != "") print_mode = 1;
+
+  cout << "There are " << arg_list.GetSize() << " column args." << endl;
+
+  LoadGenotypeDataList(arg_list, output_list);
+
+  cout << "Args are loaded." << endl;
+
+  const int num_cols = output_list.GetSize();
+
+  // Give some information in verbose mode.
+  if (verbose == true) {
+    cout << "  outputing as ";
+    if (print_mode == 1) cout << "boolean ";
+      cout << "text files." << endl;
+    cout << "  Format: ";
+
+    output_it.Reset();
+    while (output_it.Next() != NULL) {
+      cout << output_it.Get()->GetName() << " ";
+    }
+    cout << endl;
+  }
+
+    ofstream & fp = data_file_manager.GetOFStream(filename);
+
+  // printing the headers
+  // not done by default since many dumps may be analyzed at the same time
+  // and results would be put in the same file
+  if (arg_list.GetSize()==0) {
+    // Headers
+    fp << "# Avida analyze modularity data" << endl;
+    fp << "# 1: organism length" << endl;
+    fp << "# 2: number of tasks done" << endl;
+    fp << "# 3: number of sites used in tasks" << endl;
+    fp << "# 4: proportion of sites used in tasks" << endl;
+    fp << "# 5: average number of tasks done per site" << endl;
+    fp << "# 6: average number sites per task done" << endl;
+    fp << "# 7: average number tasks per site per task" << endl;
+    fp << "# 8-16: average StDev in positions used for task 1-9" << endl;
+    fp << "# 17-25: average number of sites necessary for each of the tasks" << endl;
+    fp << "# 26-35: number of sites involved in 0-9 tasks" << endl;
+    fp << endl;
+    return;
+}        
+
+  // initialize various variables used in calculations
+
+    int num_orgs = 0;		// number of organisms in the dump
+
+    double  av_length = 0; 	// average organism length
+    double  av_task = 0; 	// average # of tasks done
+    double  av_inst = 0; 	// average # instructions used in tasks
+    double  av_inst_len = 0; 	// proportion of sites used for tasks
+    double  av_site_task = 0; 	// average number of sites per task
+    double  av_task_site = 0;   // average number of tasks per site
+    double  av_t_s_norm = 0;	// average number of tasks per site per task
+
+    double std_task_position[num_cols]; // average StDev in positions used for a task
+    
+    double * org_task = new double[num_cols]; // # of organisms actually doing a task
+    double * av_num_inst = new double[num_cols]; // av. # of sites necessary for each of the tasks
+    double * av_inst_task = new double[num_cols+1]; // number of sites involved in 0-9 tasks 
+
+    for (int i = 0; i < num_cols; i++) {
+      av_num_inst[i] = 0;
+      org_task[i] = 0; 
+      std_task_position[i] = 0;
+    }
+
+    for (int i = 0; i < num_cols+1; i++) {
+      av_inst_task[i] = 0;
+    }
+
+    
+  ///////////////////////////////////////////////////////
+  // Loop through all of the genotypes in this batch...
+  ///////////////////////////////////////////////////////
+
+  tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+  cAnalyzeGenotype * genotype = NULL;
+
+  // would like to test oly the viable ones, but they can be non-viable
+  // and still reproduce and do tasks
+  // while ((genotype = batch_it.Next()) != NULL && genotype->GetViable()) {
+  while ((genotype = batch_it.Next()) != NULL) {
+
+    int num_cpus = genotype->GetNumCPUs();
+
+    if (verbose == true) cout << "  Mapping " << genotype->GetName() << endl;
+ 
+    // Calculate the stats for the genotype we're working with...
+    genotype->Recalculate();
+
+    // Check if the organism does any tasks. 
+    int does_tasks = 0;
+    for (int i = 0; i < num_cols; i++) {
+      if (genotype->GetTaskCount(i) > 0)  does_tasks = 1;
+    }
+
+   // Don't calculate the modularity if the organism doesn't reproduce
+   // i.e. if the fitness is 0 
+   if (genotype->GetFitness() != 0 && does_tasks != 0) {
+    num_orgs = num_orgs + num_cpus;
+
+    const int max_line = genotype->GetLength();
+    const cGenome & base_genome = genotype->GetGenome();
+    cGenome mod_genome(base_genome);
+
+    // Create and initialize the modularity matrix
+    tMatrix<int> mod_matrix(num_cols, max_line);
+    for (int i = 0; i < num_cols; i++) {
+        for (int j = 0; j < max_line; j++) {
+      	    mod_matrix(i,j) = 0;
+	}
+    } 
+
+    // Create an initialize the counters for modularity
+
+    int * num_task = new int[max_line]; // number of tasks instruction is used in
+    int * num_inst = new int[num_cols]; // number of instructions involved in a task
+    int sum[num_cols]; 			// helps with StDev calculations
+    int sumsq[num_cols]; 		// helps with StDev calculations
+    int * inst_task = new int[num_cols+1]; // # of inst's involved in 0,1,2,3... tasks
+
+    for (int i = 0; i < num_cols; i++) {
+      num_inst[i] = 0;
+      sum[i] = 0;
+      sumsq[i] = 0; 		
+    }
+    for (int i = 0; i < num_cols+1; i++) {
+      inst_task[i] = 0;		
+    }
+    for (int i = 0; i < max_line; i++) {
+      num_task[i] = 0;		
+    }
+
+    int total_task = 0; // total number of tasks done
+    int total_inst = 0; // total number of instructions involved in tasks
+    int total_all = 0;  // sum of mod_matrix
+
+    cInstSet map_inst_set(inst_set);
+
+    // Locate instruction corresponding to "NULL" in the instruction library.
+    {
+      const cInstruction inst_lib_null_inst = map_inst_set.GetInstLib()->GetInst("NULL");
+      if(inst_lib_null_inst == map_inst_set.GetInstLib()->GetInstError()){
+        cout << "<cAnalyze::CommandMapTasks> got error:" << endl;
+        cout << " --- instruction \"NULL\" isn't in the instruction library;" << endl;
+        cout << " --- get somebody to map a function to \"NULL\" in the library." << endl;
+        cout << " --- (probably to class method \"cHardware-of-some-type::initInstLib\"" << endl;
+        cout << " --- in file named \"cpu/hardware-of-some-type.cc\".)" << endl;
+        cout << " --- bailing-out." << endl;
+        exit(1);
+      }
+      // Add mapping to located instruction. 
+      map_inst_set.Add2(inst_lib_null_inst.GetOp());
+    }
+    const cInstruction null_inst = map_inst_set.GetInst("NULL");
+
+    // Loop through all the lines of code, testing the removal of each.
+    for (int line_num = 0; line_num < max_line; line_num++) {
+      int cur_inst = base_genome[line_num].GetOp();
+      char cur_symbol = base_genome[line_num].GetSymbol();
+
+      mod_genome[line_num] = null_inst;
+      cAnalyzeGenotype test_genotype(mod_genome, map_inst_set);
+      test_genotype.Recalculate();
+
+      // Print the individual columns...
+      output_it.Reset();
+      tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
+      int cur_col = 0;
+      while ((data_command = output_it.Next()) != NULL) {
+	data_command->SetTarget(&test_genotype);
+	test_genotype.SetSpecialArgs(data_command->GetArgs());
+	int compare = data_command->Compare(genotype);
+
+	// If knocking out an instruction stops the expression of a
+	// particular task, mark that in the modularity matrix
+	// and add it to two counts
+	// Only do the checking if the test_genotype replicate, i.e.
+	// if it's fitness is not zeros
+
+	if (compare == -2 && test_genotype.GetFitness() != 0) { 
+		mod_matrix(cur_col,line_num) = 1;
+		num_inst[cur_col]++;
+		num_task[line_num]++;
+	}
+	cur_col++;
+      }
+
+      // Reset the mod_genome back to the original sequence.
+      mod_genome[line_num].SetOp(cur_inst);
+    }
+    
+    for (int i = 0; i < num_cols; i++) {if (num_inst[i] != 0) total_task++;}
+    for (int i = 0; i < max_line; i++) {if (num_task[i] != 0) total_inst++;}
+    for (int i = 0; i < num_cols; i++) {total_all = total_all + num_inst[i];}
+
+    // Add the values to the av_ variables, used for calculating the average
+    // in order to weigh them by abundance, multiply everything by num_cpus
+
+    av_length = av_length + max_line*num_cpus;
+    av_task = av_task + total_task*num_cpus;
+    av_inst = av_inst + total_inst*num_cpus;
+    av_inst_len = av_inst_len + (double) total_inst*num_cpus/max_line;
+
+    if (total_task !=0)  av_site_task = av_site_task + num_cpus * (double) total_all/total_task; 
+    if (total_inst !=0)  av_task_site = av_task_site + num_cpus * (double) total_all/total_inst; 
+    if (total_inst !=0 && total_task !=0) {
+	av_t_s_norm = av_t_s_norm + num_cpus * (double) total_all/(total_inst*total_task); 
+    }
+
+    for (int i = 0; i < num_cols; i++) { 
+	if (num_inst[i] > 0) {
+		av_num_inst[i] = av_num_inst[i] + num_inst[i] * num_cpus;
+		org_task[i]++; 	// count how many are actually doing the task
+	}
+    }	
+
+    // calculate the Standard Deviation in the mean position of the task
+
+    for (int i = 0; i < num_cols; i++) { 
+	for (int j = 0; j < max_line; j++) { 
+		if (mod_matrix(i,j)>0) sum[i] = sum[i] + j;
+	}		
+    }
+
+    double temp = 0;
+    for (int i = 0; i < num_cols; i++) {
+	if (num_inst[i]>1) { 
+		double av_sum = sum[i]/num_inst[i];
+		for (int j = 0; j < max_line; j++) {
+			if (mod_matrix(i,j)>0) temp = (av_sum - j)*(av_sum - j);
+		}
+		std_task_position[i] = std_task_position[i] + sqrt(temp/(num_inst[i]-1))*num_cpus;
+	} 
+    } 
+
+    for (int i = 0; i < max_line; i++) { inst_task[num_task[i]]++ ;}
+    for (int i = 0; i < num_cols+1; i++) { av_inst_task[i] = av_inst_task[i] + inst_task[i] * num_cpus;}
+
+    delete [] num_inst;
+    delete [] num_task;
+    delete [] inst_task;
+    
+   }
+  }
+
+ // make sure there are some organisms doing task in this batch
+ // if not, return all zeros
+
+ if (num_orgs != 0) { 
+    fp << (double) av_length/num_orgs  << " ";  	// 1: average length
+    fp << (double) av_task/num_orgs << " ";		// 2: av. number of tasks done
+    fp << (double) av_inst/num_orgs << " ";		// 3: av. number of sites used for tasks
+    fp << (double) av_inst_len/num_orgs << " ";		// 4: proportion of sites used for tasks
+    fp << (double) av_task_site/num_orgs << " ";	// 5: av. number of tasks per site
+    fp << (double) av_site_task/num_orgs << " ";	// 6: av. number of sites per task
+    fp << (double) av_t_s_norm/num_orgs << " ";		// 7: av. number of tasks per site per task
+    for (int i = 0; i < num_cols; i++) {
+	if (org_task[i] > 0) fp << std_task_position[i]/org_task[i]  << " ";
+        else fp << 0 << " ";
+    }
+    for (int i = 0; i < num_cols; i++) { 
+	if (org_task[i] > 0) fp << (double) av_num_inst[i]/org_task[i]  << " ";
+        else fp << 0 << " ";
+    }
+    for (int i = 0; i < num_cols+1; i++) { fp << (double) av_inst_task[i]/num_orgs  << " ";}
+    fp << endl;
+    }
+
+ else {
+    for (int i = 0; i < 7+3*num_cols+1; i++) {fp << "0 ";}
+    fp << endl; 
+ }
+}
+
 
 void cAnalyze::CommandMapMutations(cString cur_string)
 {
@@ -3908,6 +4207,7 @@
   AddLibraryDef("FITNESS_MATRIX", &cAnalyze::CommandFitnessMatrix);
   AddLibraryDef("MAP", &cAnalyze::CommandMapTasks);  // Deprecated...
   AddLibraryDef("MAP_TASKS", &cAnalyze::CommandMapTasks);
+  AddLibraryDef("AVERAGE_MODULARITY", &cAnalyze::CommandAverageModularity);
   AddLibraryDef("MAP_MUTATIONS", &cAnalyze::CommandMapMutations);
 
   // Population comparison commands...
Index: avida/current/source/main/analyze.hh
diff -u avida/current/source/main/analyze.hh:1.44 avida/current/source/main/analyze.hh:1.45
--- avida/current/source/main/analyze.hh:1.44	Sun May 18 13:49:27 2003
+++ avida/current/source/main/analyze.hh	Fri Oct 10 21:42:43 2003
@@ -448,6 +448,7 @@
   void CommandLandscape(cString cur_string);
   void CommandFitnessMatrix(cString cur_string);
   void CommandMapTasks(cString cur_string);
+  void CommandAverageModularity(cString cur_string);
   void CommandMapMutations(cString cur_string);
 
   // Population Comparison Commands...


More information about the Avida-cvs mailing list