SF.net SVN: gmod:[25256] schema/trunk/chado/bin/load_ncbi_taxonomy.pl

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

SF.net SVN: gmod:[25256] schema/trunk/chado/bin/load_ncbi_taxonomy.pl

nm249
Revision: 25256
          http://gmod.svn.sourceforge.net/gmod/?rev=25256&view=rev
Author:   nm249
Date:     2012-03-06 18:54:12 +0000 (Tue, 06 Mar 2012)
Log Message:
-----------
added lookup for organism by ncbi taxonomy id, since species names may change. This loader also assumes species names are unique, as used in NCBI taxonomy.

Modified Paths:
--------------
    schema/trunk/chado/bin/load_ncbi_taxonomy.pl

Modified: schema/trunk/chado/bin/load_ncbi_taxonomy.pl
===================================================================
--- schema/trunk/chado/bin/load_ncbi_taxonomy.pl 2012-02-28 21:26:20 UTC (rev 25255)
+++ schema/trunk/chado/bin/load_ncbi_taxonomy.pl 2012-03-06 18:54:12 UTC (rev 25256)
@@ -53,7 +53,7 @@
 http://www.ncbi.nlm.nih.gov/sites/entrez?db=Taxonomy 
 and search by taxid (e.g. txis4070[Subtree] )  
 
-=item -p
+=item -n
 
 phylotree name  [optional]
 
@@ -80,7 +80,7 @@
 
 driver. Override driver name in gmod_config
 
-=item -s
+=item -p
 
 password. Override password in gmod_config
 
@@ -129,7 +129,7 @@
 
 =head1 VERISON AND DATE
 
-Version 2.0, October 2009.
+Version 2.1, March 2012.
 
 =head1 TODO
 
@@ -147,19 +147,19 @@
 use Bio::GMOD::DB::Config;
 
 use Bio::Chado::Schema;
-
+use Try::Tiny;
 use Getopt::Std;
 
-our ($opt_H, $opt_D, $opt_v, $opt_t, $opt_i, $opt_p, $opt_g, $opt_u, $opt_s, $opt_d);
+our ($opt_H, $opt_D, $opt_v, $opt_t, $opt_i, $opt_p, $opt_g, $opt_u, $opt_n, $opt_d);
 
-getopts('H:D:i:p:g:u:s:d:tv');
+getopts('H:D:i:p:g:u:n:d:tv');
 
 my $dbhost = $opt_H;
 my $dbname = $opt_D;
 my $infile = $opt_i;
-my $phylotree_name= $opt_p || 'NCBI taxonomy tree';
+my $phylotree_name= $opt_n || 'NCBI taxonomy tree';
 my $user = $opt_u;
-my $pass = $opt_s;
+my $pass = $opt_p;
 my $driver = $opt_d;
 my $port;
 
@@ -196,7 +196,7 @@
 $dsn .= ";host=$dbhost";
 $dsn .= ";port=$port" if $port;
 
-$schema= Bio::Chado::Schema->connect( $dsn, $user, $pass, { AutoCommit=>0 });
+$schema= Bio::Chado::Schema->connect( $dsn, $user, $pass||'');
 $dbh=$schema->storage->dbh();
 
 
@@ -377,7 +377,7 @@
 my %phylonode=();
 my $node_count=0;
 
-eval {
+my $coderef = sub {
     my $root_id;
     my $organism_id = $maxval{'organism'};
   NODE: foreach my $id ( keys %node ) {
@@ -417,10 +417,13 @@
       if (!$genus || !$species) { die "NO GENUS OR SPECIES FOUND FOR tax_id $genbank_taxon_accession! Check your input file! \n" ; }
       $organism = $schema->resultset('Organism::Organism')->search(
   {
-      genus   => { 'ilike'=> $genus },
       species => {'ilike' => $species }
-  })->single();
-      if (!$organism) {  #create a new empty row object
+  })->single; # lookup is by species only . NCBI species should be unique!
+      if (!$organism) {  #maybe the organism is already loaded with the ncbi taxonomy id, but the species name has changed?
+  my $organism_dbxref = $dbxref->organism_dbxrefs->single;
+  $organism= $organism_dbxref->organism if $organism_dbxref;
+      }
+      if (!$organism) { #create a new empty row object
   $organism = $schema->resultset('Organism::Organism')->new({});
   $insert=1;
       } else { $update = 1; }
@@ -433,12 +436,12 @@
       if ($update) {
   $organism->update();
   message( "*Updating organism " . $organism->get_column('organism_id') . " (species=" . $organism->species . ")\n", 1);
-      }
+      }
       if ($insert) {
   $organism->insert();
-    message("New organism " . $organism->get_column('organism_id') . " (species=" . $organism->species . ")\n", 1);
+  message("New organism " . $organism->get_column('organism_id') . " (species=" . $organism->species . ")\n", 1);
       }
-       my $organism_id= $organism->get_column('organism_id');
+      my $organism_id= $organism->get_column('organism_id');
       
       ###########################################
       #store the organism synonyms
@@ -541,13 +544,16 @@
  $setright->execute($ctr++, $phylonode_id);
  message( "Setting right index= $ctr for phylonode id $phylonode_id\n\n",1);
     }
+    if ($opt_t) { die "TEST RUN! rolling back!\n"; }
 };
+try {
+    $schema->txn_do($coderef);
+    message( "Commiting!! \n");
+    message("Inserted $node_count phylonodes. \n",1 );
 
-if ($@ || $opt_t) {
-    $dbh->rollback();
+} catch {
+    message( "An error occured! Rolling back! \n $_ \n Resetting database sequences...\n", 1);
     
-    message( "Rolling back! \n $@\n Resetting database sequences...\n", 1);
-    
     #reset sequences
     foreach my $key ( keys %seq ) {
  my $value= $seq{$key};
@@ -555,14 +561,8 @@
  if ($maxvalue) { $dbh->do("SELECT setval ('$value', $maxvalue, true)") ;  }
  else {  $dbh->do("SELECT setval ('$value', 1, false)");  }
     }
-}else {    
-    message( "Commiting!! \n");
-    message("Inserted $node_count phylonodes. \n",1 );
-    
-    $dbh->commit();
-}
+};
 
-
 sub set_maxval {
     my $key=shift;
     my $id_column= $key . "_id";

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


------------------------------------------------------------------------------
Keep Your Developer Skills Current with LearnDevNow!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-d2d
_______________________________________________
Gmod-schema-cmts mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/gmod-schema-cmts