myexperiment-hackers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[myexperiment-hackers] [2657] branches/biocat: added ability to merge bi


From: noreply
Subject: [myexperiment-hackers] [2657] branches/biocat: added ability to merge biocatalogue imports
Date: Sat, 30 Jul 2011 09:57:00 -0400 (EDT)

Revision
2657
Author
dgc
Date
2011-07-30 09:56:59 -0400 (Sat, 30 Jul 2011)

Log Message

added ability to merge biocatalogue imports

Modified Paths

Diff

Modified: branches/biocat/Rakefile (2656 => 2657)


--- branches/biocat/Rakefile	2011-07-25 14:48:42 UTC (rev 2656)
+++ branches/biocat/Rakefile	2011-07-30 13:56:59 UTC (rev 2657)
@@ -106,13 +106,6 @@
 
   conn = ActiveRecord::Base.connection
 
-  conn.execute('TRUNCATE service_categories')
-  conn.execute('TRUNCATE service_deployments')
-  conn.execute('TRUNCATE service_providers')
-  conn.execute('TRUNCATE service_tags')
-  conn.execute('TRUNCATE service_types')
-  conn.execute('TRUNCATE services')
-
   BioCatalogueImport.import_biocatalogue
 end
 

Modified: branches/biocat/lib/bio_catalogue_import.rb (2656 => 2657)


--- branches/biocat/lib/bio_catalogue_import.rb	2011-07-25 14:48:42 UTC (rev 2656)
+++ branches/biocat/lib/bio_catalogue_import.rb	2011-07-30 13:56:59 UTC (rev 2657)
@@ -6,10 +6,10 @@
 class BioCatalogueImport
 
   require 'xml/libxml'
+  require 'open-uri'
 
   @@biocat_base_uri        = 'http://www.biocatalogue.org/'
   @@biocat_ns              = { "bc" => "http://www.biocatalogue.org/2009/xml/rest" }
-  @@biocat_document_cache  = "tmp/biocatalogue.yml"
   @@biocat_wait_in_seconds = 10
 
   def self.fetch_uri(uri)
@@ -29,7 +29,7 @@
 
     puts "Fetching URI: #{rest_uri}"
 
-    @documents[uri] = rest_uri.read.to_s
+    @documents[uri] = open(rest_uri.to_s).read.to_s
     @documents[:retrieved_at][uri] = Time.now
 
     Kernel.sleep(@@biocat_wait_in_seconds)
@@ -67,106 +67,224 @@
 
     summary_element = service_element.find('bc:summary', @@biocat_ns)[0]
 
-    service = Service.create(
-        :retrieved_at       => uri_retrieved_at(index_uri),
+    # Service
 
-        :contributor        => @federation_source,
+    service_uri = get_link(service_element, '@xlink:href')
 
-        :uri                => get_link(service_element, '@xlink:href'),
-        :name               => get_text(service_element, 'bc:name/text()'),
-        :submitter_label    => get_attr(service_element, 'bc:originalSubmitter/@resourceName'),
-        :created            => get_text(service_element, 'dcterms:created/text()'),
-        :submitter_uri      => get_attr(service_element, 'bc:originalSubmitter/@xlink:href'),
+#return service_uri unless service_uri.ends_with?("/2")
+    service = Service.find_by_uri(service_uri)
 
-        :provider_uri         => get_link(summary_element, 'bc:provider/@xlink:href'),
-        :provider_label       => get_text(summary_element, 'bc:provider/bc:name/text()'),
-        :endpoint             => get_text(summary_element, 'bc:endpoint/text()'),
-        :wsdl                 => get_text(summary_element, 'bc:wsdl/text()'),
-        :city                 => get_text(summary_element, 'bc:location/bc:city/text()'),
-        :country              => get_text(summary_element, 'bc:location/bc:country/text()'),
-        :iso3166_country_code => get_text(summary_element, 'bc:location/bc:iso3166CountryCode/text()'),
-        :flag_url             => get_link(summary_element, 'bc:location/bc:flag/@xlink:href'),
-        :documentation_uri    => get_text(summary_element, 'bc:documentationUrl/text()'),
-        :description          => get_text(summary_element, 'dc:description/text()'),
-        
-        :monitor_label            => get_text(service_element, 'bc:latestMonitoringStatus/bc:label/text()'),
-        :monitor_message          => get_text(service_element, 'bc:latestMonitoringStatus/bc:message/text()'),
-        :monitor_symbol_url       => get_link(service_element, 'bc:latestMonitoringStatus/bc:symbol/@xlink:href'),
-        :monitor_small_symbol_url => get_link(service_element, 'bc:latestMonitoringStatus/bc:smallSymbol/@xlink:href'),
-        :monitor_last_checked     => get_text(service_element, 'bc:latestMonitoringStatus/bc:lastChecked/text()'))
+    service = Service.new if service.nil?
 
-    service.contribution.policy = create_default_policy(@federation_source)
-    service.contribution.policy.share_mode = 0 # Make public
-    service.contribution.policy.save
-    service.contribution.save
+    service.attributes = {
 
+      :contributor              => @federation_source,
+
+      :uri                      => service_uri,
+      :name                     => get_text(service_element, 'bc:name/text()'),
+      :submitter_label          => get_attr(service_element, 'bc:originalSubmitter/@resourceName'),
+      :created                  => get_text(service_element, 'dcterms:created/text()'),
+      :submitter_uri            => get_attr(service_element, 'bc:originalSubmitter/@xlink:href'),
+
+      :provider_uri             => get_link(summary_element, 'bc:provider/@xlink:href'),
+      :provider_label           => get_text(summary_element, 'bc:provider/bc:name/text()'),
+      :endpoint                 => get_text(summary_element, 'bc:endpoint/text()'),
+      :wsdl                     => get_text(summary_element, 'bc:wsdl/text()'),
+      :city                     => get_text(summary_element, 'bc:location/bc:city/text()'),
+      :country                  => get_text(summary_element, 'bc:location/bc:country/text()'),
+      :iso3166_country_code     => get_text(summary_element, 'bc:location/bc:iso3166CountryCode/text()'),
+      :flag_url                 => get_link(summary_element, 'bc:location/bc:flag/@xlink:href'),
+      :documentation_uri        => get_text(summary_element, 'bc:documentationUrl/text()'),
+      :description              => get_text(summary_element, 'dc:description/text()'),
+      
+      :monitor_label            => get_text(service_element, 'bc:latestMonitoringStatus/bc:label/text()'),
+      :monitor_message          => get_text(service_element, 'bc:latestMonitoringStatus/bc:message/text()'),
+      :monitor_symbol_url       => get_link(service_element, 'bc:latestMonitoringStatus/bc:symbol/@xlink:href'),
+      :monitor_small_symbol_url => get_link(service_element, 'bc:latestMonitoringStatus/bc:smallSymbol/@xlink:href'),
+      :monitor_last_checked     => get_text(service_element, 'bc:latestMonitoringStatus/bc:lastChecked/text()')
+    }
+
+    service.save if service.changed?
+
+    if service.contribution.nil?
+      service.contribution = Contribution.create(:contributor => @federation_source)
+      service.contribution.policy = create_default_policy(@federation_source)
+      service.contribution.policy.share_mode = 0 # Make public
+      service.contribution.policy.save
+      service.contribution.save
+    end
+
+    # Service categories
+
+    existing_service_categories = ServiceCategory.find_all_by_service_id(service.id)
+
+    current_service_category_uris = []
+
     summary_element.find('bc:category', @@biocat_ns).each do |category_element|
-      ServiceCategory.create(
-          :service       => service,
-          :retrieved_at  => uri_retrieved_at(index_uri),
-          :uri           => get_link(category_element, '@xlink:href'),
-          :label         => get_text(category_element, 'text()'))
+
+      service_category_uri = get_link(category_element, '@xlink:href')
+
+      service_category = ServiceCategory.find_by_service_id_and_uri(service.id, service_category_uri)
+
+      service_category = ServiceCategory.new if service_category.nil?
+
+      service_category.attributes = {
+        :service       => service,
+        :retrieved_at  => uri_retrieved_at(index_uri),
+        :uri           => service_category_uri,
+        :label         => get_text(category_element, 'text()')
+      }
+
+      service_category.save if service_category.changed?
+
+      current_service_category_uris << service_category_uri
     end
 
-    summary_element.find('bc:serviceType', @@biocat_ns).each do |category_element|
-      ServiceType.create(
-          :service      => service,
-          :retrieved_at => uri_retrieved_at(index_uri),
-          :label        => get_text(category_element, 'text()'))
+    existing_service_categories.each do |service_category|
+      next if current_service_category_uris.include?(service_category.uri)
+      service_category.destroy
     end
 
+    # Service technology types
+
+    existing_service_types = ServiceType.find_all_by_service_id(service.id)
+
+    current_types = []
+
+    service_element.find('bc:serviceTechnologyTypes/bc:type', @@biocat_ns).each do |type_element|
+
+      type_text = get_text(type_element, 'text()')
+
+      service_type = ServiceType.find_by_service_id_and_label(service.id, type_text)
+
+      service_type = ServiceType.new if service_type.nil?
+
+      service_type.attributes = {
+        :service       => service,
+        :retrieved_at  => uri_retrieved_at(index_uri),
+        :label         => type_text
+      }
+
+      service_type.save if service_type.changed?
+
+      current_types << type_text
+    end
+
+    existing_service_types.each do |service_type|
+      next if current_types.include?(service_type.label)
+      service_type.destroy
+    end
+
+    # Service tags
+
+    existing_service_tags = ServiceTag.find_all_by_service_id(service.id)
+
+    current_service_tag_uris = []
+
     summary_element.find('bc:tag', @@biocat_ns).each do |tag_element|
-      ServiceTag.create(
-          :service      => service,
-          :retrieved_at => uri_retrieved_at(index_uri),
-          :uri          => get_link(tag_element, '@xlink:href'),
-          :label        => get_text(tag_element, 'text()'))
+
+      service_tag_uri   = get_link(tag_element, '@xlink:href')
+      service_tag_label = get_text(tag_element, 'text()')
+
+      service_tag = ServiceTag.find_by_service_id_and_uri(service.id, service_tag_uri)
+
+      service_tag = ServiceTag.new if service_tag.nil?
+
+      service_tag.attributes = {
+        :service       => service,
+        :retrieved_at  => uri_retrieved_at(index_uri),
+        :uri           => service_tag_uri,
+        :label         => service_tag_label
+      }
+
+      service_tag.save if service_tag.changed?
+
+      current_service_tag_uris << service_tag_uri
     end
 
+    existing_service_tags.each do |service_tag|
+      next if current_service_tag_uris.include?(service_tag.uri)
+      service_tag.destroy
+    end
+
     # deployments and providers
 
-    service_element.find('/bc:service/bc:deployments/bc:serviceDeployment', @@biocat_ns).each do |deployment_element|
+    existing_service_providers = ServiceProvider.find(:all)
+    existing_service_deployments = ServiceDeployment.find_all_by_service_id(service.id)
+
+    current_service_deployments = []
+
+    service_element.find('bc:deployments/bc:serviceDeployment', @@biocat_ns).each do |deployment_element|
       
+      # provider
+
+      provider_uri   = get_link(deployment_element, 'bc:serviceProvider/@xlink:href')
       deployment_uri = get_link(deployment_element, '@xlink:href')
-      provider_uri   = get_link(deployment_element, 'bc:serviceProvider/@xlink:href')
 
-      next if ServiceDeployment.find_by_uri(deployment_uri)
 
-      if ServiceProvider.find_by_uri(provider_uri).nil?
-        ServiceProvider.create(
-            :uri          => provider_uri,
-            :retrieved_at => uri_retrieved_at(index_uri),
-            :name         => get_text(deployment_element, 'bc:serviceProvider/bc:name/text()'),
-            :description  => get_text(deployment_element, 'bc:serviceProvider/dc:description/text()'),
-            :created      => get_text(deployment_element, 'bc:serviceProvider/dcterms:created/text()'))
-      end
+      service_provider = ServiceProvider.find_by_uri(provider_uri)
 
-      provider = ServiceProvider.find_by_uri(provider_uri)
+      service_provider = ServiceProvider.new if service_provider.nil?
 
-      deployment = ServiceDeployment.create(
-          :service              => service,
-          :service_provider     => provider,
-          :retrieved_at         => uri_retrieved_at(index_uri),
-          :uri                  => get_link(deployment_element, '@xlink:href'),
-          :endpoint             => get_text(deployment_element, 'bc:endpoint/text()'),
-          :city                 => get_text(deployment_element, 'bc:location/bc:city/text()'),
-          :country              => get_text(deployment_element, 'bc:location/bc:country/text()'),
-          :iso3166_country_code => get_text(deployment_element, 'bc:location/bc:iso3166CountryCode/text()'),
-          :flag_url             => get_link(deployment_element, 'bc:location/bc:flag/@xlink:href'),
-          :submitter_label      => get_attr(deployment_element, 'bc:submitter/@resourceName'),
-          :submitter_uri        => get_attr(deployment_element, 'bc:submitter/@xlink:href'),
-          :created              => get_text(deployment_element, 'dcterms:created/text()'))
+      service_provider.attributes = {
+        :uri          => provider_uri,
+        :retrieved_at => uri_retrieved_at(index_uri),
+        :name         => get_text(deployment_element, 'bc:serviceProvider/bc:name/text()'),
+        :description  => get_text(deployment_element, 'bc:serviceProvider/dc:description/text()'),
+        :created      => get_text(deployment_element, 'bc:serviceProvider/dcterms:created/text()')
+      }
 
+      service_provider.save if service_provider.changed?
+
+      # deployment
+
+      service_deployment = ServiceDeployment.find_by_service_id_and_uri(service.id, deployment_uri)
+
+      service_deployment = ServiceDeployment.new if service_deployment.nil?
+
+      service_deployment.attributes = {
+        :service              => service,
+        :service_provider     => service_provider,
+        :retrieved_at         => uri_retrieved_at(index_uri),
+        :uri                  => get_link(deployment_element, '@xlink:href'),
+        :endpoint             => get_text(deployment_element, 'bc:endpoint/text()'),
+        :city                 => get_text(deployment_element, 'bc:location/bc:city/text()'),
+        :country              => get_text(deployment_element, 'bc:location/bc:country/text()'),
+        :iso3166_country_code => get_text(deployment_element, 'bc:location/bc:iso3166CountryCode/text()'),
+        :flag_url             => get_link(deployment_element, 'bc:location/bc:flag/@xlink:href'),
+        :submitter_label      => get_attr(deployment_element, 'bc:submitter/@resourceName'),
+        :submitter_uri        => get_attr(deployment_element, 'bc:submitter/@xlink:href'),
+        :created              => get_text(deployment_element, 'dcterms:created/text()')
+      }
+
+      service_deployment.save if service_deployment.changed?
+
+      current_service_deployments << deployment_uri
     end
+
+    existing_service_deployments.each do |service_deployment|
+      next if current_service_deployments.include?(service_deployment.uri)
+      service_deployment.destroy
+    end
+
+    # update the retrieved_at attribute
+
+    ActiveRecord::Base.record_timestamps = false
+    service.update_attribute(:retrieved_at, uri_retrieved_at(index_uri))
+    ActiveRecord::Base.record_timestamps = true
+
+    service_uri
   end
 
   def self.import_biocatalogue_services(uri)
 
+    current_service_uris = []
+
     while true
       doc = LibXML::XML::Parser.string(fetch_uri(uri)).parse.root
 
       doc.find("/bc:services/bc:results/bc:service", @@biocat_ns).each do |service_element|
-        import_service(service_element, uri)
+        current_service_uris << import_service(service_element, uri)
       end
 
       next_doc = doc.find("/bc:services/bc:related/bc:next/@xlink:href", @@biocat_ns)
@@ -176,8 +294,22 @@
       uri = next_doc[0].value
     end
 
+    Service.find(:all).each do |service|
+
+      next if current_service_uris.include?(service.uri)
+
+      service.destroy
+    end
+
+    # destroy unused service providers
+
+    current_service_providers = ServiceDeployment.find(:all).map do |sd| sd.service_provider end.uniq
+
+    (ServiceProvider.find(:all) - current_service_providers).each do |service_provider|
+      service_provider.destroy
+    end
+
     save_document_cache
-
   end
 
   def self.import_biocatalogue
@@ -186,9 +318,11 @@
       FederationSource.create(:name => "BioCatalogue")
     end
 
+    @@biocat_document_cache = ENV['FILE'] ? ENV['FILE'] : "tmp/biocatalogue.yml"
+
     @federation_source = FederationSource.find_by_name("BioCatalogue")
 
-    import_biocatalogue_services("http://www.biocatalogue.org/services?include=summary")
+    import_biocatalogue_services("http://www.biocatalogue.org/services?include=summary,deployments&sort_order=asc")
   end
 end
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]