<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[617] trunk/alexandria: - repaired mcu.rb</title>
</head>
<body>

<div id="msg">
<dl>
<dt>Revision</dt> <dd>617</dd>
<dt>Author</dt> <dd>laurusnobilis</dd>
<dt>Date</dt> <dd>2007-02-23 16:56:39 -0500 (Fri, 23 Feb 2007)</dd>
</dl>

<h3>Log Message</h3>
<pre>- repaired mcu.rb
- now all the providers (except proxis) return EAN
- minor changes</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersadlibrisrb">trunk/alexandria/lib/alexandria/book_providers/adlibris.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providersamazonrb">trunk/alexandria/lib/alexandria/book_providers/amazon.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providersbol_itrb">trunk/alexandria/lib/alexandria/book_providers/bol_it.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providerslsrb">trunk/alexandria/lib/alexandria/book_providers/ls.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providersmcurb">trunk/alexandria/lib/alexandria/book_providers/mcu.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providerswebster_itrb">trunk/alexandria/lib/alexandria/book_providers/webster_it.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providersworldcatrb">trunk/alexandria/lib/alexandria/book_providers/worldcat.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providersrb">trunk/alexandria/lib/alexandria/book_providers.rb</a></li>
<li><a href="#trunkalexandrialibalexandrialibraryrb">trunk/alexandria/lib/alexandria/library.rb</a></li>
<li><a href="#trunkalexandriateststest_providersrb">trunk/alexandria/tests/test_providers.rb</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkalexandrialibalexandriabook_providersadlibrisrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/adlibris.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/adlibris.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/book_providers/adlibris.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -141,7 +141,8 @@
</span><span class="cx">                         product[&quot;title&quot;] = CGI.unescape(md[1])
</span><span class="cx"> 
</span><span class="cx"> 
</span><del>-                        regx = /&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;text&quot;&gt;F&amp;#246;rfattare:&amp;nbsp;&lt;b&gt;([^&lt;]*)&lt;\/b&gt;&lt;\/td&gt;&lt;\/tr&gt;/
</del><ins>+#                        regx = /&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;text&quot;&gt;F&amp;#246;rfattare:&amp;nbsp;&lt;b&gt;([^&lt;]*)&lt;\/b&gt;&lt;\/td&gt;&lt;\/tr&gt;/
+                        regx = /&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;text&quot;&gt;F.rfattare:&amp;nbsp;&lt;b&gt;([^&lt;]*)&lt;\/b&gt;&lt;\/td&gt;&lt;\/tr&gt;/
</ins><span class="cx">                         product[&quot;authors&quot;] = []
</span><span class="cx">                         data.scan(regx) do |md| next unless md[0] != md[1]
</span><span class="cx">                             product[&quot;authors&quot;] &lt;&lt; translate_html_stuff(CGI.unescape(md[0]))
</span><span class="lines">@@ -156,8 +157,8 @@
</span><span class="cx">                         
</span><span class="cx">                         product[&quot;edition&quot;] = md[1] or nil
</span><span class="cx"> 
</span><del>-
-                        img_url = &quot;covers/&quot; + isbn[0 .. 0] + &quot;/&quot; + isbn[1 .. 2] + &quot;/&quot; + isbn + &quot;.jpg&quot;
</del><ins>+                        isbn10 = Library.canonicalise_isbn(isbn)
+                        img_url = &quot;covers/&quot; + isbn10[0 .. 0] + &quot;/&quot; + isbn10[1 .. 2] + &quot;/&quot; + isbn10 + &quot;.jpg&quot;
</ins><span class="cx">                         #puts img_url
</span><span class="cx">                         raise &quot;No image found&quot; unless md = data.match(img_url)
</span><span class="cx">                         product[&quot;cover&quot;] = BASE_URI + img_url
</span><span class="lines">@@ -165,7 +166,7 @@
</span><span class="cx">                         book = Book.new(
</span><span class="cx">                                 translate_html_stuff(product[&quot;title&quot;]),
</span><span class="cx">                                 product[&quot;authors&quot;],
</span><del>-                                isbn,
</del><ins>+                                Library.canonicalise_ean(isbn),
</ins><span class="cx">                                 translate_html_stuff(product[&quot;publisher&quot;]),
</span><span class="cx">                                 publish_year = 0,
</span><span class="cx">                                 translate_html_stuff(product[&quot;edition&quot;]))
</span></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersamazonrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/amazon.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/amazon.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/book_providers/amazon.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -28,7 +28,7 @@
</span><span class="cx">         CACHE_DIR = File.join(Alexandria::Library::DIR, '.amazon_cache')
</span><span class="cx">         
</span><span class="cx">         def initialize
</span><del>-            super(&quot;Amazon&quot;, &quot;Amazon (USA)&quot;)
</del><ins>+            super(&quot;Amazon&quot;, &quot;Amazon (Usa)&quot;)
</ins><span class="cx">             prefs.add(&quot;locale&quot;, _(&quot;Locale&quot;), &quot;us&quot;,
</span><span class="cx">                        Amazon::Search::LOCALES.keys)
</span><span class="cx">             prefs.add(&quot;dev_token&quot;, _(&quot;Development token&quot;), 
</span><span class="lines">@@ -118,10 +118,17 @@
</span><span class="cx">                 media = product.media.squeeze(' ')
</span><span class="cx">                 media = nil if media == 'Unknown Binding'
</span><span class="cx"> 
</span><ins>+                isbn = product.isbn.squeeze(' ')
+                if Library.valid_isbn?(isbn)
+                    isbn = Library.canonicalise_ean(isbn)
+                else
+                    isbn = nil # it may be an ASIN which is not an ISBN
+                end
+
</ins><span class="cx">                 book = Book.new(title,
</span><span class="cx">                                 (product.authors.map { |x| x.squeeze(' ') } \
</span><span class="cx">                                     rescue [  ]),
</span><del>-                                product.isbn.squeeze(' '),
</del><ins>+                                isbn,
</ins><span class="cx">                                 (product.manufacturer.squeeze(' ') \
</span><span class="cx">                                     rescue nil),
</span><span class="cx">                                 (Time.parse(product.release_date).year \
</span></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersbol_itrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/bol_it.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/bol_it.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/book_providers/bol_it.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -57,7 +57,7 @@
</span><span class="cx">             end
</span><span class="cx">             
</span><span class="cx"> ## warning: this provider uses pages like http://www.bol.it/libri/scheda/ea978888584104 with 12 numbers, without the checksum
</span><del>-            criterion = &quot;ea978&quot; + Library.canonicalise_isbn(criterion)[0 .. -2] + &quot;.html&quot; if type == SEARCH_BY_ISBN
</del><ins>+            criterion = &quot;ea&quot; + Library.canonicalise_ean(criterion)[0 .. -2] + &quot;.html&quot; if type == SEARCH_BY_ISBN
</ins><span class="cx">             req += CGI.escape(criterion)
</span><span class="cx">             p req if $DEBUG
</span><span class="cx">             data = transport.get(URI.parse(req))
</span><span class="lines">@@ -77,7 +77,7 @@
</span><span class="cx">         end
</span><span class="cx"> 
</span><span class="cx">         def url(book)
</span><del>-            BASE_URI + &quot;/#{LOCALE}/scheda/ea978&quot; + Library.canonicalise_isbn(book.isbn)[0 .. -2] + &quot;.html&quot;
</del><ins>+            BASE_URI + &quot;/#{LOCALE}/scheda/ea&quot; + Library.canonicalise_ean(book.isbn)[0 .. -2] + &quot;.html&quot;
</ins><span class="cx">         end
</span><span class="cx"> 
</span><span class="cx">         #######
</span></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providerslsrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/ls.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/ls.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/book_providers/ls.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -91,7 +91,7 @@
</span><span class="cx">             end
</span><span class="cx"> 
</span><span class="cx">             raise &quot;No ISBN from Image&quot; unless md = /&lt;img src=&quot;capas\/([\dX]+)p?\.jpg&quot; alt=&quot;&quot; ?\/&gt;/.match(data)
</span><del>-            isbn = md[1].strip
</del><ins>+            isbn = Library.canonicalise_ean(md[1].strip)
</ins><span class="cx"> 
</span><span class="cx">             if md = /&lt;br[^&gt;]*&gt;Editora: ([^&lt;]+)&lt;br&gt;/.match(data)
</span><span class="cx">                 publisher = md[1].strip
</span></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersmcurb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/mcu.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/mcu.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/book_providers/mcu.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -1,14 +1,5 @@
</span><del>-# This provider has switched from method &quot;get&quot; to method &quot;post&quot;.
-# It would be nice to fix it. WWW:mechanize can be used, this is already done with provider deastore.
-# http://mechanize.rubyforge.org/
-# http://www.ntecs.de/blog-old/Blog/WWW-Mechanize.rdoc
-# http://rubyforge.org/projects/mechanize/
-# See also
-# http://www.mcu.es/libro/CE/AgenciaISBN/BBDDLibros/Sobre.html
-# https://rubyforge.org/tracker/index.php?func=detail&amp;aid=2518&amp;group_id=205&amp;atid=863
-# https://rubyforge.org/tracker/index.php?func=detail&amp;aid=2533&amp;group_id=205&amp;atid=863
-
</del><span class="cx"> # Copyright (C) 2004 Javier Fernandez-Sanguino
</span><ins>+# Copyright (C) 2007 Javier Fernandez-Sanguino and Marco Costantini
</ins><span class="cx"> #
</span><span class="cx"> # Alexandria is free software; you can redistribute it and/or
</span><span class="cx"> # modify it under the terms of the GNU General Public License as
</span><span class="lines">@@ -28,6 +19,9 @@
</span><span class="cx"> require 'cgi'
</span><span class="cx"> require 'net/http'
</span><span class="cx"> 
</span><ins>+# http://www.mcu.es/libro/CE/AgenciaISBN/BBDDLibros/Sobre.html
+# http://www.mcu.es/comun/bases/isbn/ISBN.html
+
</ins><span class="cx"> module Alexandria
</span><span class="cx"> class BookProviders
</span><span class="cx">     class MCUProvider &lt; GenericProvider
</span><span class="lines">@@ -38,7 +32,8 @@
</span><span class="cx">             'es' =&gt; '1'
</span><span class="cx">         }
</span><span class="cx"> 
</span><del>-        BASE_URI = &quot;http://www.mcu.es/cgi-bin/BRSCGI3701?&quot;
</del><ins>+#        BASE_URI = &quot;http://www.mcu.es/cgi-bin/BRSCGI3701?&quot;
+        BASE_URI = &quot;http://www.mcu.es/cgi-brs/BasesHTML/isbn/BRSCGI?&quot;
</ins><span class="cx">         def initialize
</span><span class="cx">             super(&quot;MCU&quot;, _(&quot;Spanish Culture Ministry&quot;))
</span><span class="cx">             # No preferences
</span><span class="lines">@@ -46,21 +41,21 @@
</span><span class="cx">         
</span><span class="cx">         def search(criterion, type)
</span><span class="cx">             prefs.read
</span><del>-            criterion = criterion.convert(&quot;WINDOWS-1252&quot;, &quot;UTF-8&quot;)
</del><ins>+            criterion = criterion.convert(&quot;iso-8859-1&quot;, &quot;UTF-8&quot;)
</ins><span class="cx">             print &quot;Doing search with MCU #{criterion}, type: #{type}\n&quot; if $DEBUG # for DEBUGing
</span><del>-            req = BASE_URI + &quot;CMD=VERLST&amp;BASE=ISBN&amp;CONF=AEISPA.cnf&amp;OPDEF=AND&amp;DOCS=1-1000&amp;SEPARADOR=&amp;&quot;
</del><ins>+            req = BASE_URI + &quot;CMD=VERLST&amp;BASE=ISBN&amp;DOCS=1-15&amp;CONF=AEISPA.cnf&amp;OPDEF=AND&amp;DOCS=1-1000&amp;SEPARADOR=&amp;&quot;
</ins><span class="cx">             req += case type
</span><span class="cx">                 when SEARCH_BY_ISBN
</span><del>-                    &quot;WGEN-C=&amp;WISB-C=#{CGI::escape(criterion)}&amp;WAUT-C=&amp;WTIT-C=&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=DISPONIBLE&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</del><ins>+                    &quot;WGEN-C=&amp;WISB-C=#{CGI::escape(criterion)}&amp;WAUT-C=&amp;WTIT-C=&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=%28DISPONIBLE+or+AGOTADO%29&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</ins><span class="cx"> 
</span><span class="cx">                 when SEARCH_BY_TITLE
</span><del>-                    &quot;WGEN-C=&amp;WISB-C=&amp;WAUT-C=&amp;WTIT-C=#{CGI::escape(criterion)}&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=DISPONIBLE&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</del><ins>+                    &quot;WGEN-C=&amp;WISB-C=&amp;WAUT-C=&amp;WTIT-C=#{CGI::escape(criterion)}&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=%28DISPONIBLE+or+AGOTADO%29&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</ins><span class="cx"> 
</span><span class="cx">                 when SEARCH_BY_AUTHORS
</span><del>-                      &quot;WGEN-C=&amp;WISB-C=&amp;WAUT-C=#{CGI::escape(criterion)}&amp;WTIT-C=&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=DISPONIBLE&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</del><ins>+                      &quot;WGEN-C=&amp;WISB-C=&amp;WAUT-C=#{CGI::escape(criterion)}&amp;WTIT-C=&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=%28DISPONIBLE+or+AGOTADO%29&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</ins><span class="cx"> 
</span><span class="cx">                 when SEARCH_BY_KEYWORD
</span><del>-                        &quot;WGEN-C=#{CGI::escape(criterion)}&amp;WISB-C=&amp;WAUT-C=&amp;WTIT-C=&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=DISPONIBLE&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</del><ins>+                        &quot;WGEN-C=#{CGI::escape(criterion)}&amp;WISB-C=&amp;WAUT-C=&amp;WTIT-C=&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=%28DISPONIBLE+or+AGOTADO%29&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</ins><span class="cx"> 
</span><span class="cx">                 else
</span><span class="cx">                     raise InvalidSearchTypeError
</span><span class="lines">@@ -72,7 +67,7 @@
</span><span class="cx">                     print &quot;Reading line: #{line}&quot; if $DEBUG # for DEBUGing
</span><span class="cx">                 if (line =~ /CMD=VERDOC.*&amp;DOCN=([^&amp;]*)&amp;NDOC=([^&amp;]*)/) and (!products[$1]) and (book = parseBook($1,$2)) then
</span><span class="cx">                     products[$1] = book
</span><del>-                    puts $1
</del><ins>+                    puts $1 if $DEBUG # for DEBUGing
</ins><span class="cx">                 end
</span><span class="cx">             end
</span><span class="cx"> 
</span><span class="lines">@@ -81,7 +76,7 @@
</span><span class="cx">         end
</span><span class="cx">         
</span><span class="cx">         def url(book)
</span><del>-            &quot;http://www.mcu.es/cgi-bin/BRSCGI3701?CMD=VERLST&amp;BASE=ISBN&amp;CONF=AEISPA.cnf&amp;OPDEF=AND&amp;DOCS=1&amp;SEPARADOR=&amp;WGEN-C=&amp;WISB-C=&quot; + book.isbn + &quot;&amp;WAUT-C=&amp;WTIT-C=&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=DISPONIBLE&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</del><ins>+            &quot;http://www.mcu.es/cgi-brs/BasesHTML/isbn/BRSCGI?CMD=VERLST&amp;BASE=ISBN&amp;DOCS=1-15&amp;CONF=AEISPA.cnf&amp;OPDEF=AND&amp;DOCS=1&amp;SEPARADOR=&amp;WGEN-C=&amp;WISB-C=&quot; + book.isbn + &quot;&amp;WAUT-C=&amp;WTIT-C=&amp;WMAT-C=&amp;WEDI-C=&amp;WFEP-C=&amp;%40T353-GE=&amp;%40T353-LE=&amp;WSER-C=&amp;WLUG-C=&amp;WDIS-C=%28DISPONIBLE+or+AGOTADO%29&amp;WLEN-C=&amp;WCLA-C=&amp;WSOP-C=&quot;
</ins><span class="cx">         end
</span><span class="cx"> 
</span><span class="cx">         #######
</span><span class="lines">@@ -89,7 +84,7 @@
</span><span class="cx">         #######
</span><span class="cx"> 
</span><span class="cx">         def parseBook(docn,ndoc)
</span><del>-            detailspage='http://www.mcu.es/cgi-bin/BRSCGI3701?CMD=VERDOC&amp;CONF=AEISPA.cnf&amp;BASE=ISBN&amp;DOCN=' + docn + '&amp;NDOC=' + ndoc
</del><ins>+            detailspage='http://www.mcu.es/cgi-brs/BasesHTML/isbn/BRSCGI?CMD=VERDOC&amp;CONF=AEISPA.cnf&amp;BASE=ISBN&amp;DOCN=' + docn + '&amp;NDOC=' + ndoc
</ins><span class="cx">             print &quot;Looking at detailspage: #{detailspage}\n&quot; if $DEBUG # for DEBUGing
</span><span class="cx">             product = {}
</span><span class="cx">             product['authors'] = []
</span><span class="lines">@@ -121,7 +116,7 @@
</span><span class="cx">                     product['name'] = $1.strip
</span><span class="cx">                     print &quot;Name is #{product['name']}\n&quot; if $DEBUG # for DEBUGing
</span><span class="cx">                     robotstate = 0
</span><del>-                elsif robotstate == 3 and line =~ /^([0-9]+-[0-9]+-[0-9]+-[0-9X]).*/ 
</del><ins>+                elsif robotstate == 3 and line =~ /^([0-9]+-[0-9]+-[0-9]+-[0-9]+-[0-9]).*/ 
</ins><span class="cx">                     product['isbn'] = $1
</span><span class="cx">                     print &quot;ISBN is #{product['isbn']}\n&quot; if $DEBUG # for DEBUGing
</span><span class="cx">                     robotstate = 0
</span><span class="lines">@@ -129,19 +124,20 @@
</span><span class="cx">                     product['manufacturer'] = $1.strip
</span><span class="cx">                     print &quot;Manufacturer is #{product['manufacturer']}\n&quot; if $DEBUG # for DEBUGing
</span><span class="cx">                     robotstate = 0 
</span><del>-                elsif robotstate == 5 and line =~ /^([^&lt;]+)&lt;/
</del><ins>+#                elsif robotstate == 5 and line =~ /^([^&lt;]+)&lt;/
+                elsif robotstate == 5 and line =~ /&lt;span&gt;([^&lt;]+)&lt;/
</ins><span class="cx">                     product['media'] = $1.strip
</span><span class="cx">                     print &quot;Media is #{product['media']}\n&quot; if $DEBUG # for DEBUGing
</span><span class="cx">                     robotstate = 0 
</span><span class="cx">                 elsif line =~ /^.*&gt;Autor:\s*&lt;/
</span><span class="cx">                     robotstate = 1
</span><del>-                elsif line =~ /^.*&gt;T.tulo:\s*&lt;/
</del><ins>+                elsif line =~ /^.*&gt;T(.|&amp;iacute;)tulo:\s*&lt;/
</ins><span class="cx">                     robotstate = 2
</span><del>-                elsif line =~ /^.*&gt;ISBN:\s*&lt;/
</del><ins>+                elsif line =~ /^.*&gt;ISBN \(13\):\s*&lt;/
</ins><span class="cx">                     robotstate = 3
</span><del>-                elsif line =~ /^.*&gt;Publicaci.n:\s*&lt;/
</del><ins>+                elsif line =~ /^.*&gt;Publicaci(.|&amp;oacute;)n:\s*&lt;/
</ins><span class="cx">                     robotstate = 4
</span><del>-                elsif line =~ /^.*&gt;Encuadernaci.n:\s*&lt;/
</del><ins>+                elsif line =~ /^.*&gt;Encuadernaci(.|&amp;oacute;)n:\s*&lt;/
</ins><span class="cx">                     robotstate = 5 
</span><span class="cx">                 end
</span><span class="cx">             end
</span></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providerswebster_itrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/webster_it.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/webster_it.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/book_providers/webster_it.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -98,13 +98,11 @@
</span><span class="cx">                     authors = this.scan(/&lt;a href=&quot;[^&gt;]+&quot;&gt;([^&lt;]+)&lt;\/a&gt;,?/)
</span><span class="cx">                     authors = authors.collect {|author| author[0]}
</span><span class="cx">                     #puts this
</span><del>-                    
</del><span class="cx"> #                 md[1].strip.split(', ').each { |a| authors &lt;&lt; CGI.unescape(a.strip) }
</span><span class="cx">             end
</span><span class="cx"> 
</span><span class="cx">             raise unless md = /&lt;li&gt;&lt;span class=&quot;product_label&quot;&gt;ISBN:&lt;\/span&gt; &lt;span class=&quot;product_text&quot;&gt;([^&lt;]+)/.match(data)
</span><del>-            isbn = &quot;978&quot; + md[1].strip[0..8]
-            isbn += String( Library.ean_checksum( Library.extract_numbers( isbn ) ) )
</del><ins>+            isbn = Library.canonicalise_ean( md[1].strip )
</ins><span class="cx"> 
</span><span class="cx">             raise unless md = /&lt;li&gt;&lt;span class=&quot;product_label&quot;&gt;Editore:&lt;\/span&gt; &lt;span class=&quot;product_text&quot;&gt;&lt;a href=&quot;[^&gt;]+&gt;([^&lt;]+)/.match(data)
</span><span class="cx">                 publisher = CGI.unescape(md[1].strip)
</span></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersworldcatrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/worldcat.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/worldcat.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/book_providers/worldcat.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -90,7 +90,7 @@
</span><span class="cx"> 
</span><span class="cx">                     authors = []
</span><span class="cx">                     md = data.scan(/title=&quot;Search for more by this author&quot;&gt;([^&lt;]+)/)
</span><del>-            raise &quot;No authors&quot; unless md.length &gt; 0
</del><ins>+#            raise &quot;No authors&quot; unless md.length &gt; 0
</ins><span class="cx">             md = md.collect {|match| match[0]} 
</span><span class="cx">             md.each {|match|
</span><span class="cx">                             CGI.unescape(match.strip)
</span></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -231,7 +231,7 @@
</span><span class="cx"> 
</span><span class="cx">         require 'alexandria/book_providers/bn'
</span><span class="cx">         require 'alexandria/book_providers/proxis'
</span><del>-        #require 'alexandria/book_providers/mcu'
</del><ins>+        require 'alexandria/book_providers/mcu'
</ins><span class="cx">         require 'alexandria/book_providers/thalia'
</span><span class="cx">         require 'alexandria/book_providers/ibs_it'
</span><span class="cx">         require 'alexandria/book_providers/renaud'
</span></span></pre></div>
<a id="trunkalexandrialibalexandrialibraryrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/library.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/library.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/lib/alexandria/library.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -228,9 +228,24 @@
</span><span class="cx">             self.extract_numbers(AMERICAN_UPC_LOOKUP[test_upc])
</span><span class="cx">         end
</span><span class="cx"> 
</span><ins>+        def self.canonicalise_ean(code)
+            if self.valid_ean?(code)
+                return code
+            elsif self.valid_isbn?(code)
+                code = &quot;978&quot; + code[0..8]
+                return code + String( self.ean_checksum( self.extract_numbers( code ) ) )
+            elsif self.valid_upc?(code)
+                raise &quot;fix function Alexandria::Library.canonicalise_ean&quot;
+            else
+                raise InvalidISBNError.new(code)
+            end
+        end
+
</ins><span class="cx">         def self.canonicalise_isbn(isbn)
</span><span class="cx">             numbers = self.extract_numbers(isbn)
</span><del>-
</del><ins>+if self.valid_ean?(isbn)  and numbers[0 .. 2] != [9,7,8]
+    return isbn
+end
</ins><span class="cx">             canonical = if self.valid_ean?(isbn)
</span><span class="cx">                 # Looks like an EAN number -- extract the intersting part and
</span><span class="cx">                 # calculate a checksum. It would be nice if we could validate
</span></span></pre></div>
<a id="trunkalexandriateststest_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/tests/test_providers.rb (616 => 617)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/tests/test_providers.rb        2007-02-23 17:46:49 UTC (rev 616)
+++ trunk/alexandria/tests/test_providers.rb        2007-02-23 21:56:39 UTC (rev 617)
</span><span class="lines">@@ -31,7 +31,7 @@
</span><span class="cx">     
</span><span class="cx">     def test_amazon_isbn
</span><span class="cx">         __test_provider(Alexandria::BookProviders::AmazonProvider,
</span><del>-                        '0385504209')
</del><ins>+                        '9780385504201')
</ins><span class="cx">     end
</span><span class="cx">     
</span><span class="cx">     def test_amazon_title
</span><span class="lines">@@ -54,10 +54,10 @@
</span><span class="cx">                          '9780961328917')   # see #1433  
</span><span class="cx">     end
</span><span class="cx"> 
</span><del>-#     def test_mcu
-#         __test_provider(Alexandria::BookProviders::MCUProvider,
-#                         '8420636665') 
-#     end
</del><ins>+    def test_mcu
+        __test_provider(Alexandria::BookProviders::MCUProvider,
+                        '9788420636665') 
+    end
</ins><span class="cx"> 
</span><span class="cx">     def test_proxis
</span><span class="cx">         __test_provider(Alexandria::BookProviders::ProxisProvider,
</span><span class="lines">@@ -82,12 +82,12 @@
</span><span class="cx">     
</span><span class="cx">     def test_adlibris
</span><span class="cx">         __test_provider(Alexandria::BookProviders::AdlibrisProvider,
</span><del>-                        '9100109339') 
</del><ins>+                        '9789100109332') 
</ins><span class="cx">     end
</span><span class="cx">      
</span><span class="cx">     def test_siciliano
</span><span class="cx">         __test_provider(Alexandria::BookProviders::SicilianoProvider,
</span><del>-                        '8599170384') 
</del><ins>+                        '9788599170380') 
</ins><span class="cx">     end
</span><span class="cx"> 
</span><span class="cx">     def test_dea
</span></span></pre>
</div>
</div>

</body>
</html>