<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre, #msg p { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[561] blacklight_importer/src: Updates to support more permissive reading</title>
</head>
<body>
<div id="msg">
<dl>
<dt>Revision</dt> <dd>561</dd>
<dt>Author</dt> <dd>haschart</dd>
<dt>Date</dt> <dd>2008-05-14 17:43:20 -0400 (Wed, 14 May 2008)</dd>
</dl>
<h3>Log Message</h3>
<pre>Updates to support more permissive reading</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#blacklight_importersrcMarcFilteredReaderjava">blacklight_importer/src/MarcFilteredReader.java</a></li>
<li><a href="#blacklight_importersrcMarcImporterjava">blacklight_importer/src/MarcImporter.java</a></li>
<li><a href="#blacklight_importersrcMarcPrinterjava">blacklight_importer/src/MarcPrinter.java</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="blacklight_importersrcMarcFilteredReaderjava"></a>
<div class="modfile"><h4>Modified: blacklight_importer/src/MarcFilteredReader.java (560 => 561)</h4>
<pre class="diff"><span>
<span class="info">--- blacklight_importer/src/MarcFilteredReader.java        2008-05-14 21:42:42 UTC (rev 560)
+++ blacklight_importer/src/MarcFilteredReader.java        2008-05-14 21:43:20 UTC (rev 561)
</span><span class="lines">@@ -1,5 +1,6 @@
</span><span class="cx">
</span><span class="cx">
</span><ins>+import org.marc4j.MarcException;
</ins><span class="cx"> import org.marc4j.MarcReader;
</span><span class="cx"> import org.marc4j.marc.Record;
</span><span class="cx"> import java.util.Set;
</span><span class="lines">@@ -57,9 +58,16 @@
</span><span class="cx"> while (currentRecord == null)
</span><span class="cx"> {
</span><span class="cx"> if (!reader.hasNext()) return(null);
</span><del>- Record rec = reader.next();
- if (includeRecordIfFieldPresent != null)
</del><ins>+ Record rec = null;
+ try {
+ rec = reader.next();
+ }
+ catch (MarcException me)
</ins><span class="cx"> {
</span><ins>+ System.err.println("Error reading Marc Record: "+ me.getMessage());
+ }
+ if (rec != null && includeRecordIfFieldPresent != null)
+ {
</ins><span class="cx"> Set<String> fields = SolrIndexer.getFieldList(rec, includeRecordIfFieldPresent);
</span><span class="cx"> if (fields.size() != 0)
</span><span class="cx"> {
</span><span class="lines">@@ -69,9 +77,9 @@
</span><span class="cx"> }
</span><span class="cx"> }
</span><span class="cx"> }
</span><del>- if (includeRecordIfFieldMissing != null)
</del><ins>+ if (rec != null && includeRecordIfFieldMissing != null)
</ins><span class="cx"> {
</span><del>- Set<String> fields = SolrIndexer.getFieldList(rec, includeRecordIfFieldPresent);
</del><ins>+ Set<String> fields = SolrIndexer.getFieldList(rec, includeRecordIfFieldMissing);
</ins><span class="cx"> if ((fields.size() == 0 && includeRecordIfFieldDoesntContain == null) ||
</span><span class="cx"> (fields.size() != 0 && includeRecordIfFieldDoesntContain != null && !Utils.setItemContains(fields, includeRecordIfFieldDoesntContain)))
</span><span class="cx"> {
</span></span></pre></div>
<a id="blacklight_importersrcMarcImporterjava"></a>
<div class="modfile"><h4>Modified: blacklight_importer/src/MarcImporter.java (560 => 561)</h4>
<pre class="diff"><span>
<span class="info">--- blacklight_importer/src/MarcImporter.java        2008-05-14 21:42:42 UTC (rev 560)
+++ blacklight_importer/src/MarcImporter.java        2008-05-14 21:43:20 UTC (rev 561)
</span><span class="lines">@@ -38,6 +38,7 @@
</span><span class="cx"> import java.util.Properties;
</span><span class="cx">
</span><span class="cx"> import marcoverride.MarcDirStreamReader;
</span><ins>+import marcoverride.MarcPermissiveStreamReader;
</ins><span class="cx">
</span><span class="cx"> import org.apache.solr.core.SolrConfig;
</span><span class="cx"> import org.apache.solr.core.SolrCore;
</span><span class="lines">@@ -149,7 +150,8 @@
</span><span class="cx"> }
</span><span class="cx"> }
</span><span class="cx"> SolrHostURL = getProperty(props, "solr.hosturl");
</span><del>-
</del><ins>+
+ boolean permissiveReader = Boolean.parseBoolean(System.getProperty("marc.permissive"));
</ins><span class="cx"> verbose = Boolean.parseBoolean(getProperty(props, "marc.verbose"));
</span><span class="cx"> to_utf_8 = Boolean.parseBoolean(getProperty(props, "marc.to_utf_8"));
</span><span class="cx"> deleteRecordListFilename = getProperty(props, "marc.ids_to_delete");
</span><span class="lines">@@ -162,11 +164,11 @@
</span><span class="cx"> reader = null;
</span><span class="cx"> if (source.equals("FILE"))
</span><span class="cx"> {
</span><del>- reader = new MarcStreamReader(new FileInputStream(getProperty(props, "marc.path").trim()));
</del><ins>+ reader = new MarcPermissiveStreamReader(new FileInputStream(getProperty(props, "marc.path").trim()), permissiveReader);
</ins><span class="cx"> }
</span><span class="cx"> else if (source.equals("DIR"))
</span><span class="cx"> {
</span><del>- reader = new MarcDirStreamReader(getProperty(props, "marc.path").trim());
</del><ins>+ reader = new MarcDirStreamReader(getProperty(props, "marc.path").trim(), permissiveReader);
</ins><span class="cx"> }
</span><span class="cx"> else if (source.equals("Z3950"))
</span><span class="cx"> {
</span><span class="lines">@@ -262,6 +264,18 @@
</span><span class="cx"> System.out.println("Adding record " + recordCounter + ": " + record.getControlNumber());
</span><span class="cx"> addToIndex(record);
</span><span class="cx"> }
</span><ins>+ catch (org.apache.solr.common.SolrException e)
+ {
+ if (e.getMessage().contains("missing required fields"))
+ {
+ System.err.println("Warning : " + e.getMessage()+ "at record count = "+ recordCounter);
+ }
+ else
+ {
+ System.err.println("Error indexing");
+ e.printStackTrace();
+ }
+ }
</ins><span class="cx"> catch(Exception e)
</span><span class="cx"> {
</span><span class="cx"> // keep going?
</span><span class="lines">@@ -283,6 +297,7 @@
</span><span class="cx">
</span><span class="cx"> // finish up
</span><span class="cx"> addcmd.doc = builder.getDoc();
</span><ins>+
</ins><span class="cx"> if (verbose)
</span><span class="cx"> {
</span><span class="cx"> System.out.println(record.toString());
</span></span></pre></div>
<a id="blacklight_importersrcMarcPrinterjava"></a>
<div class="modfile"><h4>Modified: blacklight_importer/src/MarcPrinter.java (560 => 561)</h4>
<pre class="diff"><span>
<span class="info">--- blacklight_importer/src/MarcPrinter.java        2008-05-14 21:42:42 UTC (rev 560)
+++ blacklight_importer/src/MarcPrinter.java        2008-05-14 21:43:20 UTC (rev 561)
</span><span class="lines">@@ -16,8 +16,10 @@
</span><span class="cx"> import java.util.Set;
</span><span class="cx">
</span><span class="cx"> import marcoverride.MarcDirStreamReader;
</span><ins>+import marcoverride.MarcPermissiveStreamReader;
</ins><span class="cx">
</span><span class="cx"> import org.apache.solr.update.DeleteUpdateCommand;
</span><ins>+import org.marc4j.MarcException;
</ins><span class="cx"> import org.marc4j.MarcReader;
</span><span class="cx"> import org.marc4j.MarcStreamReader;
</span><span class="cx"> import org.marc4j.MarcStreamWriter;
</span><span class="lines">@@ -42,15 +44,16 @@
</span><span class="cx"> String fileStr = args[1];
</span><span class="cx"> File file = new File(fileStr);
</span><span class="cx"> MarcReader reader;
</span><ins>+ boolean permissiveReader = Boolean.parseBoolean(System.getProperty("marc.permissive"));
</ins><span class="cx">
</span><span class="cx"> if (file.isDirectory())
</span><span class="cx"> {
</span><del>- reader = new MarcDirStreamReader(file);
</del><ins>+ reader = new MarcDirStreamReader(file, permissiveReader);
</ins><span class="cx"> }
</span><span class="cx"> else
</span><span class="cx"> {
</span><span class="cx"> InputStream in = new FileInputStream(file);
</span><del>- reader = new MarcStreamReader(in);
</del><ins>+ reader = new MarcPermissiveStreamReader(in, permissiveReader);
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> String marcIncludeIfPresent = System.getProperty("marc.include_if_present");
</span><span class="lines">@@ -101,20 +104,63 @@
</span><span class="cx"> map = indexer.findMap(fieldVals[3]);
</span><span class="cx"> }
</span><span class="cx">
</span><del>- if (mode.equals("translate"))
</del><ins>+ if (mode.equals("count"))
</ins><span class="cx"> {
</span><ins>+ int count = 0;
+ while (reader.hasNext())
+ {
+ try {
+ Record rec = reader.next();
+ count++;
+ if (count % 1000 == 0) System.err.println(count);
+ if (verbose) System.out.println(rec.toString());
+ }
+ catch (MarcException me)
+ {
+ System.err.println("Error reading Record "+ me.getMessage());
+ }
+ }
+ System.out.println("Total records= "+ count);
+ }
+ else if (mode.equals("translate"))
+ {
</ins><span class="cx"> to_utf_8 = true;
</span><span class="cx"> writer = new MarcStreamWriter(System.out, "UTF-8");
</span><span class="cx"> while (reader.hasNext())
</span><span class="cx"> {
</span><del>- Record rec = reader.next();
- writer.write(rec);
</del><ins>+ try {
+ Record rec = reader.next();
+ writer.write(rec);
+ }
+ catch (MarcException me)
+ {
+ System.err.println("Error reading Record "+ me.getMessage());
+ }
+
</ins><span class="cx"> }
</span><ins>+ writer.close();
</ins><span class="cx"> }
</span><span class="cx"> else if (mode.equals("print"))
</span><span class="cx"> {
</span><span class="cx"> while (reader.hasNext())
</span><span class="cx"> {
</span><ins>+ try {
+ Record rec = reader.next();
+// Leader ldr = rec.getLeader();
+// if (ldr.getBaseAddressOfData() != 0) continue;
+ if (verbose) System.out.println(rec.toString());
+ }
+ catch (MarcException me)
+ {
+ System.err.println("Error reading Marc Record: "+ me.getMessage());
+ }
+ }
+ }
+
+ else if (mode.equals("era"))
+ {
+ while (reader.hasNext())
+ {
</ins><span class="cx"> Record rec = reader.next();
</span><span class="cx"> if (verbose) System.out.println(rec.toString());
</span><span class="cx"> if (map != null)
</span></span></pre>
</div>
</div>
</body>
</html>