[jsword-svn] r1865 - in trunk/jsword/src: main/java/org/crosswire/jsword/book main/java/org/crosswire/jsword/book/study main/java/org/crosswire/jsword/index/lucene main/java/org/crosswire/jsword/index/lucene/analysis test/java/org/crosswire/jsword/index/lucene/analysis
dmsmith at www.crosswire.org
dmsmith at www.crosswire.org
Sat May 17 19:15:03 MST 2008
Author: dmsmith
Date: 2008-05-17 19:15:02 -0700 (Sat, 17 May 2008)
New Revision: 1865
Added:
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookTokenFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/LuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefFilter.java
Removed:
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java
Modified:
trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java
Log:
Changes to indexing:
Refactored analyzers into analyzer package.
Stemming is now enabled
Strong's numbers now index H19a and the like
Made analyzers and filters take a book argument and not a language.
Optimized filters to reuse tokens.
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -115,7 +115,7 @@
public static final String HI_UNDERLINE = "underline"; //$NON-NLS-1$
/**
- * Constant for rendering uppercase text
+ * Constant for rendering upper case text
*/
public static final String HI_X_CAPS = "x-caps"; //$NON-NLS-1$
@@ -704,37 +704,19 @@
String attr = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_LEMMA);
if (attr != null)
{
- if (buffer.length() > 0)
+ Matcher matcher = strongsNumberPattern.matcher(attr);
+ while (matcher.find())
{
- buffer.append(' ');
+ String strongsNum = matcher.group(1);
+ if (buffer.length() > 0)
+ {
+ buffer.append(' ');
+ }
+ buffer.append(strongsNum);
}
-
- buffer.append(attr);
}
}
- String lemmas = buffer.toString();
-
- // Clear out the buffer for re-use
- int len = buffer.length();
- if (len > 0)
- {
- buffer.delete(0, len);
- }
-
- Matcher matcher = strongsNumberPattern.matcher(lemmas);
- while (matcher.find())
- {
- String strongType = matcher.group(1);
- String strongsNum = matcher.group(2);
- if (buffer.length() > 0)
- {
- buffer.append(' ');
- }
- buffer.append(strongType);
- buffer.append(strongsNum);
- }
-
return buffer.toString().trim();
}
@@ -1204,6 +1186,6 @@
}
}
- private static String strongsNumber = "strong:([GH])0*([0-9]+)"; //$NON-NLS-1$
+ private static String strongsNumber = "strong:([GgHh][0-9]+!?[A-Za-z]*)"; //$NON-NLS-1$
private static Pattern strongsNumberPattern = Pattern.compile(strongsNumber);
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -69,13 +69,26 @@
*/
public StrongsNumber(char language, short strongsNumber) throws BookException
{
+ this(language, strongsNumber, null);
+ }
+
+ /**
+ * Build an immutable Strong's Number.
+ * If the language is not 'G' or 'H' or the number is invalid, a BookException.
+ * @param language
+ * @param strongsNumber
+ * @throws BookException
+ */
+ public StrongsNumber(char language, short strongsNumber, String part) throws BookException
+ {
this.language = language;
this.strongsNumber = strongsNumber;
+ this.part = part;
validate();
}
/**
- * Return the canonical form of a Strong's Number.
+ * Return the canonical form of a Strong's Number, without the part.
* @return the strongsNumber
*/
public String getStrongsNumber()
@@ -86,6 +99,72 @@
return buf.toString();
}
+ /**
+ * Return the canonical form of a Strong's Number, with the part, if any
+ * @return the strongsNumber
+ */
+ public String getFullStrongsNumber()
+ {
+ StringBuffer buf = new StringBuffer(5);
+ buf.append(language);
+ buf.append(ZERO_PAD.format(strongsNumber));
+ if (part != null)
+ {
+ buf.append(part);
+ }
+ return buf.toString();
+ }
+
+ /**
+ * @return true if the Strong's number is for Greek
+ */
+ public boolean isGreek()
+ {
+ return language == 'G';
+ }
+
+ /**
+ * @return true if the Strong's number is for Hebrew
+ */
+ public boolean isHebrew()
+ {
+ return language == 'G';
+ }
+
+ /**
+ * @return true if this Strong's number is identified by a sub part
+ */
+ public boolean isPart()
+ {
+ return part != null;
+ }
+
+ /**
+ * Validates the number portion of this StrongsNumber.
+ * Hebrew Strong's numbers are in the range of: 1-8674
+ * Greek Strong's numbers in the range of: 1-5624 (but not 1418, 2717, 3203-3302, 4452)
+ * @return true if the Strong's number is in range.
+ */
+ public boolean isValid()
+ {
+ if (language == 'H' && (strongsNumber < 1 || strongsNumber > 8674))
+ {
+ return false;
+ }
+
+ if (language == 'G'
+ && (strongsNumber < 0
+ || strongsNumber > 5624
+ || strongsNumber == 1418
+ || strongsNumber == 2717
+ || (strongsNumber >= 3203 || strongsNumber <= 3302)
+ || strongsNumber == 4452))
+ {
+ return false;
+ }
+ return true;
+ }
+
/* (non-Javadoc)
* @see java.lang.Object#hashCode()
*/
@@ -155,6 +234,9 @@
// Get the number after the G or H
strongsNumber = Short.parseShort(m.group(2));
+
+ // FYI: OSIS refers to what follows a ! as a grain
+ part = m.group(3);
}
private void validate() throws BookException
@@ -163,24 +245,6 @@
{
throw new BookException(UserMsg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
}
-
- // Greek Strong's numbers are in the range of: 1-8674
- if (language == 'H' && (strongsNumber < 1 || strongsNumber > 8674))
- {
- throw new BookException(UserMsg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
- }
-
- // Greek Strong's numbers are in the range of: 1-5624 (but not 1418, 2717, 3203-3302, 4452)
- if (language == 'G'
- && (strongsNumber < 0
- || strongsNumber > 5624
- || strongsNumber == 1418
- || strongsNumber == 2717
- || (strongsNumber >= 3203 || strongsNumber <= 3302)
- || strongsNumber == 4452))
- {
- throw new BookException(UserMsg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
- }
}
/**
@@ -194,8 +258,13 @@
private short strongsNumber;
/**
- * The pattern of an acceptable strongs number.
+ * The part if any.
*/
- private static final Pattern STRONGS_PATTERN = Pattern.compile("([GgHh])([0-9]+)"); //$NON-NLS-1$
+ private String part;
+
+ /**
+ * The pattern of an acceptable Strong's number.
+ */
+ private static final Pattern STRONGS_PATTERN = Pattern.compile("([GgHh])0*([1-9][0-9]*)!?([A-Za-z]+)?"); //$NON-NLS-1$
private static final DecimalFormat ZERO_PAD = new DecimalFormat("0000"); //$NON-NLS-1$
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -82,7 +82,7 @@
public static final String LATEST_INDEX_VERSION = "Latest.Index.Version"; //$NON-NLS-1$
public static final String LUCENE_VERSION = "Lucene.Version"; //$NON-NLS-1$
public static final float INDEX_VERSION_1_1 = 1.1f;
- public static final float INDEX_VERSION_1_2 = 1.1f;
+ public static final float INDEX_VERSION_1_2 = 1.2f;
private static final Logger log = Logger.getLogger(IndexMetadata.class);
private static IndexMetadata myInstance;
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,4 +1,4 @@
-Installed.Index.Version=1.1
+Installed.Index.Version=1.2
Latest.Index.Version=1.2
Lucene.Version=2.3
Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,46 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- * http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- * Free Software Foundation, Inc.
- * 59 Temple Place - Suite 330
- * Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- * The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordTokenizer;
-import org.apache.lucene.analysis.TokenStream;
-
-/**
- * A specialized analyzer that normalizes Strong's Numbers.
- *
- * @see gnu.lgpl.License for license details.
- * The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class KeyAnalyzer extends Analyzer
-{
- /* (non-Javadoc)
- * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
- */
- public TokenStream tokenStream(String fieldName, Reader reader)
- {
- return new KeyFilter(new KeywordTokenizer(reader));
- }
-}
Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,55 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- * http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- * Free Software Foundation, Inc.
- * 59 Temple Place - Suite 330
- * Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- * The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
-/**
- * A KeyFilter normalizes Key.
- *
- * @see gnu.lgpl.License for license details.
- * The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class KeyFilter extends TokenFilter
-{
- /**
- * Construct filtering <i>in</i>.
- */
- public KeyFilter(TokenStream in)
- {
- super(in);
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.analysis.TokenStream#next()
- */
- public final Token next() throws IOException
- {
- // TODO(DMS): actually normalize
- return input.next();
- }
-}
Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,76 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- * http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- * Free Software Foundation, Inc.
- * 59 Temple Place - Suite 330
- * Boston, MA 02111-1307, USA
- *
- * Copyright: 2005
- * The copyright to this program is held by it's authors.
- *
- * ID: $Id:LuceneIndex.java 984 2006-01-23 14:18:33 -0500 (Mon, 23 Jan 2006) dmsmith $
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.crosswire.jsword.index.lucene.analysis.AnalyzerFactory;
-
-/**
- * A specialized analyzer for Books that analyzes different fields differently.
- * Uses AnalyzerFactory for InstalledIndexVersion > 1.1
- * @see gnu.lgpl.License for license details.
- * The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class LuceneAnalyzer extends Analyzer
-{
-
- public LuceneAnalyzer()
- {
- this(AnalyzerFactory.DEFAULT_ID);
- }
-
- public LuceneAnalyzer(String naturalLanguageID)
- {
- // The default analysis
- analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
-
- if (IndexMetadata.instance().getInstalledIndexVersion() > IndexMetadata.INDEX_VERSION_1_1)
- {
- // Content is analyzed using natural language analyzer
- // (stemming, stopword etc)
- Analyzer myNaturalLanguageAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(naturalLanguageID);
- analyzer.addAnalyzer(LuceneIndex.FIELD_BODY, myNaturalLanguageAnalyzer);
- }
-
- // Keywords are normalized to osisIDs
- analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
-
- // Strong's Numbers are normalized to a consistent representation
- analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG, new StrongsNumberAnalyzer());
-
- // XRefs are normalized from ranges into a list of osisIDs
- analyzer.addAnalyzer(LuceneIndex.FIELD_XREF, new XRefAnalyzer());
- }
-
- public TokenStream tokenStream(String fieldName, Reader reader)
- {
- return analyzer.tokenStream(fieldName, reader);
- }
-
- private PerFieldAnalyzerWrapper analyzer;
-}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -56,6 +56,7 @@
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.index.AbstractIndex;
import org.crosswire.jsword.index.IndexStatus;
+import org.crosswire.jsword.index.lucene.analysis.LuceneAnalyzer;
import org.crosswire.jsword.index.search.SearchModifier;
import org.crosswire.jsword.passage.AbstractPassage;
import org.crosswire.jsword.passage.Key;
@@ -74,7 +75,40 @@
*/
public class LuceneIndex extends AbstractIndex implements Activatable
{
+ /* The following fields are named the same as Sword in the hopes of
+ * sharing indexes.
+ */
/**
+ * The Lucene field for the osisID
+ */
+ public static final String FIELD_KEY = "key"; //$NON-NLS-1$
+
+ /**
+ * The Lucene field for the text contents
+ */
+ public static final String FIELD_BODY = "content"; //$NON-NLS-1$
+
+ /**
+ * The Lucene field for the strong numbers
+ */
+ public static final String FIELD_STRONG = "strong"; //$NON-NLS-1$
+
+ /**
+ * The Lucene field for headings
+ */
+ public static final String FIELD_HEADING = "heading"; //$NON-NLS-1$
+
+ /**
+ * The Lucene field for cross references
+ */
+ public static final String FIELD_XREF = "xref"; //$NON-NLS-1$
+
+ /**
+ * The Lucene field for the notes
+ */
+ public static final String FIELD_NOTE = "note"; //$NON-NLS-1$
+
+ /**
* Read an existing index and use it.
* @throws BookException If we fail to read the index files
*/
@@ -119,8 +153,7 @@
IndexStatus finalStatus = IndexStatus.UNDONE;
- String bookLang = book.getLanguage().getName();
- Analyzer analyzer = new LuceneAnalyzer(bookLang);
+ Analyzer analyzer = new LuceneAnalyzer(book);
List errors = new ArrayList();
File tempPath = new File(path + '.' + IndexStatus.CREATING.toString());
@@ -207,8 +240,7 @@
{
try
{
- String bookLang = book.getLanguage().getName();
- Analyzer analyzer = new LuceneAnalyzer(bookLang);
+ Analyzer analyzer = new LuceneAnalyzer(book);
QueryParser parser = new QueryParser(LuceneIndex.FIELD_BODY, analyzer);
parser.setAllowLeadingWildcard(true);
@@ -479,40 +511,7 @@
*/
private static final Logger log = Logger.getLogger(LuceneIndex.class);
- /* The following fields are named the same as Sword in the hopes of
- * sharing indexes.
- */
/**
- * The Lucene field for the osisID
- */
- protected static final String FIELD_KEY = "key"; //$NON-NLS-1$
-
- /**
- * The Lucene field for the text contents
- */
- protected static final String FIELD_BODY = "content"; //$NON-NLS-1$
-
- /**
- * The Lucene field for the strong numbers
- */
- protected static final String FIELD_STRONG = "strong"; //$NON-NLS-1$
-
- /**
- * The Lucene field for headings
- */
- protected static final String FIELD_HEADING = "heading"; //$NON-NLS-1$
-
- /**
- * The Lucene field for cross references
- */
- protected static final String FIELD_XREF = "xref"; //$NON-NLS-1$
-
- /**
- * The Lucene field for the notes
- */
- protected static final String FIELD_NOTE = "note"; //$NON-NLS-1$
-
- /**
* The Book that we are indexing
*/
protected Book book;
Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,46 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- * http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- * Free Software Foundation, Inc.
- * 59 Temple Place - Suite 330
- * Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- * The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
-
-/**
- * A specialized analyzer that normalizes JSword keys.
- *
- * @see gnu.lgpl.License for license details.
- * The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class StrongsNumberAnalyzer extends Analyzer
-{
- /* (non-Javadoc)
- * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
- */
- public TokenStream tokenStream(String fieldName, Reader reader)
- {
- return new StrongsNumberFilter(new WhitespaceTokenizer(reader));
- }
-}
Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,76 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- * http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- * Free Software Foundation, Inc.
- * 59 Temple Place - Suite 330
- * Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- * The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.crosswire.jsword.book.BookException;
-import org.crosswire.jsword.book.DataPolice;
-import org.crosswire.jsword.book.study.StrongsNumber;
-
-/**
- * A StrongsNumberFilter normalizes Strong's Numbers.
- *
- * @see gnu.lgpl.License for license details.
- * The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class StrongsNumberFilter extends TokenFilter
-{
- /**
- * Construct filtering <i>in</i>.
- */
- public StrongsNumberFilter(TokenStream in)
- {
- super(in);
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.analysis.TokenStream#next()
- */
- public final Token next() throws IOException
- {
- Token token = input.next();
- if (token == null)
- {
- return null;
- }
-
- try
- {
- String s = new StrongsNumber(token.termText()).getStrongsNumber();
- if (!s.equals(token.termText()))
- {
- token.setTermText(s);
- }
- }
- catch (BookException e)
- {
- DataPolice.report(e.getDetailedMessage());
- }
-
- return token;
- }
-}
Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,46 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- * http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- * Free Software Foundation, Inc.
- * 59 Temple Place - Suite 330
- * Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- * The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
-
-/**
- * A specialized analyzer that normalizes Strong's Numbers.
- *
- * @see gnu.lgpl.License for license details.
- * The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class XRefAnalyzer extends Analyzer
-{
- /* (non-Javadoc)
- * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
- */
- public TokenStream tokenStream(String fieldName, Reader reader)
- {
- return new KeyFilter(new WhitespaceTokenizer(reader));
- }
-}
Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,55 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- * http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- * Free Software Foundation, Inc.
- * 59 Temple Place - Suite 330
- * Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- * The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
-/**
- * A KeyFilter normalizes OSISrefs.
- *
- * @see gnu.lgpl.License for license details.
- * The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class XRefFilter extends TokenFilter
-{
- /**
- * Construct filtering <i>in</i>.
- */
- public XRefFilter(TokenStream in)
- {
- super(in);
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.analysis.TokenStream#next()
- */
- public final Token next() throws IOException
- {
- // TODO(DMS): actually normalize
- return input.next();
- }
-}
Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,88 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- * http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- * Free Software Foundation, Inc.
- * 59 Temple Place - Suite 330
- * Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- * The copyright to this program is held by it's authors.
- *
- * ID: $Id: $
- */
-package org.crosswire.jsword.index.lucene.analysis;
-
-import java.util.Set;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.StopFilter;
-
-/**
- * Base class for Analyzers.
- * Note: All analyzers configured in AnalyzerFactory.properties should be of this type
- *
- * @see gnu.lgpl.License for license details.<br>
- * The copyright to this program is held by it's authors.
- * @author sijo cherian [sijocherian at yahoo dot com]
- */
-public abstract class AbstractAnalyzer extends Analyzer
-{
-
- public AbstractAnalyzer()
- {
- doStopWords = false;
- doStemming = true;
- naturalLanguage = null;
- }
-
- public void setDoStopWords(boolean doIt)
- {
- doStopWords = doIt;
- }
-
- public boolean getDoStopWords()
- {
- return doStopWords;
- }
-
- public void setStopWords(String[] stopWords)
- {
- stopSet = StopFilter.makeStopSet(stopWords);
- }
-
- public void setDoStemming(boolean stemming)
- {
- doStemming = stemming;
- }
-
- public void setNaturalLanguage(String lang)
- {
- naturalLanguage = lang;
- }
-
- public String getNaturalLanguage()
- {
- return naturalLanguage;
- }
-
- protected Set stopSet;
-
- // for turning on/off stopword removal during analysis
- protected boolean doStopWords;
-
- // for turning on/off stemming
- protected boolean doStemming;
-
- // Natural language of text that is being analyzed (optional parameter)
- protected String naturalLanguage;
-
-}
Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,117 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * Base class for Analyzers.
+ * Note: All analyzers configured in AnalyzerFactory.properties should be of this type
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author sijo cherian [sijocherian at yahoo dot com]
+ */
+public abstract class AbstractBookAnalyzer extends Analyzer
+{
+
+ public AbstractBookAnalyzer()
+ {
+ this(null);
+ }
+
+ public AbstractBookAnalyzer(Book book)
+ {
+ this.book = book;
+ doStopWords = false;
+ doStemming = true;
+ naturalLanguage = null;
+ }
+
+ /**
+ * The book for which analysis is being performed.
+ * @param newBook
+ */
+ public void setBook(Book newBook)
+ {
+ book = newBook;
+ }
+
+ /**
+ * @return the book for which analysis is being performed.
+ */
+ public Book getBook()
+ {
+ return book;
+ }
+
+ public void setDoStopWords(boolean doIt)
+ {
+ doStopWords = doIt;
+ }
+
+ public boolean getDoStopWords()
+ {
+ return doStopWords;
+ }
+
+ public void setStopWords(String[] stopWords)
+ {
+ stopSet = StopFilter.makeStopSet(stopWords);
+ }
+
+ public void setDoStemming(boolean stemming)
+ {
+ doStemming = stemming;
+ }
+
+ public void setNaturalLanguage(String lang)
+ {
+ naturalLanguage = lang;
+ }
+
+ public String getNaturalLanguage()
+ {
+ return naturalLanguage;
+ }
+
+ /**
+ * The book against which analysis is performed.
+ */
+ protected Book book;
+
+ protected Set stopSet;
+
+ // for turning on/off stop word removal during analysis
+ protected boolean doStopWords;
+
+ // for turning on/off stemming
+ protected boolean doStemming;
+
+ // Natural language of text that is being analyzed (optional parameter)
+ protected String naturalLanguage;
+
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookTokenFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookTokenFilter.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookTokenFilter.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,76 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2008
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: org.eclipse.jdt.ui.prefs 1178 2006-11-06 12:48:02Z dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * An AbstractBookTokenFilter ties a Lucene TokenFilter to a Book.
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class AbstractBookTokenFilter extends TokenFilter
+{
+
+ /**
+ * Create a TokenFilter not tied to a Book.
+ *
+ * @param input the token stream to filter
+ */
+ public AbstractBookTokenFilter(TokenStream input)
+ {
+ this(null, input);
+ }
+
+ /**
+ * Create a TokenFilter tied to a Book.
+ *
+ * @param input the token stream to filter
+ */
+ public AbstractBookTokenFilter(Book book, TokenStream input)
+ {
+ super(input);
+ this.book = book;
+ }
+
+ /**
+ * @return the book
+ */
+ public Book getBook()
+ {
+ return book;
+ }
+
+ /**
+ * @param book the book to set
+ */
+ public void setBook(Book book)
+ {
+ this.book = book;
+ }
+
+ private Book book;
+}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -27,6 +27,7 @@
import org.crosswire.common.util.ClassUtil;
import org.crosswire.common.util.Logger;
import org.crosswire.common.util.ResourceUtil;
+import org.crosswire.jsword.book.Book;
/**
* A factory creating the appropriate Analyzer for natural language analysis of text for Lucene
@@ -45,9 +46,10 @@
*/
public class AnalyzerFactory
{
- public AbstractAnalyzer createAnalyzer(String lang)
+ public AbstractBookAnalyzer createAnalyzer(Book book)
{
- AbstractAnalyzer newObject = null;
+ AbstractBookAnalyzer newObject = null;
+ String lang = book == null ? null : book.getLanguage().getName();
if (lang != null)
{
String adjustLang = lang;
@@ -67,7 +69,7 @@
{
Class impl = ClassUtil.forName(aClass);
- newObject = (AbstractAnalyzer) impl.newInstance();
+ newObject = (AbstractBookAnalyzer) impl.newInstance();
}
catch (ClassNotFoundException e)
{
@@ -90,6 +92,7 @@
}
// Configure the analyzer
+ newObject.setBook(book);
newObject.setDoStemming(getDefaultStemmingProperty());
newObject.setDoStopWords(getDefaultStopWordProperty());
newObject.setNaturalLanguage(lang);
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -38,7 +38,7 @@
* The copyright to this program is held by it's authors.
* @author Sijo Cherian [sijocherian at yahoo dot com]
*/
-public class ChineseLuceneAnalyzer extends AbstractAnalyzer
+public class ChineseLuceneAnalyzer extends AbstractBookAnalyzer
{
public ChineseLuceneAnalyzer()
{
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -64,7 +64,7 @@
* The copyright to this program is held by it's authors.
* @author sijo cherian [sijocherian at yahoo dot com]
*/
-public class ConfigurableSnowballAnalyzer extends AbstractAnalyzer
+public class ConfigurableSnowballAnalyzer extends AbstractBookAnalyzer
{
public ConfigurableSnowballAnalyzer()
{
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -35,7 +35,7 @@
* The copyright to this program is held by it's authors.
* @author Sijo Cherian [sijocherian at yahoo dot com]
*/
-public class CzechLuceneAnalyzer extends AbstractAnalyzer
+public class CzechLuceneAnalyzer extends AbstractBookAnalyzer
{
public CzechLuceneAnalyzer()
{
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -39,7 +39,7 @@
* The copyright to this program is held by it's authors.
* @author sijo cherian [sijocherian at yahoo dot com]
*/
-public class EnglishLuceneAnalyzer extends AbstractAnalyzer
+public class EnglishLuceneAnalyzer extends AbstractBookAnalyzer
{
public EnglishLuceneAnalyzer()
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -36,7 +36,7 @@
* The copyright to this program is held by it's authors.
* @author Sijo Cherian [sijocherian at yahoo dot com]
*/
-public class GermanLuceneAnalyzer extends AbstractAnalyzer
+public class GermanLuceneAnalyzer extends AbstractBookAnalyzer
{
public TokenStream tokenStream(String fieldName, Reader reader)
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -35,7 +35,7 @@
* The copyright to this program is held by it's authors.
* @author Sijo Cherian [sijocherian at yahoo dot com]
*/
-public class GreekLuceneAnalyzer extends AbstractAnalyzer
+public class GreekLuceneAnalyzer extends AbstractBookAnalyzer
{
public GreekLuceneAnalyzer()
{
Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,61 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A specialized analyzer that normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class KeyAnalyzer extends AbstractBookAnalyzer
+{
+ /**
+ * Construct a default KeyAnalyzer.
+ */
+ public KeyAnalyzer()
+ {
+ }
+
+ /**
+ * Construct an KeyAnalyzer tied to a book.
+ */
+ public KeyAnalyzer(Book book)
+ {
+ setBook(book);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+ */
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return new KeyFilter(getBook(), new KeywordTokenizer(reader));
+ }
+}
Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyFilter.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyFilter.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyFilter.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,66 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A KeyFilter normalizes Key.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class KeyFilter extends AbstractBookTokenFilter
+{
+ /**
+ * Construct a KeyFilter not tied to a Book.
+ * @param in the input TokenStream
+ */
+ public KeyFilter(TokenStream in)
+ {
+ this(null, in);
+ }
+
+ /**
+ * Construct a KeyFilter tied to a Book.
+ * @param book the book to which this TokenFilter is tied.
+ * @param in the input TokenStream
+ */
+ public KeyFilter(Book book, TokenStream in)
+ {
+ super(book, in);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis.Token)
+ */
+ public final Token next(Token result) throws IOException
+ {
+ // TODO(DMS): actually normalize
+ return input.next(result);
+ }
+}
Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/LuceneAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/LuceneAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/LuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,75 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id:LuceneIndex.java 984 2006-01-23 14:18:33 -0500 (Mon, 23 Jan 2006) dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+import org.crosswire.jsword.index.lucene.IndexMetadata;
+import org.crosswire.jsword.index.lucene.LuceneIndex;
+
+/**
+ * A specialized analyzer for Books that analyzes different fields differently.
+ * This is book specific since it is possible that each book has specialized search requirements.
+ *
+ * Uses AnalyzerFactory for InstalledIndexVersion > 1.1
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class LuceneAnalyzer extends Analyzer
+{
+
+ public LuceneAnalyzer(Book book)
+ {
+ // The default analysis
+ analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
+
+ if (IndexMetadata.instance().getInstalledIndexVersion() > IndexMetadata.INDEX_VERSION_1_1)
+ {
+ // Content is analyzed using natural language analyzer
+ // (stemming, stopword etc)
+ Analyzer myNaturalLanguageAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(book);
+ analyzer.addAnalyzer(LuceneIndex.FIELD_BODY, myNaturalLanguageAnalyzer);
+ }
+
+ // Keywords are normalized to osisIDs
+ analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
+
+ // Strong's Numbers are normalized to a consistent representation
+ analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG, new StrongsNumberAnalyzer());
+
+ // XRefs are normalized from ranges into a list of osisIDs
+ analyzer.addAnalyzer(LuceneIndex.FIELD_XREF, new XRefAnalyzer());
+ }
+
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return analyzer.tokenStream(fieldName, reader);
+ }
+
+ private PerFieldAnalyzerWrapper analyzer;
+}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -43,7 +43,7 @@
* The copyright to this program is held by it's authors.
* @author Sijo Cherian [sijocherian at yahoo dot com]
*/
-public class SimpleLuceneAnalyzer extends AbstractAnalyzer
+public class SimpleLuceneAnalyzer extends AbstractBookAnalyzer
{
public SimpleLuceneAnalyzer()
Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,61 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A specialized analyzer that normalizes JSword keys.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumberAnalyzer extends AbstractBookAnalyzer
+{
+ /**
+ * Construct a default StrongsNumberAnalyzer.
+ */
+ public StrongsNumberAnalyzer()
+ {
+ }
+
+ /**
+ * Construct an StrongsNumberAnalyzer tied to a book.
+ */
+ public StrongsNumberAnalyzer(Book book)
+ {
+ setBook(book);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+ */
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return new StrongsNumberFilter(getBook(), new WhitespaceTokenizer(reader));
+ }
+}
Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberFilter.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberFilter.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberFilter.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,110 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.book.DataPolice;
+import org.crosswire.jsword.book.study.StrongsNumber;
+
+/**
+ * A StrongsNumberFilter normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumberFilter extends AbstractBookTokenFilter
+{
+
+ /**
+ * Construct filtering <i>in</i>.
+ */
+ public StrongsNumberFilter(TokenStream in)
+ {
+ this(null, in);
+ }
+
+ /**
+ * Construct filtering <i>in</i>.
+ */
+ public StrongsNumberFilter(Book book, TokenStream in)
+ {
+ super(book, in);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis.Token)
+ */
+ public final Token next(Token result) throws IOException
+ {
+ // If the token is suffixed with '!a' or 'a', where 'a' is a sequence of 1 or more letters
+ // then create a token without the suffix and also for the whole.
+ Token token = result;
+ if (lastToken == null)
+ {
+ token = input.next(token);
+ if (token == null)
+ {
+ return null;
+ }
+
+ try
+ {
+ char[] buf = result.termBuffer();
+ String tokenText = new String(buf, 0, result.termLength());
+
+ number = new StrongsNumber(tokenText);
+ String s = number.getStrongsNumber();
+
+ if (!s.equals(tokenText))
+ {
+ result.setTermBuffer(s.toCharArray(), 0, s.length());
+ }
+
+ if (number.isPart())
+ {
+ lastToken = result;
+ }
+ }
+ catch (BookException e)
+ {
+ DataPolice.report(e.getDetailedMessage());
+ }
+ }
+ else
+ {
+ token = lastToken;
+ lastToken = null;
+ String s = number.getFullStrongsNumber();
+ result.setTermBuffer(s.toCharArray(), 0, s.length());
+ }
+ return token;
+ }
+
+ private Token lastToken;
+ private StrongsNumber number;
+}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -36,7 +36,7 @@
* The copyright to this program is held by it's authors.
* @author sijo cherian [sijocherian at yahoo dot com]
*/
-public class ThaiLuceneAnalyzer extends AbstractAnalyzer
+public class ThaiLuceneAnalyzer extends AbstractBookAnalyzer
{
public TokenStream tokenStream(String fieldName, Reader reader)
Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefAnalyzer.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,61 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A specialized analyzer that normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class XRefAnalyzer extends AbstractBookAnalyzer
+{
+ /**
+ * Construct a default XRefAnalyzer.
+ */
+ public XRefAnalyzer()
+ {
+ }
+
+ /**
+ * Construct an XRefAnalyzer tied to a book.
+ */
+ public XRefAnalyzer(Book book)
+ {
+ setBook(book);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+ */
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return new KeyFilter(getBook(), new WhitespaceTokenizer(reader));
+ }
+}
Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefFilter.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefFilter.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefFilter.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,65 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A KeyFilter normalizes OSISrefs.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class XRefFilter extends AbstractBookTokenFilter
+{
+ /**
+ * Construct filtering <i>in</i>.
+ */
+ public XRefFilter(TokenStream in)
+ {
+ this(null, in);
+ }
+
+ /**
+ * Construct an XRefFilter tied to a Book.
+ * @param book the book to which this TokenFilter is tied.
+ * @param in the input TokenStream
+ */
+ public XRefFilter(Book book, TokenStream in)
+ {
+ super(book, in);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis.Token)
+ */
+ public final Token next(Token result) throws IOException
+ {
+ // TODO(DMS): actually normalize
+ return input.next(result);
+ }
+}
Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -59,18 +59,16 @@
*/
public void testCreateAnalyzer()
{
- Analyzer myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(""); //$NON-NLS-1$
+ Analyzer myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(null);
assertTrue(myAnalyzer!=null);
myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(null);
assertTrue(myAnalyzer!=null);
- myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer("Unknown"); //$NON-NLS-1$
- assertTrue(myAnalyzer!=null);
}
public void testEngStemming() throws ParseException
{
- AbstractAnalyzer myAnalyzer = new EnglishLuceneAnalyzer();
+ AbstractBookAnalyzer myAnalyzer = new EnglishLuceneAnalyzer();
QueryParser parser = new QueryParser(field, myAnalyzer);
@@ -105,7 +103,7 @@
System.out.println(query.toString());
}
-
+/*
public void testLatin1Language() throws ParseException {
Analyzer myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer("Latin"); //$NON-NLS-1$
@@ -124,6 +122,6 @@
}
-
+*/
protected static final String field = "content"; //$NON-NLS-1$
}
Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -62,6 +62,6 @@
}
protected static final String field = "content"; //$NON-NLS-1$
- private AbstractAnalyzer myAnalyzer;
+ private AbstractBookAnalyzer myAnalyzer;
private QueryParser parser;
}
Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -141,6 +141,6 @@
}
protected static final String field = "content"; //$NON-NLS-1$
- private AbstractAnalyzer myAnalyzer;
+ private AbstractBookAnalyzer myAnalyzer;
private QueryParser parser;
}
Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -101,6 +101,6 @@
protected static final String field = "content"; //$NON-NLS-1$
- private AbstractAnalyzer myAnalyzer;
+ private AbstractBookAnalyzer myAnalyzer;
private QueryParser parser;
}
Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -64,6 +64,6 @@
}
protected static final String field = "content"; //$NON-NLS-1$
- private AbstractAnalyzer myAnalyzer;
+ private AbstractBookAnalyzer myAnalyzer;
private QueryParser parser;
}
Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java 2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java 2008-05-18 02:15:02 UTC (rev 1865)
@@ -74,6 +74,6 @@
protected static final String field = "content"; //$NON-NLS-1$
- private AbstractAnalyzer myAnalyzer;
+ private AbstractBookAnalyzer myAnalyzer;
private QueryParser parser;
}
More information about the jsword-svn
mailing list