Search in sources :

Example 6 with StringMockResourceLoader

use of org.apache.lucene.analysis.util.StringMockResourceLoader in project lucene-solr by apache.

the class TestSynonymFilterFactory method testEmptySynonyms.

/** if the synonyms are completely empty, test that we still analyze correctly */
public void testEmptySynonyms() throws Exception {
    Reader reader = new StringReader("GB");
    TokenStream stream = whitespaceMockTokenizer(reader);
    stream = tokenFilterFactory("Synonym", Version.LATEST, // empty file!
    new StringMockResourceLoader(""), "synonyms", "synonyms.txt").create(stream);
    assertTokenStreamContents(stream, new String[] { "GB" });
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader)

Example 7 with StringMockResourceLoader

use of org.apache.lucene.analysis.util.StringMockResourceLoader in project lucene-solr by apache.

the class TestKeywordMarkerFilterFactory method testKeywordsMixed.

public void testKeywordsMixed() throws Exception {
    Reader reader = new StringReader("dogs cats birds");
    TokenStream stream = whitespaceMockTokenizer(reader);
    stream = tokenFilterFactory("KeywordMarker", Version.LATEST, new StringMockResourceLoader("cats"), "protected", "protwords.txt", "pattern", "birds|Dogs").create(stream);
    stream = tokenFilterFactory("PorterStem").create(stream);
    assertTokenStreamContents(stream, new String[] { "dog", "cats", "birds" });
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader)

Example 8 with StringMockResourceLoader

use of org.apache.lucene.analysis.util.StringMockResourceLoader in project lucene-solr by apache.

the class TestKeywordMarkerFilterFactory method testKeywords.

public void testKeywords() throws Exception {
    Reader reader = new StringReader("dogs cats");
    TokenStream stream = whitespaceMockTokenizer(reader);
    stream = tokenFilterFactory("KeywordMarker", Version.LATEST, new StringMockResourceLoader("cats"), "protected", "protwords.txt").create(stream);
    stream = tokenFilterFactory("PorterStem").create(stream);
    assertTokenStreamContents(stream, new String[] { "dog", "cats" });
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader)

Example 9 with StringMockResourceLoader

use of org.apache.lucene.analysis.util.StringMockResourceLoader in project lucene-solr by apache.

the class TestKeywordMarkerFilterFactory method testKeywordsCaseInsensitiveMixed.

public void testKeywordsCaseInsensitiveMixed() throws Exception {
    Reader reader = new StringReader("dogs cats Cats Birds birds");
    TokenStream stream = whitespaceMockTokenizer(reader);
    stream = tokenFilterFactory("KeywordMarker", Version.LATEST, new StringMockResourceLoader("cats"), "protected", "protwords.txt", "pattern", "birds", "ignoreCase", "true").create(stream);
    stream = tokenFilterFactory("PorterStem").create(stream);
    assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats", "Birds", "birds" });
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader)

Example 10 with StringMockResourceLoader

use of org.apache.lucene.analysis.util.StringMockResourceLoader in project lucene-solr by apache.

the class TestStemmerOverrideFilterFactory method testKeywords.

public void testKeywords() throws Exception {
    // our stemdict stems dogs to 'cat'
    Reader reader = new StringReader("testing dogs");
    TokenStream stream = whitespaceMockTokenizer(reader);
    stream = tokenFilterFactory("StemmerOverride", Version.LATEST, new StringMockResourceLoader("dogs\tcat"), "dictionary", "stemdict.txt").create(stream);
    stream = tokenFilterFactory("PorterStem").create(stream);
    assertTokenStreamContents(stream, new String[] { "test", "cat" });
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader)

Aggregations

StringMockResourceLoader (org.apache.lucene.analysis.util.StringMockResourceLoader)11 Reader (java.io.Reader)9 StringReader (java.io.StringReader)9 TokenStream (org.apache.lucene.analysis.TokenStream)9 Collator (com.ibm.icu.text.Collator)1 RuleBasedCollator (com.ibm.icu.text.RuleBasedCollator)1 ULocale (com.ibm.icu.util.ULocale)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 AbstractAnalysisFactory (org.apache.lucene.analysis.util.AbstractAnalysisFactory)1 FilesystemResourceLoader (org.apache.lucene.analysis.util.FilesystemResourceLoader)1 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)1 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)1