use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestTypeTokenFilterFactory method testCreationWithBlackList.
public void testCreationWithBlackList() throws Exception {
TokenFilterFactory factory = tokenFilterFactory("Type", "types", "stoptypes-1.txt, stoptypes-2.txt");
CannedTokenStream input = new CannedTokenStream();
factory.create(input);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestTypeTokenFilterFactory method testCreationWithWhiteList.
public void testCreationWithWhiteList() throws Exception {
TokenFilterFactory factory = tokenFilterFactory("Type", "types", "stoptypes-1.txt, stoptypes-2.txt", "useWhitelist", "true");
CannedTokenStream input = new CannedTokenStream();
factory.create(input);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class ShingleFilterTest method testTwoTrailingHolesTriShingleWithTokenFiller.
public void testTwoTrailingHolesTriShingleWithTokenFiller() throws IOException {
// Analyzing "purple wizard of the", where of and the are removed as a
// stopwords, leaving two trailing holes:
Token[] inputTokens = new Token[] { createToken("purple", 0, 6), createToken("wizard", 7, 13) };
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
filter.setFillerToken("--");
assertTokenStreamContents(filter, new String[] { "purple", "purple wizard", "purple wizard --", "wizard", "wizard --", "wizard -- --" }, new int[] { 0, 0, 0, 7, 7, 7 }, new int[] { 6, 13, 20, 13, 20, 20 }, new int[] { 1, 0, 0, 1, 0, 0 }, 20);
filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
filter.setFillerToken("");
assertTokenStreamContents(filter, new String[] { "purple", "purple wizard", "purple wizard ", "wizard", "wizard ", "wizard " }, new int[] { 0, 0, 0, 7, 7, 7 }, new int[] { 6, 13, 20, 13, 20, 20 }, new int[] { 1, 0, 0, 1, 0, 0 }, 20);
filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
filter.setFillerToken(null);
assertTokenStreamContents(filter, new String[] { "purple", "purple wizard", "purple wizard ", "wizard", "wizard ", "wizard " }, new int[] { 0, 0, 0, 7, 7, 7 }, new int[] { 6, 13, 20, 13, 20, 20 }, new int[] { 1, 0, 0, 1, 0, 0 }, 20);
filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
filter.setFillerToken(null);
filter.setTokenSeparator(null);
assertTokenStreamContents(filter, new String[] { "purple", "purplewizard", "purplewizard", "wizard", "wizard", "wizard" }, new int[] { 0, 0, 0, 7, 7, 7 }, new int[] { 6, 13, 20, 13, 20, 20 }, new int[] { 1, 0, 0, 1, 0, 0 }, 20);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class ShingleFilterTest method shingleFilterTest.
protected void shingleFilterTest(String tokenSeparator, int minSize, int maxSize, Token[] tokensToShingle, Token[] tokensToCompare, int[] positionIncrements, String[] types, boolean outputUnigrams) throws IOException {
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
filter.setTokenSeparator(tokenSeparator);
filter.setOutputUnigrams(outputUnigrams);
shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestPostingsOffsets method checkTokens.
// TODO: more tests with other possibilities
private void checkTokens(Token[] field1, Token[] field2) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
boolean success = false;
try {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Document doc = new Document();
doc.add(new Field("body", new CannedTokenStream(field1), ft));
doc.add(new Field("body", new CannedTokenStream(field2), ft));
riw.addDocument(doc);
riw.close();
success = true;
} finally {
if (success) {
IOUtils.close(dir);
} else {
IOUtils.closeWhileHandlingException(riw, dir);
}
}
}
Aggregations