use of org.apache.lucene.analysis.en.EnglishAnalyzer in project elasticsearch by elastic.
the class AnalysisRegistryTests method testOverrideDefaultIndexAnalyzerIsUnsupported.
public void testOverrideDefaultIndexAnalyzerIsUnsupported() {
Version version = VersionUtils.randomVersionBetween(random(), Version.V_5_0_0_alpha1, Version.CURRENT);
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
AnalyzerProvider<?> defaultIndex = new PreBuiltAnalyzerProvider("default_index", AnalyzerScope.INDEX, new EnglishAnalyzer());
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> registry.build(IndexSettingsModule.newIndexSettings("index", settings), singletonMap("default_index", defaultIndex), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
assertTrue(e.getMessage().contains("[index.analysis.analyzer.default_index] is not supported"));
}
use of org.apache.lucene.analysis.en.EnglishAnalyzer in project lucene-solr by apache.
the class KNearestNeighborClassifierTest method testRankedClasses.
/**
* This test is for the scenario where in the first topK results from the MLT query, we have the same number of results per class.
* But the results for a class have a better ranking in comparison with the results of the second class.
* So we would expect a greater score for the best ranked class.
*
* @throws Exception if any error happens
*/
@Test
public void testRankedClasses() throws Exception {
LeafReader leafReader = null;
try {
Analyzer analyzer = new EnglishAnalyzer();
leafReader = getSampleIndex(analyzer);
KNearestNeighborClassifier knnClassifier = new KNearestNeighborClassifier(leafReader, null, analyzer, null, 6, 1, 1, categoryFieldName, textFieldName);
List<ClassificationResult<BytesRef>> classes = knnClassifier.getClasses(STRONG_TECHNOLOGY_INPUT);
assertTrue(classes.get(0).getScore() > classes.get(1).getScore());
checkCorrectClassification(knnClassifier, STRONG_TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
} finally {
if (leafReader != null) {
leafReader.close();
}
}
}
use of org.apache.lucene.analysis.en.EnglishAnalyzer in project lucene-solr by apache.
the class KNearestNeighborClassifierTest method testUnbalancedClasses.
/**
* This test is for the scenario where in the first topK results from the MLT query, we have less results
* for the expected class than the results for the bad class.
* But the results for the expected class have a better score in comparison with the results of the second class.
* So we would expect a greater score for the best ranked class.
*
* @throws Exception if any error happens
*/
@Test
public void testUnbalancedClasses() throws Exception {
LeafReader leafReader = null;
try {
Analyzer analyzer = new EnglishAnalyzer();
leafReader = getSampleIndex(analyzer);
KNearestNeighborClassifier knnClassifier = new KNearestNeighborClassifier(leafReader, null, analyzer, null, 3, 1, 1, categoryFieldName, textFieldName);
List<ClassificationResult<BytesRef>> classes = knnClassifier.getClasses(SUPER_STRONG_TECHNOLOGY_INPUT);
assertTrue(classes.get(0).getScore() > classes.get(1).getScore());
checkCorrectClassification(knnClassifier, SUPER_STRONG_TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
} finally {
if (leafReader != null) {
leafReader.close();
}
}
}
use of org.apache.lucene.analysis.en.EnglishAnalyzer in project lucene-solr by apache.
the class DocumentClassificationTestBase method init.
@Before
public void init() throws IOException {
analyzer = new EnglishAnalyzer();
field2analyzer = new LinkedHashMap<>();
field2analyzer.put(textFieldName, analyzer);
field2analyzer.put(titleFieldName, analyzer);
field2analyzer.put(authorFieldName, analyzer);
indexReader = populateDocumentClassificationIndex(analyzer);
}
use of org.apache.lucene.analysis.en.EnglishAnalyzer in project Anserini by castorini.
the class IndexerTest method testCloneIndex.
@Test
public void testCloneIndex() throws Exception {
buildTestIndex();
System.out.println("Cloning index:");
Directory dir1 = FSDirectory.open(Paths.get(INDEX_PATH1));
IndexReader reader = DirectoryReader.open(dir1);
Directory dir2 = FSDirectory.open(Paths.get(INDEX_PATH2));
IndexWriterConfig config = new IndexWriterConfig(new EnglishAnalyzer());
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir2, config);
LeafReader leafReader = reader.leaves().get(0).reader();
CodecReader codecReader = SlowCodecReaderWrapper.wrap(leafReader);
writer.addIndexes(new MyFilterCodecReader(codecReader));
writer.commit();
writer.forceMerge(1);
writer.close();
reader.close();
// Open up the cloned index and verify it.
reader = DirectoryReader.open(dir2);
assertEquals(3, reader.numDocs());
assertEquals(1, reader.leaves().size());
System.out.println("Dumping out postings...");
dumpPostings(reader);
assertEquals(2, reader.docFreq(new Term("text", "here")));
assertEquals(2, reader.docFreq(new Term("text", "more")));
assertEquals(1, reader.docFreq(new Term("text", "some")));
assertEquals(1, reader.docFreq(new Term("text", "test")));
assertEquals(2, reader.docFreq(new Term("text", "text")));
reader.close();
}
Aggregations