use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestSynonymGraphFilter method testFlattenedGraph.
/** If we expand synonyms during indexing, it's a bit better than
* SynonymFilter is today, but still necessarily has false
* positive and negative PhraseQuery matches because we do not
* index posLength, so we lose information. */
public void testFlattenedGraph() throws Exception {
SynonymMap.Builder b = new SynonymMap.Builder();
add(b, "wtf", "what the fudge", true);
Analyzer a = getFlattenAnalyzer(b, true);
assertAnalyzesTo(a, "wtf happened", new String[] { "what", "wtf", "the", "fudge", "happened" }, new int[] { 0, 0, 0, 0, 4 }, new int[] { 3, 3, 3, 3, 12 }, null, new int[] { 1, 0, 1, 1, 1 }, new int[] { 1, 3, 1, 1, 1 }, true);
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
Document doc = new Document();
doc.add(newTextField("field", "wtf happened", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
// Good (this should not match, and doesn't):
assertEquals(0, s.count(new PhraseQuery("field", "what", "happened")));
// Bad (this should match, but doesn't):
assertEquals(0, s.count(new PhraseQuery("field", "wtf", "happened")));
// Good (this should match, and does):
assertEquals(1, s.count(new PhraseQuery("field", "what", "the", "fudge", "happened")));
// Bad (this should not match, but does):
assertEquals(1, s.count(new PhraseQuery("field", "wtf", "the")));
IOUtils.close(r, dir);
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestIndexWriter method testStopwordsPosIncHole.
// LUCENE-3849
public void testStopwordsPosIncHole() throws Exception {
Directory dir = newDirectory();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
return new TokenStreamComponents(tokenizer, stream);
}
};
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
Document doc = new Document();
doc.add(new TextField("body", "just a", Field.Store.NO));
doc.add(new TextField("body", "test of gaps", Field.Store.NO));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("body", "just"), 0);
builder.add(new Term("body", "test"), 2);
PhraseQuery pq = builder.build();
// body:"just ? test"
assertEquals(1, is.search(pq, 5).totalHits);
ir.close();
dir.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestAddIndexes method testWithPendingDeletes2.
public void testWithPendingDeletes2() throws IOException {
// main directory
Directory dir = newDirectory();
// auxiliary directory
Directory aux = newDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
// docs, so 10 pending deletes:
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
doc.add(new IntPoint("doc", i));
doc.add(new IntPoint("doc2d", i, i));
doc.add(new NumericDocValuesField("dv", i));
writer.updateDocument(new Term("id", "" + (i % 10)), doc);
}
writer.addIndexes(aux);
// Deletes one of the 10 added docs, leaving 9:
PhraseQuery q = new PhraseQuery("content", "bbb", "14");
writer.deleteDocuments(q);
writer.forceMerge(1);
writer.commit();
verifyNumDocs(dir, 1039);
verifyTermDocs(dir, new Term("content", "aaa"), 1030);
verifyTermDocs(dir, new Term("content", "bbb"), 9);
writer.close();
dir.close();
aux.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestAddIndexes method testWithPendingDeletes.
public void testWithPendingDeletes() throws IOException {
// main directory
Directory dir = newDirectory();
// auxiliary directory
Directory aux = newDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
writer.addIndexes(aux);
// docs, so 10 pending deletes:
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
doc.add(new IntPoint("doc", i));
doc.add(new IntPoint("doc2d", i, i));
doc.add(new NumericDocValuesField("dv", i));
writer.updateDocument(new Term("id", "" + (i % 10)), doc);
}
// Deletes one of the 10 added docs, leaving 9:
PhraseQuery q = new PhraseQuery("content", "bbb", "14");
writer.deleteDocuments(q);
writer.forceMerge(1);
writer.commit();
verifyNumDocs(dir, 1039);
verifyTermDocs(dir, new Term("content", "aaa"), 1030);
verifyTermDocs(dir, new Term("content", "bbb"), 9);
writer.close();
dir.close();
aux.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestIndexWriterExceptions method testAddDocsNonAbortingException.
public void testAddDocsNonAbortingException() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int numDocs1 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs1; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
final List<Document> docs = new ArrayList<>();
for (int docCount = 0; docCount < 7; docCount++) {
Document doc = new Document();
docs.add(doc);
doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
if (docCount == 4) {
Field f = newTextField("crash", "", Field.Store.NO);
doc.add(f);
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("crash me on the 4th token"));
// disable workflow checking as we forcefully close() in exceptional cases.
tokenizer.setEnableChecks(false);
f.setTokenStream(new CrashingFilter("crash", tokenizer));
}
}
IOException expected = expectThrows(IOException.class, () -> {
w.addDocuments(docs);
});
assertEquals(CRASH_FAIL_MESSAGE, expected.getMessage());
final int numDocs2 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs2; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
final IndexReader r = w.getReader();
w.close();
final IndexSearcher s = newSearcher(r);
PhraseQuery pq = new PhraseQuery("content", "silly", "good");
assertEquals(0, s.search(pq, 1).totalHits);
pq = new PhraseQuery("content", "good", "content");
assertEquals(numDocs1 + numDocs2, s.search(pq, 1).totalHits);
r.close();
dir.close();
}
Aggregations