use of org.apache.lucene.search.TermQuery in project neo4j by neo4j.
the class SimpleUniquenessVerifier method verify.
@Override
public void verify(PropertyAccessor accessor, int[] propKeyIds) throws IndexEntryConflictException, IOException {
try {
DuplicateCheckingCollector collector = DuplicateCheckingCollector.forProperties(accessor, propKeyIds);
IndexSearcher searcher = indexSearcher();
for (LeafReaderContext leafReaderContext : searcher.getIndexReader().leaves()) {
Fields fields = leafReaderContext.reader().fields();
for (String field : fields) {
if (LuceneDocumentStructure.NODE_ID_KEY.equals(field)) {
continue;
}
TermsEnum terms = LuceneDocumentStructure.originalTerms(fields.terms(field), field);
BytesRef termsRef;
while ((termsRef = terms.next()) != null) {
if (terms.docFreq() > 1) {
collector.reset();
searcher.search(new TermQuery(new Term(field, termsRef)), collector);
}
}
}
}
} catch (IOException e) {
Throwable cause = e.getCause();
if (cause instanceof IndexEntryConflictException) {
throw (IndexEntryConflictException) cause;
}
throw e;
}
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class NestedAggregatorTests method testResetRootDocId.
public void testResetRootDocId() throws Exception {
Directory directory = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc);
List<Document> documents = new ArrayList<>();
// 1 segment with, 1 root document, with 3 nested sub docs
Document document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
indexWriter.addDocuments(documents);
indexWriter.commit();
documents.clear();
// 1 segment with:
// 1 document, with 1 nested subdoc
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
indexWriter.addDocuments(documents);
documents.clear();
// and 1 document, with 1 nested subdoc
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
indexWriter.addDocuments(documents);
indexWriter.commit();
indexWriter.close();
IndexService indexService = createIndex("test");
DirectoryReader directoryReader = DirectoryReader.open(directory);
directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
IndexSearcher searcher = new IndexSearcher(directoryReader);
indexService.mapperService().merge("test", new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested").string()), MapperService.MergeReason.MAPPING_UPDATE, false);
SearchContext context = createSearchContext(indexService);
AggregatorFactories.Builder builder = AggregatorFactories.builder();
NestedAggregationBuilder factory = new NestedAggregationBuilder("test", "nested_field");
builder.addAggregator(factory);
AggregatorFactories factories = builder.build(context, null);
context.aggregations(new SearchContextAggregations(factories));
Aggregator[] aggs = factories.createTopLevelAggregators();
BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs));
collector.preCollection();
// A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here (otherwise MatchAllDocsQuery would be sufficient)
// We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc when we process a new segment, because
// root doc type#3 and root doc type#1 have the same segment docid
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(Queries.newNonNestedFilter(), Occur.MUST);
bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT);
searcher.search(new ConstantScoreQuery(bq.build()), collector);
collector.postCollection();
Nested nested = (Nested) aggs[0].buildAggregation(0);
// The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous child docs from the first segment are emitted as hits.
assertThat(nested.getDocCount(), equalTo(4L));
directoryReader.close();
directory.close();
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class SumAggregatorTests method testQueryFiltering.
public void testQueryFiltering() throws IOException {
testCase(new TermQuery(new Term("match", "yes")), iw -> {
iw.addDocument(Arrays.asList(new StringField("match", "yes", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 1)));
iw.addDocument(Arrays.asList(new StringField("match", "no", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 2)));
iw.addDocument(Arrays.asList(new StringField("match", "yes", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 3)));
iw.addDocument(Arrays.asList(new StringField("match", "no", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 4)));
iw.addDocument(Arrays.asList(new StringField("match", "yes", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 5)));
}, count -> assertEquals(9L, count.getValue(), 0d));
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class PlainHighlighterTests method checkGeoQueryHighlighting.
public void checkGeoQueryHighlighting(Query geoQuery) throws IOException, InvalidTokenOffsetsException {
Map analysers = new HashMap<String, Analyzer>();
analysers.put("text", new StandardAnalyzer());
FieldNameAnalyzer fieldNameAnalyzer = new FieldNameAnalyzer(analysers);
Query termQuery = new TermQuery(new Term("text", "failure"));
Query boolQuery = new BooleanQuery.Builder().add(new BooleanClause(geoQuery, BooleanClause.Occur.SHOULD)).add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)).build();
org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(boolQuery));
String fragment = highlighter.getBestFragment(fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"), "Arbitrary text field which should not cause a failure");
assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
Query rewritten = boolQuery.rewrite(null);
highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(rewritten));
fragment = highlighter.getBestFragment(fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"), "Arbitrary text field which should not cause a failure");
assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class QueryPhaseTests method countTestCase.
private void countTestCase(boolean withDeletions) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
final int numDocs = scaledRandomIntBetween(100, 200);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (randomBoolean()) {
doc.add(new StringField("foo", "bar", Store.NO));
}
if (randomBoolean()) {
doc.add(new StringField("foo", "baz", Store.NO));
}
if (withDeletions && (rarely() || i == 0)) {
doc.add(new StringField("delete", "yes", Store.NO));
}
w.addDocument(doc);
}
if (withDeletions) {
w.deleteDocuments(new Term("delete", "yes"));
}
final IndexReader reader = w.getReader();
Query matchAll = new MatchAllDocsQuery();
Query matchAllCsq = new ConstantScoreQuery(matchAll);
Query tq = new TermQuery(new Term("foo", "bar"));
Query tCsq = new ConstantScoreQuery(tq);
BooleanQuery bq = new BooleanQuery.Builder().add(matchAll, Occur.SHOULD).add(tq, Occur.MUST).build();
countTestCase(matchAll, reader, false);
countTestCase(matchAllCsq, reader, false);
countTestCase(tq, reader, withDeletions);
countTestCase(tCsq, reader, withDeletions);
countTestCase(bq, reader, true);
reader.close();
w.close();
dir.close();
}
Aggregations