Search in sources :

Example 6 with MemoryIndex

use of org.apache.lucene.index.memory.MemoryIndex in project orientdb by orientechnologies.

the class OLuceneTxChangesMultiRid method isDeleted.

public boolean isDeleted(Document document, Object key, OIdentifiable value) {
    boolean match = false;
    List<String> strings = deleted.get(value.getIdentity().toString());
    if (strings != null) {
        MemoryIndex memoryIndex = new MemoryIndex();
        for (String string : strings) {
            Query q = engine.deleteQuery(string, value);
            memoryIndex.reset();
            for (IndexableField field : document.getFields()) {
                memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer());
            }
            match = match || (memoryIndex.search(q) > 0.0f);
        }
        return match;
    }
    return match;
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) Query(org.apache.lucene.search.Query)

Example 7 with MemoryIndex

use of org.apache.lucene.index.memory.MemoryIndex in project elasticsearch by elastic.

the class TermVectorsService method generateTermVectors.

private static Fields generateTermVectors(IndexShard indexShard, Map<String, Object> source, Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields) throws IOException {
    Map<String, Collection<Object>> values = new HashMap<>();
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field)) {
            // some fields are returned even when not asked for, eg. _timestamp
            values.put(field, getField.getValues());
        }
    }
    if (source != null) {
        for (String field : fields) {
            if (values.containsKey(field) == false) {
                List<Object> v = XContentMapValues.extractRawValues(field, source);
                if (v.isEmpty() == false) {
                    values.put(field, v);
                }
            }
        }
    }
    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (Map.Entry<String, Collection<Object>> entry : values.entrySet()) {
        String field = entry.getKey();
        Analyzer analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer);
        if (entry.getValue() instanceof List) {
            for (Object text : entry.getValue()) {
                index.addField(field, text.toString(), analyzer);
            }
        } else {
            index.addField(field, entry.getValue().toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}
Also used : GetField(org.elasticsearch.index.get.GetField) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) HashMap(java.util.HashMap) Collection(java.util.Collection) List(java.util.List) Analyzer(org.apache.lucene.analysis.Analyzer) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 8 with MemoryIndex

use of org.apache.lucene.index.memory.MemoryIndex in project elasticsearch by elastic.

the class CandidateQueryTests method testDuelSpecificQueries.

public void testDuelSpecificQueries() throws Exception {
    List<ParseContext.Document> documents = new ArrayList<>();
    CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128);
    commonTermsQuery.add(new Term("field", "quick"));
    commonTermsQuery.add(new Term("field", "brown"));
    commonTermsQuery.add(new Term("field", "fox"));
    addQuery(commonTermsQuery, documents);
    BlendedTermQuery blendedTermQuery = BlendedTermQuery.booleanBlendedQuery(new Term[] { new Term("field", "quick"), new Term("field", "brown"), new Term("field", "fox") }, false);
    addQuery(blendedTermQuery, documents);
    SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true).addClause(new SpanTermQuery(new Term("field", "quick"))).addClause(new SpanTermQuery(new Term("field", "brown"))).addClause(new SpanTermQuery(new Term("field", "fox"))).build();
    addQuery(spanNearQuery, documents);
    SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true).addClause(new SpanTermQuery(new Term("field", "the"))).addClause(new SpanTermQuery(new Term("field", "lazy"))).addClause(new SpanTermQuery(new Term("field", "doc"))).build();
    SpanOrQuery spanOrQuery = new SpanOrQuery(spanNearQuery, spanNearQuery2);
    addQuery(spanOrQuery, documents);
    SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery);
    addQuery(spanNotQuery, documents);
    long lowerLong = randomIntBetween(0, 256);
    long upperLong = lowerLong + randomIntBetween(0, 32);
    addQuery(LongPoint.newRangeQuery("long_field", lowerLong, upperLong), documents);
    indexWriter.addDocuments(documents);
    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);
    // Disable query cache, because ControlQuery cannot be cached...
    shardSearcher.setQueryCache(null);
    Document document = new Document();
    document.add(new TextField("field", "the quick brown fox jumps over the lazy dog", Field.Store.NO));
    long randomLong = randomIntBetween((int) lowerLong, (int) upperLong);
    document.add(new LongPoint("long_field", randomLong));
    MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
    duelRun(queryStore, memoryIndex, shardSearcher);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) ArrayList(java.util.ArrayList) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) Term(org.apache.lucene.index.Term) LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TextField(org.apache.lucene.document.TextField) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 9 with MemoryIndex

use of org.apache.lucene.index.memory.MemoryIndex in project orientdb by orientechnologies.

the class LuceneBooleanIndexTest method testMemoryIndex.

@Test
public void testMemoryIndex() throws ParseException {
    // TODO To be used in evaluate Record
    MemoryIndex index = new MemoryIndex();
    Document doc = new Document();
    doc.add(new StringField("text", "my text", Field.Store.YES));
    StandardAnalyzer analyzer = new StandardAnalyzer();
    for (IndexableField field : doc.getFields()) {
        index.addField(field.name(), field.stringValue(), analyzer);
    }
    QueryParser parser = new QueryParser("text", analyzer);
    float score = index.search(parser.parse("+text:my"));
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) StringField(org.apache.lucene.document.StringField) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Document(org.apache.lucene.document.Document) ODocument(com.orientechnologies.orient.core.record.impl.ODocument) Test(org.junit.Test)

Example 10 with MemoryIndex

use of org.apache.lucene.index.memory.MemoryIndex in project lucene-solr by apache.

the class WeightedSpanTermExtractor method getLeafContext.

protected LeafReaderContext getLeafContext() throws IOException {
    if (internalReader == null) {
        boolean cacheIt = wrapToCaching && !(tokenStream instanceof CachingTokenFilter);
        // If it's from term vectors, simply wrap the underlying Terms in a reader
        if (tokenStream instanceof TokenStreamFromTermVector) {
            cacheIt = false;
            Terms termVectorTerms = ((TokenStreamFromTermVector) tokenStream).getTermVectorTerms();
            if (termVectorTerms.hasPositions() && termVectorTerms.hasOffsets()) {
                internalReader = new TermVectorLeafReader(DelegatingLeafReader.FIELD_NAME, termVectorTerms);
            }
        }
        // Use MemoryIndex (index/invert this tokenStream now)
        if (internalReader == null) {
            //offsets and payloads
            final MemoryIndex indexer = new MemoryIndex(true, usePayloads);
            if (cacheIt) {
                assert !cachedTokenStream;
                tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
                cachedTokenStream = true;
                indexer.addField(DelegatingLeafReader.FIELD_NAME, tokenStream);
            } else {
                indexer.addField(DelegatingLeafReader.FIELD_NAME, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
            }
            final IndexSearcher searcher = indexer.createSearcher();
            // MEM index has only atomic ctx
            internalReader = ((LeafReaderContext) searcher.getTopReaderContext()).reader();
        }
        //Now wrap it so we always use a common field.
        this.internalReader = new DelegatingLeafReader(internalReader);
    }
    return internalReader.getContext();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) Terms(org.apache.lucene.index.Terms)

Aggregations

MemoryIndex (org.apache.lucene.index.memory.MemoryIndex)10 IndexSearcher (org.apache.lucene.search.IndexSearcher)5 WhitespaceAnalyzer (org.apache.lucene.analysis.core.WhitespaceAnalyzer)4 IndexableField (org.apache.lucene.index.IndexableField)4 Term (org.apache.lucene.index.Term)4 ArrayList (java.util.ArrayList)3 Document (org.apache.lucene.document.Document)3 LongPoint (org.apache.lucene.document.LongPoint)3 StringField (org.apache.lucene.document.StringField)3 BooleanQuery (org.apache.lucene.search.BooleanQuery)3 Query (org.apache.lucene.search.Query)3 TermQuery (org.apache.lucene.search.TermQuery)3 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)3 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)3 List (java.util.List)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 BlendedTermQuery (org.apache.lucene.queries.BlendedTermQuery)2 CommonTermsQuery (org.apache.lucene.queries.CommonTermsQuery)2 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)2 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)2