Search in sources :

Example 51 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestSuggestField method testReturnedDocID.

@Test
public void testReturnedDocID() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(10));
    for (int i = 0; i < num; i++) {
        Document document = new Document();
        document.add(new SuggestField("suggest_field", "abc_" + i, num));
        document.add(new StoredField("int_field", i));
        iw.addDocument(document);
        if (random().nextBoolean()) {
            iw.commit();
        }
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
    TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
    assertEquals(num, suggest.totalHits);
    for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
        String key = suggestScoreDoc.key.toString();
        assertTrue(key.startsWith("abc_"));
        String substring = key.substring(4);
        int fieldValue = Integer.parseInt(substring);
        Document doc = reader.document(suggestScoreDoc.doc);
        assertEquals(doc.getField("int_field").numericValue().intValue(), fieldValue);
    }
    reader.close();
    iw.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SuggestScoreDoc(org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Example 52 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestSuggestField method testMultipleSegments.

@Test
public void testMultipleSegments() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(10));
    List<Entry> entries = new ArrayList<>();
    // ensure at least some segments have no suggest field
    for (int i = num; i > 0; i--) {
        Document document = new Document();
        if (random().nextInt(4) == 1) {
            document.add(new SuggestField("suggest_field", "abc_" + i, i));
            entries.add(new Entry("abc_" + i, i));
        }
        document.add(new StoredField("weight_fld", i));
        iw.addDocument(document);
        if (usually()) {
            iw.commit();
        }
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
    TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
    assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));
    reader.close();
    iw.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Example 53 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testDeletePartiallyWrittenFilesIfAbort.

public void testDeletePartiallyWrittenFilesIfAbort() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
    iwConf.setCodec(CompressingCodec.randomInstance(random()));
    // disable CFS because this test checks file names
    iwConf.setMergePolicy(newLogMergePolicy(false));
    iwConf.setUseCompoundFile(false);
    // Cannot use RIW because this test wants CFS to stay off:
    IndexWriter iw = new IndexWriter(dir, iwConf);
    final Document validDoc = new Document();
    validDoc.add(new IntPoint("id", 0));
    validDoc.add(new StoredField("id", 0));
    iw.addDocument(validDoc);
    iw.commit();
    // make sure that #writeField will fail to trigger an abort
    final Document invalidDoc = new Document();
    FieldType fieldType = new FieldType();
    fieldType.setStored(true);
    invalidDoc.add(new Field("invalid", fieldType) {

        @Override
        public String stringValue() {
            // abort the segment!!  We should fix this.
            return null;
        }
    });
    try {
        iw.addDocument(invalidDoc);
        iw.commit();
    } catch (IllegalArgumentException iae) {
        // expected
        assertEquals(iae, iw.getTragicException());
    }
    // Writer should be closed by tragedy
    assertFalse(iw.isOpen());
    dir.close();
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Example 54 with StoredField

use of org.apache.lucene.document.StoredField in project Anserini by castorini.

the class ObjectTriplesLuceneDocumentGenerator method createDocument.

public Document createDocument(ObjectTriples src) {
    // Convert the triple doc to lucene doc
    Document doc = new Document();
    // Index subject as a StringField to allow searching
    Field subjectField = new StringField(FIELD_SUBJECT, cleanUri(src.getSubject()), Field.Store.YES);
    doc.add(subjectField);
    // Iterate over predicates and object values
    for (Map.Entry<String, List<String>> entry : src.getPredicateValues().entrySet()) {
        String predicate = cleanUri(entry.getKey());
        List<String> values = entry.getValue();
        for (String value : values) {
            String valueType = getObjectType(value);
            value = normalizeObjectValue(value);
            if (isIndexedPredicate(predicate)) {
                if (valueType.equals(VALUE_TYPE_URI)) {
                    // Always index URIs using StringField
                    doc.add(new StringField(predicate, value, Field.Store.YES));
                } else {
                    // Just store the predicate in a stored field, no index
                    doc.add(new TextField(predicate, value, Field.Store.YES));
                }
            } else {
                // Just add the predicate as a stored field, no index on it
                doc.add(new StoredField(predicate, value));
            }
        }
    }
    src.clear();
    return doc;
}
Also used : Field(org.apache.lucene.document.Field) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) StoredField(org.apache.lucene.document.StoredField) StoredField(org.apache.lucene.document.StoredField) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) List(java.util.List) Document(org.apache.lucene.document.Document) Map(java.util.Map)

Example 55 with StoredField

use of org.apache.lucene.document.StoredField in project Anserini by castorini.

the class LuceneDocumentGenerator method createDocument.

public Document createDocument(SourceDocument src) {
    String id = src.id();
    String contents;
    try {
        // If there's a transform, use it.
        contents = transform != null ? transform.apply(src.content()) : src.content();
    } catch (Exception e) {
        LOG.error("Error extracting document text, skipping document: " + id, e);
        counters.errors.incrementAndGet();
        return null;
    }
    if (contents.trim().length() == 0) {
        LOG.info("Empty document: " + id);
        counters.emptyDocuments.incrementAndGet();
        return null;
    }
    // make a new, empty document
    Document document = new Document();
    // document id
    document.add(new StringField(FIELD_ID, id, Field.Store.YES));
    if (args.storeRawDocs) {
        document.add(new StoredField(FIELD_RAW, src.content()));
    }
    FieldType fieldType = new FieldType();
    fieldType.setStored(args.storeTransformedDocs);
    // Are we storing document vectors?
    if (args.storeDocvectors) {
        fieldType.setStoreTermVectors(true);
        fieldType.setStoreTermVectorPositions(true);
    }
    // Are we building a "positional" or "count" index?
    if (args.storePositions) {
        fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    } else {
        fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    }
    document.add(new Field(FIELD_BODY, contents, fieldType));
    return document;
}
Also used : Field(org.apache.lucene.document.Field) StringField(org.apache.lucene.document.StringField) StoredField(org.apache.lucene.document.StoredField) StoredField(org.apache.lucene.document.StoredField) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) SourceDocument(io.anserini.document.SourceDocument) FieldType(org.apache.lucene.document.FieldType)

Aggregations

StoredField (org.apache.lucene.document.StoredField)109 Document (org.apache.lucene.document.Document)97 Directory (org.apache.lucene.store.Directory)72 StringField (org.apache.lucene.document.StringField)43 Field (org.apache.lucene.document.Field)40 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)39 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)36 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)34 BytesRef (org.apache.lucene.util.BytesRef)34 TextField (org.apache.lucene.document.TextField)30 IndexReader (org.apache.lucene.index.IndexReader)29 IndexSearcher (org.apache.lucene.search.IndexSearcher)26 IntPoint (org.apache.lucene.document.IntPoint)24 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)23 TopDocs (org.apache.lucene.search.TopDocs)23 SortField (org.apache.lucene.search.SortField)22 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)21 Sort (org.apache.lucene.search.Sort)21 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)18 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18