Search in sources :

Example 26 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestBlockJoin method testRandom.

public void testRandom() throws Exception {
    // We build two indices at once: one normalized (which
    // ToParentBlockJoinQuery/Collector,
    // ToChildBlockJoinQuery can query) and the other w/
    // the same docs, just fully denormalized:
    final Directory dir = newDirectory();
    final Directory joinDir = newDirectory();
    final int maxNumChildrenPerParent = 20;
    final int numParentDocs = TestUtil.nextInt(random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
    //final int numParentDocs = 30;
    // Values for parent fields:
    final String[][] parentFields = getRandomFields(numParentDocs / 2);
    // Values for child fields:
    final String[][] childFields = getRandomFields(numParentDocs);
    final boolean doDeletes = random().nextBoolean();
    final List<Integer> toDelete = new ArrayList<>();
    // TODO: parallel star join, nested join cases too!
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    final RandomIndexWriter joinW = new RandomIndexWriter(random(), joinDir);
    for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++) {
        Document parentDoc = new Document();
        Document parentJoinDoc = new Document();
        Field id = new StoredField("parentID", parentDocID);
        parentDoc.add(id);
        parentJoinDoc.add(id);
        parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
        id = new NumericDocValuesField("parentID", parentDocID);
        parentDoc.add(id);
        parentJoinDoc.add(id);
        parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
        for (int field = 0; field < parentFields.length; field++) {
            if (random().nextDouble() < 0.9) {
                String s = parentFields[field][random().nextInt(parentFields[field].length)];
                Field f = newStringField("parent" + field, s, Field.Store.NO);
                parentDoc.add(f);
                parentJoinDoc.add(f);
                f = new SortedDocValuesField("parent" + field, new BytesRef(s));
                parentDoc.add(f);
                parentJoinDoc.add(f);
            }
        }
        if (doDeletes) {
            parentDoc.add(new IntPoint("blockID", parentDocID));
            parentJoinDoc.add(new IntPoint("blockID", parentDocID));
        }
        final List<Document> joinDocs = new ArrayList<>();
        if (VERBOSE) {
            StringBuilder sb = new StringBuilder();
            sb.append("parentID=").append(parentDoc.get("parentID"));
            for (int fieldID = 0; fieldID < parentFields.length; fieldID++) {
                String s = parentDoc.get("parent" + fieldID);
                if (s != null) {
                    sb.append(" parent" + fieldID + "=" + s);
                }
            }
            System.out.println("  " + sb.toString());
        }
        final int numChildDocs = TestUtil.nextInt(random(), 1, maxNumChildrenPerParent);
        for (int childDocID = 0; childDocID < numChildDocs; childDocID++) {
            // Denormalize: copy all parent fields into child doc:
            Document childDoc = TestUtil.cloneDocument(parentDoc);
            Document joinChildDoc = new Document();
            joinDocs.add(joinChildDoc);
            Field childID = new StoredField("childID", childDocID);
            childDoc.add(childID);
            joinChildDoc.add(childID);
            childID = new NumericDocValuesField("childID", childDocID);
            childDoc.add(childID);
            joinChildDoc.add(childID);
            for (int childFieldID = 0; childFieldID < childFields.length; childFieldID++) {
                if (random().nextDouble() < 0.9) {
                    String s = childFields[childFieldID][random().nextInt(childFields[childFieldID].length)];
                    Field f = newStringField("child" + childFieldID, s, Field.Store.NO);
                    childDoc.add(f);
                    joinChildDoc.add(f);
                    f = new SortedDocValuesField("child" + childFieldID, new BytesRef(s));
                    childDoc.add(f);
                    joinChildDoc.add(f);
                }
            }
            if (VERBOSE) {
                StringBuilder sb = new StringBuilder();
                sb.append("childID=").append(joinChildDoc.get("childID"));
                for (int fieldID = 0; fieldID < childFields.length; fieldID++) {
                    String s = joinChildDoc.get("child" + fieldID);
                    if (s != null) {
                        sb.append(" child" + fieldID + "=" + s);
                    }
                }
                System.out.println("    " + sb.toString());
            }
            if (doDeletes) {
                joinChildDoc.add(new IntPoint("blockID", parentDocID));
            }
            w.addDocument(childDoc);
        }
        // Parent last:
        joinDocs.add(parentJoinDoc);
        joinW.addDocuments(joinDocs);
        if (doDeletes && random().nextInt(30) == 7) {
            toDelete.add(parentDocID);
        }
    }
    if (!toDelete.isEmpty()) {
        Query query = IntPoint.newSetQuery("blockID", toDelete);
        w.deleteDocuments(query);
        joinW.deleteDocuments(query);
    }
    final IndexReader r = w.getReader();
    w.close();
    final IndexReader joinR = joinW.getReader();
    joinW.close();
    if (VERBOSE) {
        System.out.println("TEST: reader=" + r);
        System.out.println("TEST: joinReader=" + joinR);
        Bits liveDocs = MultiFields.getLiveDocs(joinR);
        for (int docIDX = 0; docIDX < joinR.maxDoc(); docIDX++) {
            System.out.println("  docID=" + docIDX + " doc=" + joinR.document(docIDX) + " deleted?=" + (liveDocs != null && liveDocs.get(docIDX) == false));
        }
        PostingsEnum parents = MultiFields.getTermDocsEnum(joinR, "isParent", new BytesRef("x"));
        System.out.println("parent docIDs:");
        while (parents.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
            System.out.println("  " + parents.docID());
        }
    }
    final IndexSearcher s = newSearcher(r, false);
    final IndexSearcher joinS = newSearcher(joinR);
    final BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "x")));
    CheckJoinIndex.check(joinS.getIndexReader(), parentsFilter);
    final int iters = 200 * RANDOM_MULTIPLIER;
    for (int iter = 0; iter < iters; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + (1 + iter) + " of " + iters);
        }
        Query childQuery;
        if (random().nextInt(3) == 2) {
            final int childFieldID = random().nextInt(childFields.length);
            childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)]));
        } else if (random().nextInt(3) == 2) {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            final int numClauses = TestUtil.nextInt(random(), 2, 4);
            boolean didMust = false;
            for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++) {
                Query clause;
                BooleanClause.Occur occur;
                if (!didMust && random().nextBoolean()) {
                    occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                    clause = new TermQuery(randomChildTerm(childFields[0]));
                    didMust = true;
                } else {
                    occur = BooleanClause.Occur.SHOULD;
                    final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1);
                    clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)]));
                }
                bq.add(clause, occur);
            }
            childQuery = bq.build();
        } else {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            bq.add(new TermQuery(randomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
            final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1);
            bq.add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)])), random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
            childQuery = bq.build();
        }
        if (random().nextBoolean()) {
            childQuery = new RandomApproximationQuery(childQuery, random());
        }
        final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
        final ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
        // To run against the block-join index:
        final Query parentJoinQuery;
        // Same query as parentJoinQuery, but to run against
        // the fully denormalized index (so we can compare
        // results):
        final Query parentQuery;
        if (random().nextBoolean()) {
            parentQuery = childQuery;
            parentJoinQuery = childJoinQuery;
        } else {
            // AND parent field w/ child field
            final BooleanQuery.Builder bq = new BooleanQuery.Builder();
            final Term parentTerm = randomParentTerm(parentFields[0]);
            if (random().nextBoolean()) {
                bq.add(childJoinQuery, BooleanClause.Occur.MUST);
                bq.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
            } else {
                bq.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                bq.add(childJoinQuery, BooleanClause.Occur.MUST);
            }
            final BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
            if (random().nextBoolean()) {
                bq2.add(childQuery, BooleanClause.Occur.MUST);
                bq2.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
            } else {
                bq2.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                bq2.add(childQuery, BooleanClause.Occur.MUST);
            }
            parentJoinQuery = bq.build();
            parentQuery = bq2.build();
        }
        final Sort parentSort = getRandomSort("parent", parentFields.length);
        final Sort childSort = getRandomSort("child", childFields.length);
        if (VERBOSE) {
            System.out.println("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
        }
        // Merge both sorts:
        final List<SortField> sortFields = new ArrayList<>(Arrays.asList(parentSort.getSort()));
        sortFields.addAll(Arrays.asList(childSort.getSort()));
        final Sort parentAndChildSort = new Sort(sortFields.toArray(new SortField[sortFields.size()]));
        final TopDocs results = s.search(parentQuery, r.numDocs(), parentAndChildSort);
        if (VERBOSE) {
            System.out.println("\nTEST: normal index gets " + results.totalHits + " hits; sort=" + parentAndChildSort);
            final ScoreDoc[] hits = results.scoreDocs;
            for (int hitIDX = 0; hitIDX < hits.length; hitIDX++) {
                final Document doc = s.doc(hits[hitIDX].doc);
                //System.out.println("  score=" + hits[hitIDX].score + " parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
                System.out.println("  parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
                FieldDoc fd = (FieldDoc) hits[hitIDX];
                if (fd.fields != null) {
                    System.out.print("    " + fd.fields.length + " sort values: ");
                    for (Object o : fd.fields) {
                        if (o instanceof BytesRef) {
                            System.out.print(((BytesRef) o).utf8ToString() + " ");
                        } else {
                            System.out.print(o + " ");
                        }
                    }
                    System.out.println();
                }
            }
        }
        TopDocs joinedResults = joinS.search(parentJoinQuery, numParentDocs);
        SortedMap<Integer, TopDocs> joinResults = new TreeMap<>();
        for (ScoreDoc parentHit : joinedResults.scoreDocs) {
            ParentChildrenBlockJoinQuery childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery, parentHit.doc);
            TopDocs childTopDocs = joinS.search(childrenQuery, maxNumChildrenPerParent, childSort);
            final Document parentDoc = joinS.doc(parentHit.doc);
            joinResults.put(Integer.valueOf(parentDoc.get("parentID")), childTopDocs);
        }
        final int hitsPerGroup = TestUtil.nextInt(random(), 1, 20);
        if (VERBOSE) {
            System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.size()) + " groups; hitsPerGroup=" + hitsPerGroup);
            if (joinResults != null) {
                for (Map.Entry<Integer, TopDocs> entry : joinResults.entrySet()) {
                    System.out.println("  group parentID=" + entry.getKey() + " (docID=" + entry.getKey() + ")");
                    for (ScoreDoc childHit : entry.getValue().scoreDocs) {
                        final Document doc = joinS.doc(childHit.doc);
                        //              System.out.println("    score=" + childHit.score + " childID=" + doc.get("childID") + " (docID=" + childHit.doc + ")");
                        System.out.println("    childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + childHit.doc + ")");
                    }
                }
            }
        }
        if (results.totalHits == 0) {
            assertEquals(0, joinResults.size());
        } else {
            compareHits(r, joinR, results, joinResults);
            TopDocs b = joinS.search(childJoinQuery, 10);
            for (ScoreDoc hit : b.scoreDocs) {
                Explanation explanation = joinS.explain(childJoinQuery, hit.doc);
                Document document = joinS.doc(hit.doc - 1);
                int childId = Integer.parseInt(document.get("childID"));
                //System.out.println("  hit docID=" + hit.doc + " childId=" + childId + " parentId=" + document.get("parentID"));
                assertTrue(explanation.isMatch());
                assertEquals(hit.score, explanation.getValue(), 0.0f);
                Matcher m = Pattern.compile("Score based on ([0-9]+) child docs in range from ([0-9]+) to ([0-9]+), best match:").matcher(explanation.getDescription());
                assertTrue("Block Join description not matches", m.matches());
                assertTrue("Matched children not positive", Integer.parseInt(m.group(1)) > 0);
                assertEquals("Wrong child range start", hit.doc - 1 - childId, Integer.parseInt(m.group(2)));
                assertEquals("Wrong child range end", hit.doc - 1, Integer.parseInt(m.group(3)));
                Explanation childWeightExplanation = explanation.getDetails()[0];
                if ("sum of:".equals(childWeightExplanation.getDescription())) {
                    childWeightExplanation = childWeightExplanation.getDetails()[0];
                }
                assertTrue("Wrong child weight description", childWeightExplanation.getDescription().startsWith("weight(child"));
            }
        }
        // Test joining in the opposite direction (parent to
        // child):
        // Get random query against parent documents:
        final Query parentQuery2;
        if (random().nextInt(3) == 2) {
            final int fieldID = random().nextInt(parentFields.length);
            parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)]));
        } else if (random().nextInt(3) == 2) {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            final int numClauses = TestUtil.nextInt(random(), 2, 4);
            boolean didMust = false;
            for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++) {
                Query clause;
                BooleanClause.Occur occur;
                if (!didMust && random().nextBoolean()) {
                    occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                    clause = new TermQuery(randomParentTerm(parentFields[0]));
                    didMust = true;
                } else {
                    occur = BooleanClause.Occur.SHOULD;
                    final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1);
                    clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)]));
                }
                bq.add(clause, occur);
            }
            parentQuery2 = bq.build();
        } else {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            bq.add(new TermQuery(randomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
            final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1);
            bq.add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)])), random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
            parentQuery2 = bq.build();
        }
        if (VERBOSE) {
            System.out.println("\nTEST: top down: parentQuery2=" + parentQuery2);
        }
        // Maps parent query to child docs:
        final ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter);
        // To run against the block-join index:
        Query childJoinQuery2;
        // Same query as parentJoinQuery, but to run against
        // the fully denormalized index (so we can compare
        // results):
        Query childQuery2;
        if (random().nextBoolean()) {
            childQuery2 = parentQuery2;
            childJoinQuery2 = parentJoinQuery2;
        } else {
            final Term childTerm = randomChildTerm(childFields[0]);
            if (random().nextBoolean()) {
                // filtered case
                childJoinQuery2 = parentJoinQuery2;
                childJoinQuery2 = new BooleanQuery.Builder().add(childJoinQuery2, Occur.MUST).add(new TermQuery(childTerm), Occur.FILTER).build();
            } else {
                // AND child field w/ parent query:
                final BooleanQuery.Builder bq = new BooleanQuery.Builder();
                if (random().nextBoolean()) {
                    bq.add(parentJoinQuery2, BooleanClause.Occur.MUST);
                    bq.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                } else {
                    bq.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                    bq.add(parentJoinQuery2, BooleanClause.Occur.MUST);
                }
                childJoinQuery2 = bq.build();
            }
            if (random().nextBoolean()) {
                // filtered case
                childQuery2 = parentQuery2;
                childQuery2 = new BooleanQuery.Builder().add(childQuery2, Occur.MUST).add(new TermQuery(childTerm), Occur.FILTER).build();
            } else {
                final BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
                if (random().nextBoolean()) {
                    bq2.add(parentQuery2, BooleanClause.Occur.MUST);
                    bq2.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                } else {
                    bq2.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                    bq2.add(parentQuery2, BooleanClause.Occur.MUST);
                }
                childQuery2 = bq2.build();
            }
        }
        final Sort childSort2 = getRandomSort("child", childFields.length);
        // Search denormalized index:
        if (VERBOSE) {
            System.out.println("TEST: run top down query=" + childQuery2 + " sort=" + childSort2);
        }
        final TopDocs results2 = s.search(childQuery2, r.numDocs(), childSort2);
        if (VERBOSE) {
            System.out.println("  " + results2.totalHits + " totalHits:");
            for (ScoreDoc sd : results2.scoreDocs) {
                final Document doc = s.doc(sd.doc);
                System.out.println("  childID=" + doc.get("childID") + " parentID=" + doc.get("parentID") + " docID=" + sd.doc);
            }
        }
        // Search join index:
        if (VERBOSE) {
            System.out.println("TEST: run top down join query=" + childJoinQuery2 + " sort=" + childSort2);
        }
        TopDocs joinResults2 = joinS.search(childJoinQuery2, joinR.numDocs(), childSort2);
        if (VERBOSE) {
            System.out.println("  " + joinResults2.totalHits + " totalHits:");
            for (ScoreDoc sd : joinResults2.scoreDocs) {
                final Document doc = joinS.doc(sd.doc);
                final Document parentDoc = getParentDoc(joinR, parentsFilter, sd.doc);
                System.out.println("  childID=" + doc.get("childID") + " parentID=" + parentDoc.get("parentID") + " docID=" + sd.doc);
            }
        }
        compareChildHits(r, joinR, results2, joinResults2);
    }
    r.close();
    joinR.close();
    dir.close();
    joinDir.close();
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) PostingsEnum(org.apache.lucene.index.PostingsEnum) Term(org.apache.lucene.index.Term) IntPoint(org.apache.lucene.document.IntPoint) Bits(org.apache.lucene.util.Bits) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Matcher(java.util.regex.Matcher) Occur(org.apache.lucene.search.BooleanClause.Occur) StringField(org.apache.lucene.document.StringField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TreeMap(java.util.TreeMap) IntPoint(org.apache.lucene.document.IntPoint) IndexReader(org.apache.lucene.index.IndexReader)

Example 27 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestIndexSorting method testTieBreak.

public void testTieBreak() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING)));
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int id = 0; id < 1000; id++) {
        Document doc = new Document();
        doc.add(new StoredField("id", id));
        String value;
        if (id < 500) {
            value = "bar2";
        } else {
            value = "bar1";
        }
        doc.add(new SortedDocValuesField("foo", new BytesRef(value)));
        w.addDocument(doc);
        if (id == 500) {
            w.commit();
        }
    }
    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    for (int docID = 0; docID < 1000; docID++) {
        int expectedID;
        if (docID < 500) {
            expectedID = 500 + docID;
        } else {
            expectedID = docID - 500;
        }
        assertEquals(expectedID, r.document(docID).getField("id").numericValue().intValue());
    }
    IOUtils.close(r, w, dir);
}
Also used : SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 28 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestTermVectorsWriter method testTermVectorCorruption2.

// LUCENE-1168
public void testTermVectorCorruption2() throws IOException {
    Directory dir = newDirectory();
    for (int iter = 0; iter < 2; iter++) {
        IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(new LogDocMergePolicy()));
        Document document = new Document();
        FieldType customType = new FieldType();
        customType.setStored(true);
        Field storedField = newField("stored", "stored", customType);
        document.add(storedField);
        writer.addDocument(document);
        writer.addDocument(document);
        document = new Document();
        document.add(storedField);
        FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
        customType2.setStoreTermVectors(true);
        customType2.setStoreTermVectorPositions(true);
        customType2.setStoreTermVectorOffsets(true);
        Field termVectorField = newField("termVector", "termVector", customType2);
        document.add(termVectorField);
        writer.addDocument(document);
        writer.forceMerge(1);
        writer.close();
        IndexReader reader = DirectoryReader.open(dir);
        assertNull(reader.getTermVectors(0));
        assertNull(reader.getTermVectors(1));
        assertNotNull(reader.getTermVectors(2));
        reader.close();
    }
    dir.close();
}
Also used : StringField(org.apache.lucene.document.StringField) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) FieldType(org.apache.lucene.document.FieldType)

Example 29 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestTermVectorsWriter method testTermVectorCorruption.

// LUCENE-1168
public void testTermVectorCorruption() throws IOException {
    Directory dir = newDirectory();
    for (int iter = 0; iter < 2; iter++) {
        IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(new LogDocMergePolicy()));
        Document document = new Document();
        FieldType customType = new FieldType();
        customType.setStored(true);
        Field storedField = newField("stored", "stored", customType);
        document.add(storedField);
        writer.addDocument(document);
        writer.addDocument(document);
        document = new Document();
        document.add(storedField);
        FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
        customType2.setStoreTermVectors(true);
        customType2.setStoreTermVectorPositions(true);
        customType2.setStoreTermVectorOffsets(true);
        Field termVectorField = newField("termVector", "termVector", customType2);
        document.add(termVectorField);
        writer.addDocument(document);
        writer.forceMerge(1);
        writer.close();
        IndexReader reader = DirectoryReader.open(dir);
        for (int i = 0; i < reader.numDocs(); i++) {
            reader.document(i);
            reader.getTermVectors(i);
        }
        reader.close();
        writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(new LogDocMergePolicy()));
        Directory[] indexDirs = { new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir)) };
        writer.addIndexes(indexDirs);
        writer.forceMerge(1);
        writer.close();
    }
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) StringField(org.apache.lucene.document.StringField) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) FieldType(org.apache.lucene.document.FieldType)

Example 30 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestSortRandom method testRandomStringSort.

private void testRandomStringSort(SortField.Type type) throws Exception {
    Random random = new Random(random().nextLong());
    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    final int maxLength = TestUtil.nextInt(random, 5, 100);
    if (VERBOSE) {
        System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
    }
    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();
    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final Document doc = new Document();
        // 10% of the time, the document is missing the value:
        final BytesRef br;
        if (random().nextInt(10) != 7) {
            final String s;
            if (random.nextBoolean()) {
                s = TestUtil.randomSimpleString(random, maxLength);
            } else {
                s = TestUtil.randomUnicodeString(random, maxLength);
            }
            if (!allowDups) {
                if (seen.contains(s)) {
                    continue;
                }
                seen.add(s);
            }
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": s=" + s);
            }
            br = new BytesRef(s);
            doc.add(new SortedDocValuesField("stringdv", br));
            docValues.add(br);
        } else {
            br = null;
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": <missing>");
            }
            docValues.add(null);
        }
        doc.add(new NumericDocValuesField("id", numDocs));
        doc.add(new StoredField("id", numDocs));
        writer.addDocument(doc);
        numDocs++;
        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }
    final IndexReader r = writer.getReader();
    writer.close();
    if (VERBOSE) {
        System.out.println("  reader=" + r);
    }
    final IndexSearcher s = newSearcher(r, false);
    final int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        final boolean reverse = random.nextBoolean();
        final TopFieldDocs hits;
        final SortField sf;
        final boolean sortMissingLast;
        sf = new SortField("stringdv", type, reverse);
        sortMissingLast = random().nextBoolean();
        if (sortMissingLast) {
            sf.setMissingValue(SortField.STRING_LAST);
        }
        final Sort sort;
        if (random.nextBoolean()) {
            sort = new Sort(sf);
        } else {
            sort = new Sort(sf, SortField.FIELD_DOC);
        }
        final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
        final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
        hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
        }
        // Compute expected results:
        Collections.sort(f.matchValues, new Comparator<BytesRef>() {

            @Override
            public int compare(BytesRef a, BytesRef b) {
                if (a == null) {
                    if (b == null) {
                        return 0;
                    }
                    if (sortMissingLast) {
                        return 1;
                    } else {
                        return -1;
                    }
                } else if (b == null) {
                    if (sortMissingLast) {
                        return -1;
                    } else {
                        return 1;
                    }
                } else {
                    return a.compareTo(b);
                }
            }
        });
        if (reverse) {
            Collections.reverse(f.matchValues);
        }
        final List<BytesRef> expected = f.matchValues;
        if (VERBOSE) {
            System.out.println("  expected:");
            for (int idx = 0; idx < expected.size(); idx++) {
                BytesRef br = expected.get(idx);
                System.out.println("    " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
                if (idx == hitCount - 1) {
                    break;
                }
            }
        }
        if (VERBOSE) {
            System.out.println("  actual:");
            for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
                final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
                BytesRef br = (BytesRef) fd.fields[0];
                System.out.println("    " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
            }
        }
        for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
            final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
            BytesRef br = expected.get(hitIDX);
            BytesRef br2 = (BytesRef) fd.fields[0];
            assertEquals(br, br2);
        }
    }
    r.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) Random(java.util.Random) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Aggregations

StoredField (org.apache.lucene.document.StoredField)140 Document (org.apache.lucene.document.Document)118 Directory (org.apache.lucene.store.Directory)77 StringField (org.apache.lucene.document.StringField)61 Field (org.apache.lucene.document.Field)52 BytesRef (org.apache.lucene.util.BytesRef)50 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)41 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)39 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)36 TextField (org.apache.lucene.document.TextField)34 IntPoint (org.apache.lucene.document.IntPoint)31 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)31 IndexReader (org.apache.lucene.index.IndexReader)29 IndexSearcher (org.apache.lucene.search.IndexSearcher)26 TopDocs (org.apache.lucene.search.TopDocs)24 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)22 SortField (org.apache.lucene.search.SortField)22 FieldType (org.apache.lucene.document.FieldType)21 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)21 Sort (org.apache.lucene.search.Sort)21