Search in sources :

Example 41 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class BaseTestCheckIndex method testDeletedDocs.

public void testDeletedDocs(Directory dir) throws IOException {
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2));
    for (int i = 0; i < 19; i++) {
        Document doc = new Document();
        FieldType customType = new FieldType(TextField.TYPE_STORED);
        customType.setStoreTermVectors(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectorOffsets(true);
        doc.add(newField("field", "aaa" + i, customType));
        writer.addDocument(doc);
    }
    writer.forceMerge(1);
    writer.commit();
    writer.deleteDocuments(new Term("field", "aaa5"));
    writer.close();
    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
    CheckIndex checker = new CheckIndex(dir);
    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
    if (VERBOSE)
        checker.setInfoStream(System.out);
    CheckIndex.Status indexStatus = checker.checkIndex();
    if (indexStatus.clean == false) {
        System.out.println("CheckIndex failed");
        System.out.println(bos.toString(IOUtils.UTF_8));
        fail();
    }
    final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
    assertTrue(seg.openReaderPassed);
    assertNotNull(seg.diagnostics);
    assertNotNull(seg.fieldNormStatus);
    assertNull(seg.fieldNormStatus.error);
    assertEquals(1, seg.fieldNormStatus.totFields);
    assertNotNull(seg.termIndexStatus);
    assertNull(seg.termIndexStatus.error);
    assertEquals(18, seg.termIndexStatus.termCount);
    assertEquals(18, seg.termIndexStatus.totFreq);
    assertEquals(18, seg.termIndexStatus.totPos);
    assertNotNull(seg.storedFieldStatus);
    assertNull(seg.storedFieldStatus.error);
    assertEquals(18, seg.storedFieldStatus.docCount);
    assertEquals(18, seg.storedFieldStatus.totFields);
    assertNotNull(seg.termVectorStatus);
    assertNull(seg.termVectorStatus.error);
    assertEquals(18, seg.termVectorStatus.docCount);
    assertEquals(18, seg.termVectorStatus.totVectors);
    assertNotNull(seg.diagnostics.get("java.vm.version"));
    assertNotNull(seg.diagnostics.get("java.runtime.version"));
    assertTrue(seg.diagnostics.size() > 0);
    final List<String> onlySegments = new ArrayList<>();
    onlySegments.add("_0");
    assertTrue(checker.checkIndex(onlySegments).clean == true);
    checker.close();
}
Also used : PrintStream(java.io.PrintStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 42 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class DocHelper method createDocument.

public static Document createDocument(int n, String indexName, int numFields) {
    StringBuilder sb = new StringBuilder();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);
    FieldType customType1 = new FieldType(StringField.TYPE_STORED);
    customType1.setStoreTermVectors(true);
    customType1.setStoreTermVectorPositions(true);
    customType1.setStoreTermVectorOffsets(true);
    final Document doc = new Document();
    doc.add(new Field("id", Integer.toString(n), customType1));
    doc.add(new Field("indexname", indexName, customType1));
    sb.append("a");
    sb.append(n);
    doc.add(new Field("field1", sb.toString(), customType));
    sb.append(" b");
    sb.append(n);
    for (int i = 1; i < numFields; i++) {
        doc.add(new Field("field" + (i + 1), sb.toString(), customType));
    }
    return doc;
}
Also used : StringField(org.apache.lucene.document.StringField) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType)

Example 43 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestGrouping method testBasic.

public void testBasic() throws Exception {
    String groupField = "author";
    FieldType customType = new FieldType();
    customType.setStored(true);
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    // 0
    Document doc = new Document();
    addGroupField(doc, groupField, "author1");
    doc.add(new TextField("content", "random text", Field.Store.YES));
    doc.add(new Field("id", "1", customType));
    w.addDocument(doc);
    // 1
    doc = new Document();
    addGroupField(doc, groupField, "author1");
    doc.add(new TextField("content", "some more random text", Field.Store.YES));
    doc.add(new Field("id", "2", customType));
    w.addDocument(doc);
    // 2
    doc = new Document();
    addGroupField(doc, groupField, "author1");
    doc.add(new TextField("content", "some more random textual data", Field.Store.YES));
    doc.add(new Field("id", "3", customType));
    w.addDocument(doc);
    // 3
    doc = new Document();
    addGroupField(doc, groupField, "author2");
    doc.add(new TextField("content", "some random text", Field.Store.YES));
    doc.add(new Field("id", "4", customType));
    w.addDocument(doc);
    // 4
    doc = new Document();
    addGroupField(doc, groupField, "author3");
    doc.add(new TextField("content", "some more random text", Field.Store.YES));
    doc.add(new Field("id", "5", customType));
    w.addDocument(doc);
    // 5
    doc = new Document();
    addGroupField(doc, groupField, "author3");
    doc.add(new TextField("content", "random", Field.Store.YES));
    doc.add(new Field("id", "6", customType));
    w.addDocument(doc);
    // 6 -- no author field
    doc = new Document();
    doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
    doc.add(new Field("id", "6", customType));
    w.addDocument(doc);
    IndexSearcher indexSearcher = newSearcher(w.getReader());
    w.close();
    final Sort groupSort = Sort.RELEVANCE;
    final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, 10);
    indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
    final TopGroupsCollector<?> c2 = createSecondPassCollector(c1, groupSort, Sort.RELEVANCE, 0, 5, true, true, true);
    indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
    final TopGroups<?> groups = c2.getTopGroups(0);
    assertFalse(Float.isNaN(groups.maxScore));
    assertEquals(7, groups.totalHitCount);
    assertEquals(7, groups.totalGroupedHitCount);
    assertEquals(4, groups.groups.length);
    // relevance order: 5, 0, 3, 4, 1, 2, 6
    // the later a document is added the higher this docId
    // value
    GroupDocs<?> group = groups.groups[0];
    compareGroupValue("author3", group);
    assertEquals(2, group.scoreDocs.length);
    assertEquals(5, group.scoreDocs[0].doc);
    assertEquals(4, group.scoreDocs[1].doc);
    assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score);
    group = groups.groups[1];
    compareGroupValue("author1", group);
    assertEquals(3, group.scoreDocs.length);
    assertEquals(0, group.scoreDocs[0].doc);
    assertEquals(1, group.scoreDocs[1].doc);
    assertEquals(2, group.scoreDocs[2].doc);
    assertTrue(group.scoreDocs[0].score >= group.scoreDocs[1].score);
    assertTrue(group.scoreDocs[1].score >= group.scoreDocs[2].score);
    group = groups.groups[2];
    compareGroupValue("author2", group);
    assertEquals(1, group.scoreDocs.length);
    assertEquals(3, group.scoreDocs[0].doc);
    group = groups.groups[3];
    compareGroupValue(null, group);
    assertEquals(1, group.scoreDocs.length);
    assertEquals(6, group.scoreDocs[0].doc);
    indexSearcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) StringField(org.apache.lucene.document.StringField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TextField(org.apache.lucene.document.TextField) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 44 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class HighlighterPhraseTest method testSparsePhraseWithNoPositions.

public void testSparsePhraseWithNoPositions() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_STORED);
        customType.setStoreTermVectorOffsets(true);
        customType.setStoreTermVectors(true);
        document.add(new Field(FIELD, TEXT, customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        final PhraseQuery phraseQuery = new PhraseQuery(1, FIELD, "did", "jump");
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("the fox <B>did</B> not <B>jump</B>", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TokenStream(org.apache.lucene.analysis.TokenStream) PhraseQuery(org.apache.lucene.search.PhraseQuery) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 45 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class HighlighterPhraseTest method testSparsePhrase.

public void testSparsePhrase() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectorOffsets(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectors(true);
        document.add(new Field(FIELD, new TokenStreamSparse(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        final PhraseQuery phraseQuery = new PhraseQuery(FIELD, "did", "jump");
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(0, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals(highlighter.getBestFragment(new TokenStreamSparse(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TokenStream(org.apache.lucene.analysis.TokenStream) PhraseQuery(org.apache.lucene.search.PhraseQuery) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Aggregations

FieldType (org.apache.lucene.document.FieldType)283 Document (org.apache.lucene.document.Document)244 Field (org.apache.lucene.document.Field)209 Directory (org.apache.lucene.store.Directory)175 TextField (org.apache.lucene.document.TextField)168 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)125 StringField (org.apache.lucene.document.StringField)88 StoredField (org.apache.lucene.document.StoredField)77 BytesRef (org.apache.lucene.util.BytesRef)56 IndexWriter (org.apache.lucene.index.IndexWriter)53 IndexReader (org.apache.lucene.index.IndexReader)49 IndexSearcher (org.apache.lucene.search.IndexSearcher)46 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 Term (org.apache.lucene.index.Term)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)39 RAMDirectory (org.apache.lucene.store.RAMDirectory)37 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)34 TermQuery (org.apache.lucene.search.TermQuery)34 Analyzer (org.apache.lucene.analysis.Analyzer)33 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)33