use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class BaseTestCheckIndex method testDeletedDocs.
public void testDeletedDocs(Directory dir) throws IOException {
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2));
for (int i = 0; i < 19; i++) {
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
doc.add(newField("field", "aaa" + i, customType));
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.commit();
writer.deleteDocuments(new Term("field", "aaa5"));
writer.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
if (VERBOSE)
checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
if (indexStatus.clean == false) {
System.out.println("CheckIndex failed");
System.out.println(bos.toString(IOUtils.UTF_8));
fail();
}
final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
assertTrue(seg.openReaderPassed);
assertNotNull(seg.diagnostics);
assertNotNull(seg.fieldNormStatus);
assertNull(seg.fieldNormStatus.error);
assertEquals(1, seg.fieldNormStatus.totFields);
assertNotNull(seg.termIndexStatus);
assertNull(seg.termIndexStatus.error);
assertEquals(18, seg.termIndexStatus.termCount);
assertEquals(18, seg.termIndexStatus.totFreq);
assertEquals(18, seg.termIndexStatus.totPos);
assertNotNull(seg.storedFieldStatus);
assertNull(seg.storedFieldStatus.error);
assertEquals(18, seg.storedFieldStatus.docCount);
assertEquals(18, seg.storedFieldStatus.totFields);
assertNotNull(seg.termVectorStatus);
assertNull(seg.termVectorStatus.error);
assertEquals(18, seg.termVectorStatus.docCount);
assertEquals(18, seg.termVectorStatus.totVectors);
assertNotNull(seg.diagnostics.get("java.vm.version"));
assertNotNull(seg.diagnostics.get("java.runtime.version"));
assertTrue(seg.diagnostics.size() > 0);
final List<String> onlySegments = new ArrayList<>();
onlySegments.add("_0");
assertTrue(checker.checkIndex(onlySegments).clean == true);
checker.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class DocHelper method createDocument.
public static Document createDocument(int n, String indexName, int numFields) {
StringBuilder sb = new StringBuilder();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
FieldType customType1 = new FieldType(StringField.TYPE_STORED);
customType1.setStoreTermVectors(true);
customType1.setStoreTermVectorPositions(true);
customType1.setStoreTermVectorOffsets(true);
final Document doc = new Document();
doc.add(new Field("id", Integer.toString(n), customType1));
doc.add(new Field("indexname", indexName, customType1));
sb.append("a");
sb.append(n);
doc.add(new Field("field1", sb.toString(), customType));
sb.append(" b");
sb.append(n);
for (int i = 1; i < numFields; i++) {
doc.add(new Field("field" + (i + 1), sb.toString(), customType));
}
return doc;
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestGrouping method testBasic.
public void testBasic() throws Exception {
String groupField = "author";
FieldType customType = new FieldType();
customType.setStored(true);
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new TextField("content", "random text", Field.Store.YES));
doc.add(new Field("id", "1", customType));
w.addDocument(doc);
// 1
doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new TextField("content", "some more random text", Field.Store.YES));
doc.add(new Field("id", "2", customType));
w.addDocument(doc);
// 2
doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new TextField("content", "some more random textual data", Field.Store.YES));
doc.add(new Field("id", "3", customType));
w.addDocument(doc);
// 3
doc = new Document();
addGroupField(doc, groupField, "author2");
doc.add(new TextField("content", "some random text", Field.Store.YES));
doc.add(new Field("id", "4", customType));
w.addDocument(doc);
// 4
doc = new Document();
addGroupField(doc, groupField, "author3");
doc.add(new TextField("content", "some more random text", Field.Store.YES));
doc.add(new Field("id", "5", customType));
w.addDocument(doc);
// 5
doc = new Document();
addGroupField(doc, groupField, "author3");
doc.add(new TextField("content", "random", Field.Store.YES));
doc.add(new Field("id", "6", customType));
w.addDocument(doc);
// 6 -- no author field
doc = new Document();
doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
doc.add(new Field("id", "6", customType));
w.addDocument(doc);
IndexSearcher indexSearcher = newSearcher(w.getReader());
w.close();
final Sort groupSort = Sort.RELEVANCE;
final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
final TopGroupsCollector<?> c2 = createSecondPassCollector(c1, groupSort, Sort.RELEVANCE, 0, 5, true, true, true);
indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
final TopGroups<?> groups = c2.getTopGroups(0);
assertFalse(Float.isNaN(groups.maxScore));
assertEquals(7, groups.totalHitCount);
assertEquals(7, groups.totalGroupedHitCount);
assertEquals(4, groups.groups.length);
// relevance order: 5, 0, 3, 4, 1, 2, 6
// the later a document is added the higher this docId
// value
GroupDocs<?> group = groups.groups[0];
compareGroupValue("author3", group);
assertEquals(2, group.scoreDocs.length);
assertEquals(5, group.scoreDocs[0].doc);
assertEquals(4, group.scoreDocs[1].doc);
assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score);
group = groups.groups[1];
compareGroupValue("author1", group);
assertEquals(3, group.scoreDocs.length);
assertEquals(0, group.scoreDocs[0].doc);
assertEquals(1, group.scoreDocs[1].doc);
assertEquals(2, group.scoreDocs[2].doc);
assertTrue(group.scoreDocs[0].score >= group.scoreDocs[1].score);
assertTrue(group.scoreDocs[1].score >= group.scoreDocs[2].score);
group = groups.groups[2];
compareGroupValue("author2", group);
assertEquals(1, group.scoreDocs.length);
assertEquals(3, group.scoreDocs[0].doc);
group = groups.groups[3];
compareGroupValue(null, group);
assertEquals(1, group.scoreDocs.length);
assertEquals(6, group.scoreDocs[0].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class HighlighterPhraseTest method testSparsePhraseWithNoPositions.
public void testSparsePhraseWithNoPositions() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, TEXT, customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final PhraseQuery phraseQuery = new PhraseQuery(1, FIELD, "did", "jump");
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("the fox <B>did</B> not <B>jump</B>", highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class HighlighterPhraseTest method testSparsePhrase.
public void testSparsePhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, new TokenStreamSparse(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final PhraseQuery phraseQuery = new PhraseQuery(FIELD, "did", "jump");
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(0, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals(highlighter.getBestFragment(new TokenStreamSparse(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
Aggregations