use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestClassicSimilarity method testNormEncodingBackwardCompatibility.
public void testNormEncodingBackwardCompatibility() throws IOException {
Similarity similarity = new ClassicSimilarity();
for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
for (int length : new int[] { 1, 4, 16 }) {
// these length values are encoded accurately on both cases
Directory dir = newDirectory();
// set the version on the directory
new SegmentInfos(indexCreatedVersionMajor).commit(dir);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
Document doc = new Document();
String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
doc.add(new TextField("foo", value, Store.NO));
w.addDocument(doc);
IndexReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(similarity);
Explanation expl = searcher.explain(new TermQuery(new Term("foo", "b")), 0);
Explanation fieldNorm = findExplanation(expl, "fieldNorm");
assertNotNull(fieldNorm);
assertEquals(fieldNorm.toString(), 1 / Math.sqrt(length), fieldNorm.getValue(), 0f);
w.close();
reader.close();
dir.close();
}
}
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class ExpressionAggregationFacetsExample method index.
/** Build the example index. */
private void index() throws IOException {
IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
// Writes facet ords to a separate directory from the main index
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
Document doc = new Document();
doc.add(new TextField("c", "foo bar", Store.NO));
doc.add(new NumericDocValuesField("popularity", 5L));
doc.add(new FacetField("A", "B"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new TextField("c", "foo foo bar", Store.NO));
doc.add(new NumericDocValuesField("popularity", 3L));
doc.add(new FacetField("A", "C"));
indexWriter.addDocument(config.build(taxoWriter, doc));
indexWriter.close();
taxoWriter.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestGrouping method testBasic.
public void testBasic() throws Exception {
String groupField = "author";
FieldType customType = new FieldType();
customType.setStored(true);
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new TextField("content", "random text", Field.Store.YES));
doc.add(new Field("id", "1", customType));
w.addDocument(doc);
// 1
doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new TextField("content", "some more random text", Field.Store.YES));
doc.add(new Field("id", "2", customType));
w.addDocument(doc);
// 2
doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new TextField("content", "some more random textual data", Field.Store.YES));
doc.add(new Field("id", "3", customType));
w.addDocument(doc);
// 3
doc = new Document();
addGroupField(doc, groupField, "author2");
doc.add(new TextField("content", "some random text", Field.Store.YES));
doc.add(new Field("id", "4", customType));
w.addDocument(doc);
// 4
doc = new Document();
addGroupField(doc, groupField, "author3");
doc.add(new TextField("content", "some more random text", Field.Store.YES));
doc.add(new Field("id", "5", customType));
w.addDocument(doc);
// 5
doc = new Document();
addGroupField(doc, groupField, "author3");
doc.add(new TextField("content", "random", Field.Store.YES));
doc.add(new Field("id", "6", customType));
w.addDocument(doc);
// 6 -- no author field
doc = new Document();
doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
doc.add(new Field("id", "6", customType));
w.addDocument(doc);
IndexSearcher indexSearcher = newSearcher(w.getReader());
w.close();
final Sort groupSort = Sort.RELEVANCE;
final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
final TopGroupsCollector<?> c2 = createSecondPassCollector(c1, groupSort, Sort.RELEVANCE, 0, 5, true, true, true);
indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
final TopGroups<?> groups = c2.getTopGroups(0);
assertFalse(Float.isNaN(groups.maxScore));
assertEquals(7, groups.totalHitCount);
assertEquals(7, groups.totalGroupedHitCount);
assertEquals(4, groups.groups.length);
// relevance order: 5, 0, 3, 4, 1, 2, 6
// the later a document is added the higher this docId
// value
GroupDocs<?> group = groups.groups[0];
compareGroupValue("author3", group);
assertEquals(2, group.scoreDocs.length);
assertEquals(5, group.scoreDocs[0].doc);
assertEquals(4, group.scoreDocs[1].doc);
assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score);
group = groups.groups[1];
compareGroupValue("author1", group);
assertEquals(3, group.scoreDocs.length);
assertEquals(0, group.scoreDocs[0].doc);
assertEquals(1, group.scoreDocs[1].doc);
assertEquals(2, group.scoreDocs[2].doc);
assertTrue(group.scoreDocs[0].score >= group.scoreDocs[1].score);
assertTrue(group.scoreDocs[1].score >= group.scoreDocs[2].score);
group = groups.groups[2];
compareGroupValue("author2", group);
assertEquals(1, group.scoreDocs.length);
assertEquals(3, group.scoreDocs[0].doc);
group = groups.groups[3];
compareGroupValue(null, group);
assertEquals(1, group.scoreDocs.length);
assertEquals(6, group.scoreDocs[0].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestJoinUtil method testEquals_numericJoin.
public void testEquals_numericJoin() throws Exception {
final int numDocs = atLeast(random(), 50);
try (final Directory dir = newDirectory()) {
try (final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()))) {
boolean multiValued = random().nextBoolean();
String joinField = multiValued ? "mvField" : "svField";
for (int id = 0; id < numDocs; id++) {
Document doc = new Document();
doc.add(new TextField("id", "" + id, Field.Store.NO));
doc.add(new TextField("name", "name" + (id % 7), Field.Store.NO));
if (multiValued) {
int numValues = 1 + random().nextInt(2);
for (int i = 0; i < numValues; i++) {
doc.add(new IntPoint(joinField, random().nextInt(13)));
doc.add(new SortedNumericDocValuesField(joinField, random().nextInt(13)));
}
} else {
doc.add(new IntPoint(joinField, random().nextInt(13)));
doc.add(new NumericDocValuesField(joinField, random().nextInt(13)));
}
w.addDocument(doc);
}
Set<ScoreMode> scoreModes = EnumSet.allOf(ScoreMode.class);
ScoreMode scoreMode1 = scoreModes.toArray(new ScoreMode[0])[random().nextInt(scoreModes.size())];
scoreModes.remove(scoreMode1);
ScoreMode scoreMode2 = scoreModes.toArray(new ScoreMode[0])[random().nextInt(scoreModes.size())];
final Query x;
try (IndexReader r = w.getReader()) {
IndexSearcher indexSearcher = new IndexSearcher(r);
x = JoinUtil.createJoinQuery(joinField, multiValued, joinField, Integer.class, new TermQuery(new Term("name", "name5")), indexSearcher, scoreMode1);
assertEquals("identical calls to createJoinQuery", x, JoinUtil.createJoinQuery(joinField, multiValued, joinField, Integer.class, new TermQuery(new Term("name", "name5")), indexSearcher, scoreMode1));
assertFalse("score mode (" + scoreMode1 + " != " + scoreMode2 + "), but queries are equal", x.equals(JoinUtil.createJoinQuery(joinField, multiValued, joinField, Integer.class, new TermQuery(new Term("name", "name5")), indexSearcher, scoreMode2)));
assertFalse("from fields (joinField != \"other_field\") but queries equals", x.equals(JoinUtil.createJoinQuery(joinField, multiValued, "other_field", Integer.class, new TermQuery(new Term("name", "name5")), indexSearcher, scoreMode1)));
assertFalse("from fields (\"other_field\" != joinField) but queries equals", x.equals(JoinUtil.createJoinQuery("other_field", multiValued, joinField, Integer.class, new TermQuery(new Term("name", "name5")), indexSearcher, scoreMode1)));
assertFalse("fromQuery (name:name5 != name:name6) but queries equals", x.equals(JoinUtil.createJoinQuery("other_field", multiValued, joinField, Integer.class, new TermQuery(new Term("name", "name6")), indexSearcher, scoreMode1)));
}
for (int i = 14; i < 26; i++) {
Document doc = new Document();
doc.add(new TextField("id", "new_id", Field.Store.NO));
doc.add(new TextField("name", "name5", Field.Store.NO));
if (multiValued) {
int numValues = 1 + random().nextInt(2);
for (int j = 0; j < numValues; j++) {
doc.add(new SortedNumericDocValuesField(joinField, i));
doc.add(new IntPoint(joinField, i));
}
} else {
doc.add(new NumericDocValuesField(joinField, i));
doc.add(new IntPoint(joinField, i));
}
w.addDocument(doc);
}
try (IndexReader r = w.getReader()) {
IndexSearcher indexSearcher = new IndexSearcher(r);
assertFalse("Query shouldn't be equal, because new join values have been indexed", x.equals(JoinUtil.createJoinQuery(joinField, multiValued, joinField, Integer.class, new TermQuery(new Term("name", "name5")), indexSearcher, scoreMode1)));
}
}
}
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestJoinUtil method testSimpleOrdinalsJoin.
public void testSimpleOrdinalsJoin() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
IndexReader r = indexSearcher.getIndexReader();
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
Query toQuery = new TermQuery(new Term(typeField, "price"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
// Search for product and return prices
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(4, result.scoreDocs[0].doc);
assertEquals(5, result.scoreDocs[1].doc);
fromQuery = new TermQuery(new Term("name", "name1"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(1, result.scoreDocs[0].doc);
assertEquals(2, result.scoreDocs[1].doc);
// Search for prices and return products
fromQuery = new TermQuery(new Term("price", "20.0"));
toQuery = new TermQuery(new Term(typeField, "product"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(0, result.scoreDocs[0].doc);
assertEquals(3, result.scoreDocs[1].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
Aggregations