use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class TestTermAutomatonQuery method testRandom.
public void testRandom() throws Exception {
int numDocs = atLeast(100);
Directory dir = newDirectory();
// Adds occassional random synonyms:
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true, 100);
tokenizer.setEnableChecks(true);
TokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
filt = new RandomSynonymFilter(filt);
return new TokenStreamComponents(tokenizer, filt);
}
};
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numTokens = atLeast(10);
StringBuilder sb = new StringBuilder();
for (int j = 0; j < numTokens; j++) {
sb.append(' ');
sb.append((char) (97 + random().nextInt(3)));
}
String contents = sb.toString();
doc.add(newTextField("field", contents, Field.Store.NO));
doc.add(new StoredField("id", "" + i));
if (VERBOSE) {
System.out.println(" doc " + i + " -> " + contents);
}
w.addDocument(doc);
}
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
// Used to match ANY using MultiPhraseQuery:
Term[] allTerms = new Term[] { new Term("field", "a"), new Term("field", "b"), new Term("field", "c") };
int numIters = atLeast(1000);
for (int iter = 0; iter < numIters; iter++) {
// Build the (finite, no any transitions) TermAutomatonQuery and
// also the "equivalent" BooleanQuery and make sure they match the
// same docs:
BooleanQuery.Builder bq = new BooleanQuery.Builder();
int count = TestUtil.nextInt(random(), 1, 5);
Set<BytesRef> strings = new HashSet<>();
for (int i = 0; i < count; i++) {
StringBuilder sb = new StringBuilder();
int numTokens = TestUtil.nextInt(random(), 1, 5);
for (int j = 0; j < numTokens; j++) {
if (j > 0 && j < numTokens - 1 && random().nextInt(5) == 3) {
sb.append('*');
} else {
sb.append((char) (97 + random().nextInt(3)));
}
}
String string = sb.toString();
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
for (int j = 0; j < string.length(); j++) {
if (string.charAt(j) == '*') {
mpqb.add(allTerms);
} else {
mpqb.add(new Term("field", "" + string.charAt(j)));
}
}
bq.add(mpqb.build(), BooleanClause.Occur.SHOULD);
strings.add(new BytesRef(string));
}
List<BytesRef> stringsList = new ArrayList<>(strings);
Collections.sort(stringsList);
Automaton a = Automata.makeStringUnion(stringsList);
// Translate automaton to query:
TermAutomatonQuery q = new TermAutomatonQuery("field");
int numStates = a.getNumStates();
for (int i = 0; i < numStates; i++) {
q.createState();
q.setAccept(i, a.isAccept(i));
}
Transition t = new Transition();
for (int i = 0; i < numStates; i++) {
int transCount = a.initTransition(i, t);
for (int j = 0; j < transCount; j++) {
a.getNextTransition(t);
for (int label = t.min; label <= t.max; label++) {
if ((char) label == '*') {
q.addAnyTransition(t.source, t.dest);
} else {
q.addTransition(t.source, t.dest, "" + (char) label);
}
}
}
}
q.finish();
if (VERBOSE) {
System.out.println("TEST: iter=" + iter);
for (BytesRef string : stringsList) {
System.out.println(" string: " + string.utf8ToString());
}
System.out.println(q.toDot());
}
Query q1 = q;
Query q2 = bq.build();
if (random().nextInt(5) == 1) {
if (VERBOSE) {
System.out.println(" use random filter");
}
RandomQuery filter = new RandomQuery(random().nextLong(), random().nextFloat());
q1 = new BooleanQuery.Builder().add(q1, Occur.MUST).add(filter, Occur.FILTER).build();
q2 = new BooleanQuery.Builder().add(q2, Occur.MUST).add(filter, Occur.FILTER).build();
}
TopDocs hits1 = s.search(q1, numDocs);
TopDocs hits2 = s.search(q2, numDocs);
Set<String> hits1Docs = toDocIDs(s, hits1);
Set<String> hits2Docs = toDocIDs(s, hits2);
try {
assertEquals(hits2.totalHits, hits1.totalHits);
assertEquals(hits2Docs, hits1Docs);
} catch (AssertionError ae) {
System.out.println("FAILED:");
for (String id : hits1Docs) {
if (hits2Docs.contains(id) == false) {
System.out.println(String.format(Locale.ROOT, " id=%3s matched but should not have", id));
}
}
for (String id : hits2Docs) {
if (hits1Docs.contains(id) == false) {
System.out.println(String.format(Locale.ROOT, " id=%3s did not match but should have", id));
}
}
throw ae;
}
}
IOUtils.close(w, r, dir, analyzer);
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class TestFunctionQuerySort method testSearchAfterWhenSortingByFunctionValues.
public void testSearchAfterWhenSortingByFunctionValues() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
// depends on docid order
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
Field field = new StoredField("value", 0);
Field dvField = new NumericDocValuesField("value", 0);
doc.add(field);
doc.add(dvField);
// Save docs unsorted (decreasing value n, n-1, ...)
final int NUM_VALS = 5;
for (int val = NUM_VALS; val > 0; val--) {
field.setIntValue(val);
dvField.setLongValue(val);
writer.addDocument(doc);
}
// Open index
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
// Trivial ValueSource function that bypasses single field ValueSource sort optimization
ValueSource src = new SumFloatFunction(new ValueSource[] { new IntFieldSource("value"), new DoubleConstValueSource(1.0D) });
// ...and make it a sort criterion
SortField sf = src.getSortField(false).rewrite(searcher);
Sort orderBy = new Sort(sf);
// Get hits sorted by our FunctionValues (ascending values)
Query q = new MatchAllDocsQuery();
TopDocs hits = searcher.search(q, reader.maxDoc(), orderBy);
assertEquals(NUM_VALS, hits.scoreDocs.length);
// Verify that sorting works in general
int i = 0;
for (ScoreDoc hit : hits.scoreDocs) {
int valueFromDoc = Integer.parseInt(reader.document(hit.doc).get("value"));
assertEquals(++i, valueFromDoc);
}
// Now get hits after hit #2 using IS.searchAfter()
int afterIdx = 1;
FieldDoc afterHit = (FieldDoc) hits.scoreDocs[afterIdx];
hits = searcher.searchAfter(afterHit, q, reader.maxDoc(), orderBy);
// Expected # of hits: NUM_VALS - 2
assertEquals(NUM_VALS - (afterIdx + 1), hits.scoreDocs.length);
// Verify that hits are actually "after"
int afterValue = ((Double) afterHit.fields[0]).intValue();
for (ScoreDoc hit : hits.scoreDocs) {
int val = Integer.parseInt(reader.document(hit.doc).get("value"));
assertTrue(afterValue <= val);
assertFalse(hit.doc == afterHit.doc);
}
reader.close();
dir.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class FunctionTestSetup method addDoc.
private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
Document d = new Document();
Field f;
int scoreAndID = i + 1;
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setTokenized(false);
customType.setOmitNorms(true);
// for debug purposes
f = newField(ID_FIELD, id2String(scoreAndID), customType);
d.add(f);
d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));
FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
customType2.setOmitNorms(true);
// for regular search
f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2);
d.add(f);
// for function scoring
f = new StoredField(INT_FIELD, scoreAndID);
d.add(f);
d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));
// for function scoring
f = new StoredField(FLOAT_FIELD, scoreAndID);
d.add(f);
d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));
f = new StoredField(INT_FIELD_MV_MIN, scoreAndID);
d.add(f);
f = new StoredField(INT_FIELD_MV_MIN, scoreAndID + 1);
d.add(f);
d.add(new SortedNumericDocValuesField(INT_FIELD_MV_MIN, scoreAndID));
d.add(new SortedNumericDocValuesField(INT_FIELD_MV_MIN, scoreAndID + 1));
f = new StoredField(INT_FIELD_MV_MAX, scoreAndID);
d.add(f);
f = new StoredField(INT_FIELD_MV_MAX, scoreAndID - 1);
d.add(f);
d.add(new SortedNumericDocValuesField(INT_FIELD_MV_MAX, scoreAndID));
d.add(new SortedNumericDocValuesField(INT_FIELD_MV_MAX, scoreAndID - 1));
f = new StoredField(FLOAT_FIELD_MV_MIN, scoreAndID);
d.add(f);
f = new StoredField(FLOAT_FIELD_MV_MIN, scoreAndID + 1);
d.add(f);
d.add(new SortedNumericDocValuesField(FLOAT_FIELD_MV_MIN, NumericUtils.floatToSortableInt(scoreAndID)));
d.add(new SortedNumericDocValuesField(FLOAT_FIELD_MV_MIN, NumericUtils.floatToSortableInt(scoreAndID + 1)));
f = new StoredField(FLOAT_FIELD_MV_MAX, scoreAndID);
d.add(f);
f = new StoredField(FLOAT_FIELD_MV_MAX, scoreAndID - 1);
d.add(f);
d.add(new SortedNumericDocValuesField(FLOAT_FIELD_MV_MAX, NumericUtils.floatToSortableInt(scoreAndID)));
d.add(new SortedNumericDocValuesField(FLOAT_FIELD_MV_MAX, NumericUtils.floatToSortableInt(scoreAndID - 1)));
iw.addDocument(d);
log("added: " + d);
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method doTestSortedSetVsStoredFields.
protected void doTestSortedSetVsStoredFields(int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues) throws Exception {
Directory dir = newFSDirectory(createTempDir("dvduel"));
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Set<String> valueSet = new HashSet<String>();
for (int i = 0; i < 10000 && valueSet.size() < maxUniqueValues; ++i) {
final int length = TestUtil.nextInt(random(), minLength, maxLength);
valueSet.add(TestUtil.randomSimpleString(random(), length));
}
String[] uniqueValues = valueSet.toArray(new String[0]);
// index some docs
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
// create a random set of strings
Set<String> values = new TreeSet<>();
for (int v = 0; v < numValues; v++) {
values.add(RandomPicks.randomFrom(random(), uniqueValues));
}
// add ordered to the stored field
for (String v : values) {
doc.add(new StoredField("stored", v));
}
// add in any order to the dv field
ArrayList<String> unordered = new ArrayList<>(values);
Collections.shuffle(unordered, random());
for (String v : unordered) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String[] stringValues = r.document(i).getValues("stored");
if (docValues != null) {
if (docValues.docID() < i) {
docValues.nextDoc();
}
}
if (docValues != null && stringValues.length > 0) {
assertEquals(i, docValues.docID());
for (int j = 0; j < stringValues.length; j++) {
assert docValues != null;
long ord = docValues.nextOrd();
assert ord != NO_MORE_ORDS;
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValues[j], scratch.utf8ToString());
}
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
ir.close();
writer.forceMerge(1);
// compare again
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String[] stringValues = r.document(i).getValues("stored");
if (docValues.docID() < i) {
docValues.nextDoc();
}
if (docValues != null && stringValues.length > 0) {
assertEquals(i, docValues.docID());
for (int j = 0; j < stringValues.length; j++) {
assert docValues != null;
long ord = docValues.nextOrd();
assert ord != NO_MORE_ORDS;
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValues[j], scratch.utf8ToString());
}
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
ir.close();
writer.close();
dir.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testEmptyBinaryValueOnPageSizes.
// LUCENE-5218
public void testEmptyBinaryValueOnPageSizes() throws Exception {
// followed by empty string value:
for (int i = 0; i < 20; i++) {
if (i > 14 && codecAcceptsHugeBinaryValues("field") == false) {
break;
}
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
BytesRef bytes = new BytesRef();
bytes.bytes = new byte[1 << i];
bytes.length = 1 << i;
for (int j = 0; j < 4; j++) {
Document doc = new Document();
doc.add(new BinaryDocValuesField("field", bytes));
w.addDocument(doc);
}
Document doc = new Document();
doc.add(new StoredField("id", "5"));
doc.add(new BinaryDocValuesField("field", new BytesRef()));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
BinaryDocValues values = MultiDocValues.getBinaryValues(r, "field");
for (int j = 0; j < 5; j++) {
assertEquals(j, values.nextDoc());
BytesRef result = values.binaryValue();
assertTrue(result.length == 0 || result.length == 1 << i);
}
r.close();
dir.close();
}
}
Aggregations