use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testDocValuesSimple.
/*
* Simple test case to show how to use the API
*/
public void testDocValuesSimple() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("docId", i));
doc.add(new TextField("docId", "" + i, Field.Store.NO));
writer.addDocument(doc);
}
writer.commit();
writer.forceMerge(1, true);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size());
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 10);
assertEquals(5, search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs;
NumericDocValues docValues = getOnlyLeafReader(reader).getNumericDocValues("docId");
for (int i = 0; i < scoreDocs.length; i++) {
assertEquals(i, scoreDocs[i].doc);
assertEquals(i, docValues.advance(i));
assertEquals(i, docValues.longValue());
}
reader.close();
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class BaseNormsFormatTestCase method testUndeadNorms.
// TODO: test thread safety (e.g. across different fields) explicitly here
/*
* LUCENE-6006: Tests undead norms.
* .....
* C C /
* /< /
* ___ __________/_#__=o
* /(- /(\_\________ \
* \ ) \ )_ \o \
* /|\ /|\ |' |
* | _|
* /o __\
* / ' |
* / / |
* /_/\______|
* ( _( <
* \ \ \
* \ \ |
* \____\____\
* ____\_\__\_\
* /` /` o\
* |___ |_______|
*
*/
public void testUndeadNorms() throws Exception {
Directory dir = applyCreatedVersionMajor(newDirectory());
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
int numDocs = atLeast(500);
List<Integer> toDelete = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "" + i, Field.Store.NO));
if (random().nextInt(5) == 1) {
toDelete.add(i);
doc.add(new TextField("content", "some content", Field.Store.NO));
}
w.addDocument(doc);
}
for (Integer id : toDelete) {
w.deleteDocuments(new Term("id", "" + id));
}
w.forceMerge(1);
IndexReader r = w.getReader();
assertFalse(r.hasDeletions());
// Confusingly, norms should exist, and should all be 0, even though we deleted all docs that had the field "content". They should not
// be undead:
NumericDocValues norms = MultiDocValues.getNormValues(r, "content");
assertNotNull(norms);
if (codecSupportsSparsity()) {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, norms.nextDoc());
} else {
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(i, norms.nextDoc());
assertEquals(0, norms.longValue());
}
}
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class DistinctValuesCollectorTest method testSimple.
public void testSimple() throws Exception {
Random random = random();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
addField(doc, GROUP_FIELD, "1");
addField(doc, COUNT_FIELD, "1");
doc.add(new TextField("content", "random text", Field.Store.NO));
doc.add(new StringField("id", "1", Field.Store.NO));
w.addDocument(doc);
// 1
doc = new Document();
addField(doc, GROUP_FIELD, "1");
addField(doc, COUNT_FIELD, "1");
doc.add(new TextField("content", "some more random text blob", Field.Store.NO));
doc.add(new StringField("id", "2", Field.Store.NO));
w.addDocument(doc);
// 2
doc = new Document();
addField(doc, GROUP_FIELD, "1");
addField(doc, COUNT_FIELD, "2");
doc.add(new TextField("content", "some more random textual data", Field.Store.NO));
doc.add(new StringField("id", "3", Field.Store.NO));
w.addDocument(doc);
// To ensure a second segment
w.commit();
// 3 -- no count field
doc = new Document();
addField(doc, GROUP_FIELD, "2");
doc.add(new TextField("content", "some random text", Field.Store.NO));
doc.add(new StringField("id", "4", Field.Store.NO));
w.addDocument(doc);
// 4
doc = new Document();
addField(doc, GROUP_FIELD, "3");
addField(doc, COUNT_FIELD, "1");
doc.add(new TextField("content", "some more random text", Field.Store.NO));
doc.add(new StringField("id", "5", Field.Store.NO));
w.addDocument(doc);
// 5
doc = new Document();
addField(doc, GROUP_FIELD, "3");
addField(doc, COUNT_FIELD, "1");
doc.add(new TextField("content", "random blob", Field.Store.NO));
doc.add(new StringField("id", "6", Field.Store.NO));
w.addDocument(doc);
// 6 -- no author field
doc = new Document();
doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
addField(doc, COUNT_FIELD, "1");
doc.add(new StringField("id", "6", Field.Store.NO));
w.addDocument(doc);
IndexSearcher indexSearcher = newSearcher(w.getReader());
w.close();
Comparator<DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>>> cmp = (groupCount1, groupCount2) -> {
if (groupCount1.groupValue == null) {
if (groupCount2.groupValue == null) {
return 0;
}
return -1;
} else if (groupCount2.groupValue == null) {
return 1;
} else {
return groupCount1.groupValue.compareTo(groupCount2.groupValue);
}
};
// === Search for content:random
FirstPassGroupingCollector<Comparable<Object>> firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), firstCollector);
DistinctValuesCollector<Comparable<Object>, Comparable<Object>> distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
indexSearcher.search(new TermQuery(new Term("content", "random")), distinctValuesCollector);
List<DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>>> gcs = distinctValuesCollector.getGroups();
Collections.sort(gcs, cmp);
assertEquals(4, gcs.size());
compareNull(gcs.get(0).groupValue);
List<Comparable<?>> countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
assertEquals(1, countValues.size());
compare("1", countValues.get(0));
compare("1", gcs.get(1).groupValue);
countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
Collections.sort(countValues, nullComparator);
assertEquals(2, countValues.size());
compare("1", countValues.get(0));
compare("2", countValues.get(1));
compare("2", gcs.get(2).groupValue);
countValues = new ArrayList<Comparable<?>>(gcs.get(2).uniqueValues);
assertEquals(1, countValues.size());
compareNull(countValues.get(0));
compare("3", gcs.get(3).groupValue);
countValues = new ArrayList<Comparable<?>>(gcs.get(3).uniqueValues);
assertEquals(1, countValues.size());
compare("1", countValues.get(0));
// === Search for content:some
firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
indexSearcher.search(new TermQuery(new Term("content", "some")), firstCollector);
distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
indexSearcher.search(new TermQuery(new Term("content", "some")), distinctValuesCollector);
gcs = distinctValuesCollector.getGroups();
Collections.sort(gcs, cmp);
assertEquals(3, gcs.size());
compare("1", gcs.get(0).groupValue);
countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
assertEquals(2, countValues.size());
Collections.sort(countValues, nullComparator);
compare("1", countValues.get(0));
compare("2", countValues.get(1));
compare("2", gcs.get(1).groupValue);
countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
assertEquals(1, countValues.size());
compareNull(countValues.get(0));
compare("3", gcs.get(2).groupValue);
countValues = new ArrayList<Comparable<?>>(gcs.get(2).uniqueValues);
assertEquals(1, countValues.size());
compare("1", countValues.get(0));
// === Search for content:blob
firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
indexSearcher.search(new TermQuery(new Term("content", "blob")), firstCollector);
distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
indexSearcher.search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);
gcs = distinctValuesCollector.getGroups();
Collections.sort(gcs, cmp);
assertEquals(2, gcs.size());
compare("1", gcs.get(0).groupValue);
countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
// B/c the only one document matched with blob inside the author 1 group
assertEquals(1, countValues.size());
compare("1", countValues.get(0));
compare("3", gcs.get(1).groupValue);
countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
assertEquals(1, countValues.size());
compare("1", countValues.get(0));
indexSearcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class DistinctValuesCollectorTest method createIndexContext.
private IndexContext createIndexContext() throws Exception {
Random random = random();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
String[] groupValues = new String[numDocs / 5];
String[] countValues = new String[numDocs / 10];
for (int i = 0; i < groupValues.length; i++) {
groupValues[i] = generateRandomNonEmptyString();
}
for (int i = 0; i < countValues.length; i++) {
countValues[i] = generateRandomNonEmptyString();
}
List<String> contentStrings = new ArrayList<>();
Map<String, Map<String, Set<String>>> searchTermToGroupCounts = new HashMap<>();
for (int i = 1; i <= numDocs; i++) {
String groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.length)];
String countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.length)];
String content = "random" + random.nextInt(numDocs / 20);
Map<String, Set<String>> groupToCounts = searchTermToGroupCounts.get(content);
if (groupToCounts == null) {
// Groups sort always DOCID asc...
searchTermToGroupCounts.put(content, groupToCounts = new LinkedHashMap<>());
contentStrings.add(content);
}
Set<String> countsVals = groupToCounts.get(groupValue);
if (countsVals == null) {
groupToCounts.put(groupValue, countsVals = new HashSet<>());
}
countsVals.add(countValue);
Document doc = new Document();
doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.YES));
doc.add(new SortedDocValuesField("id", new BytesRef(String.format(Locale.ROOT, "%09d", i))));
if (groupValue != null) {
addField(doc, GROUP_FIELD, groupValue);
}
if (countValue != null) {
addField(doc, COUNT_FIELD, countValue);
}
doc.add(new TextField("content", content, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader reader = w.getReader();
if (VERBOSE) {
for (int docID = 0; docID < reader.maxDoc(); docID++) {
Document doc = reader.document(docID);
System.out.println("docID=" + docID + " id=" + doc.get("id") + " content=" + doc.get("content") + " author=" + doc.get("author") + " publisher=" + doc.get("publisher"));
}
}
w.close();
return new IndexContext(dir, reader, searchTermToGroupCounts, contentStrings.toArray(new String[contentStrings.size()]));
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class PayloadHelper method setUp.
/**
* Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
* and analyzes them using the PayloadAnalyzer
* @param similarity The Similarity class to use in the Searcher
* @param numDocs The num docs to add
* @return An IndexSearcher
*/
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
PayloadAnalyzer analyzer = new PayloadAnalyzer();
// TODO randomize this
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer).setSimilarity(similarity));
// writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES));
doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
writer.addDocument(doc);
}
writer.forceMerge(1);
reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = LuceneTestCase.newSearcher(LuceneTestCase.getOnlyLeafReader(reader));
searcher.setSimilarity(similarity);
return searcher;
}
Aggregations