Search in sources :

Example 96 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testDocValuesSimple.

/*
   * Simple test case to show how to use the API
   */
public void testDocValuesSimple() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    conf.setMergePolicy(newLogMergePolicy());
    IndexWriter writer = new IndexWriter(dir, conf);
    for (int i = 0; i < 5; i++) {
        Document doc = new Document();
        doc.add(new NumericDocValuesField("docId", i));
        doc.add(new TextField("docId", "" + i, Field.Store.NO));
        writer.addDocument(doc);
    }
    writer.commit();
    writer.forceMerge(1, true);
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    assertEquals(1, reader.leaves().size());
    IndexSearcher searcher = new IndexSearcher(reader);
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD);
    TopDocs search = searcher.search(query.build(), 10);
    assertEquals(5, search.totalHits);
    ScoreDoc[] scoreDocs = search.scoreDocs;
    NumericDocValues docValues = getOnlyLeafReader(reader).getNumericDocValues("docId");
    for (int i = 0; i < scoreDocs.length; i++) {
        assertEquals(i, scoreDocs[i].doc);
        assertEquals(i, docValues.advance(i));
        assertEquals(i, docValues.longValue());
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 97 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class BaseNormsFormatTestCase method testUndeadNorms.

// TODO: test thread safety (e.g. across different fields) explicitly here
/*
   * LUCENE-6006: Tests undead norms.
   *                                 .....            
   *                             C C  /            
   *                            /<   /             
   *             ___ __________/_#__=o             
   *            /(- /(\_\________   \              
   *            \ ) \ )_      \o     \             
   *            /|\ /|\       |'     |             
   *                          |     _|             
   *                          /o   __\             
   *                         / '     |             
   *                        / /      |             
   *                       /_/\______|             
   *                      (   _(    <              
   *                       \    \    \             
   *                        \    \    |            
   *                         \____\____\           
   *                         ____\_\__\_\          
   *                       /`   /`     o\          
   *                       |___ |_______|
   *
   */
public void testUndeadNorms() throws Exception {
    Directory dir = applyCreatedVersionMajor(newDirectory());
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    int numDocs = atLeast(500);
    List<Integer> toDelete = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", "" + i, Field.Store.NO));
        if (random().nextInt(5) == 1) {
            toDelete.add(i);
            doc.add(new TextField("content", "some content", Field.Store.NO));
        }
        w.addDocument(doc);
    }
    for (Integer id : toDelete) {
        w.deleteDocuments(new Term("id", "" + id));
    }
    w.forceMerge(1);
    IndexReader r = w.getReader();
    assertFalse(r.hasDeletions());
    // Confusingly, norms should exist, and should all be 0, even though we deleted all docs that had the field "content".  They should not
    // be undead:
    NumericDocValues norms = MultiDocValues.getNormValues(r, "content");
    assertNotNull(norms);
    if (codecSupportsSparsity()) {
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, norms.nextDoc());
    } else {
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, norms.nextDoc());
            assertEquals(0, norms.longValue());
        }
    }
    r.close();
    w.close();
    dir.close();
}
Also used : StringField(org.apache.lucene.document.StringField) ArrayList(java.util.ArrayList) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 98 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class DistinctValuesCollectorTest method testSimple.

public void testSimple() throws Exception {
    Random random = random();
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    addField(doc, GROUP_FIELD, "1");
    addField(doc, COUNT_FIELD, "1");
    doc.add(new TextField("content", "random text", Field.Store.NO));
    doc.add(new StringField("id", "1", Field.Store.NO));
    w.addDocument(doc);
    // 1
    doc = new Document();
    addField(doc, GROUP_FIELD, "1");
    addField(doc, COUNT_FIELD, "1");
    doc.add(new TextField("content", "some more random text blob", Field.Store.NO));
    doc.add(new StringField("id", "2", Field.Store.NO));
    w.addDocument(doc);
    // 2
    doc = new Document();
    addField(doc, GROUP_FIELD, "1");
    addField(doc, COUNT_FIELD, "2");
    doc.add(new TextField("content", "some more random textual data", Field.Store.NO));
    doc.add(new StringField("id", "3", Field.Store.NO));
    w.addDocument(doc);
    // To ensure a second segment
    w.commit();
    // 3 -- no count field
    doc = new Document();
    addField(doc, GROUP_FIELD, "2");
    doc.add(new TextField("content", "some random text", Field.Store.NO));
    doc.add(new StringField("id", "4", Field.Store.NO));
    w.addDocument(doc);
    // 4
    doc = new Document();
    addField(doc, GROUP_FIELD, "3");
    addField(doc, COUNT_FIELD, "1");
    doc.add(new TextField("content", "some more random text", Field.Store.NO));
    doc.add(new StringField("id", "5", Field.Store.NO));
    w.addDocument(doc);
    // 5
    doc = new Document();
    addField(doc, GROUP_FIELD, "3");
    addField(doc, COUNT_FIELD, "1");
    doc.add(new TextField("content", "random blob", Field.Store.NO));
    doc.add(new StringField("id", "6", Field.Store.NO));
    w.addDocument(doc);
    // 6 -- no author field
    doc = new Document();
    doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
    addField(doc, COUNT_FIELD, "1");
    doc.add(new StringField("id", "6", Field.Store.NO));
    w.addDocument(doc);
    IndexSearcher indexSearcher = newSearcher(w.getReader());
    w.close();
    Comparator<DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>>> cmp = (groupCount1, groupCount2) -> {
        if (groupCount1.groupValue == null) {
            if (groupCount2.groupValue == null) {
                return 0;
            }
            return -1;
        } else if (groupCount2.groupValue == null) {
            return 1;
        } else {
            return groupCount1.groupValue.compareTo(groupCount2.groupValue);
        }
    };
    // === Search for content:random
    FirstPassGroupingCollector<Comparable<Object>> firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
    indexSearcher.search(new TermQuery(new Term("content", "random")), firstCollector);
    DistinctValuesCollector<Comparable<Object>, Comparable<Object>> distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
    indexSearcher.search(new TermQuery(new Term("content", "random")), distinctValuesCollector);
    List<DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>>> gcs = distinctValuesCollector.getGroups();
    Collections.sort(gcs, cmp);
    assertEquals(4, gcs.size());
    compareNull(gcs.get(0).groupValue);
    List<Comparable<?>> countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    compare("1", gcs.get(1).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
    Collections.sort(countValues, nullComparator);
    assertEquals(2, countValues.size());
    compare("1", countValues.get(0));
    compare("2", countValues.get(1));
    compare("2", gcs.get(2).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(2).uniqueValues);
    assertEquals(1, countValues.size());
    compareNull(countValues.get(0));
    compare("3", gcs.get(3).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(3).uniqueValues);
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    // === Search for content:some
    firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
    indexSearcher.search(new TermQuery(new Term("content", "some")), firstCollector);
    distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
    indexSearcher.search(new TermQuery(new Term("content", "some")), distinctValuesCollector);
    gcs = distinctValuesCollector.getGroups();
    Collections.sort(gcs, cmp);
    assertEquals(3, gcs.size());
    compare("1", gcs.get(0).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
    assertEquals(2, countValues.size());
    Collections.sort(countValues, nullComparator);
    compare("1", countValues.get(0));
    compare("2", countValues.get(1));
    compare("2", gcs.get(1).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
    assertEquals(1, countValues.size());
    compareNull(countValues.get(0));
    compare("3", gcs.get(2).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(2).uniqueValues);
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    // === Search for content:blob
    firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
    indexSearcher.search(new TermQuery(new Term("content", "blob")), firstCollector);
    distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
    indexSearcher.search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);
    gcs = distinctValuesCollector.getGroups();
    Collections.sort(gcs, cmp);
    assertEquals(2, gcs.size());
    compare("1", gcs.get(0).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
    // B/c the only one document matched with blob inside the author 1 group
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    compare("3", gcs.get(1).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    indexSearcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) StringField(org.apache.lucene.document.StringField) Term(org.apache.lucene.index.Term) TestUtil(org.apache.lucene.util.TestUtil) HashMap(java.util.HashMap) Random(java.util.Random) ArrayList(java.util.ArrayList) MutableValue(org.apache.lucene.util.mutable.MutableValue) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) BytesRefFieldSource(org.apache.lucene.queries.function.valuesource.BytesRefFieldSource) Document(org.apache.lucene.document.Document) Locale(java.util.Locale) Map(java.util.Map) Directory(org.apache.lucene.store.Directory) SortField(org.apache.lucene.search.SortField) MutableValueStr(org.apache.lucene.util.mutable.MutableValueStr) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Collection(java.util.Collection) DirectoryReader(org.apache.lucene.index.DirectoryReader) Set(java.util.Set) IOException(java.io.IOException) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) List(java.util.List) TermQuery(org.apache.lucene.search.TermQuery) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Comparator(java.util.Comparator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Collections(java.util.Collections) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 99 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class DistinctValuesCollectorTest method createIndexContext.

private IndexContext createIndexContext() throws Exception {
    Random random = random();
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
    int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
    String[] groupValues = new String[numDocs / 5];
    String[] countValues = new String[numDocs / 10];
    for (int i = 0; i < groupValues.length; i++) {
        groupValues[i] = generateRandomNonEmptyString();
    }
    for (int i = 0; i < countValues.length; i++) {
        countValues[i] = generateRandomNonEmptyString();
    }
    List<String> contentStrings = new ArrayList<>();
    Map<String, Map<String, Set<String>>> searchTermToGroupCounts = new HashMap<>();
    for (int i = 1; i <= numDocs; i++) {
        String groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.length)];
        String countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.length)];
        String content = "random" + random.nextInt(numDocs / 20);
        Map<String, Set<String>> groupToCounts = searchTermToGroupCounts.get(content);
        if (groupToCounts == null) {
            // Groups sort always DOCID asc...
            searchTermToGroupCounts.put(content, groupToCounts = new LinkedHashMap<>());
            contentStrings.add(content);
        }
        Set<String> countsVals = groupToCounts.get(groupValue);
        if (countsVals == null) {
            groupToCounts.put(groupValue, countsVals = new HashSet<>());
        }
        countsVals.add(countValue);
        Document doc = new Document();
        doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.YES));
        doc.add(new SortedDocValuesField("id", new BytesRef(String.format(Locale.ROOT, "%09d", i))));
        if (groupValue != null) {
            addField(doc, GROUP_FIELD, groupValue);
        }
        if (countValue != null) {
            addField(doc, COUNT_FIELD, countValue);
        }
        doc.add(new TextField("content", content, Field.Store.YES));
        w.addDocument(doc);
    }
    DirectoryReader reader = w.getReader();
    if (VERBOSE) {
        for (int docID = 0; docID < reader.maxDoc(); docID++) {
            Document doc = reader.document(docID);
            System.out.println("docID=" + docID + " id=" + doc.get("id") + " content=" + doc.get("content") + " author=" + doc.get("author") + " publisher=" + doc.get("publisher"));
        }
    }
    w.close();
    return new IndexContext(dir, reader, searchTermToGroupCounts, contentStrings.toArray(new String[contentStrings.size()]));
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) LinkedHashMap(java.util.LinkedHashMap) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 100 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class PayloadHelper method setUp.

/**
   * Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
   * and analyzes them using the PayloadAnalyzer
   * @param similarity The Similarity class to use in the Searcher
   * @param numDocs The num docs to add
   * @return An IndexSearcher
   */
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
    Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
    PayloadAnalyzer analyzer = new PayloadAnalyzer();
    // TODO randomize this
    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer).setSimilarity(similarity));
    // writer.infoStream = System.out;
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
        doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + "  " + English.intToEnglish(i), Field.Store.YES));
        doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
        writer.addDocument(doc);
    }
    writer.forceMerge(1);
    reader = DirectoryReader.open(writer);
    writer.close();
    IndexSearcher searcher = LuceneTestCase.newSearcher(LuceneTestCase.getOnlyLeafReader(reader));
    searcher.setSimilarity(similarity);
    return searcher;
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

TextField (org.apache.lucene.document.TextField)192 Document (org.apache.lucene.document.Document)171 Directory (org.apache.lucene.store.Directory)99 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)61 Term (org.apache.lucene.index.Term)61 IndexWriter (org.apache.lucene.index.IndexWriter)58 IndexSearcher (org.apache.lucene.search.IndexSearcher)55 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)52 Field (org.apache.lucene.document.Field)50 StringField (org.apache.lucene.document.StringField)48 BytesRef (org.apache.lucene.util.BytesRef)48 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)44 IndexReader (org.apache.lucene.index.IndexReader)43 TermQuery (org.apache.lucene.search.TermQuery)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)31 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)30 TopDocs (org.apache.lucene.search.TopDocs)29 RAMDirectory (org.apache.lucene.store.RAMDirectory)29 FieldType (org.apache.lucene.document.FieldType)23 Query (org.apache.lucene.search.Query)23