Search in sources :

Example 46 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class MissesTest method testTermQuery.

public void testTermQuery() throws IOException, InvalidTokenOffsetsException {
    try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
        final Query query = new TermQuery(new Term("test", "foo"));
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
        assertEquals("this is a <B>foo</B> bar example", highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
        assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
    }
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Query(org.apache.lucene.search.Query) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 47 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class HighlightCustomQueryTest method highlightField.

/**
   * This method intended for use with
   * <tt>testHighlightingWithDefaultField()</tt>
   */
private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException {
    TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text);
    // Assuming "<B>", "</B>" used to highlight
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
    String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
    return rv.length() == 0 ? text : rv;
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) SimpleFragmenter(org.apache.lucene.search.highlight.SimpleFragmenter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) Highlighter(org.apache.lucene.search.highlight.Highlighter)

Example 48 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class SynonymTokenizer method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    //Not many use this setup:
    a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    dir = newDirectory();
    //Most tests use this setup:
    analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    ramDir = newDirectory();
    fieldType = random().nextBoolean() ? FIELD_TYPE_TV : TextField.TYPE_STORED;
    IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(analyzer).setMergePolicy(newLogMergePolicy()));
    for (String text : texts) {
        writer.addDocument(doc(FIELD_NAME, text));
    }
    // a few tests need other docs...:
    Document doc = new Document();
    doc.add(new IntPoint(NUMERIC_FIELD_NAME, 1));
    doc.add(new StoredField(NUMERIC_FIELD_NAME, 1));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new IntPoint(NUMERIC_FIELD_NAME, 3));
    doc.add(new StoredField(NUMERIC_FIELD_NAME, 3));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new IntPoint(NUMERIC_FIELD_NAME, 5));
    doc.add(new StoredField(NUMERIC_FIELD_NAME, 5));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new IntPoint(NUMERIC_FIELD_NAME, 7));
    doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
    writer.addDocument(doc);
    Document childDoc = doc(FIELD_NAME, "child document");
    Document parentDoc = doc(FIELD_NAME, "parent document");
    writer.addDocuments(Arrays.asList(childDoc, parentDoc));
    writer.forceMerge(1);
    writer.close();
    reader = DirectoryReader.open(ramDir);
    //Misc:
    numHighlights = 0;
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Document(org.apache.lucene.document.Document)

Example 49 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class SimplePrimaryNode method initWriter.

private static IndexWriter initWriter(int id, Random random, Path indexPath, boolean doCheckIndexOnClose) throws IOException {
    Directory dir = SimpleReplicaNode.getDirectory(random, id, indexPath, doCheckIndexOnClose);
    MockAnalyzer analyzer = new MockAnalyzer(random);
    analyzer.setMaxTokenLength(TestUtil.nextInt(random, 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(random, analyzer);
    MergePolicy mp = iwc.getMergePolicy();
    // Force more frequent merging so we stress merge warming:
    if (mp instanceof TieredMergePolicy) {
        TieredMergePolicy tmp = (TieredMergePolicy) mp;
        tmp.setSegmentsPerTier(3);
        tmp.setMaxMergeAtOnce(3);
    } else if (mp instanceof LogMergePolicy) {
        LogMergePolicy lmp = (LogMergePolicy) mp;
        lmp.setMergeFactor(3);
    }
    IndexWriter writer = new IndexWriter(dir, iwc);
    TestUtil.reduceOpenFiles(writer);
    return writer;
}
Also used : TieredMergePolicy(org.apache.lucene.index.TieredMergePolicy) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) MergePolicy(org.apache.lucene.index.MergePolicy) LogMergePolicy(org.apache.lucene.index.LogMergePolicy) TieredMergePolicy(org.apache.lucene.index.TieredMergePolicy) LogMergePolicy(org.apache.lucene.index.LogMergePolicy) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 50 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testRandom.

// TODO make a similar test for BT, w/ varied IDs:
public void testRandom() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
    int maxItemsInBlock = 2 * (minItemsInBlock - 1) + random().nextInt(50);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    //IndexWriter w = new IndexWriter(dir, iwc);
    int numDocs = atLeast(1000);
    Map<String, Long> idValues = new HashMap<String, Long>();
    int docUpto = 0;
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs);
    }
    IDSource ids = getRandomIDs();
    String idPrefix;
    if (random().nextBoolean()) {
        idPrefix = "";
    } else {
        idPrefix = TestUtil.randomSimpleString(random());
        if (VERBOSE) {
            System.out.println("TEST: use id prefix: " + idPrefix);
        }
    }
    boolean useMonotonicVersion = random().nextBoolean();
    if (VERBOSE) {
        System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
    }
    List<String> idsList = new ArrayList<>();
    long version = 0;
    while (docUpto < numDocs) {
        String idValue = idPrefix + ids.next();
        if (idValues.containsKey(idValue)) {
            continue;
        }
        if (useMonotonicVersion) {
            version += TestUtil.nextInt(random(), 1, 10);
        } else {
            version = random().nextLong() & 0x3fffffffffffffffL;
        }
        idValues.put(idValue, version);
        if (VERBOSE) {
            System.out.println("  " + idValue + " -> " + version);
        }
        Document doc = new Document();
        doc.add(makeIDField(idValue, version));
        w.addDocument(doc);
        idsList.add(idValue);
        if (idsList.size() > 0 && random().nextInt(7) == 5) {
            // Randomly delete or update a previous ID
            idValue = idsList.get(random().nextInt(idsList.size()));
            if (random().nextBoolean()) {
                if (useMonotonicVersion) {
                    version += TestUtil.nextInt(random(), 1, 10);
                } else {
                    version = random().nextLong() & 0x3fffffffffffffffL;
                }
                doc = new Document();
                doc.add(makeIDField(idValue, version));
                if (VERBOSE) {
                    System.out.println("  update " + idValue + " -> " + version);
                }
                w.updateDocument(new Term("id", idValue), doc);
                idValues.put(idValue, version);
            } else {
                if (VERBOSE) {
                    System.out.println("  delete " + idValue);
                }
                w.deleteDocuments(new Term("id", idValue));
                idValues.remove(idValue);
            }
        }
        docUpto++;
    }
    IndexReader r = w.getReader();
    //IndexReader r = DirectoryReader.open(w);
    PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
    List<Map.Entry<String, Long>> idValuesList = new ArrayList<>(idValues.entrySet());
    int iters = numDocs * 5;
    for (int iter = 0; iter < iters; iter++) {
        String idValue;
        if (random().nextBoolean()) {
            idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
        } else if (random().nextBoolean()) {
            idValue = ids.next();
        } else {
            idValue = idPrefix + TestUtil.randomSimpleString(random());
        }
        BytesRef idValueBytes = new BytesRef(idValue);
        Long expectedVersion = idValues.get(idValue);
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " id=" + idValue + " expectedVersion=" + expectedVersion);
        }
        if (expectedVersion == null) {
            assertEquals("term should not have been found (doesn't exist)", -1, lookup.lookup(idValueBytes));
        } else {
            if (random().nextBoolean()) {
                if (VERBOSE) {
                    System.out.println("  lookup exact version (should be found)");
                }
                assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
                assertEquals(expectedVersion.longValue(), lookup.getVersion());
            } else {
                if (VERBOSE) {
                    System.out.println("  lookup version+1 (should not be found)");
                }
                assertEquals("term should not have been found (version newer)", -1, lookup.lookup(idValueBytes, expectedVersion.longValue() + 1));
            }
        }
    }
    r.close();
    w.close();
    dir.close();
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) AtomicLong(java.util.concurrent.atomic.AtomicLong) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1164 Directory (org.apache.lucene.store.Directory)785 Document (org.apache.lucene.document.Document)775 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)265 Analyzer (org.apache.lucene.analysis.Analyzer)259 BytesRef (org.apache.lucene.util.BytesRef)252 StringField (org.apache.lucene.document.StringField)183 Term (org.apache.lucene.index.Term)183 RAMDirectory (org.apache.lucene.store.RAMDirectory)168 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)165 Field (org.apache.lucene.document.Field)164 TextField (org.apache.lucene.document.TextField)159 Test (org.junit.Test)142 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)136 IndexReader (org.apache.lucene.index.IndexReader)134 IndexWriter (org.apache.lucene.index.IndexWriter)133 TermQuery (org.apache.lucene.search.TermQuery)121 FieldType (org.apache.lucene.document.FieldType)119 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)116 IndexSearcher (org.apache.lucene.search.IndexSearcher)111