use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class MissesTest method testTermQuery.
public void testTermQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final Query query = new TermQuery(new Term("test", "foo"));
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> bar example", highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class HighlightCustomQueryTest method highlightField.
/**
* This method intended for use with
* <tt>testHighlightingWithDefaultField()</tt>
*/
private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException {
TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text);
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
return rv.length() == 0 ? text : rv;
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class SynonymTokenizer method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
//Not many use this setup:
a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
dir = newDirectory();
//Most tests use this setup:
analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
ramDir = newDirectory();
fieldType = random().nextBoolean() ? FIELD_TYPE_TV : TextField.TYPE_STORED;
IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(analyzer).setMergePolicy(newLogMergePolicy()));
for (String text : texts) {
writer.addDocument(doc(FIELD_NAME, text));
}
// a few tests need other docs...:
Document doc = new Document();
doc.add(new IntPoint(NUMERIC_FIELD_NAME, 1));
doc.add(new StoredField(NUMERIC_FIELD_NAME, 1));
writer.addDocument(doc);
doc = new Document();
doc.add(new IntPoint(NUMERIC_FIELD_NAME, 3));
doc.add(new StoredField(NUMERIC_FIELD_NAME, 3));
writer.addDocument(doc);
doc = new Document();
doc.add(new IntPoint(NUMERIC_FIELD_NAME, 5));
doc.add(new StoredField(NUMERIC_FIELD_NAME, 5));
writer.addDocument(doc);
doc = new Document();
doc.add(new IntPoint(NUMERIC_FIELD_NAME, 7));
doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
writer.addDocument(doc);
Document childDoc = doc(FIELD_NAME, "child document");
Document parentDoc = doc(FIELD_NAME, "parent document");
writer.addDocuments(Arrays.asList(childDoc, parentDoc));
writer.forceMerge(1);
writer.close();
reader = DirectoryReader.open(ramDir);
//Misc:
numHighlights = 0;
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class SimplePrimaryNode method initWriter.
private static IndexWriter initWriter(int id, Random random, Path indexPath, boolean doCheckIndexOnClose) throws IOException {
Directory dir = SimpleReplicaNode.getDirectory(random, id, indexPath, doCheckIndexOnClose);
MockAnalyzer analyzer = new MockAnalyzer(random);
analyzer.setMaxTokenLength(TestUtil.nextInt(random, 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(random, analyzer);
MergePolicy mp = iwc.getMergePolicy();
// Force more frequent merging so we stress merge warming:
if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
tmp.setSegmentsPerTier(3);
tmp.setMaxMergeAtOnce(3);
} else if (mp instanceof LogMergePolicy) {
LogMergePolicy lmp = (LogMergePolicy) mp;
lmp.setMergeFactor(3);
}
IndexWriter writer = new IndexWriter(dir, iwc);
TestUtil.reduceOpenFiles(writer);
return writer;
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class TestIDVersionPostingsFormat method testRandom.
// TODO make a similar test for BT, w/ varied IDs:
public void testRandom() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
int maxItemsInBlock = 2 * (minItemsInBlock - 1) + random().nextInt(50);
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
//IndexWriter w = new IndexWriter(dir, iwc);
int numDocs = atLeast(1000);
Map<String, Long> idValues = new HashMap<String, Long>();
int docUpto = 0;
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs);
}
IDSource ids = getRandomIDs();
String idPrefix;
if (random().nextBoolean()) {
idPrefix = "";
} else {
idPrefix = TestUtil.randomSimpleString(random());
if (VERBOSE) {
System.out.println("TEST: use id prefix: " + idPrefix);
}
}
boolean useMonotonicVersion = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
}
List<String> idsList = new ArrayList<>();
long version = 0;
while (docUpto < numDocs) {
String idValue = idPrefix + ids.next();
if (idValues.containsKey(idValue)) {
continue;
}
if (useMonotonicVersion) {
version += TestUtil.nextInt(random(), 1, 10);
} else {
version = random().nextLong() & 0x3fffffffffffffffL;
}
idValues.put(idValue, version);
if (VERBOSE) {
System.out.println(" " + idValue + " -> " + version);
}
Document doc = new Document();
doc.add(makeIDField(idValue, version));
w.addDocument(doc);
idsList.add(idValue);
if (idsList.size() > 0 && random().nextInt(7) == 5) {
// Randomly delete or update a previous ID
idValue = idsList.get(random().nextInt(idsList.size()));
if (random().nextBoolean()) {
if (useMonotonicVersion) {
version += TestUtil.nextInt(random(), 1, 10);
} else {
version = random().nextLong() & 0x3fffffffffffffffL;
}
doc = new Document();
doc.add(makeIDField(idValue, version));
if (VERBOSE) {
System.out.println(" update " + idValue + " -> " + version);
}
w.updateDocument(new Term("id", idValue), doc);
idValues.put(idValue, version);
} else {
if (VERBOSE) {
System.out.println(" delete " + idValue);
}
w.deleteDocuments(new Term("id", idValue));
idValues.remove(idValue);
}
}
docUpto++;
}
IndexReader r = w.getReader();
//IndexReader r = DirectoryReader.open(w);
PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
List<Map.Entry<String, Long>> idValuesList = new ArrayList<>(idValues.entrySet());
int iters = numDocs * 5;
for (int iter = 0; iter < iters; iter++) {
String idValue;
if (random().nextBoolean()) {
idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
} else if (random().nextBoolean()) {
idValue = ids.next();
} else {
idValue = idPrefix + TestUtil.randomSimpleString(random());
}
BytesRef idValueBytes = new BytesRef(idValue);
Long expectedVersion = idValues.get(idValue);
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " id=" + idValue + " expectedVersion=" + expectedVersion);
}
if (expectedVersion == null) {
assertEquals("term should not have been found (doesn't exist)", -1, lookup.lookup(idValueBytes));
} else {
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" lookup exact version (should be found)");
}
assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
assertEquals(expectedVersion.longValue(), lookup.getVersion());
} else {
if (VERBOSE) {
System.out.println(" lookup version+1 (should not be found)");
}
assertEquals("term should not have been found (version newer)", -1, lookup.lookup(idValueBytes, expectedVersion.longValue() + 1));
}
}
}
r.close();
w.close();
dir.close();
}
Aggregations