use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class DistinctValuesCollectorTest method createIndexContext.
private IndexContext createIndexContext() throws Exception {
Random random = random();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
String[] groupValues = new String[numDocs / 5];
String[] countValues = new String[numDocs / 10];
for (int i = 0; i < groupValues.length; i++) {
groupValues[i] = generateRandomNonEmptyString();
}
for (int i = 0; i < countValues.length; i++) {
countValues[i] = generateRandomNonEmptyString();
}
List<String> contentStrings = new ArrayList<>();
Map<String, Map<String, Set<String>>> searchTermToGroupCounts = new HashMap<>();
for (int i = 1; i <= numDocs; i++) {
String groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.length)];
String countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.length)];
String content = "random" + random.nextInt(numDocs / 20);
Map<String, Set<String>> groupToCounts = searchTermToGroupCounts.get(content);
if (groupToCounts == null) {
// Groups sort always DOCID asc...
searchTermToGroupCounts.put(content, groupToCounts = new LinkedHashMap<>());
contentStrings.add(content);
}
Set<String> countsVals = groupToCounts.get(groupValue);
if (countsVals == null) {
groupToCounts.put(groupValue, countsVals = new HashSet<>());
}
countsVals.add(countValue);
Document doc = new Document();
doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.YES));
doc.add(new SortedDocValuesField("id", new BytesRef(String.format(Locale.ROOT, "%09d", i))));
if (groupValue != null) {
addField(doc, GROUP_FIELD, groupValue);
}
if (countValue != null) {
addField(doc, COUNT_FIELD, countValue);
}
doc.add(new TextField("content", content, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader reader = w.getReader();
if (VERBOSE) {
for (int docID = 0; docID < reader.maxDoc(); docID++) {
Document doc = reader.document(docID);
System.out.println("docID=" + docID + " id=" + doc.get("id") + " content=" + doc.get("content") + " author=" + doc.get("author") + " publisher=" + doc.get("publisher"));
}
}
w.close();
return new IndexContext(dir, reader, searchTermToGroupCounts, contentStrings.toArray(new String[contentStrings.size()]));
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class PayloadHelper method setUp.
/**
* Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
* and analyzes them using the PayloadAnalyzer
* @param similarity The Similarity class to use in the Searcher
* @param numDocs The num docs to add
* @return An IndexSearcher
*/
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
PayloadAnalyzer analyzer = new PayloadAnalyzer();
// TODO randomize this
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer).setSimilarity(similarity));
// writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES));
doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
writer.addDocument(doc);
}
writer.forceMerge(1);
reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = LuceneTestCase.newSearcher(LuceneTestCase.getOnlyLeafReader(reader));
searcher.setSimilarity(similarity);
return searcher;
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestPayloadSpans method testShrinkToAfterShortestMatch2.
public void testShrinkToAfterShortestMatch2() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer()));
Document doc = new Document();
doc.add(new TextField("content", new StringReader("a b a d k f a h i k a k")));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher is = newSearcher(getOnlyLeafReader(reader), false);
writer.close();
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
VerifyingCollector collector = new VerifyingCollector();
Spans spans = snq.createWeight(is, false, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
for (final BytesRef payload : collector.payloads) {
payloadSet.add(Term.toString(payload));
}
}
}
}
assertEquals(2, payloadSet.size());
assertTrue(payloadSet.contains("a:Noise:10"));
assertTrue(payloadSet.contains("k:Noise:11"));
reader.close();
directory.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class IndexBasedSpellCheckerTest method testAlternateLocation.
@Test
public void testAlternateLocation() throws Exception {
String[] ALT_DOCS = new String[] { "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" };
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
NamedList spellchecker = new NamedList();
spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
File tmpDir = createTempDir().toFile();
File indexDir = new File(tmpDir, "spellingIdx");
//create a standalone index
File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
Directory dir = newFSDirectory(altIndexDir.toPath());
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(new WhitespaceAnalyzer()));
for (int i = 0; i < ALT_DOCS.length; i++) {
Document doc = new Document();
doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
iw.addDocument(doc);
}
iw.forceMerge(1);
iw.close();
dir.close();
indexDir.mkdirs();
spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
SolrCore core = h.getCore();
String dictName = checker.init(spellchecker, core);
assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
RefCounted<SolrIndexSearcher> holder = core.getSearcher();
SolrIndexSearcher searcher = holder.get();
try {
checker.build(core, searcher);
IndexReader reader = searcher.getIndexReader();
Collection<Token> tokens = queryConverter.convert("flesh");
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("flesh is null and it shouldn't be", suggestions != null);
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
//test something not in the spell checker
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions size should be 0", suggestions.size() == 0);
spellOpts.tokens = queryConverter.convert("Caroline");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
} finally {
holder.decref();
}
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class QueryAutoStopWordAnalyzerTest method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
dir = new RAMDirectory();
appAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(appAnalyzer));
int numDocs = 200;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
String variedFieldValue = variedFieldValues[i % variedFieldValues.length];
String repetitiveFieldValue = repetitiveFieldValues[i % repetitiveFieldValues.length];
doc.add(new TextField("variedField", variedFieldValue, Field.Store.YES));
doc.add(new TextField("repetitiveField", repetitiveFieldValue, Field.Store.YES));
writer.addDocument(doc);
}
writer.close();
reader = DirectoryReader.open(dir);
}
Aggregations