use of org.apache.lucene.codecs.PostingsFormat in project lucene-solr by apache.
the class TestSuggestField method iwcWithSuggestField.
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
iwc.setMergePolicy(newLogMergePolicy());
Codec filterCodec = new Lucene70Codec() {
PostingsFormat postingsFormat = new Completion50PostingsFormat();
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (suggestFields.contains(field)) {
return postingsFormat;
}
return super.getPostingsFormatForField(field);
}
};
iwc.setCodec(filterCodec);
return iwc;
}
use of org.apache.lucene.codecs.PostingsFormat in project lucene-solr by apache.
the class SchemaCodecFactory method init.
@Override
public void init(NamedList args) {
super.init(args);
assert codec == null;
String compressionModeStr = (String) args.get(COMPRESSION_MODE);
Mode compressionMode;
if (compressionModeStr != null) {
try {
compressionMode = Mode.valueOf(compressionModeStr.toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid compressionMode: '" + compressionModeStr + "'. Value must be one of " + Arrays.toString(Mode.values()));
}
log.debug("Using compressionMode: " + compressionMode);
} else {
compressionMode = SOLR_DEFAULT_COMPRESSION_MODE;
log.debug("Using default compressionMode: " + compressionMode);
}
codec = new Lucene70Codec(compressionMode) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);
if (schemaField != null) {
String postingsFormatName = schemaField.getType().getPostingsFormat();
if (postingsFormatName != null) {
return PostingsFormat.forName(postingsFormatName);
}
}
return super.getPostingsFormatForField(field);
}
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);
if (schemaField != null) {
String docValuesFormatName = schemaField.getType().getDocValuesFormat();
if (docValuesFormatName != null) {
return DocValuesFormat.forName(docValuesFormatName);
}
}
return super.getDocValuesFormatForField(field);
}
};
}
use of org.apache.lucene.codecs.PostingsFormat in project lucene-solr by apache.
the class RandomCodec method add.
private final void add(Set<String> avoidCodecs, PostingsFormat... postings) {
for (PostingsFormat p : postings) {
if (!avoidCodecs.contains(p.getName())) {
formats.add(p);
formatNames.add(p.getName());
}
}
}
use of org.apache.lucene.codecs.PostingsFormat in project lucene-solr by apache.
the class RandomCodec method getPostingsFormatForField.
@Override
public PostingsFormat getPostingsFormatForField(String name) {
PostingsFormat codec = previousMappings.get(name);
if (codec == null) {
codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size());
previousMappings.put(name, codec);
// Safety:
assert previousMappings.size() < 10000 : "test went insane";
}
return codec;
}
use of org.apache.lucene.codecs.PostingsFormat in project lucene-solr by apache.
the class BasePostingsFormatTestCase method testInvertedWrite.
// LUCENE-5123: make sure we can visit postings twice
// during flush/merge
public void testInvertedWrite() throws Exception {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
// Must be concurrent because thread(s) can be merging
// while up to one thread flushes, and each of those
// threads iterates over the map while the flushing
// thread might be adding to it:
final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();
final AtomicLong sumDocFreq = new AtomicLong();
final AtomicLong sumTotalTermFreq = new AtomicLong();
// TODO: would be better to use / delegate to the current
// Codec returned by getCodec()
iwc.setCodec(new FilterCodec(getCodec().getName(), getCodec()) {
@Override
public PostingsFormat postingsFormat() {
final PostingsFormat defaultPostingsFormat = delegate.postingsFormat();
final Thread mainThread = Thread.currentThread();
return new PostingsFormat(defaultPostingsFormat.getName()) {
@Override
public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
return new FieldsConsumer() {
@Override
public void write(Fields fields) throws IOException {
fieldsConsumer.write(fields);
boolean isMerge = state.context.context == IOContext.Context.MERGE;
// in this test:
assert isMerge || Thread.currentThread() == mainThread;
// We iterate the provided TermsEnum
// twice, so we excercise this new freedom
// with the inverted API; if
// addOnSecondPass is true, we add up
// term stats on the 2nd iteration:
boolean addOnSecondPass = random().nextBoolean();
//System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
// Gather our own stats:
Terms terms = fields.terms("body");
assert terms != null;
TermsEnum termsEnum = terms.iterator();
PostingsEnum docs = null;
while (termsEnum.next() != null) {
BytesRef term = termsEnum.term();
// TODO: also sometimes ask for payloads/offsets?
boolean noPositions = random().nextBoolean();
if (noPositions) {
docs = termsEnum.postings(docs, PostingsEnum.FREQS);
} else {
docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
}
int docFreq = 0;
long totalTermFreq = 0;
while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
docFreq++;
totalTermFreq += docs.freq();
int limit = TestUtil.nextInt(random(), 1, docs.freq());
if (!noPositions) {
for (int i = 0; i < limit; i++) {
docs.nextPosition();
}
}
}
String termString = term.utf8ToString();
// During merge we should only see terms
// we had already seen during a
// previous flush:
assertTrue(isMerge == false || termFreqs.containsKey(termString));
if (isMerge == false) {
if (addOnSecondPass == false) {
TermFreqs tf = termFreqs.get(termString);
if (tf == null) {
tf = new TermFreqs();
termFreqs.put(termString, tf);
}
tf.docFreq += docFreq;
tf.totalTermFreq += totalTermFreq;
sumDocFreq.addAndGet(docFreq);
sumTotalTermFreq.addAndGet(totalTermFreq);
} else if (termFreqs.containsKey(termString) == false) {
// Add placeholder (2nd pass will
// set its counts):
termFreqs.put(termString, new TermFreqs());
}
}
}
// Also test seeking the TermsEnum:
for (String term : termFreqs.keySet()) {
if (termsEnum.seekExact(new BytesRef(term))) {
// TODO: also sometimes ask for payloads/offsets?
boolean noPositions = random().nextBoolean();
if (noPositions) {
docs = termsEnum.postings(docs, PostingsEnum.FREQS);
} else {
docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
}
int docFreq = 0;
long totalTermFreq = 0;
while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
docFreq++;
totalTermFreq += docs.freq();
int limit = TestUtil.nextInt(random(), 1, docs.freq());
if (!noPositions) {
for (int i = 0; i < limit; i++) {
docs.nextPosition();
}
}
}
if (isMerge == false && addOnSecondPass) {
TermFreqs tf = termFreqs.get(term);
assert tf != null;
tf.docFreq += docFreq;
tf.totalTermFreq += totalTermFreq;
sumDocFreq.addAndGet(docFreq);
sumTotalTermFreq.addAndGet(totalTermFreq);
}
//System.out.println(" term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
assertTrue(docFreq <= termFreqs.get(term).docFreq);
assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
}
}
// Also test seekCeil
for (int iter = 0; iter < 10; iter++) {
BytesRef term = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
SeekStatus status = termsEnum.seekCeil(term);
if (status == SeekStatus.NOT_FOUND) {
assertTrue(term.compareTo(termsEnum.term()) < 0);
}
}
}
@Override
public void close() throws IOException {
fieldsConsumer.close();
}
};
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return defaultPostingsFormat.fieldsProducer(state);
}
};
}
});
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
LineFileDocs docs = new LineFileDocs(random());
int bytesToIndex = atLeast(100) * 1024;
int bytesIndexed = 0;
while (bytesIndexed < bytesToIndex) {
Document doc = docs.nextDoc();
Document justBodyDoc = new Document();
justBodyDoc.add(doc.getField("body"));
w.addDocument(justBodyDoc);
bytesIndexed += RamUsageTester.sizeOf(justBodyDoc);
}
IndexReader r = w.getReader();
w.close();
Terms terms = MultiFields.getTerms(r, "body");
assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());
TermsEnum termsEnum = terms.iterator();
long termCount = 0;
boolean supportsOrds = true;
while (termsEnum.next() != null) {
BytesRef term = termsEnum.term();
assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
if (supportsOrds) {
long ord;
try {
ord = termsEnum.ord();
} catch (UnsupportedOperationException uoe) {
supportsOrds = false;
ord = -1;
}
if (ord != -1) {
assertEquals(termCount, ord);
}
}
termCount++;
}
assertEquals(termFreqs.size(), termCount);
r.close();
dir.close();
}
Aggregations