use of org.apache.lucene.util.BytesRefIterator in project elasticsearch by elastic.
the class AbstractBytesReferenceTestCase method getSinglePageOrNull.
public static BytesRef getSinglePageOrNull(BytesReference ref) throws IOException {
if (ref.length() > 0) {
BytesRefIterator iterator = ref.iterator();
BytesRef next = iterator.next();
BytesRef retVal = next.clone();
if (iterator.next() == null) {
return retVal;
}
} else {
return new BytesRef();
}
return null;
}
use of org.apache.lucene.util.BytesRefIterator in project elasticsearch by elastic.
the class AbstractBytesReferenceTestCase method testIteratorRandom.
public void testIteratorRandom() throws IOException {
int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8));
BytesReference pbr = newBytesReference(length);
if (randomBoolean()) {
int sliceOffset = randomIntBetween(0, pbr.length());
int sliceLength = randomIntBetween(0, pbr.length() - sliceOffset);
pbr = pbr.slice(sliceOffset, sliceLength);
}
if (randomBoolean()) {
pbr = new BytesArray(pbr.toBytesRef());
}
BytesRefIterator iterator = pbr.iterator();
BytesRef ref = null;
BytesRefBuilder builder = new BytesRefBuilder();
while ((ref = iterator.next()) != null) {
builder.append(ref);
}
assertArrayEquals(BytesReference.toBytes(pbr), BytesRef.deepCopyOf(builder.toBytesRef()).bytes);
}
use of org.apache.lucene.util.BytesRefIterator in project elasticsearch by elastic.
the class OrdinalsBuilder method buildFromTerms.
/**
* This method iterates all terms in the given {@link TermsEnum} and
* associates each terms ordinal with the terms documents. The caller must
* exhaust the returned {@link BytesRefIterator} which returns all values
* where the first returned value is associated with the ordinal <tt>1</tt>
* etc.
* <p>
* If the {@link TermsEnum} contains prefix coded numerical values the terms
* enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
* or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
* the {@link TermsEnum} is not wrapped the returned
* {@link BytesRefIterator} will contain partial precision terms rather than
* only full-precision terms.
* </p>
*/
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
return new BytesRefIterator() {
private PostingsEnum docsEnum = null;
@Override
public BytesRef next() throws IOException {
BytesRef ref;
if ((ref = termsEnum.next()) != null) {
docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
nextOrdinal();
int docId;
while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
addDoc(docId);
}
}
return ref;
}
};
}
use of org.apache.lucene.util.BytesRefIterator in project lucene-solr by apache.
the class SpellChecker method indexDictionary.
/**
* Indexes the data from the given {@link Dictionary}.
* @param dict Dictionary to index
* @param config {@link IndexWriterConfig} to use
* @param fullMerge whether or not the spellcheck index should be fully merged
* @throws AlreadyClosedException if the Spellchecker is already closed
* @throws IOException If there is a low-level I/O error.
*/
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
synchronized (modifyCurrentIndexLock) {
ensureOpen();
final Directory dir = this.spellIndex;
final IndexWriter writer = new IndexWriter(dir, config);
IndexSearcher indexSearcher = obtainSearcher();
final List<TermsEnum> termsEnums = new ArrayList<>();
final IndexReader reader = searcher.getIndexReader();
if (reader.maxDoc() > 0) {
for (final LeafReaderContext ctx : reader.leaves()) {
Terms terms = ctx.reader().terms(F_WORD);
if (terms != null)
termsEnums.add(terms.iterator());
}
}
boolean isEmpty = termsEnums.isEmpty();
try {
BytesRefIterator iter = dict.getEntryIterator();
BytesRef currentTerm;
terms: while ((currentTerm = iter.next()) != null) {
String word = currentTerm.utf8ToString();
int len = word.length();
if (len < 3) {
// too short we bail but "too long" is fine...
continue;
}
if (!isEmpty) {
for (TermsEnum te : termsEnums) {
if (te.seekExact(currentTerm)) {
continue terms;
}
}
}
// ok index the word
Document doc = createDocument(word, getMin(len), getMax(len));
writer.addDocument(doc);
}
} finally {
releaseSearcher(indexSearcher);
}
if (fullMerge) {
writer.forceMerge(1);
}
// close writer
writer.close();
// TODO: this isn't that great, maybe in the future SpellChecker should take
// IWC in its ctor / keep its writer open?
// also re-open the spell index to see our own changes when the next suggestion
// is fetched:
swapSearcher(dir);
}
}
use of org.apache.lucene.util.BytesRefIterator in project lucene-solr by apache.
the class TestBytesRefArray method testAppend.
public void testAppend() throws IOException {
Random random = random();
BytesRefArray list = new BytesRefArray(Counter.newCounter());
List<String> stringList = new ArrayList<>();
for (int j = 0; j < 2; j++) {
if (j > 0 && random.nextBoolean()) {
list.clear();
stringList.clear();
}
int entries = atLeast(500);
BytesRefBuilder spare = new BytesRefBuilder();
int initSize = list.size();
for (int i = 0; i < entries; i++) {
String randomRealisticUnicodeString = TestUtil.randomRealisticUnicodeString(random);
spare.copyChars(randomRealisticUnicodeString);
assertEquals(i + initSize, list.append(spare.get()));
stringList.add(randomRealisticUnicodeString);
}
for (int i = 0; i < entries; i++) {
assertNotNull(list.get(spare, i));
assertEquals("entry " + i + " doesn't match", stringList.get(i), spare.get().utf8ToString());
}
// check random
for (int i = 0; i < entries; i++) {
int e = random.nextInt(entries);
assertNotNull(list.get(spare, e));
assertEquals("entry " + i + " doesn't match", stringList.get(e), spare.get().utf8ToString());
}
for (int i = 0; i < 2; i++) {
BytesRefIterator iterator = list.iterator();
for (String string : stringList) {
assertEquals(string, iterator.next().utf8ToString());
}
}
}
}
Aggregations