use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.
the class XMoreLikeThis method addTermFrequencies.
/**
* Adds terms and frequencies found in vector into the Map termFreqMap
*
* @param termFreqMap a Map of terms and their frequencies
* @param vector List of terms and their frequencies for a doc/field
* @param fieldName Optional field name of the terms for skip terms
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
final TermsEnum termsEnum = vector.iterator();
final CharsRefBuilder spare = new CharsRefBuilder();
BytesRef text;
while ((text = termsEnum.next()) != null) {
spare.copyUTF8Bytes(text);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;
}
if (isSkipTerm(fieldName, term)) {
continue;
}
final PostingsEnum docs = termsEnum.postings(null);
int freq = 0;
while (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
freq += docs.freq();
}
// increment frequency
Int cnt = termFreqMap.get(term);
if (cnt == null) {
cnt = new Int();
termFreqMap.put(term, cnt);
cnt.x = freq;
} else {
cnt.x += freq;
}
}
}
use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.
the class Versions method loadPrimaryTerm.
/**
* Returns the primary term for the given uid term, returning {@code 0} if none is found.
*/
public static long loadPrimaryTerm(IndexReader reader, Term term) throws IOException {
assert term.field().equals(UidFieldMapper.NAME) : "can only load _primary_term by uid";
List<LeafReaderContext> leaves = reader.leaves();
if (leaves.isEmpty()) {
return 0;
}
// which are likely to be in the last segments
for (int i = leaves.size() - 1; i >= 0; i--) {
LeafReader leaf = leaves.get(i).reader();
Bits liveDocs = leaf.getLiveDocs();
TermsEnum termsEnum = null;
NumericDocValues dvField = null;
PostingsEnum docsEnum = null;
final Fields fields = leaf.fields();
if (fields != null) {
Terms terms = fields.terms(UidFieldMapper.NAME);
if (terms != null) {
termsEnum = terms.iterator();
assert termsEnum != null;
dvField = leaf.getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME);
assert dvField != null;
final BytesRef id = term.bytes();
if (termsEnum.seekExact(id)) {
// there may be more than one matching docID, in the
// case of nested docs, so we want the last one:
docsEnum = termsEnum.postings(docsEnum, 0);
int docID = DocIdSetIterator.NO_MORE_DOCS;
for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
if (liveDocs != null && liveDocs.get(d) == false) {
continue;
}
docID = d;
}
if (docID != DocIdSetIterator.NO_MORE_DOCS) {
return dvField.get(docID);
}
}
}
}
}
return 0;
}
use of org.apache.lucene.util.BytesRef in project crate by crate.
the class MatchQueryBuilderTest method testSimpleSingleMatchTwoTerms.
@Test
public void testSimpleSingleMatchTwoTerms() throws Exception {
Map<String, Object> fields = MapBuilder.<String, Object>newMapBuilder().put("col1", null).map();
MatchQueryBuilder builder = new MatchQueryBuilder(mockMapperService(), null, Collections.emptyMap());
Query query = builder.query(fields, new BytesRef("foo bar"));
assertThat(query, instanceOf(BooleanQuery.class));
}
use of org.apache.lucene.util.BytesRef in project crate by crate.
the class MatchQueryBuilderTest method testTwoFieldsSingleTerm.
@Test
public void testTwoFieldsSingleTerm() throws Exception {
MatchQueryBuilder builder = new io.crate.lucene.match.MultiMatchQueryBuilder(mockMapperService(), null, Collections.emptyMap());
Map<String, Object> fields = MapBuilder.<String, Object>newMapBuilder().put("col1", null).put("col2", null).map();
Query query = builder.query(fields, new BytesRef("foo"));
assertThat(query, instanceOf(DisjunctionMaxQuery.class));
}
use of org.apache.lucene.util.BytesRef in project crate by crate.
the class MatchQueryBuilderTest method testPhrasePrefix.
@Test
public void testPhrasePrefix() throws Exception {
MatchQueryBuilder builder = new MatchQueryBuilder(mockMapperService(), new BytesRef("phrase_prefix"), Collections.emptyMap());
Map<String, Object> fields = MapBuilder.<String, Object>newMapBuilder().put("col1", null).map();
Query query = builder.query(fields, new BytesRef("foo"));
assertThat(query, instanceOf(MultiPhrasePrefixQuery.class));
}
Aggregations