use of org.apache.lucene.util.CharsRefBuilder in project lucene-solr by apache.
the class WordnetSynonymParser method parseSynonym.
private CharsRef parseSynonym(String line, CharsRefBuilder reuse) throws IOException {
if (reuse == null) {
reuse = new CharsRefBuilder();
}
int start = line.indexOf('\'') + 1;
int end = line.lastIndexOf('\'');
String text = line.substring(start, end).replace("''", "'");
return analyze(text, reuse);
}
use of org.apache.lucene.util.CharsRefBuilder in project lucene-solr by apache.
the class WordnetSynonymParser method parse.
@Override
public void parse(Reader in) throws IOException, ParseException {
LineNumberReader br = new LineNumberReader(in);
try {
String line = null;
String lastSynSetID = "";
CharsRef[] synset = new CharsRef[8];
int synsetSize = 0;
while ((line = br.readLine()) != null) {
String synSetID = line.substring(2, 11);
if (!synSetID.equals(lastSynSetID)) {
addInternal(synset, synsetSize);
synsetSize = 0;
}
if (synset.length <= synsetSize + 1) {
synset = Arrays.copyOf(synset, synset.length * 2);
}
synset[synsetSize] = parseSynonym(line, new CharsRefBuilder());
synsetSize++;
lastSynSetID = synSetID;
}
// final synset in the file
addInternal(synset, synsetSize);
} catch (IllegalArgumentException e) {
ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
ex.initCause(e);
throw ex;
} finally {
br.close();
}
}
use of org.apache.lucene.util.CharsRefBuilder in project lucene-solr by apache.
the class SimpleFacets method getGroupedCounts.
public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher, DocSet base, String field, boolean multiToken, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter) throws IOException {
GroupingSpecification groupingSpecification = rb.getGroupingSpec();
final String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null;
if (groupField == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter");
}
BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null;
final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBytesRef, 128);
Collector groupWrapper = getInsanityWrapper(groupField, collector);
Collector fieldWrapper = getInsanityWrapper(field, groupWrapper);
// When GroupedFacetCollector can handle numerics we can remove the wrapped collectors
searcher.search(base.getTopFilter(), fieldWrapper);
boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY);
TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(limit < 0 ? Integer.MAX_VALUE : (offset + limit), mincount, orderByCount);
CharsRefBuilder charsRef = new CharsRefBuilder();
FieldType facetFieldType = searcher.getSchema().getFieldType(field);
NamedList<Integer> facetCounts = new NamedList<>();
List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit);
for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) {
//:TODO:can we filter earlier than this to make it more efficient?
if (termFilter != null && !termFilter.test(facetEntry.getValue())) {
continue;
}
facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef);
facetCounts.add(charsRef.toString(), facetEntry.getCount());
}
if (missing) {
facetCounts.add(null, result.getTotalMissingCount());
}
return facetCounts;
}
use of org.apache.lucene.util.CharsRefBuilder in project lucene-solr by apache.
the class DeleteUpdateCommand method getId.
public String getId() {
if (id == null && indexedId != null) {
IndexSchema schema = req.getSchema();
SchemaField sf = schema.getUniqueKeyField();
if (sf != null) {
CharsRefBuilder ref = new CharsRefBuilder();
sf.getType().indexedToReadable(indexedId, ref);
id = ref.toString();
}
}
return id;
}
use of org.apache.lucene.util.CharsRefBuilder in project lucene-solr by apache.
the class TestLimitTokenPositionFilter method testMaxPosition3WithSynomyms.
public void testMaxPosition3WithSynomyms() throws IOException {
for (final boolean consumeAll : new boolean[] { true, false }) {
MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
// if we are consuming all tokens, we can use the checks, otherwise we can't
tokenizer.setEnableChecks(consumeAll);
SynonymMap.Builder builder = new SynonymMap.Builder(true);
builder.add(new CharsRef("one"), new CharsRef("first"), true);
builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
SynonymMap.Builder.join(new String[] { "dopple", "ganger" }, multiWordCharsRef);
builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
SynonymMap synonymMap = builder.build();
TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
stream = new LimitTokenPositionFilter(stream, 3, consumeAll);
// "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
assertTokenStreamContents(stream, new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 });
}
}
Aggregations