use of org.carrot2.text.util.MutableCharArray in project lucene-solr by apache.
the class DuplicatingTokenizerFactory method getTokenizer.
@Override
public ITokenizer getTokenizer(LanguageCode language) {
return new ITokenizer() {
private final ExtendedWhitespaceTokenizer delegate = new ExtendedWhitespaceTokenizer();
@Override
public void setTermBuffer(MutableCharArray buffer) {
delegate.setTermBuffer(buffer);
buffer.reset(buffer.toString() + buffer.toString());
}
@Override
public void reset(Reader input) {
delegate.reset(input);
}
@Override
public short nextToken() throws IOException {
return delegate.nextToken();
}
};
}
use of org.carrot2.text.util.MutableCharArray in project lucene-solr by apache.
the class LexicalResourcesCheckClusteringAlgorithm method process.
@Override
public void process() throws ProcessingException {
clusters = new ArrayList<>();
if (wordsToCheck == null) {
return;
}
// Test with Maltese so that the English clustering performed in other tests
// is not affected by the test stopwords and stoplabels.
ILexicalData lexicalData = preprocessing.lexicalDataFactory.getLexicalData(LanguageCode.MALTESE);
for (String word : wordsToCheck.split(",")) {
if (!lexicalData.isCommonWord(new MutableCharArray(word)) && !lexicalData.isStopLabel(word)) {
clusters.add(new Cluster(word));
}
}
}
Aggregations