use of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory in project lucene-solr by apache.
the class AnalyzerFactoryTask method setParams.
/**
* Sets the params.
* Analysis component factory names may optionally include the "Factory" suffix.
*
* @param params analysis pipeline specification: name, (optional) positionIncrementGap,
* (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
* and 0+ TokenFilterFactory's
*/
@Override
@SuppressWarnings("fallthrough")
public void setParams(String params) {
super.setParams(params);
ArgType expectedArgType = ArgType.ANALYZER_ARG;
final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
stok.commentChar('#');
stok.quoteChar('"');
stok.quoteChar('\'');
stok.eolIsSignificant(false);
stok.ordinaryChar('(');
stok.ordinaryChar(')');
stok.ordinaryChar(':');
stok.ordinaryChar(',');
try {
while (stok.nextToken() != StreamTokenizer.TT_EOF) {
switch(stok.ttype) {
case ',':
{
// Do nothing
break;
}
case StreamTokenizer.TT_WORD:
{
if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
final String argName = stok.sval;
if (!argName.equalsIgnoreCase("name") && !argName.equalsIgnoreCase("positionIncrementGap") && !argName.equalsIgnoreCase("offsetGap")) {
throw new RuntimeException("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
}
stok.nextToken();
if (stok.ttype != ':') {
throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
}
stok.nextToken();
String argValue = stok.sval;
switch(stok.ttype) {
case StreamTokenizer.TT_NUMBER:
{
argValue = Double.toString(stok.nval);
// Drop the ".0" from numbers, for integer arguments
argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
// Intentional fallthrough
}
case '"':
case '\'':
case StreamTokenizer.TT_WORD:
{
if (argName.equalsIgnoreCase("name")) {
factoryName = argValue;
expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
} else {
int intArgValue = 0;
try {
intArgValue = Integer.parseInt(argValue);
} catch (NumberFormatException e) {
throw new RuntimeException("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
}
if (argName.equalsIgnoreCase("positionIncrementGap")) {
positionIncrementGap = intArgValue;
} else if (argName.equalsIgnoreCase("offsetGap")) {
offsetGap = intArgValue;
}
}
break;
}
case StreamTokenizer.TT_EOF:
{
throw new RuntimeException("Unexpected EOF: " + stok.toString());
}
default:
{
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
} else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
final String argName = stok.sval;
if (argName.equalsIgnoreCase("positionIncrementGap") || argName.equalsIgnoreCase("offsetGap")) {
stok.nextToken();
if (stok.ttype != ':') {
throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
}
stok.nextToken();
int intArgValue = (int) stok.nval;
switch(stok.ttype) {
case '"':
case '\'':
case StreamTokenizer.TT_WORD:
{
intArgValue = 0;
try {
intArgValue = Integer.parseInt(stok.sval.trim());
} catch (NumberFormatException e) {
throw new RuntimeException("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
}
// Intentional fall-through
}
case StreamTokenizer.TT_NUMBER:
{
if (argName.equalsIgnoreCase("positionIncrementGap")) {
positionIncrementGap = intArgValue;
} else if (argName.equalsIgnoreCase("offsetGap")) {
offsetGap = intArgValue;
}
break;
}
case StreamTokenizer.TT_EOF:
{
throw new RuntimeException("Unexpected EOF: " + stok.toString());
}
default:
{
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
break;
}
try {
final Class<? extends CharFilterFactory> clazz;
clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
createAnalysisPipelineComponent(stok, clazz);
} catch (IllegalArgumentException e) {
try {
final Class<? extends TokenizerFactory> clazz;
clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
createAnalysisPipelineComponent(stok, clazz);
expectedArgType = ArgType.TOKENFILTER;
} catch (IllegalArgumentException e2) {
throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '" + argName + "' as CharFilterFactory or TokenizerFactory");
}
}
} else {
// expectedArgType = ArgType.TOKENFILTER
final String className = stok.sval;
final Class<? extends TokenFilterFactory> clazz;
try {
clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
} catch (IllegalArgumentException e) {
throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
}
createAnalysisPipelineComponent(stok, clazz);
}
break;
}
default:
{
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
}
} catch (RuntimeException e) {
if (e.getMessage().startsWith("Line #")) {
throw e;
} else {
throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
}
} catch (Throwable t) {
throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
}
final AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories);
analyzerFactory.setPositionIncrementGap(positionIncrementGap);
analyzerFactory.setOffsetGap(offsetGap);
getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
}
use of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory in project lucene-solr by apache.
the class NewAnalyzerTask method doLogic.
@Override
public int doLogic() throws IOException {
String analyzerName = null;
try {
if (current >= analyzerNames.size()) {
current = 0;
}
analyzerName = analyzerNames.get(current++);
Analyzer analyzer = null;
if (null == analyzerName || 0 == analyzerName.length()) {
analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
}
// First, lookup analyzerName as a named analyzer factory
AnalyzerFactory factory = getRunData().getAnalyzerFactories().get(analyzerName);
if (null != factory) {
analyzer = factory.create();
} else {
if (analyzerName.contains(".")) {
if (analyzerName.startsWith("standard.")) {
analyzerName = "org.apache.lucene.analysis." + analyzerName;
}
analyzer = createAnalyzer(analyzerName);
} else {
// No package
try {
// Attempt to instantiate a core analyzer
String coreClassName = "org.apache.lucene.analysis.core." + analyzerName;
analyzer = createAnalyzer(coreClassName);
analyzerName = coreClassName;
} catch (ClassNotFoundException e) {
// If not a core analyzer, try the base analysis package
analyzerName = "org.apache.lucene.analysis." + analyzerName;
analyzer = createAnalyzer(analyzerName);
}
}
}
getRunData().setAnalyzer(analyzer);
} catch (Exception e) {
throw new RuntimeException("Error creating Analyzer: " + analyzerName, e);
}
return 1;
}
Aggregations