use of org.apache.lucene.analysis.Analyzer in project jackrabbit-oak by apache.
the class IndexWriterUtils method getIndexWriterConfig.
public static IndexWriterConfig getIndexWriterConfig(IndexDefinition definition, boolean remoteDir) {
// FIXME: Hack needed to make Lucene work in an OSGi environment
Thread thread = Thread.currentThread();
ClassLoader loader = thread.getContextClassLoader();
thread.setContextClassLoader(IndexWriterConfig.class.getClassLoader());
try {
Analyzer definitionAnalyzer = definition.getAnalyzer();
Map<String, Analyzer> analyzers = new HashMap<String, Analyzer>();
analyzers.put(FieldNames.SPELLCHECK, new ShingleAnalyzerWrapper(LuceneIndexConstants.ANALYZER, 3));
if (!definition.isSuggestAnalyzed()) {
analyzers.put(FieldNames.SUGGEST, SuggestHelper.getAnalyzer());
}
Analyzer analyzer = new PerFieldAnalyzerWrapper(definitionAnalyzer, analyzers);
IndexWriterConfig config = new IndexWriterConfig(VERSION, analyzer);
if (remoteDir) {
config.setMergeScheduler(new SerialMergeScheduler());
}
if (definition.getCodec() != null) {
config.setCodec(definition.getCodec());
}
return config;
} finally {
thread.setContextClassLoader(loader);
}
}
use of org.apache.lucene.analysis.Analyzer in project jena by apache.
the class TextIndexLuceneAssembler method open.
/*
<#index> a :TextIndexLucene ;
#text:directory "mem" ;
#text:directory "DIR" ;
text:directory <file:DIR> ;
text:entityMap <#endMap> ;
.
*/
@SuppressWarnings("resource")
@Override
public TextIndex open(Assembler a, Resource root, Mode mode) {
try {
if (!GraphUtils.exactlyOneProperty(root, pDirectory))
throw new TextIndexException("No 'text:directory' property on " + root);
Directory directory;
RDFNode n = root.getProperty(pDirectory).getObject();
if (n.isLiteral()) {
String literalValue = n.asLiteral().getLexicalForm();
if (literalValue.equals("mem")) {
directory = new RAMDirectory();
} else {
File dir = new File(literalValue);
directory = FSDirectory.open(dir.toPath());
}
} else {
Resource x = n.asResource();
String path = IRILib.IRIToFilename(x.getURI());
File dir = new File(path);
directory = FSDirectory.open(dir.toPath());
}
Analyzer analyzer = null;
Statement analyzerStatement = root.getProperty(pAnalyzer);
if (null != analyzerStatement) {
RDFNode aNode = analyzerStatement.getObject();
if (!aNode.isResource()) {
throw new TextIndexException("Text analyzer property is not a resource : " + aNode);
}
Resource analyzerResource = (Resource) aNode;
analyzer = (Analyzer) a.open(analyzerResource);
}
Analyzer queryAnalyzer = null;
Statement queryAnalyzerStatement = root.getProperty(pQueryAnalyzer);
if (null != queryAnalyzerStatement) {
RDFNode qaNode = queryAnalyzerStatement.getObject();
if (!qaNode.isResource()) {
throw new TextIndexException("Text query analyzer property is not a resource : " + qaNode);
}
Resource analyzerResource = (Resource) qaNode;
queryAnalyzer = (Analyzer) a.open(analyzerResource);
}
String queryParser = null;
Statement queryParserStatement = root.getProperty(pQueryParser);
if (null != queryParserStatement) {
RDFNode qpNode = queryParserStatement.getObject();
if (!qpNode.isResource()) {
throw new TextIndexException("Text query parser property is not a resource : " + qpNode);
}
Resource parserResource = (Resource) qpNode;
queryParser = parserResource.getLocalName();
}
boolean isMultilingualSupport = false;
Statement mlSupportStatement = root.getProperty(pMultilingualSupport);
if (null != mlSupportStatement) {
RDFNode mlsNode = mlSupportStatement.getObject();
if (!mlsNode.isLiteral()) {
throw new TextIndexException("text:multilingualSupport property must be a string : " + mlsNode);
}
isMultilingualSupport = mlsNode.asLiteral().getBoolean();
}
boolean storeValues = false;
Statement storeValuesStatement = root.getProperty(pStoreValues);
if (null != storeValuesStatement) {
RDFNode svNode = storeValuesStatement.getObject();
if (!svNode.isLiteral()) {
throw new TextIndexException("text:storeValues property must be a string : " + svNode);
}
storeValues = svNode.asLiteral().getBoolean();
}
Resource r = GraphUtils.getResourceValue(root, pEntityMap);
EntityDefinition docDef = (EntityDefinition) a.open(r);
TextIndexConfig config = new TextIndexConfig(docDef);
config.setAnalyzer(analyzer);
config.setQueryAnalyzer(queryAnalyzer);
config.setQueryParser(queryParser);
config.setMultilingualSupport(isMultilingualSupport);
config.setValueStored(storeValues);
return TextDatasetFactory.createLuceneIndex(directory, config);
} catch (IOException e) {
IO.exception(e);
return null;
}
}
use of org.apache.lucene.analysis.Analyzer in project jena by apache.
the class Util method getLocalizedAnalyzer.
public static Analyzer getLocalizedAnalyzer(String lang) {
if (lang == null)
return null;
if (cache.containsKey(lang))
return cache.get(lang);
try {
Class<?> className = analyzersClasses.get(lang);
if (className == null)
return null;
Constructor<?> constructor = className.getConstructor();
Analyzer analyzer = (Analyzer) constructor.newInstance();
cache.put(lang, analyzer);
return analyzer;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
use of org.apache.lucene.analysis.Analyzer in project jackrabbit-oak by apache.
the class IndexDefinition method createAnalyzer.
//~---------------------------------------------------< Analyzer >
private Analyzer createAnalyzer() {
Analyzer result;
Analyzer defaultAnalyzer = LuceneIndexConstants.ANALYZER;
if (analyzers.containsKey(LuceneIndexConstants.ANL_DEFAULT)) {
defaultAnalyzer = analyzers.get(LuceneIndexConstants.ANL_DEFAULT);
}
if (!evaluatePathRestrictions()) {
result = defaultAnalyzer;
} else {
Map<String, Analyzer> analyzerMap = ImmutableMap.<String, Analyzer>builder().put(FieldNames.ANCESTORS, new TokenizerChain(new PathHierarchyTokenizerFactory(Collections.<String, String>emptyMap()))).build();
result = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerMap);
}
//In case of negative value no limits would be applied
if (maxFieldLength < 0) {
return result;
}
return new LimitTokenCountAnalyzer(result, maxFieldLength);
}
use of org.apache.lucene.analysis.Analyzer in project jackrabbit-oak by apache.
the class DefaultAnalyzersConfigurationTest method setUp.
@Before
public void setUp() throws Exception {
this.exactPathAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
return new TokenStreamComponents(source);
}
};
this.parentPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
return new TokenStreamComponents(source);
}
};
this.parentPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
filter = new PatternReplaceFilter(filter, Pattern.compile("[^\\/]+\\/"), "", false);
filter = new ReverseStringFilter(Version.LUCENE_47, filter);
return new TokenStreamComponents(source, filter);
}
};
this.directChildrenPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
filter = new LengthFilter(Version.LUCENE_47, filter, 2, Integer.MAX_VALUE);
filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false);
filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false);
filter = new ReverseStringFilter(Version.LUCENE_47, filter);
return new TokenStreamComponents(source, filter);
}
};
this.directChildrenPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
return new TokenStreamComponents(source);
}
};
this.allChildrenPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new PathHierarchyTokenizer(reader);
TokenStream filter = new PatternCaptureGroupTokenFilter(source, false, Pattern.compile("((\\/).*)"));
filter = new RemoveDuplicatesTokenFilter(filter);
return new TokenStreamComponents(source, filter);
}
};
this.allChildrenPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
return new TokenStreamComponents(source);
}
};
}
Aggregations