use of org.apache.lucene.analysis.Analyzer in project languagetool by languagetool-org.
the class PatternRuleQueryBuilderTest method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
language = new English();
directory = new RAMDirectory();
/*File indexPath = new File("/tmp/lucene");
if (indexPath.exists()) {
FileUtils.deleteDirectory(indexPath);
}
directory = FSDirectory.open(indexPath);*/
Analyzer analyzer = Indexer.getAnalyzer(language);
IndexWriterConfig config = Indexer.getIndexWriterConfig(analyzer);
try (IndexWriter writer = new IndexWriter(directory, config)) {
addDocument(writer, "How do you thin about this wonderful idea?");
addDocument(writer, "The are several grammar checkers for English, E.G. LanguageTool 123.");
}
reader = DirectoryReader.open(directory);
searcher = newSearcher(reader);
}
use of org.apache.lucene.analysis.Analyzer in project languagetool by languagetool-org.
the class SentenceSourceIndexer method main.
public static void main(String... args) throws Exception {
if (args.length != 5) {
System.out.println("Usage: " + SentenceSourceIndexer.class.getSimpleName() + " <dataFile...> <indexDir> <languageCode> <maxSentences> <indexPosTags>");
System.out.println("\t<dataFiles> comma-separated list of a Wikipedia XML dump (*.xml) and/or Tatoeba files (tatoeba-*)");
System.out.println("\t<indexDir> directory where Lucene index will be written to, existing index content will be removed");
System.out.println("\t<languageCode> short code like en for English, de for German etc");
System.out.println("\t<maxSentences> maximum number of sentences to be indexed, use 0 for no limit");
System.out.println("\t<indexPosTags> 1 to also index POS tags (i.e. analyze text by LT), 0 to index only the plain text");
System.exit(1);
}
List<String> dumpFilesNames = Arrays.asList(args[0].split(","));
File indexDir = new File(args[1]);
String languageCode = args[2];
int maxSentences = Integer.parseInt(args[3]);
Language language = Languages.getLanguageForShortCode(languageCode);
if (maxSentences == 0) {
System.out.println("Going to index contents from " + dumpFilesNames);
} else {
System.out.println("Going to index up to " + maxSentences + " sentences from " + dumpFilesNames);
}
System.out.println("Output index dir: " + indexDir);
long start = System.currentTimeMillis();
Analyzer analyzer;
String indexPos = args[4];
if (indexPos.equals("1")) {
// this will use LanguageToolAnalyzer
analyzer = null;
} else if (indexPos.equals("0")) {
analyzer = new StandardAnalyzer(new CharArraySet(Collections.emptyList(), false));
} else {
throw new IllegalArgumentException("Unknown value '" + indexPos + "' for indexPosTags parameter, use 0 or 1");
}
try (FSDirectory fsDirectory = FSDirectory.open(indexDir.toPath());
SentenceSourceIndexer indexer = new SentenceSourceIndexer(fsDirectory, language, maxSentences, analyzer)) {
try {
indexer.run(dumpFilesNames, language);
} catch (DocumentLimitReachedException e) {
System.out.println("Sentence limit (" + e.getLimit() + ") reached, stopping indexing");
} finally {
indexer.writeMetaDocuments();
}
if (analyzer != null) {
analyzer.close();
}
}
long end = System.currentTimeMillis();
float minutes = (end - start) / (float) (1000 * 60);
System.out.printf("Indexing took %.2f minutes\n", minutes);
}
use of org.apache.lucene.analysis.Analyzer in project neo4j-mobile-android by neo4j-contrib.
the class IndexType method getIndexType.
static IndexType getIndexType(IndexIdentifier identifier, Map<String, String> config) {
String type = config.get(LuceneIndexImplementation.KEY_TYPE);
IndexType result = null;
Similarity similarity = getCustomSimilarity(config);
boolean toLowerCase = parseBoolean(config.get(LuceneIndexImplementation.KEY_TO_LOWER_CASE), true);
Analyzer customAnalyzer = getCustomAnalyzer(config);
if (type != null) {
// Use the built in alternatives... "exact" or "fulltext"
if (type.equals("exact")) {
result = EXACT;
} else if (type.equals("fulltext")) {
Analyzer analyzer = customAnalyzer;
if (analyzer == null) {
analyzer = toLowerCase ? LuceneDataSource.LOWER_CASE_WHITESPACE_ANALYZER : LuceneDataSource.WHITESPACE_ANALYZER;
}
result = new CustomType(analyzer, toLowerCase, similarity);
}
} else {
// Use custom analyzer
if (customAnalyzer == null) {
throw new IllegalArgumentException("No 'type' was given (which can point out " + "built-in analyzers, such as 'exact' and 'fulltext')" + " and no 'analyzer' was given either (which can point out a custom " + Analyzer.class.getName() + " to use)");
}
result = new CustomType(customAnalyzer, toLowerCase, similarity);
}
return result;
}
use of org.apache.lucene.analysis.Analyzer in project orientdb by orientechnologies.
the class OLuceneAnalyzerFactory method buildAnalyzer.
private Analyzer buildAnalyzer(String analyzerFQN, Collection<String> stopwords) {
try {
final Class classAnalyzer = Class.forName(analyzerFQN);
final Constructor constructor = classAnalyzer.getDeclaredConstructor(CharArraySet.class);
return (Analyzer) constructor.newInstance(new CharArraySet(stopwords, true));
} catch (ClassNotFoundException e) {
throw OException.wrapException(new OIndexException("Analyzer: " + analyzerFQN + " not found"), e);
} catch (NoSuchMethodException e) {
throw OException.wrapException(new OIndexException("Couldn't instantiate analyzer: public constructor not found"), e);
} catch (Exception e) {
OLogManager.instance().error(this, "Error on getting analyzer for Lucene index", e);
}
return new StandardAnalyzer();
}
use of org.apache.lucene.analysis.Analyzer in project elasticsearch-suggest-plugin by spinscale.
the class AbstractCacheLoaderSuggester method load.
@Override
public T load(ShardSuggestService.FieldType fieldType) throws Exception {
MapperService.SmartNameFieldMappers fieldMappers = mapperService.smartName(fieldType.field(), fieldType.types());
Analyzer queryAnalyzer = null;
Analyzer indexAnalyzer = null;
if (fieldMappers != null) {
FieldMapper fieldMapper = mapperService.smartName(fieldType.field(), fieldType.types()).mapper();
queryAnalyzer = fieldMapper.searchAnalyzer();
if (Strings.hasLength(fieldType.indexAnalyzer())) {
NamedAnalyzer namedAnalyzer = analysisService.analyzer(fieldType.queryAnalyzer());
if (namedAnalyzer == null) {
throw new ElasticsearchException("Query analyzer[" + fieldType.queryAnalyzer() + "] does not exist.");
}
queryAnalyzer = namedAnalyzer.analyzer();
}
indexAnalyzer = fieldMapper.searchAnalyzer();
if (Strings.hasLength(fieldType.indexAnalyzer())) {
NamedAnalyzer namedAnalyzer = analysisService.analyzer(fieldType.indexAnalyzer());
if (namedAnalyzer == null) {
throw new ElasticsearchException("Index analyzer[" + fieldType.indexAnalyzer() + "] does not exist.");
}
indexAnalyzer = namedAnalyzer.analyzer();
}
}
if (queryAnalyzer == null) {
queryAnalyzer = new StandardAnalyzer(org.elasticsearch.Version.CURRENT.luceneVersion);
}
if (indexAnalyzer == null) {
indexAnalyzer = new StandardAnalyzer(org.elasticsearch.Version.CURRENT.luceneVersion);
}
return getSuggester(indexAnalyzer, queryAnalyzer, fieldType);
}
Aggregations