use of org.apache.lucene.analysis.Analyzer in project ansj_seg by NLPchina.
the class IndexTest method indexTest.
@Test
public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
MyStaticValue.DIC.put(MyStaticValue.DIC_DEFAULT, "../../library/default.dic");
HashSet<String> hs = new HashSet<String>();
hs.add("的");
Analyzer analyzer = new AnsjIndexAnalysis(hs, false);
Directory directory = null;
IndexWriter iwriter = null;
String text = "季德胜蛇药片 10片*6板 ";
UserDefineLibrary.insertWord("蛇药片", "n", 1000);
IndexWriterConfig ic = new IndexWriterConfig(Version.LUCENE_44, analyzer);
// 建立内存索引对象
directory = new RAMDirectory();
iwriter = new IndexWriter(directory, ic);
addContent(iwriter, text);
iwriter.commit();
iwriter.close();
System.out.println("索引建立完毕");
Analyzer queryAnalyzer = new AnsjAnalysis(hs, false);
;
System.out.println("index ok to search!");
search(queryAnalyzer, directory, "\"季德胜蛇药片\"");
}
use of org.apache.lucene.analysis.Analyzer in project OpenGrok by OpenGrok.
the class IndexDatabase method optimize.
/**
* Optimize the index database
*/
public void optimize() {
synchronized (lock) {
if (running) {
LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
return;
}
running = true;
}
IndexWriter wrt = null;
try {
LOGGER.info("Optimizing the index ... ");
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
wrt = new IndexWriter(indexDirectory, conf);
// this is deprecated and not needed anymore
wrt.forceMerge(1);
LOGGER.info("done");
synchronized (lock) {
if (dirtyFile.exists() && !dirtyFile.delete()) {
LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath());
}
dirty = false;
}
} catch (IOException e) {
LOGGER.log(Level.SEVERE, "ERROR: optimizing index: {0}", e);
} finally {
if (wrt != null) {
try {
wrt.close();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
}
}
synchronized (lock) {
running = false;
}
}
}
use of org.apache.lucene.analysis.Analyzer in project OpenGrok by OpenGrok.
the class IndexDatabase method update.
/**
* Update the content of this index database
*
* @throws IOException if an error occurs
* @throws HistoryException if an error occurs when accessing the history
*/
public void update() throws IOException, HistoryException {
synchronized (lock) {
if (running) {
throw new IOException("Indexer already running!");
}
running = true;
interrupted = false;
}
String ctgs = RuntimeEnvironment.getInstance().getCtags();
if (ctgs != null) {
ctags = new Ctags();
ctags.setBinary(ctgs);
}
if (ctags == null) {
LOGGER.severe("Unable to run ctags! searching definitions will not work!");
}
if (ctags != null) {
String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
if (filename != null) {
ctags.setCTagsExtraOptionsFile(filename);
}
}
try {
Analyzer analyzer = AnalyzerGuru.getAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
writer = new IndexWriter(indexDirectory, iwc);
// to make sure index exists on the disk
writer.commit();
if (directories.isEmpty()) {
if (project == null) {
directories.add("");
} else {
directories.add(project.getPath());
}
}
for (String dir : directories) {
File sourceRoot;
if ("".equals(dir)) {
sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
} else {
sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
}
HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
String startuid = Util.path2uid(dir, "");
// open existing index
IndexReader reader = DirectoryReader.open(indexDirectory);
Terms terms = null;
int numDocs = reader.numDocs();
if (numDocs > 0) {
//reader.getTermVectors(0);
Fields uFields = MultiFields.getFields(reader);
terms = uFields.terms(QueryBuilder.U);
}
try {
if (numDocs > 0) {
uidIter = terms.iterator();
//init uid
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid));
if (stat == TermsEnum.SeekStatus.END) {
uidIter = null;
LOGGER.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
}
}
// The code below traverses the tree to get total count.
int file_cnt = 0;
if (RuntimeEnvironment.getInstance().isPrintProgress()) {
LOGGER.log(Level.INFO, "Counting files in {0} ...", dir);
file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
LOGGER.log(Level.INFO, "Need to process: {0} files for {1}", new Object[] { file_cnt, dir });
}
indexDown(sourceRoot, dir, false, 0, file_cnt);
while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
removeFile();
BytesRef next = uidIter.next();
if (next == null) {
uidIter = null;
}
}
} finally {
reader.close();
}
}
} finally {
if (writer != null) {
try {
writer.prepareCommit();
writer.commit();
writer.close();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
}
}
if (ctags != null) {
try {
ctags.close();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "An error occured while closing ctags process", e);
}
}
synchronized (lock) {
running = false;
}
}
if (!isInterrupted() && isDirty()) {
if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
optimize();
}
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
File timestamp = new File(env.getDataRootFile(), "timestamp");
String purpose = "used for timestamping the index database.";
if (timestamp.exists()) {
if (!timestamp.setLastModified(System.currentTimeMillis())) {
LOGGER.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose });
}
} else {
if (!timestamp.createNewFile()) {
LOGGER.log(Level.WARNING, "Failed to create file ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose });
}
}
}
}
use of org.apache.lucene.analysis.Analyzer in project Openfire by igniterealtime.
the class ChatSearchManager method loadAnalyzer.
/**
* Load the search analyzer. A custom analyzer class will be used if it is defined.
*/
private void loadAnalyzer() {
Analyzer analyzer = null;
String analyzerClass = null;
String words = null;
// First check if the workgroup should use a special Analyzer
analyzerClass = workgroup.getProperties().getProperty("search.analyzer.className");
if (analyzerClass != null) {
words = workgroup.getProperties().getProperty("search.analyzer.stopWordList");
} else {
// Use the global analyzer
analyzerClass = getAnalyzerClass();
words = JiveGlobals.getProperty("workgroup.search.analyzer.stopWordList");
}
// get stop word list is there was one
List<String> stopWords = new ArrayList<String>();
if (words != null) {
StringTokenizer st = new StringTokenizer(words, ",");
while (st.hasMoreTokens()) {
stopWords.add(st.nextToken().trim());
}
}
try {
analyzer = getAnalyzerInstance(analyzerClass, stopWords);
} catch (Exception e) {
Log.error("Error loading custom " + "search analyzer: " + analyzerClass, e);
}
// If the analyzer is null, use the standard analyzer.
if (analyzer == null && stopWords.size() > 0) {
analyzer = new StandardAnalyzer(stopWords.toArray(new String[stopWords.size()]));
} else if (analyzer == null) {
analyzer = new StandardAnalyzer();
}
indexerAnalyzer = analyzer;
}
use of org.apache.lucene.analysis.Analyzer in project Openfire by igniterealtime.
the class ChatSearchManager method getAnalyzerInstance.
private Analyzer getAnalyzerInstance(String analyzerClass, List<String> stopWords) throws Exception {
Analyzer analyzer = null;
// Load the class.
Class c = null;
try {
c = ClassUtils.forName(analyzerClass);
} catch (ClassNotFoundException e) {
c = getClass().getClassLoader().loadClass(analyzerClass);
}
// Create an instance of the custom analyzer.
if (stopWords.size() > 0) {
Class[] params = new Class[] { String[].class };
try {
Constructor constructor = c.getConstructor(params);
Object[] initargs = { (String[]) stopWords.toArray(new String[stopWords.size()]) };
analyzer = (Analyzer) constructor.newInstance(initargs);
} catch (NoSuchMethodException e) {
// no String[] parameter to the constructor
analyzer = (Analyzer) c.newInstance();
}
} else {
analyzer = (Analyzer) c.newInstance();
}
return analyzer;
}
Aggregations