use of org.apache.lucene.analysis.standard.StandardAnalyzer in project OpenGrok by OpenGrok.
the class IndexDatabase method optimize.
/**
* Optimize the index database
*/
public void optimize() {
synchronized (lock) {
if (running) {
LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
return;
}
running = true;
}
IndexWriter wrt = null;
try {
LOGGER.info("Optimizing the index ... ");
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
wrt = new IndexWriter(indexDirectory, conf);
// this is deprecated and not needed anymore
wrt.forceMerge(1);
LOGGER.info("done");
synchronized (lock) {
if (dirtyFile.exists() && !dirtyFile.delete()) {
LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath());
}
dirty = false;
}
} catch (IOException e) {
LOGGER.log(Level.SEVERE, "ERROR: optimizing index: {0}", e);
} finally {
if (wrt != null) {
try {
wrt.close();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
}
}
synchronized (lock) {
running = false;
}
}
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project Openfire by igniterealtime.
the class ChatSearchManager method loadAnalyzer.
/**
* Load the search analyzer. A custom analyzer class will be used if it is defined.
*/
private void loadAnalyzer() {
Analyzer analyzer = null;
String analyzerClass = null;
String words = null;
// First check if the workgroup should use a special Analyzer
analyzerClass = workgroup.getProperties().getProperty("search.analyzer.className");
if (analyzerClass != null) {
words = workgroup.getProperties().getProperty("search.analyzer.stopWordList");
} else {
// Use the global analyzer
analyzerClass = getAnalyzerClass();
words = JiveGlobals.getProperty("workgroup.search.analyzer.stopWordList");
}
// get stop word list is there was one
List<String> stopWords = new ArrayList<String>();
if (words != null) {
StringTokenizer st = new StringTokenizer(words, ",");
while (st.hasMoreTokens()) {
stopWords.add(st.nextToken().trim());
}
}
try {
analyzer = getAnalyzerInstance(analyzerClass, stopWords);
} catch (Exception e) {
Log.error("Error loading custom " + "search analyzer: " + analyzerClass, e);
}
// If the analyzer is null, use the standard analyzer.
if (analyzer == null && stopWords.size() > 0) {
analyzer = new StandardAnalyzer(stopWords.toArray(new String[stopWords.size()]));
} else if (analyzer == null) {
analyzer = new StandardAnalyzer();
}
indexerAnalyzer = analyzer;
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project Openfire by igniterealtime.
the class ArchiveIndexer method rebuildIndex.
/**
* Rebuilds the search index with all archived conversation data. This method returns
* a Future that represents the status of the index rebuild process (also available
* via {@link #getIndexRebuildProgress()}). The integer value
* (values 0 through 100) represents the percentage of work done. If message archiving
* is disabled, this method will return <tt>null</tt>.
*
* @return a Future to indicate the status of rebuilding the index or <tt>null</tt> if
* rebuilding the index is not possible.
*/
public synchronized Future<Integer> rebuildIndex() {
// Immediately return if the service has been stopped.
if (stopped) {
return null;
}
// If a rebuild is already happening, return.
if (rebuildInProgress) {
return null;
}
rebuildInProgress = true;
// Do nothing if archiving is disabled.
if (!conversationManager.isArchivingEnabled()) {
return null;
}
// Create a future to track the index rebuild progress.
rebuildFuture = new RebuildFuture();
// Create a runnable that will perform the actual rebuild work.
Runnable rebuildTask = new Runnable() {
public void run() {
List<Long> conversationIDs = new ArrayList<Long>();
Map<Long, Boolean> externalMetaData = new HashMap<Long, Boolean>();
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = DbConnectionManager.getConnection();
pstmt = con.prepareStatement(ALL_CONVERSATIONS);
rs = pstmt.executeQuery();
while (rs.next()) {
long conversationID = rs.getLong(1);
conversationIDs.add(conversationID);
externalMetaData.put(conversationID, rs.getInt(2) == 1);
}
} catch (SQLException sqle) {
Log.error(sqle.getMessage(), sqle);
} finally {
DbConnectionManager.closeConnection(rs, pstmt, con);
}
if (!conversationIDs.isEmpty()) {
// Index the conversations.
writerLock.lock();
IndexModifier writer = null;
try {
writer = new IndexModifier(directory, new StandardAnalyzer(), true);
long newestDate = indexConversations(conversationIDs, externalMetaData, writer, true);
writer.optimize();
// Done indexing so store a last modified date.
if (newestDate != -1) {
lastModified = newestDate;
indexProperties.setProperty("lastModified", Long.toString(lastModified));
}
} catch (IOException ioe) {
Log.error(ioe.getMessage(), ioe);
} finally {
if (writer != null) {
try {
writer.close();
} catch (Exception e) {
Log.error(e.getMessage(), e);
}
}
writerLock.unlock();
}
}
// Done rebuilding the index, so reset state.
rebuildFuture = null;
rebuildInProgress = false;
}
};
taskEngine.submit(rebuildTask);
return rebuildFuture;
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project Openfire by igniterealtime.
the class ArchiveIndexer method updateIndex.
/**
* Updates the search index with all new conversation data since the last index update.
*/
public void updateIndex() {
// Immediately return if the service has been stopped.
if (stopped) {
return;
}
// Do nothing if archiving is disabled.
if (!conversationManager.isArchivingEnabled()) {
return;
}
// If we're currently rebuilding the index, return.
if (rebuildInProgress) {
return;
}
writerLock.lock();
IndexModifier writer = null;
try {
writer = new IndexModifier(directory, new StandardAnalyzer(), false);
List<Long> conversationIDs = new ArrayList<Long>();
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = DbConnectionManager.getConnection();
pstmt = con.prepareStatement(NEW_CONVERSATIONS);
pstmt.setLong(1, lastModified);
rs = pstmt.executeQuery();
while (rs.next()) {
conversationIDs.add(rs.getLong(1));
}
} catch (SQLException sqle) {
Log.error(sqle.getMessage(), sqle);
} finally {
DbConnectionManager.closeConnection(rs, pstmt, con);
}
// updated since then.
for (long conversationID : conversationIDs) {
writer.deleteDocuments(new Term("conversationID", Long.toString(conversationID)));
}
// Load meta-data for each conversation.
Map<Long, Boolean> externalMetaData = new HashMap<Long, Boolean>();
for (long conversationID : conversationIDs) {
try {
con = DbConnectionManager.getConnection();
pstmt = con.prepareStatement(CONVERSATION_METADATA);
pstmt.setLong(1, conversationID);
rs = pstmt.executeQuery();
while (rs.next()) {
externalMetaData.put(conversationID, rs.getInt(1) == 1);
}
} catch (SQLException sqle) {
Log.error(sqle.getMessage(), sqle);
} finally {
DbConnectionManager.closeConnection(rs, pstmt, con);
}
}
// Now index all the new conversations.
long newestDate = indexConversations(conversationIDs, externalMetaData, writer, false);
writer.optimize();
// Done indexing so store a last modified date.
if (newestDate != -1) {
lastModified = newestDate;
indexProperties.setProperty("lastModified", Long.toString(lastModified));
}
} catch (IOException ioe) {
Log.error(ioe.getMessage(), ioe);
} finally {
if (writer != null) {
try {
writer.close();
} catch (Exception e) {
Log.error(e.getMessage(), e);
}
}
writerLock.unlock();
}
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project languagetool by languagetool-org.
the class SentenceSourceIndexer method main.
public static void main(String... args) throws Exception {
if (args.length != 5) {
System.out.println("Usage: " + SentenceSourceIndexer.class.getSimpleName() + " <dataFile...> <indexDir> <languageCode> <maxSentences> <indexPosTags>");
System.out.println("\t<dataFiles> comma-separated list of a Wikipedia XML dump (*.xml) and/or Tatoeba files (tatoeba-*)");
System.out.println("\t<indexDir> directory where Lucene index will be written to, existing index content will be removed");
System.out.println("\t<languageCode> short code like en for English, de for German etc");
System.out.println("\t<maxSentences> maximum number of sentences to be indexed, use 0 for no limit");
System.out.println("\t<indexPosTags> 1 to also index POS tags (i.e. analyze text by LT), 0 to index only the plain text");
System.exit(1);
}
List<String> dumpFilesNames = Arrays.asList(args[0].split(","));
File indexDir = new File(args[1]);
String languageCode = args[2];
int maxSentences = Integer.parseInt(args[3]);
Language language = Languages.getLanguageForShortCode(languageCode);
if (maxSentences == 0) {
System.out.println("Going to index contents from " + dumpFilesNames);
} else {
System.out.println("Going to index up to " + maxSentences + " sentences from " + dumpFilesNames);
}
System.out.println("Output index dir: " + indexDir);
long start = System.currentTimeMillis();
Analyzer analyzer;
String indexPos = args[4];
if (indexPos.equals("1")) {
// this will use LanguageToolAnalyzer
analyzer = null;
} else if (indexPos.equals("0")) {
analyzer = new StandardAnalyzer(new CharArraySet(Collections.emptyList(), false));
} else {
throw new IllegalArgumentException("Unknown value '" + indexPos + "' for indexPosTags parameter, use 0 or 1");
}
try (FSDirectory fsDirectory = FSDirectory.open(indexDir.toPath());
SentenceSourceIndexer indexer = new SentenceSourceIndexer(fsDirectory, language, maxSentences, analyzer)) {
try {
indexer.run(dumpFilesNames, language);
} catch (DocumentLimitReachedException e) {
System.out.println("Sentence limit (" + e.getLimit() + ") reached, stopping indexing");
} finally {
indexer.writeMetaDocuments();
}
if (analyzer != null) {
analyzer.close();
}
}
long end = System.currentTimeMillis();
float minutes = (end - start) / (float) (1000 * 60);
System.out.printf("Indexing took %.2f minutes\n", minutes);
}
Aggregations