use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.
the class IndexTest method indexTest.
@Test
public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
MyStaticValue.DIC.put(MyStaticValue.DIC_DEFAULT, "../../library/default.dic");
HashSet<String> hs = new HashSet<String>();
hs.add("的");
Analyzer analyzer = new AnsjIndexAnalysis(hs, false);
Directory directory = null;
IndexWriter iwriter = null;
String text = "季德胜蛇药片 10片*6板 ";
UserDefineLibrary.insertWord("蛇药片", "n", 1000);
IndexWriterConfig ic = new IndexWriterConfig(Version.LUCENE_44, analyzer);
// 建立内存索引对象
directory = new RAMDirectory();
iwriter = new IndexWriter(directory, ic);
addContent(iwriter, text);
iwriter.commit();
iwriter.close();
System.out.println("索引建立完毕");
Analyzer queryAnalyzer = new AnsjAnalysis(hs, false);
;
System.out.println("index ok to search!");
search(queryAnalyzer, directory, "\"季德胜蛇药片\"");
}
use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.
the class NearTest method createIndex.
public static void createIndex() throws Exception {
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, new AnsjAnalysis());
Directory directory = FSDirectory.open(new File("c:/index"));
IndexWriter writer = new IndexWriter(directory, conf);
String str = "文化人;文化人谈文化";
String[] values = str.split(";");
for (String value : values) {
Document doc = new Document();
Field field = new Field("test", value, Store.YES, Index.ANALYZED_NO_NORMS, TermVector.WITH_POSITIONS_OFFSETS);
// field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
doc.add(field);
writer.addDocument(doc);
writer.commit();
}
writer.close();
}
use of org.apache.lucene.index.IndexWriter in project OpenGrok by OpenGrok.
the class IndexDatabase method optimize.
/**
* Optimize the index database
*/
public void optimize() {
synchronized (lock) {
if (running) {
LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
return;
}
running = true;
}
IndexWriter wrt = null;
try {
LOGGER.info("Optimizing the index ... ");
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
wrt = new IndexWriter(indexDirectory, conf);
// this is deprecated and not needed anymore
wrt.forceMerge(1);
LOGGER.info("done");
synchronized (lock) {
if (dirtyFile.exists() && !dirtyFile.delete()) {
LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath());
}
dirty = false;
}
} catch (IOException e) {
LOGGER.log(Level.SEVERE, "ERROR: optimizing index: {0}", e);
} finally {
if (wrt != null) {
try {
wrt.close();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
}
}
synchronized (lock) {
running = false;
}
}
}
use of org.apache.lucene.index.IndexWriter in project OpenGrok by OpenGrok.
the class IndexDatabase method update.
/**
* Update the content of this index database
*
* @throws IOException if an error occurs
* @throws HistoryException if an error occurs when accessing the history
*/
public void update() throws IOException, HistoryException {
synchronized (lock) {
if (running) {
throw new IOException("Indexer already running!");
}
running = true;
interrupted = false;
}
String ctgs = RuntimeEnvironment.getInstance().getCtags();
if (ctgs != null) {
ctags = new Ctags();
ctags.setBinary(ctgs);
}
if (ctags == null) {
LOGGER.severe("Unable to run ctags! searching definitions will not work!");
}
if (ctags != null) {
String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
if (filename != null) {
ctags.setCTagsExtraOptionsFile(filename);
}
}
try {
Analyzer analyzer = AnalyzerGuru.getAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
writer = new IndexWriter(indexDirectory, iwc);
// to make sure index exists on the disk
writer.commit();
if (directories.isEmpty()) {
if (project == null) {
directories.add("");
} else {
directories.add(project.getPath());
}
}
for (String dir : directories) {
File sourceRoot;
if ("".equals(dir)) {
sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
} else {
sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
}
HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
String startuid = Util.path2uid(dir, "");
// open existing index
IndexReader reader = DirectoryReader.open(indexDirectory);
Terms terms = null;
int numDocs = reader.numDocs();
if (numDocs > 0) {
//reader.getTermVectors(0);
Fields uFields = MultiFields.getFields(reader);
terms = uFields.terms(QueryBuilder.U);
}
try {
if (numDocs > 0) {
uidIter = terms.iterator();
//init uid
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid));
if (stat == TermsEnum.SeekStatus.END) {
uidIter = null;
LOGGER.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
}
}
// The code below traverses the tree to get total count.
int file_cnt = 0;
if (RuntimeEnvironment.getInstance().isPrintProgress()) {
LOGGER.log(Level.INFO, "Counting files in {0} ...", dir);
file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
LOGGER.log(Level.INFO, "Need to process: {0} files for {1}", new Object[] { file_cnt, dir });
}
indexDown(sourceRoot, dir, false, 0, file_cnt);
while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
removeFile();
BytesRef next = uidIter.next();
if (next == null) {
uidIter = null;
}
}
} finally {
reader.close();
}
}
} finally {
if (writer != null) {
try {
writer.prepareCommit();
writer.commit();
writer.close();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
}
}
if (ctags != null) {
try {
ctags.close();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "An error occured while closing ctags process", e);
}
}
synchronized (lock) {
running = false;
}
}
if (!isInterrupted() && isDirty()) {
if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
optimize();
}
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
File timestamp = new File(env.getDataRootFile(), "timestamp");
String purpose = "used for timestamping the index database.";
if (timestamp.exists()) {
if (!timestamp.setLastModified(System.currentTimeMillis())) {
LOGGER.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose });
}
} else {
if (!timestamp.createNewFile()) {
LOGGER.log(Level.WARNING, "Failed to create file ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose });
}
}
}
}
use of org.apache.lucene.index.IndexWriter in project Openfire by igniterealtime.
the class ChatSearchManager method updateIndex.
/**
* Updates the index file with new chats that took place since the last added chat to the
* index. If the index file is missing or a chat was never added to the index file then
* {@link #rebuildIndex} will be used instead.
*
* @param forceUpdate true if the index should be updated despite of the execution frequency.
* @throws IOException if the directory cannot be read/written to, or it does not exist, or
* there is a problem adding a document to the index.
*/
public synchronized void updateIndex(boolean forceUpdate) throws IOException {
// Check that the index files exist
File dir = new File(searchDirectory);
boolean create = !dir.exists() || !dir.isDirectory();
if (lastUpdated == null || create) {
// Recreate the index since it was never created or the index files disappeared
rebuildIndex();
} else {
if (forceUpdate || (System.currentTimeMillis() - lastExecution.getTime()) / 60000 > getExecutionFrequency()) {
List<ChatInformation> chatsInformation = getChatsInformation(lastUpdated);
if (!chatsInformation.isEmpty()) {
// Reset the number of transcripts pending to be added to the index
pendingTranscripts.set(0);
Date lastDate = null;
IndexWriter writer = getWriter(false);
for (ChatInformation chat : chatsInformation) {
addTranscriptToIndex(chat, writer);
lastDate = chat.getCreationDate();
}
// Check if we need to optimize the index. The index is optimized once a day
if ((System.currentTimeMillis() - lastOptimization.getTime()) / ONE_HOUR > getOptimizationFrequency()) {
writer.optimize();
// Update the optimized date
lastOptimization = new Date();
}
writer.close();
closeSearcherReader();
// Reset the filters cache
cachedFilters.clear();
// Update the last updated date
lastUpdated = lastDate;
// Save the last updated and optimized dates to the database
saveDates();
}
// Update the last time the update process was executed
lastExecution = new Date();
}
}
}
Aggregations