use of gate.creole.annic.apache.lucene.index.IndexWriter in project gate-core by GateNLP.
the class LuceneIndexer method add.
/**
* Add new documents to Index
* @throws IndexException
*/
@Override
public void add(String corpusPersistenceID, List<gate.Document> added) throws IndexException {
String location = null;
// TODO should we use the gate util Files mehotd for this
try {
location = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
location = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
try {
IndexWriter writer = new IndexWriter(location, new LuceneAnalyzer(), false);
try {
if (added != null) {
for (int i = 0; i < added.size(); i++) {
gate.Document gateDoc = added.get(i);
String idToUse = gateDoc.getLRPersistenceId() == null ? gateDoc.getName() : gateDoc.getLRPersistenceId().toString();
System.out.print("Indexing : " + idToUse + " ...");
List<gate.creole.annic.apache.lucene.document.Document> docs = getLuceneDocuments(corpusPersistenceID, gateDoc, location);
if (docs == null) {
System.out.println("Done");
continue;
}
for (int j = 0; j < docs.size(); j++) {
writer.addDocument(docs.get(j));
}
System.out.println("Done");
}
// for (add all added documents)
}
} finally {
// make sure we close the writer, whatever happens
writer.close();
}
} catch (java.io.IOException ioe) {
throw new IndexException(ioe);
}
}
use of gate.creole.annic.apache.lucene.index.IndexWriter in project gate-core by GateNLP.
the class LuceneIndexer method createIndex.
/**
* Creates index directory and indexing all documents in the corpus.
*
* @param indexParameters This is a map containing various values
* required to create an index In case of LuceneIndexManager
* following are the values required
* <P>
* INDEX_LOCATION_URL - this is a URL where the Index be
* created
* <P>
* BASE_TOKEN_ANNOTATION_TYPE
* <P>
* INDEX_UNIT_ANNOTATION_TYPE
* <P>
* FEATURES_TO_EXCLUDE
* <P>
* FEATURES_TO_INCLUDE
* <P>
*/
@Override
public void createIndex(Map<String, Object> indexParameters) throws IndexException {
checkIndexParameters(indexParameters);
URL indexLocation = (URL) parameters.get(Constants.INDEX_LOCATION_URL);
try {
File file = null;
try {
file = new File(indexLocation.toURI());
} catch (URISyntaxException use) {
file = Files.fileFromURL(indexLocation);
}
// create an instance of Index Writer
IndexWriter writer = new IndexWriter(file.getAbsolutePath(), new LuceneAnalyzer(), true);
try {
if (corpus != null) {
// load documents and add them one by one
for (int i = 0; i < corpus.size(); i++) {
gate.Document gateDoc = corpus.get(i);
String idToUse = gateDoc.getLRPersistenceId() == null ? gateDoc.getName() : gateDoc.getLRPersistenceId().toString();
System.out.print("Indexing : " + idToUse + " ...");
String corpusName = corpus.getLRPersistenceId() == null ? corpus.getName() : corpus.getLRPersistenceId().toString();
List<gate.creole.annic.apache.lucene.document.Document> luceneDocs = getLuceneDocuments(corpusName, gateDoc, indexLocation.toString());
if (luceneDocs != null) {
for (int j = 0; j < luceneDocs.size(); j++) {
if (luceneDocs.get(j) != null) {
writer.addDocument(luceneDocs.get(j));
}
}
}
if (gateDoc.getLRPersistenceId() != null) {
gate.Factory.deleteResource(gateDoc);
}
System.out.println("Done");
}
}
// for (all documents)
} finally {
writer.close();
}
writeParametersToDisk();
} catch (java.io.IOException ioe) {
throw new IndexException(ioe);
}
}
use of gate.creole.annic.apache.lucene.index.IndexWriter in project gate-core by GateNLP.
the class LuceneIndexer method optimizeIndex.
/**
* Optimize existing index.
*/
@Override
public void optimizeIndex() throws IndexException {
try {
String location = ((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toString();
IndexWriter writer = new IndexWriter(location, new gate.creole.annic.lucene.LuceneAnalyzer(), false);
try {
writer.optimize();
} finally {
writer.close();
}
} catch (java.io.IOException ioe) {
throw new IndexException(ioe);
}
}
Aggregations