use of gate.creole.annic.IndexException in project gate-core by GateNLP.
the class LuceneDataStoreImpl method setIndexer.
/**
* Sets the Indexer to be used for indexing Datastore
*/
@Override
public void setIndexer(Indexer indexer, Map<String, Object> indexParameters) throws IndexException {
this.indexer = indexer;
this.indexParameters = indexParameters;
this.indexURL = (URL) this.indexParameters.get(Constants.INDEX_LOCATION_URL);
this.indexer.createIndex(this.indexParameters);
// dump the version file
try {
File versionFile = getVersionFile();
OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(versionFile));
osw.write(versionNumber + Strings.getNl());
String indexDirRelativePath = PersistenceManager.getRelativePath(storageDir.toURI().toURL(), indexURL);
osw.write(indexDirRelativePath);
osw.close();
} catch (IOException e) {
throw new IndexException("couldn't write version file: " + e);
}
}
use of gate.creole.annic.IndexException in project gate-core by GateNLP.
the class LuceneIndexer method createIndex.
/**
* Creates index directory and indexing all documents in the corpus.
*
* @param indexParameters This is a map containing various values
* required to create an index In case of LuceneIndexManager
* following are the values required
* <P>
* INDEX_LOCATION_URL - this is a URL where the Index be
* created
* <P>
* BASE_TOKEN_ANNOTATION_TYPE
* <P>
* INDEX_UNIT_ANNOTATION_TYPE
* <P>
* FEATURES_TO_EXCLUDE
* <P>
* FEATURES_TO_INCLUDE
* <P>
*/
@Override
public void createIndex(Map<String, Object> indexParameters) throws IndexException {
checkIndexParameters(indexParameters);
URL indexLocation = (URL) parameters.get(Constants.INDEX_LOCATION_URL);
try {
File file = null;
try {
file = new File(indexLocation.toURI());
} catch (URISyntaxException use) {
file = Files.fileFromURL(indexLocation);
}
// create an instance of Index Writer
IndexWriter writer = new IndexWriter(file.getAbsolutePath(), new LuceneAnalyzer(), true);
try {
if (corpus != null) {
// load documents and add them one by one
for (int i = 0; i < corpus.size(); i++) {
gate.Document gateDoc = corpus.get(i);
String idToUse = gateDoc.getLRPersistenceId() == null ? gateDoc.getName() : gateDoc.getLRPersistenceId().toString();
System.out.print("Indexing : " + idToUse + " ...");
String corpusName = corpus.getLRPersistenceId() == null ? corpus.getName() : corpus.getLRPersistenceId().toString();
List<gate.creole.annic.apache.lucene.document.Document> luceneDocs = getLuceneDocuments(corpusName, gateDoc, indexLocation.toString());
if (luceneDocs != null) {
for (int j = 0; j < luceneDocs.size(); j++) {
if (luceneDocs.get(j) != null) {
writer.addDocument(luceneDocs.get(j));
}
}
}
if (gateDoc.getLRPersistenceId() != null) {
gate.Factory.deleteResource(gateDoc);
}
System.out.println("Done");
}
}
// for (all documents)
} finally {
writer.close();
}
writeParametersToDisk();
} catch (java.io.IOException ioe) {
throw new IndexException(ioe);
}
}
use of gate.creole.annic.IndexException in project gate-core by GateNLP.
the class LuceneIndexer method optimizeIndex.
/**
* Optimize existing index.
*/
@Override
public void optimizeIndex() throws IndexException {
try {
String location = ((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toString();
IndexWriter writer = new IndexWriter(location, new gate.creole.annic.lucene.LuceneAnalyzer(), false);
try {
writer.optimize();
} finally {
writer.close();
}
} catch (java.io.IOException ioe) {
throw new IndexException(ioe);
}
}
Aggregations