use of gate.creole.ResourceInstantiationException in project gate-core by GateNLP.
the class CorpusBenchmarkTool method generateCorpus.
// setStartDirectory
protected void generateCorpus(File fileDir, File outputDir) {
// 1. check if we have input files
if (fileDir == null)
return;
// 2. create the output directory or clean it up if needed
File outDir = outputDir;
if (outputDir == null) {
outDir = new File(currDir, PROCESSED_DIR_NAME);
} else {
// get rid of the directory, coz datastore wants it clean
if (!Files.rmdir(outDir))
Out.prln("cannot delete old output directory: " + outDir);
}
outDir.mkdir();
// create the datastore and process each document
try {
SerialDataStore sds = new SerialDataStore(outDir.toURI().toURL().toString());
sds.create();
sds.open();
File[] files = fileDir.listFiles();
for (int i = 0; i < files.length; i++) {
if (!files[i].isFile())
continue;
// create a document
Out.prln("Processing and storing document: " + files[i].toURI().toURL() + "<P>");
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, files[i].toURI().toURL());
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
FeatureMap features = Factory.newFeatureMap();
// Gate.setHiddenAttribute(features, true);
// create the document
final Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, features);
doc.setName(files[i].getName());
processDocument(doc);
final LanguageResource lr = sds.adopt(doc);
sds.sync(lr);
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(doc);
Factory.deleteResource(lr);
}
});
}
// for
sds.close();
} catch (java.net.MalformedURLException ex) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex.getMessage()).initCause(ex);
} catch (PersistenceException ex1) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex1.getMessage()).initCause(ex1);
} catch (ResourceInstantiationException ex2) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex2.getMessage()).initCause(ex2);
}
}
use of gate.creole.ResourceInstantiationException in project gate-core by GateNLP.
the class CorpusBenchmarkTool method processDocument.
// evaluateMarkedClean
protected void processDocument(Document doc) {
try {
if (application instanceof CorpusController) {
Corpus tempCorpus = Factory.newCorpus("temp");
tempCorpus.add(doc);
((CorpusController) application).setCorpus(tempCorpus);
application.execute();
Factory.deleteResource(tempCorpus);
tempCorpus = null;
} else {
Iterator<ProcessingResource> iter = application.getPRs().iterator();
while (iter.hasNext()) iter.next().setParameterValue("document", doc);
application.execute();
}
} catch (ResourceInstantiationException ex) {
throw (RuntimeException) new RuntimeException("Error executing application: " + ex.getMessage()).initCause(ex);
} catch (ExecutionException ex) {
throw (RuntimeException) new RuntimeException("Error executing application: " + ex.getMessage()).initCause(ex);
}
}
use of gate.creole.ResourceInstantiationException in project gate-core by GateNLP.
the class CorpusBenchmarkTool method evaluateMarkedStored.
// evaluateCorpus
protected void evaluateMarkedStored(File markedDir, File storedDir, File errDir) {
Document persDoc = null;
Document cleanDoc = null;
Document markedDoc = null;
// open the datastore and process each document
try {
// open the data store
DataStore sds = Factory.openDataStore("gate.persist.SerialDataStore", storedDir.toURI().toURL().toExternalForm());
List<String> lrIDs = sds.getLrIds("gate.corpora.DocumentImpl");
for (int i = 0; i < lrIDs.size(); i++) {
String docID = lrIDs.get(i);
// read the stored document
FeatureMap features = Factory.newFeatureMap();
features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
features.put(DataStore.LR_ID_FEATURE_NAME, docID);
FeatureMap hparams = Factory.newFeatureMap();
// Gate.setHiddenAttribute(hparams, true);
persDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features, hparams);
if (isMoreInfoMode) {
StringBuffer errName = new StringBuffer(persDoc.getName());
errName.replace(persDoc.getName().lastIndexOf("."), persDoc.getName().length(), ".err");
Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + persDoc.getName() + "</a>" + "</H2>");
} else
Out.prln("<H2>" + persDoc.getName() + "</H2>");
if (!this.isMarkedDS) {
// try finding the marked document as file
StringBuffer docName = new StringBuffer(persDoc.getName());
docName.replace(persDoc.getName().lastIndexOf("."), docName.length(), ".xml");
File markedDocFile = new File(markedDir, docName.toString());
if (!markedDocFile.exists()) {
Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
} else {
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
// create the document
markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
markedDoc.setName(persDoc.getName());
}
// find marked as file
} else {
try {
// open marked from a DS
// open the data store
DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
boolean found = false;
int k = 0;
// search for the marked doc with the same name
while (k < lrIDs1.size() && !found) {
String docID1 = lrIDs1.get(k);
// read the stored document
FeatureMap features1 = Factory.newFeatureMap();
features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
// check whether this is our doc
if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(persDoc.getName())) {
found = true;
markedDoc = tempDoc;
} else
k++;
}
} catch (java.net.MalformedURLException ex) {
Out.prln("Error finding marked directory " + markedDir.getAbsolutePath());
} catch (gate.persist.PersistenceException ex1) {
Out.prln("Error opening marked as a datastore (-marked_ds specified)");
} catch (gate.creole.ResourceInstantiationException ex2) {
Out.prln("Error opening marked as a datastore (-marked_ds specified)");
}
}
evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
if (persDoc != null) {
final gate.Document pd = persDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(pd);
}
});
}
if (markedDoc != null) {
final gate.Document md = markedDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(md);
}
});
}
}
// for loop through saved docs
sds.close();
} catch (java.net.MalformedURLException ex) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex.getMessage()).initCause(ex);
} catch (PersistenceException ex1) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex1.getMessage()).initCause(ex1);
} catch (ResourceInstantiationException ex2) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex2.getMessage()).initCause(ex2);
}
}
use of gate.creole.ResourceInstantiationException in project gate-core by GateNLP.
the class SerialCorpusImpl method get.
@Override
public Document get(int index) {
if (index >= docDataList.size())
return null;
Document res = documents.get(index);
if (DEBUG)
Out.prln("SerialCorpusImpl: get(): index " + index + "result: " + res);
// if the document is null, then I must get it from the DS
if (res == null) {
FeatureMap parameters = Factory.newFeatureMap();
parameters.put(DataStore.DATASTORE_FEATURE_NAME, this.dataStore);
try {
parameters.put(DataStore.LR_ID_FEATURE_NAME, docDataList.get(index).getPersistentID());
Document lr = (Document) Factory.createResource(docDataList.get(index).getClassType(), parameters);
if (DEBUG)
Out.prln("Loaded document :" + lr.getName());
// change the result to the newly loaded doc
res = lr;
// finally replace the doc with the instantiated version
documents.set(index, lr);
} catch (ResourceInstantiationException ex) {
Err.prln("Error reading document inside a serialised corpus.");
throw new GateRuntimeException(ex);
}
}
return res;
}
Aggregations