use of gate.Document in project gate-core by GateNLP.
the class NameBearerHandle method buildStaticPopupItems.
protected void buildStaticPopupItems() {
// build the static part of the popup
staticPopupItems = new ArrayList<JComponent>();
if (target instanceof ProcessingResource && !(target instanceof Controller)) {
// actions for PRs (but not Controllers)
staticPopupItems.add(null);
staticPopupItems.add(new XJMenuItem(new ReloadAction(), sListenerProxy));
staticPopupItems.add(new XJMenuItem(new ApplicationWithPRAction(), sListenerProxy));
} else if (target instanceof LanguageResource) {
// Language Resources
staticPopupItems.add(null);
if (target instanceof Document) {
staticPopupItems.add(new XJMenuItem(new CreateCorpusForDocAction(), sListenerProxy));
}
if (target instanceof gate.TextualDocument) {
staticPopupItems.add(null);
staticPopupItems.add(new DocumentExportMenu(this));
} else if (target instanceof Corpus) {
corpusFiller = new CorpusFillerComponent();
scfInputDialog = new SingleConcatenatedFileInputDialog();
staticPopupItems.add(new XJMenuItem(new PopulateCorpusAction(), sListenerProxy));
staticPopupItems.add(new XJMenuItem(new PopulateCorpusFromSingleConcatenatedFileAction(), sListenerProxy));
staticPopupItems.add(null);
staticPopupItems.add(new DocumentExportMenu(this));
}
if (((LanguageResource) target).getDataStore() != null) {
// this item can be used only if the resource belongs to a
// datastore
staticPopupItems.add(new XJMenuItem(new SaveAction(), sListenerProxy));
}
if (!(target instanceof AnnotationSchema)) {
staticPopupItems.add(new XJMenuItem(new SaveToAction(), sListenerProxy));
}
}
if (target instanceof Controller) {
// Applications
staticPopupItems.add(null);
if (target instanceof SerialAnalyserController) {
staticPopupItems.add(new XJMenuItem(new MakeConditionalAction(), sListenerProxy));
}
staticPopupItems.add(new XJMenuItem(new DumpToFileAction(), sListenerProxy));
staticPopupItems.add(new XJMenuItem(new ExportApplicationAction(), sListenerProxy));
}
}
use of gate.Document in project gate-core by GateNLP.
the class CorpusPersistence method extractDataFromSource.
/**
* Populates this Persistence with the data that needs to be stored from the
* original source object.
*/
@Override
public void extractDataFromSource(Object source) throws PersistenceException {
// check input
if (!(source instanceof Corpus)) {
throw new UnsupportedOperationException(getClass().getName() + " can only be used for " + Corpus.class.getName() + " objects!\n" + source.getClass().getName() + " is not a " + Corpus.class.getName());
}
Corpus corpus = (Corpus) source;
super.extractDataFromSource(source);
if (dsData == null) {
// transient corpus; we still need to save the docs
docList = new ArrayList<Serializable>();
Iterator<Document> docIter = corpus.iterator();
while (docIter.hasNext()) {
docList.add(PersistenceManager.getPersistentRepresentation(docIter.next()));
}
} else {
// persistent corpus; it takes care of documents by itself
// nothing to do :)
docList = null;
}
}
use of gate.Document in project gate-core by GateNLP.
the class CorpusBenchmarkTool method evaluateCorpus.
// generateCorpus
protected void evaluateCorpus(File fileDir, File processedDir, File markedDir, File errorDir) {
// 1. check if we have input files and the processed Dir
if (fileDir == null || !fileDir.exists())
return;
if (processedDir == null || !processedDir.exists())
// if the user wants evaluation of marked and stored that's not possible
if (isMarkedStored) {
Out.prln("Cannot evaluate because no processed documents exist.");
return;
} else
isMarkedClean = true;
// create the error directory or clean it up if needed
File errDir = null;
if (isMoreInfoMode) {
errDir = errorDir;
if (errDir == null) {
errDir = new File(currDir, ERROR_DIR_NAME);
} else {
// get rid of the directory, coz we wants it clean
if (!Files.rmdir(errDir))
Out.prln("cannot delete old error directory: " + errDir);
}
Out.prln("Create error directory: " + errDir + "<BR><BR>");
errDir.mkdir();
}
// looked for marked texts only if the directory exists
boolean processMarked = markedDir != null && markedDir.exists();
if (!processMarked && (isMarkedStored || isMarkedClean)) {
Out.prln("Cannot evaluate because no human-annotated documents exist.");
return;
}
if (isMarkedStored) {
evaluateMarkedStored(markedDir, processedDir, errDir);
return;
} else if (isMarkedClean) {
evaluateMarkedClean(markedDir, fileDir, errDir);
return;
}
Document persDoc = null;
Document cleanDoc = null;
Document markedDoc = null;
// open the datastore and process each document
try {
// open the data store
DataStore sds = Factory.openDataStore("gate.persist.SerialDataStore", processedDir.toURI().toURL().toExternalForm());
List<String> lrIDs = sds.getLrIds("gate.corpora.DocumentImpl");
for (int i = 0; i < lrIDs.size(); i++) {
String docID = lrIDs.get(i);
// read the stored document
FeatureMap features = Factory.newFeatureMap();
features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
features.put(DataStore.LR_ID_FEATURE_NAME, docID);
FeatureMap hparams = Factory.newFeatureMap();
// Gate.setHiddenAttribute(hparams, true);
persDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features, hparams);
if (isMoreInfoMode) {
StringBuffer errName = new StringBuffer(persDoc.getName());
errName.replace(persDoc.getName().lastIndexOf("."), persDoc.getName().length(), ".err");
Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + persDoc.getName() + "</a>" + "</H2>");
} else
Out.prln("<H2>" + persDoc.getName() + "</H2>");
File cleanDocFile = new File(fileDir, persDoc.getName());
// try reading the original document from clean
if (!cleanDocFile.exists()) {
Out.prln("Warning: Cannot find original document " + persDoc.getName() + " in " + fileDir);
} else {
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocFile.toURI().toURL());
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
// create the document
cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
cleanDoc.setName(persDoc.getName());
}
// try finding the marked document
StringBuffer docName = new StringBuffer(persDoc.getName());
if (!isMarkedDS) {
docName.replace(persDoc.getName().lastIndexOf("."), docName.length(), ".xml");
File markedDocFile = new File(markedDir, docName.toString());
if (!processMarked || !markedDocFile.exists()) {
Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
} else {
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
// create the document
markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
markedDoc.setName(persDoc.getName());
}
} else {
// open marked from a DS
// open the data store
DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
boolean found = false;
int k = 0;
// search for the marked doc with the same name
while (k < lrIDs1.size() && !found) {
String docID1 = lrIDs1.get(k);
// read the stored document
FeatureMap features1 = Factory.newFeatureMap();
features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
// check whether this is our doc
if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(persDoc.getName())) {
found = true;
markedDoc = tempDoc;
} else
k++;
}
}
evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
if (persDoc != null) {
final gate.Document pd = persDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(pd);
}
});
}
if (cleanDoc != null) {
final gate.Document cd = cleanDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(cd);
}
});
}
if (markedDoc != null) {
final gate.Document md = markedDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(md);
}
});
}
}
// for loop through saved docs
sds.close();
} catch (java.net.MalformedURLException ex) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex.getMessage()).initCause(ex);
} catch (PersistenceException ex1) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex1.getMessage()).initCause(ex1);
} catch (ResourceInstantiationException ex2) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex2.getMessage()).initCause(ex2);
}
}
use of gate.Document in project gate-core by GateNLP.
the class CorpusBenchmarkTool method evaluateMarkedClean.
// evaluateMarkedStored
protected void evaluateMarkedClean(File markedDir, File cleanDir, File errDir) {
Document persDoc = null;
Document cleanDoc = null;
Document markedDoc = null;
File[] cleanDocs = cleanDir.listFiles();
for (int i = 0; i < cleanDocs.length; i++) {
if (!cleanDocs[i].isFile())
continue;
// try reading the original document from clean
FeatureMap params = Factory.newFeatureMap();
try {
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocs[i].toURI().toURL());
} catch (java.net.MalformedURLException ex) {
Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
continue;
}
// params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
FeatureMap hparams = Factory.newFeatureMap();
// create the document
try {
cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDocs[i].getName());
} catch (gate.creole.ResourceInstantiationException ex) {
Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
continue;
}
if (isMoreInfoMode) {
StringBuffer errName = new StringBuffer(cleanDocs[i].getName());
errName.replace(cleanDocs[i].getName().lastIndexOf("."), cleanDocs[i].getName().length(), ".err");
Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + cleanDocs[i].getName() + "</a>" + "</H2>");
} else
Out.prln("<H2>" + cleanDocs[i].getName() + "</H2>");
// try finding the marked document
if (!isMarkedDS) {
StringBuffer docName = new StringBuffer(cleanDoc.getName());
docName.replace(cleanDoc.getName().lastIndexOf("."), docName.length(), ".xml");
File markedDocFile = new File(markedDir, docName.toString());
if (!markedDocFile.exists()) {
Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
continue;
} else {
params = Factory.newFeatureMap();
try {
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
} catch (java.net.MalformedURLException ex) {
Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
continue;
}
// params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
// create the document
try {
markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDoc.getName());
} catch (gate.creole.ResourceInstantiationException ex) {
Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
continue;
}
}
// if markedDoc exists
} else {
try {
// open marked from a DS
// open the data store
DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
boolean found = false;
int k = 0;
// search for the marked doc with the same name
while (k < lrIDs1.size() && !found) {
String docID1 = lrIDs1.get(k);
// read the stored document
FeatureMap features1 = Factory.newFeatureMap();
features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
// check whether this is our doc
if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(cleanDoc.getName())) {
found = true;
markedDoc = tempDoc;
} else
k++;
}
} catch (java.net.MalformedURLException ex) {
Out.prln("Error finding marked directory " + markedDir.getAbsolutePath());
} catch (gate.persist.PersistenceException ex1) {
Out.prln("Error opening marked as a datastore (-marked_ds specified)");
} catch (gate.creole.ResourceInstantiationException ex2) {
Out.prln("Error opening marked as a datastore (-marked_ds specified)");
}
}
try {
evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
} catch (gate.creole.ResourceInstantiationException ex) {
ex.printStackTrace();
Out.prln("Evaluate failed on document: " + cleanDoc.getName());
}
if (persDoc != null) {
final gate.Document pd = persDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(pd);
}
});
}
if (cleanDoc != null) {
final gate.Document cd = cleanDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(cd);
}
});
}
if (markedDoc != null) {
final gate.Document md = markedDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(md);
}
});
}
}
// for loop through clean docs
}
use of gate.Document in project gate-core by GateNLP.
the class TestXml method testAnnotationConsistencyForSaveAsXml.
// testUnpackMarkup()
/*
* This method runs ANNIE with defaults on a document, then saves
* it as a GATE XML document and loads it back. All the annotations on the
* loaded document should be the same as the original ones.
*
* It also verifies if the matches feature still holds after an export/import to XML
*/
public void testAnnotationConsistencyForSaveAsXml() throws Exception {
// Load a document from the test repository
// Document origDoc = gate.Factory.newDocument(Gate.getUrl("tests/xml/gateTestSaveAsXML.xml"));
String testDoc = gate.util.Files.getGateResourceAsString("gate.ac.uk/tests/xml/gateTestSaveAsXML.xml");
Document origDoc = gate.Factory.newDocument(testDoc);
// Verifies if the maximum annotation ID on the origDoc is less than the
// Annotation ID generator of the document.
verifyAnnotationIDGenerator(origDoc);
// create a couple of annotations with features we can look at after a round trip to disc
Integer ann1ID = origDoc.getAnnotations().add(0L, 10L, "Test", Factory.newFeatureMap());
Integer ann2ID = origDoc.getAnnotations().add(15L, 20L, "Test", Factory.newFeatureMap());
origDoc.getAnnotations().get(ann1ID).getFeatures().put("matches", Arrays.asList(new Integer[] { ann2ID }));
origDoc.getAnnotations().get(ann2ID).getFeatures().put("matches", Arrays.asList(new Integer[] { ann1ID }));
// SaveAS XML and reload the document into another GATE doc
// Export the Gate document called origDoc as XML, into a temp file,
// using the working encoding
File xmlFile = Files.writeTempFile(origDoc.toXml(), workingEncoding);
System.out.println("Saved to temp file :" + xmlFile.toURI().toURL());
Document reloadedDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(), workingEncoding);
// Verifies if the maximum annotation ID on the origDoc is less than the
// Annotation ID generator of the document.
verifyAnnotationIDGenerator(reloadedDoc);
// Verify if the annotations are identical in the two docs.
Map<Integer, Annotation> origAnnotMap = buildID2AnnotMap(origDoc);
Map<Integer, Annotation> reloadedAnnMap = buildID2AnnotMap(reloadedDoc);
// Verifies if the reloaded annotations are the same as the original ones
verifyIDConsistency(origAnnotMap, reloadedAnnMap);
// Build the original Matches map
// ID -> List of IDs
Map<Integer, List<Integer>> origMatchesMap = buildMatchesMap(origDoc);
// extracted from the reloadedMAp
for (Iterator<Integer> it = origMatchesMap.keySet().iterator(); it.hasNext(); ) {
Integer id = it.next();
Annotation origAnnot = origAnnotMap.get(id);
assertTrue("Couldn't find an original annot with ID=" + id, origAnnot != null);
Annotation reloadedAnnot = reloadedAnnMap.get(id);
assertTrue("Couldn't find a reloaded annot with ID=" + id, reloadedAnnot != null);
compareAnnot(origAnnot, reloadedAnnot);
// Iterate through the matches list and repeat the comparison
List<Integer> matchesList = origMatchesMap.get(id);
for (Iterator<Integer> itList = matchesList.iterator(); itList.hasNext(); ) {
Integer matchId = itList.next();
Annotation origA = origAnnotMap.get(matchId);
assertTrue("Couldn't find an original annot with ID=" + matchId, origA != null);
Annotation reloadedA = reloadedAnnMap.get(matchId);
assertTrue("Couldn't find a reloaded annot with ID=" + matchId, reloadedA != null);
compareAnnot(origA, reloadedA);
}
// End for
}
// End for
// Clean up the XMl file
xmlFile.delete();
}
Aggregations