use of gate.FeatureMap in project gate-core by GateNLP.
the class ParameterList method getDefaults.
// getInitimeDefaults()
/**
* Get default parameter value set. Where more than one default
* is possible amongst disjunctive parameters, only the first will be set.
* To check if the default set is comprehensive,
* use <TT>isFullyDefaulted()</TT>.
* @see #isFullyDefaulted()
*/
public FeatureMap getDefaults(List<List<Parameter>> parameters) throws ParameterException {
FeatureMap defaults = Factory.newFeatureMap();
// each element of the parameters list is a list of (disjunctive) params
Iterator<List<Parameter>> disjIter = parameters.iterator();
// for each parameter disjunction in parameters
disjIterLoop: while (disjIter.hasNext()) {
// were any of this disj optional?
boolean optional = false;
// get an iterator for this disjunction of parameters
List<Parameter> paramDisj = disjIter.next();
Iterator<Parameter> paramsIter = paramDisj.iterator();
// for each parameter in the disjunction
while (paramsIter.hasNext()) {
Parameter param = paramsIter.next();
if (DEBUG)
Out.prln("Examining " + param);
if (!optional)
optional = param.isOptional();
// try and find a default value
Object defaultValue = param.calculateDefaultValue();
// no default found
if (defaultValue == null) {
// we've got at least one non-optional param unset
if (!optional && !paramsIter.hasNext()) {
fullyDefaulted = false;
}
// valid default found - set it and continue with the next disj
} else {
defaults.put(param.getName(), defaultValue);
continue disjIterLoop;
}
}
// paramsIter
}
return defaults;
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class SerialDataStore method adopt.
// delete(lr)
/**
* Adopt a resource for persistence.
*/
@Override
public LanguageResource adopt(LanguageResource lr) throws PersistenceException {
// ignore security info
// check the LR's current DS
DataStore currentDS = lr.getDataStore();
if (currentDS == null) {
// an orphan - do the adoption
LanguageResource res = lr;
if (lr instanceof Corpus) {
FeatureMap features1 = Factory.newFeatureMap();
features1.put("transientSource", lr);
try {
// here we create the persistent LR via Factory, so it's registered
// in GATE
res = (LanguageResource) Factory.createResource("gate.corpora.SerialCorpusImpl", features1);
// Here the transient corpus is not deleted from the CRI, because
// this might not always be the desired behaviour
// since we chose that it is for the GUI, this functionality is
// now move to the 'Save to' action code in NameBearerHandle
} catch (gate.creole.ResourceInstantiationException ex) {
throw new GateRuntimeException(ex.getMessage());
}
}
res.setDataStore(this);
// let the world know
fireResourceAdopted(new DatastoreEvent(this, DatastoreEvent.RESOURCE_ADOPTED, lr, null));
return res;
} else if (// adopted already here
currentDS.equals(this))
return lr;
else {
// someone else's child
throw new PersistenceException("Can't adopt a resource which is already in a different datastore");
}
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class CorpusBenchmarkTool method evaluateCorpus.
// generateCorpus
protected void evaluateCorpus(File fileDir, File processedDir, File markedDir, File errorDir) {
// 1. check if we have input files and the processed Dir
if (fileDir == null || !fileDir.exists())
return;
if (processedDir == null || !processedDir.exists())
// if the user wants evaluation of marked and stored that's not possible
if (isMarkedStored) {
Out.prln("Cannot evaluate because no processed documents exist.");
return;
} else
isMarkedClean = true;
// create the error directory or clean it up if needed
File errDir = null;
if (isMoreInfoMode) {
errDir = errorDir;
if (errDir == null) {
errDir = new File(currDir, ERROR_DIR_NAME);
} else {
// get rid of the directory, coz we wants it clean
if (!Files.rmdir(errDir))
Out.prln("cannot delete old error directory: " + errDir);
}
Out.prln("Create error directory: " + errDir + "<BR><BR>");
errDir.mkdir();
}
// looked for marked texts only if the directory exists
boolean processMarked = markedDir != null && markedDir.exists();
if (!processMarked && (isMarkedStored || isMarkedClean)) {
Out.prln("Cannot evaluate because no human-annotated documents exist.");
return;
}
if (isMarkedStored) {
evaluateMarkedStored(markedDir, processedDir, errDir);
return;
} else if (isMarkedClean) {
evaluateMarkedClean(markedDir, fileDir, errDir);
return;
}
Document persDoc = null;
Document cleanDoc = null;
Document markedDoc = null;
// open the datastore and process each document
try {
// open the data store
DataStore sds = Factory.openDataStore("gate.persist.SerialDataStore", processedDir.toURI().toURL().toExternalForm());
List<String> lrIDs = sds.getLrIds("gate.corpora.DocumentImpl");
for (int i = 0; i < lrIDs.size(); i++) {
String docID = lrIDs.get(i);
// read the stored document
FeatureMap features = Factory.newFeatureMap();
features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
features.put(DataStore.LR_ID_FEATURE_NAME, docID);
FeatureMap hparams = Factory.newFeatureMap();
// Gate.setHiddenAttribute(hparams, true);
persDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features, hparams);
if (isMoreInfoMode) {
StringBuffer errName = new StringBuffer(persDoc.getName());
errName.replace(persDoc.getName().lastIndexOf("."), persDoc.getName().length(), ".err");
Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + persDoc.getName() + "</a>" + "</H2>");
} else
Out.prln("<H2>" + persDoc.getName() + "</H2>");
File cleanDocFile = new File(fileDir, persDoc.getName());
// try reading the original document from clean
if (!cleanDocFile.exists()) {
Out.prln("Warning: Cannot find original document " + persDoc.getName() + " in " + fileDir);
} else {
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocFile.toURI().toURL());
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
// create the document
cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
cleanDoc.setName(persDoc.getName());
}
// try finding the marked document
StringBuffer docName = new StringBuffer(persDoc.getName());
if (!isMarkedDS) {
docName.replace(persDoc.getName().lastIndexOf("."), docName.length(), ".xml");
File markedDocFile = new File(markedDir, docName.toString());
if (!processMarked || !markedDocFile.exists()) {
Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
} else {
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
// create the document
markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
markedDoc.setName(persDoc.getName());
}
} else {
// open marked from a DS
// open the data store
DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
boolean found = false;
int k = 0;
// search for the marked doc with the same name
while (k < lrIDs1.size() && !found) {
String docID1 = lrIDs1.get(k);
// read the stored document
FeatureMap features1 = Factory.newFeatureMap();
features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
// check whether this is our doc
if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(persDoc.getName())) {
found = true;
markedDoc = tempDoc;
} else
k++;
}
}
evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
if (persDoc != null) {
final gate.Document pd = persDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(pd);
}
});
}
if (cleanDoc != null) {
final gate.Document cd = cleanDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(cd);
}
});
}
if (markedDoc != null) {
final gate.Document md = markedDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(md);
}
});
}
}
// for loop through saved docs
sds.close();
} catch (java.net.MalformedURLException ex) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex.getMessage()).initCause(ex);
} catch (PersistenceException ex1) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex1.getMessage()).initCause(ex1);
} catch (ResourceInstantiationException ex2) {
throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex2.getMessage()).initCause(ex2);
}
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class CorpusBenchmarkTool method evaluateMarkedClean.
// evaluateMarkedStored
protected void evaluateMarkedClean(File markedDir, File cleanDir, File errDir) {
Document persDoc = null;
Document cleanDoc = null;
Document markedDoc = null;
File[] cleanDocs = cleanDir.listFiles();
for (int i = 0; i < cleanDocs.length; i++) {
if (!cleanDocs[i].isFile())
continue;
// try reading the original document from clean
FeatureMap params = Factory.newFeatureMap();
try {
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocs[i].toURI().toURL());
} catch (java.net.MalformedURLException ex) {
Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
continue;
}
// params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
FeatureMap hparams = Factory.newFeatureMap();
// create the document
try {
cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDocs[i].getName());
} catch (gate.creole.ResourceInstantiationException ex) {
Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
continue;
}
if (isMoreInfoMode) {
StringBuffer errName = new StringBuffer(cleanDocs[i].getName());
errName.replace(cleanDocs[i].getName().lastIndexOf("."), cleanDocs[i].getName().length(), ".err");
Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + cleanDocs[i].getName() + "</a>" + "</H2>");
} else
Out.prln("<H2>" + cleanDocs[i].getName() + "</H2>");
// try finding the marked document
if (!isMarkedDS) {
StringBuffer docName = new StringBuffer(cleanDoc.getName());
docName.replace(cleanDoc.getName().lastIndexOf("."), docName.length(), ".xml");
File markedDocFile = new File(markedDir, docName.toString());
if (!markedDocFile.exists()) {
Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
continue;
} else {
params = Factory.newFeatureMap();
try {
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
} catch (java.net.MalformedURLException ex) {
Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
continue;
}
// params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
// create the document
try {
markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDoc.getName());
} catch (gate.creole.ResourceInstantiationException ex) {
Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
continue;
}
}
// if markedDoc exists
} else {
try {
// open marked from a DS
// open the data store
DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
boolean found = false;
int k = 0;
// search for the marked doc with the same name
while (k < lrIDs1.size() && !found) {
String docID1 = lrIDs1.get(k);
// read the stored document
FeatureMap features1 = Factory.newFeatureMap();
features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
// check whether this is our doc
if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(cleanDoc.getName())) {
found = true;
markedDoc = tempDoc;
} else
k++;
}
} catch (java.net.MalformedURLException ex) {
Out.prln("Error finding marked directory " + markedDir.getAbsolutePath());
} catch (gate.persist.PersistenceException ex1) {
Out.prln("Error opening marked as a datastore (-marked_ds specified)");
} catch (gate.creole.ResourceInstantiationException ex2) {
Out.prln("Error opening marked as a datastore (-marked_ds specified)");
}
}
try {
evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
} catch (gate.creole.ResourceInstantiationException ex) {
ex.printStackTrace();
Out.prln("Evaluate failed on document: " + cleanDoc.getName());
}
if (persDoc != null) {
final gate.Document pd = persDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(pd);
}
});
}
if (cleanDoc != null) {
final gate.Document cd = cleanDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(cd);
}
});
}
if (markedDoc != null) {
final gate.Document md = markedDoc;
javax.swing.SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
Factory.deleteResource(md);
}
});
}
}
// for loop through clean docs
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class XmlDocumentHandler method startElement.
// endDocument();
/**
* This method is called when the SAX parser encounts the beginning of an
* XML element.
*/
/**
* @param uri - namespace uri
* @param localName - local, unprefixed element name
* @param qName - fully qualified, prefixed element name
* @param atts
* @throws SAXException
*/
@Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
// call characterActions
if (readCharacterStatus) {
readCharacterStatus = false;
charactersAction(new String(contentBuffer).toCharArray(), 0, contentBuffer.length());
}
// so far is a multiple of ELEMENTS_RATE
if ((++elements % ELEMENTS_RATE) == 0) {
fireStatusChangedEvent("Processed elements : " + elements);
}
Integer customObjectId = null;
// Construct a SimpleFeatureMapImpl from the list of attributes
FeatureMap fm = Factory.newFeatureMap();
/**
* Use localName rather than qName and add the namespace prefix and uri
* as features if global flag is set
*/
String elemName = qName;
boolean hasNSUri = (uri != null && !uri.isEmpty());
if (deserializeNamespaceInfo && hasNSUri) {
elemName = localName;
StringTokenizer strToken = new StringTokenizer(qName, ":");
if (strToken.countTokens() > 1) {
String nsPrefix = strToken.nextToken();
fm.put(namespaceURIFeature, uri);
fm.put(namespacePrefixFeature, nsPrefix);
}
}
// Get the name and the value of the attributes and add them to a FeaturesMAP
for (int i = 0; i < atts.getLength(); i++) {
String attName = atts.getLocalName(i);
String attValue = atts.getValue(i);
String attUri = atts.getURI(i);
if (attUri != null && Gate.URI.equals(attUri)) {
if ("gateId".equals(attName)) {
customObjectId = Integer.parseInt(attValue);
}
// End if
if ("annotMaxId".equals(attName)) {
customObjectsId = Integer.parseInt(attValue);
}
// End if
if ("matches".equals(attName)) {
StringTokenizer strTokenizer = new StringTokenizer(attValue, ";");
List<Integer> list = new ArrayList<Integer>();
// Take all tokens,create Integers and add them to the list
while (strTokenizer.hasMoreTokens()) {
String token = strTokenizer.nextToken();
list.add(Integer.valueOf(token));
}
// End while
fm.put(attName, list);
}
// End if
} else {
fm.put(atts.getQName(i), attValue);
}
// End if
}
// End for
// create the START index of the annotation
Long startIndex = Long.valueOf(tmpDocContent.length());
// initialy the Start index is equal with End index
CustomObject obj = new CustomObject(customObjectId, elemName, fm, startIndex, startIndex);
// put this object into the stack
stack.push(obj);
}
Aggregations