use of gate.FeatureMap in project gate-core by GateNLP.
the class SerialCorpusImpl method get.
@Override
public Document get(int index) {
if (index >= docDataList.size())
return null;
Document res = documents.get(index);
if (DEBUG)
Out.prln("SerialCorpusImpl: get(): index " + index + "result: " + res);
// if the document is null, then I must get it from the DS
if (res == null) {
FeatureMap parameters = Factory.newFeatureMap();
parameters.put(DataStore.DATASTORE_FEATURE_NAME, this.dataStore);
try {
parameters.put(DataStore.LR_ID_FEATURE_NAME, docDataList.get(index).getPersistentID());
Document lr = (Document) Factory.createResource(docDataList.get(index).getClassType(), parameters);
if (DEBUG)
Out.prln("Loaded document :" + lr.getName());
// change the result to the newly loaded doc
res = lr;
// finally replace the doc with the instantiated version
documents.set(index, lr);
} catch (ResourceInstantiationException ex) {
Err.prln("Error reading document inside a serialised corpus.");
throw new GateRuntimeException(ex);
}
}
return res;
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class DocumentStaxUtils method writeXcesAnnotations.
/**
* Save annotations to the given XMLStreamWriter in XCES format. The
* writer is <i>not</i> closed by this method, that is left to the
* caller. This method writes just the cesAna element - the XML
* declaration must be filled in by the caller if required. Characters
* in feature values that are illegal in XML are replaced by
* {@link #INVALID_CHARACTER_REPLACEMENT} (a space). Feature <i>names</i>
* are not modified, nor are annotation types - an illegal character
* in one of these will cause the serialization to fail.
*
* @param annotations the annotations to save, typically an
* AnnotationSet
* @param xsw the XMLStreamWriter to write to
* @param includeId should we include the annotation IDs (as the "n"
* attribute on each <code>struct</code>)?
* @throws XMLStreamException
*/
public static void writeXcesAnnotations(Collection<Annotation> annotations, XMLStreamWriter xsw, boolean includeId) throws XMLStreamException {
List<Annotation> annotsToDump = new ArrayList<Annotation>(annotations);
Collections.sort(annotsToDump, LONGEST_FIRST_OFFSET_COMPARATOR);
xsw.setDefaultNamespace(XCES_NAMESPACE);
xsw.writeStartElement(XCES_NAMESPACE, "cesAna");
xsw.writeDefaultNamespace(XCES_NAMESPACE);
xsw.writeAttribute("version", XCES_VERSION);
newLine(xsw);
String indent = " ";
String indentMore = indent + indent;
for (Annotation a : annotsToDump) {
long start = a.getStartNode().getOffset().longValue();
long end = a.getEndNode().getOffset().longValue();
FeatureMap fm = a.getFeatures();
xsw.writeCharacters(indent);
if (fm == null || fm.size() == 0) {
xsw.writeEmptyElement(XCES_NAMESPACE, "struct");
} else {
xsw.writeStartElement(XCES_NAMESPACE, "struct");
}
xsw.writeAttribute("type", a.getType());
xsw.writeAttribute("from", String.valueOf(start));
xsw.writeAttribute("to", String.valueOf(end));
// include the annotation ID as the "n" attribute if requested
if (includeId) {
xsw.writeAttribute("n", String.valueOf(a.getId()));
}
newLine(xsw);
if (fm != null && fm.size() != 0) {
for (Map.Entry<Object, Object> att : fm.entrySet()) {
if (!"isEmptyAndSpan".equals(att.getKey())) {
xsw.writeCharacters(indentMore);
xsw.writeEmptyElement(XCES_NAMESPACE, "feat");
xsw.writeAttribute("name", String.valueOf(att.getKey()));
xsw.writeAttribute("value", replaceXMLIllegalCharactersInString(String.valueOf(att.getValue())));
newLine(xsw);
}
}
xsw.writeCharacters(indent);
xsw.writeEndElement();
newLine(xsw);
}
}
xsw.writeEndElement();
newLine(xsw);
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class DocumentStaxUtils method readRelationSet.
public static void readRelationSet(XMLStreamReader xsr, RelationSet relations, Set<Integer> allAnnotIds) throws XMLStreamException {
while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
xsr.require(XMLStreamConstants.START_ELEMENT, null, "Relation");
String type = xsr.getAttributeValue(null, "Type");
String idString = xsr.getAttributeValue(null, "Id");
String memberString = xsr.getAttributeValue(null, "Members");
if (memberString == null)
throw new XMLStreamException("A relation must have members");
if (type == null)
throw new XMLStreamException("A relation must have a type");
if (idString == null)
throw new XMLStreamException("A relation must have an id");
String[] memberStrings = memberString.split(";");
int[] members = new int[memberStrings.length];
for (int i = 0; i < members.length; ++i) {
members[i] = Integer.parseInt(memberStrings[i]);
}
xsr.nextTag();
xsr.require(XMLStreamConstants.START_ELEMENT, null, "UserData");
// get the string representation of the user data
StringBuilder stringRep = new StringBuilder(1024);
int eventType;
while ((eventType = xsr.next()) != XMLStreamConstants.END_ELEMENT) {
switch(eventType) {
case XMLStreamConstants.CHARACTERS:
case XMLStreamConstants.CDATA:
stringRep.append(xsr.getTextCharacters(), xsr.getTextStart(), xsr.getTextLength());
break;
case XMLStreamConstants.START_ELEMENT:
throw new XMLStreamException("Elements not allowed within " + "user data.", xsr.getLocation());
default:
}
}
xsr.require(XMLStreamConstants.END_ELEMENT, null, "UserData");
FeatureMap features = readFeatureMap(xsr);
Relation r = new SimpleRelation(Integer.parseInt(idString), type, members);
r.setFeatures(features);
if (stringRep.length() > 0) {
ObjectWrapper wrapper = new ObjectWrapper(stringRep.toString());
r.setUserData(wrapper.getValue());
}
relations.add(r);
}
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class UimaDocumentFormat method unpackCasMarkup.
/**
* Convert UIMA CAS markups to GATE markups.
* @param doc XML document already parsed
* @throws DocumentFormatException error when parsing the file
*/
private void unpackCasMarkup(Document doc) throws DocumentFormatException {
AnnotationSet inputAS = doc.getAnnotations("Original markups");
AnnotationSet outputAS = doc.getAnnotations("Original markups");
// set format specific names
String casPrefix;
String idName;
if (!inputAS.get("CAS").isEmpty()) {
casPrefix = "uima.cas.";
idName = "_id";
} else if (!inputAS.get("xmi:XMI").isEmpty()) {
casPrefix = "cas:";
idName = "xmi:id";
} else {
throw new DocumentFormatException("The document \"" + doc.getName() + "\" is neither of XCAS nor XMICAS format.");
}
// get array/list contained elements annotations
for (Annotation annotation : inputAS) {
if (annotation.getType().matches(casPrefix + "[a-zA-Z]+(List|Array)")) {
try {
String elements = doc.getContent().getContent(annotation.getStartNode().getOffset(), annotation.getEndNode().getOffset()).toString();
// add contained values as a feature to the array annotation
if (!elements.trim().equals("")) {
annotation.getFeatures().put("elements", elements);
}
} catch (InvalidOffsetException e) {
throw new DocumentFormatException(e);
}
}
}
// get document content from SOFA annotations
Set<Annotation> sofaSet = inputAS.get(casPrefix + "Sofa");
if (sofaSet.size() > 1) {
Out.prln("More than one UIMA SOFA, annotation offsets won't be correct.");
}
StringBuilder documentContent = new StringBuilder();
for (Annotation annotation : sofaSet) {
documentContent.append((String) annotation.getFeatures().get("sofaString"));
}
doc.setContent(new DocumentContentImpl(documentContent.toString()));
// remove SOFA annotations
inputAS.removeAll(sofaSet);
// remove non document annotations
inputAS.removeAll(inputAS.get("CAS"));
inputAS.removeAll(inputAS.get("xmi:XMI"));
inputAS.removeAll(inputAS.get("cas:NULL"));
// get the views members, views will be added later as annotation sets
List<List<String>> viewList = new ArrayList<List<String>>();
for (Annotation view : inputAS.get(casPrefix + "View")) {
viewList.add(Arrays.asList(((String) view.getFeatures().get("members")).split("\\s+")));
}
inputAS.removeAll(inputAS.get(casPrefix + "View"));
// fill a map with the id as key and the entity name as value
// this is specific to the Temis Luxid CAS format
Map<String, String> entityMap = new HashMap<String, String>();
for (Annotation entity : inputAS.get("com.temis.uima.Entity")) {
FeatureMap features = entity.getFeatures();
entityMap.put((String) features.get(idName), (String) features.get("value"));
}
try {
// for each UIMA annotation
for (Annotation annotation : new HashSet<Annotation>(inputAS)) {
FeatureMap features = Factory.newFeatureMap();
features.putAll(annotation.getFeatures());
String start = (String) features.get("begin");
String end = (String) features.get("end");
String id = (String) features.get(idName);
// UIMA feature
features.remove("begin");
// UIMA feature
features.remove("end");
// GATE feature
features.remove("isEmptyAndSpan");
// UIMA XCAS feature
features.remove("_indexed");
if (start == null || end == null) {
// no offsets so add it as a GATE document feature
features.remove(idName);
for (Map.Entry<Object, Object> entry : features.entrySet()) {
doc.getFeatures().put(annotation.getType() + '_' + id + '.' + entry.getKey(), entry.getValue());
}
} else {
// offsets so add it as a GATE document annotation
String entityReference = (String) features.get("_ref_entity");
String type = entityMap.containsKey(entityReference) ? entityMap.get(entityReference) : annotation.getType();
Integer gateId = outputAS.add(Long.valueOf(start), Long.valueOf(end), type, features);
int viewCount = 0;
for (List<String> viewMembers : viewList) {
if (viewMembers.contains(id)) {
// add the annotation to the annotation set
doc.getAnnotations("CasView" + viewCount).add(outputAS.get(gateId));
}
viewCount++;
}
}
// delete UIMA annotation
inputAS.remove(annotation);
}
} catch (InvalidOffsetException e) {
throw new DocumentFormatException("Couldn't create annotation.", e);
}
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class Parameter method calculateValueFromString.
// calculateDefaultValue()
/**
* Calculate and return the value for this parameter starting from a String
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public Object calculateValueFromString(String stringValue) throws ParameterException {
// if we have no string we can't construct a value
Object value = null;
// get the Class for the parameter via Class.forName or CREOLE register
Class<?> paramClass = getParameterClass();
if (substituteClasses.containsKey(paramClass)) {
paramClass = substituteClasses.get(paramClass);
}
// collectionSubstituteClasses and create a value of that type.
if (Collection.class.isAssignableFrom(paramClass) && !paramClass.isInterface()) {
// Create an collection object belonging to paramClass
Collection<?> colection = null;
try {
colection = paramClass.asSubclass(Collection.class).getConstructor(new Class<?>[] {}).newInstance(new Object[] {});
} catch (Exception ex) {
throw new ParameterException("Could not construct an object of type " + typeName + " for param " + name + "\nProblem was: " + ex.toString());
}
// string tokens to the collection.
if (itemClassName == null) {
// Read the tokens from the default value and try to create items
// belonging to the itemClassName
StringTokenizer strTokenizer = new StringTokenizer(stringValue, ";");
while (strTokenizer.hasMoreTokens()) {
String itemStringValue = strTokenizer.nextToken();
((Collection<String>) colection).add(itemStringValue);
}
// End while
} else {
Class<?> itemClass = null;
try {
itemClass = Gate.getClassLoader().loadClass(itemClassName);
} catch (ClassNotFoundException e) {
throw new ParameterException("Could not construct a class object for " + itemClassName + " for param " + name + ", with type name=" + typeName);
}
// End try
// Read the tokens from the default value and try to create items
// belonging to the itemClassName
StringTokenizer strTokenizer = new StringTokenizer(stringValue, ";");
while (strTokenizer.hasMoreTokens()) {
// Read a string item and construct an object belonging to
// itemClassName
String itemStringValue = strTokenizer.nextToken();
Object itemValue = null;
try {
itemValue = itemClass.getConstructor(new Class<?>[] { String.class }).newInstance(new Object[] { itemStringValue });
} catch (Exception e) {
throw new ParameterException("Could not create an object of " + itemClassName + " for param name " + name + ", with type name =" + typeName);
}
// End try
// Add the item value object to the collection
((Collection<Object>) colection).add(itemValue);
}
// End while
}
// End if(itemClassName == null)
return colection;
}
if (FeatureMap.class.isAssignableFrom(paramClass)) {
// a null string value means a null FeatureMap
if (stringValue == null)
return null;
FeatureMap fm = null;
// then just create a normal feature map using the factory
if (paramClass.isInterface()) {
fm = Factory.newFeatureMap();
} else {
try {
fm = paramClass.asSubclass(FeatureMap.class).getConstructor(new Class<?>[] {}).newInstance(new Object[] {});
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) {
throw new ParameterException("Could not construct an object of type " + typeName + " for param " + name + "\nProblem was: " + e.toString());
}
}
// Read the tokens from the default value and try to create items
// belonging to the itemClassName
StringTokenizer strTokenizer = new StringTokenizer(stringValue, ";");
while (strTokenizer.hasMoreTokens()) {
String keyAndValue = strTokenizer.nextToken();
int indexOfEquals = keyAndValue.indexOf('=');
if (indexOfEquals == -1) {
throw new ParameterException("Error parsing string \"" + stringValue + "\" for parameter " + name + " of type " + typeName + ". Value string must be of the form " + "name1=value1;name2=value2;...");
}
String featName = keyAndValue.substring(0, indexOfEquals);
String featValue = keyAndValue.substring(indexOfEquals + 1);
fm.put(featName, featValue);
}
return fm;
}
// Java 5.0 enum types
if (paramClass.isEnum()) {
if (stringValue == null) {
value = null;
} else {
try {
value = Enum.valueOf(paramClass.<Enum>asSubclass(Enum.class), stringValue);
} catch (IllegalArgumentException e) {
throw new ParameterException("Invalid enum constant name " + stringValue + " for type " + typeName);
}
}
} else if (typeName.equals("gate.creole.ResourceReference")) {
if (stringValue != null && !stringValue.equals("")) {
try {
value = new ResourceReference(plugin, stringValue);
} catch (URISyntaxException e) {
throw new ParameterException("Malformed ResourceReference parameter value: " + stringValue, e);
}
}
} else // empty string value, but just leave value as null
if (typeName.startsWith("java.")) {
if (typeName.equals("java.lang.Boolean"))
value = Boolean.valueOf(stringValue);
else if (typeName.equals("java.lang.Long")) {
if (stringValue != null && !stringValue.equals("")) {
value = Long.valueOf(stringValue);
}
} else if (typeName.equals("java.lang.Integer")) {
if (stringValue != null && !stringValue.equals("")) {
value = Integer.valueOf(stringValue);
}
} else if (typeName.equals("java.lang.String"))
value = stringValue;
else if (typeName.equals("java.lang.Double")) {
if (stringValue != null && !stringValue.equals("")) {
value = Double.valueOf(stringValue);
}
} else if (typeName.equals("java.lang.Float")) {
if (stringValue != null && !stringValue.equals("")) {
value = Float.valueOf(stringValue);
}
} else if (typeName.equals("java.net.URL")) {
try {
if (stringValue != null && !stringValue.equals("")) {
value = new URL(plugin.getBaseURL(), stringValue);
}
} catch (MalformedURLException mue) {
// value = null;
throw new ParameterException("Malformed URL parameter value: " + stringValue, mue);
}
} else {
// e.g. for URLs
try {
if (!paramClass.isAssignableFrom(String.class)) {
value = paramClass.getConstructor(new Class<?>[] { String.class }).newInstance(new Object[] { stringValue });
}
} catch (Exception e) {
throw new ParameterException("Unsupported parameter type " + typeName);
}
}
} else {
// null string value means null target value
if (stringValue != null) {
// otherwise, if it's a GATE resource type pick the first registered instance
if (resData == null)
resData = Gate.getCreoleRegister().get(typeName);
if (resData == null) {
// unknown type
return null;
}
List<Resource> instantiations = resData.getInstantiations();
if (!instantiations.isEmpty())
value = instantiations.get(0);
}
}
return value;
}
Aggregations