Search in sources :

Example 51 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class SerialCorpusImpl method get.

@Override
public Document get(int index) {
    if (index >= docDataList.size())
        return null;
    Document res = documents.get(index);
    if (DEBUG)
        Out.prln("SerialCorpusImpl: get(): index " + index + "result: " + res);
    // if the document is null, then I must get it from the DS
    if (res == null) {
        FeatureMap parameters = Factory.newFeatureMap();
        parameters.put(DataStore.DATASTORE_FEATURE_NAME, this.dataStore);
        try {
            parameters.put(DataStore.LR_ID_FEATURE_NAME, docDataList.get(index).getPersistentID());
            Document lr = (Document) Factory.createResource(docDataList.get(index).getClassType(), parameters);
            if (DEBUG)
                Out.prln("Loaded document :" + lr.getName());
            // change the result to the newly loaded doc
            res = lr;
            // finally replace the doc with the instantiated version
            documents.set(index, lr);
        } catch (ResourceInstantiationException ex) {
            Err.prln("Error reading document inside a serialised corpus.");
            throw new GateRuntimeException(ex);
        }
    }
    return res;
}
Also used : FeatureMap(gate.FeatureMap) GateRuntimeException(gate.util.GateRuntimeException) Document(gate.Document) ResourceInstantiationException(gate.creole.ResourceInstantiationException)

Example 52 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class DocumentStaxUtils method writeXcesAnnotations.

/**
 * Save annotations to the given XMLStreamWriter in XCES format. The
 * writer is <i>not</i> closed by this method, that is left to the
 * caller. This method writes just the cesAna element - the XML
 * declaration must be filled in by the caller if required. Characters
 * in feature values that are illegal in XML are replaced by
 * {@link #INVALID_CHARACTER_REPLACEMENT} (a space). Feature <i>names</i>
 * are not modified, nor are annotation types - an illegal character
 * in one of these will cause the serialization to fail.
 *
 * @param annotations the annotations to save, typically an
 *          AnnotationSet
 * @param xsw the XMLStreamWriter to write to
 * @param includeId should we include the annotation IDs (as the "n"
 *          attribute on each <code>struct</code>)?
 * @throws XMLStreamException
 */
public static void writeXcesAnnotations(Collection<Annotation> annotations, XMLStreamWriter xsw, boolean includeId) throws XMLStreamException {
    List<Annotation> annotsToDump = new ArrayList<Annotation>(annotations);
    Collections.sort(annotsToDump, LONGEST_FIRST_OFFSET_COMPARATOR);
    xsw.setDefaultNamespace(XCES_NAMESPACE);
    xsw.writeStartElement(XCES_NAMESPACE, "cesAna");
    xsw.writeDefaultNamespace(XCES_NAMESPACE);
    xsw.writeAttribute("version", XCES_VERSION);
    newLine(xsw);
    String indent = "   ";
    String indentMore = indent + indent;
    for (Annotation a : annotsToDump) {
        long start = a.getStartNode().getOffset().longValue();
        long end = a.getEndNode().getOffset().longValue();
        FeatureMap fm = a.getFeatures();
        xsw.writeCharacters(indent);
        if (fm == null || fm.size() == 0) {
            xsw.writeEmptyElement(XCES_NAMESPACE, "struct");
        } else {
            xsw.writeStartElement(XCES_NAMESPACE, "struct");
        }
        xsw.writeAttribute("type", a.getType());
        xsw.writeAttribute("from", String.valueOf(start));
        xsw.writeAttribute("to", String.valueOf(end));
        // include the annotation ID as the "n" attribute if requested
        if (includeId) {
            xsw.writeAttribute("n", String.valueOf(a.getId()));
        }
        newLine(xsw);
        if (fm != null && fm.size() != 0) {
            for (Map.Entry<Object, Object> att : fm.entrySet()) {
                if (!"isEmptyAndSpan".equals(att.getKey())) {
                    xsw.writeCharacters(indentMore);
                    xsw.writeEmptyElement(XCES_NAMESPACE, "feat");
                    xsw.writeAttribute("name", String.valueOf(att.getKey()));
                    xsw.writeAttribute("value", replaceXMLIllegalCharactersInString(String.valueOf(att.getValue())));
                    newLine(xsw);
                }
            }
            xsw.writeCharacters(indent);
            xsw.writeEndElement();
            newLine(xsw);
        }
    }
    xsw.writeEndElement();
    newLine(xsw);
}
Also used : FeatureMap(gate.FeatureMap) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map) FeatureMap(gate.FeatureMap) Annotation(gate.Annotation)

Example 53 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class DocumentStaxUtils method readRelationSet.

public static void readRelationSet(XMLStreamReader xsr, RelationSet relations, Set<Integer> allAnnotIds) throws XMLStreamException {
    while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
        xsr.require(XMLStreamConstants.START_ELEMENT, null, "Relation");
        String type = xsr.getAttributeValue(null, "Type");
        String idString = xsr.getAttributeValue(null, "Id");
        String memberString = xsr.getAttributeValue(null, "Members");
        if (memberString == null)
            throw new XMLStreamException("A relation must have members");
        if (type == null)
            throw new XMLStreamException("A relation must have a type");
        if (idString == null)
            throw new XMLStreamException("A relation must have an id");
        String[] memberStrings = memberString.split(";");
        int[] members = new int[memberStrings.length];
        for (int i = 0; i < members.length; ++i) {
            members[i] = Integer.parseInt(memberStrings[i]);
        }
        xsr.nextTag();
        xsr.require(XMLStreamConstants.START_ELEMENT, null, "UserData");
        // get the string representation of the user data
        StringBuilder stringRep = new StringBuilder(1024);
        int eventType;
        while ((eventType = xsr.next()) != XMLStreamConstants.END_ELEMENT) {
            switch(eventType) {
                case XMLStreamConstants.CHARACTERS:
                case XMLStreamConstants.CDATA:
                    stringRep.append(xsr.getTextCharacters(), xsr.getTextStart(), xsr.getTextLength());
                    break;
                case XMLStreamConstants.START_ELEMENT:
                    throw new XMLStreamException("Elements not allowed within " + "user data.", xsr.getLocation());
                default:
            }
        }
        xsr.require(XMLStreamConstants.END_ELEMENT, null, "UserData");
        FeatureMap features = readFeatureMap(xsr);
        Relation r = new SimpleRelation(Integer.parseInt(idString), type, members);
        r.setFeatures(features);
        if (stringRep.length() > 0) {
            ObjectWrapper wrapper = new ObjectWrapper(stringRep.toString());
            r.setUserData(wrapper.getValue());
        }
        relations.add(r);
    }
}
Also used : FeatureMap(gate.FeatureMap) Relation(gate.relations.Relation) SimpleRelation(gate.relations.SimpleRelation) XMLStreamException(javax.xml.stream.XMLStreamException) SimpleRelation(gate.relations.SimpleRelation)

Example 54 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class UimaDocumentFormat method unpackCasMarkup.

/**
 * Convert UIMA CAS markups to GATE markups.
 * @param doc XML document already parsed
 * @throws DocumentFormatException error when parsing the file
 */
private void unpackCasMarkup(Document doc) throws DocumentFormatException {
    AnnotationSet inputAS = doc.getAnnotations("Original markups");
    AnnotationSet outputAS = doc.getAnnotations("Original markups");
    // set format specific names
    String casPrefix;
    String idName;
    if (!inputAS.get("CAS").isEmpty()) {
        casPrefix = "uima.cas.";
        idName = "_id";
    } else if (!inputAS.get("xmi:XMI").isEmpty()) {
        casPrefix = "cas:";
        idName = "xmi:id";
    } else {
        throw new DocumentFormatException("The document \"" + doc.getName() + "\" is neither of XCAS nor XMICAS format.");
    }
    // get array/list contained elements annotations
    for (Annotation annotation : inputAS) {
        if (annotation.getType().matches(casPrefix + "[a-zA-Z]+(List|Array)")) {
            try {
                String elements = doc.getContent().getContent(annotation.getStartNode().getOffset(), annotation.getEndNode().getOffset()).toString();
                // add contained values as a feature to the array annotation
                if (!elements.trim().equals("")) {
                    annotation.getFeatures().put("elements", elements);
                }
            } catch (InvalidOffsetException e) {
                throw new DocumentFormatException(e);
            }
        }
    }
    // get document content from SOFA annotations
    Set<Annotation> sofaSet = inputAS.get(casPrefix + "Sofa");
    if (sofaSet.size() > 1) {
        Out.prln("More than one UIMA SOFA, annotation offsets won't be correct.");
    }
    StringBuilder documentContent = new StringBuilder();
    for (Annotation annotation : sofaSet) {
        documentContent.append((String) annotation.getFeatures().get("sofaString"));
    }
    doc.setContent(new DocumentContentImpl(documentContent.toString()));
    // remove SOFA annotations
    inputAS.removeAll(sofaSet);
    // remove non document annotations
    inputAS.removeAll(inputAS.get("CAS"));
    inputAS.removeAll(inputAS.get("xmi:XMI"));
    inputAS.removeAll(inputAS.get("cas:NULL"));
    // get the views members, views will be added later as annotation sets
    List<List<String>> viewList = new ArrayList<List<String>>();
    for (Annotation view : inputAS.get(casPrefix + "View")) {
        viewList.add(Arrays.asList(((String) view.getFeatures().get("members")).split("\\s+")));
    }
    inputAS.removeAll(inputAS.get(casPrefix + "View"));
    // fill a map with the id as key and the entity name as value
    // this is specific to the Temis Luxid CAS format
    Map<String, String> entityMap = new HashMap<String, String>();
    for (Annotation entity : inputAS.get("com.temis.uima.Entity")) {
        FeatureMap features = entity.getFeatures();
        entityMap.put((String) features.get(idName), (String) features.get("value"));
    }
    try {
        // for each UIMA annotation
        for (Annotation annotation : new HashSet<Annotation>(inputAS)) {
            FeatureMap features = Factory.newFeatureMap();
            features.putAll(annotation.getFeatures());
            String start = (String) features.get("begin");
            String end = (String) features.get("end");
            String id = (String) features.get(idName);
            // UIMA feature
            features.remove("begin");
            // UIMA feature
            features.remove("end");
            // GATE feature
            features.remove("isEmptyAndSpan");
            // UIMA XCAS feature
            features.remove("_indexed");
            if (start == null || end == null) {
                // no offsets so add it as a GATE document feature
                features.remove(idName);
                for (Map.Entry<Object, Object> entry : features.entrySet()) {
                    doc.getFeatures().put(annotation.getType() + '_' + id + '.' + entry.getKey(), entry.getValue());
                }
            } else {
                // offsets so add it as a GATE document annotation
                String entityReference = (String) features.get("_ref_entity");
                String type = entityMap.containsKey(entityReference) ? entityMap.get(entityReference) : annotation.getType();
                Integer gateId = outputAS.add(Long.valueOf(start), Long.valueOf(end), type, features);
                int viewCount = 0;
                for (List<String> viewMembers : viewList) {
                    if (viewMembers.contains(id)) {
                        // add the annotation to the annotation set
                        doc.getAnnotations("CasView" + viewCount).add(outputAS.get(gateId));
                    }
                    viewCount++;
                }
            }
            // delete UIMA annotation
            inputAS.remove(annotation);
        }
    } catch (InvalidOffsetException e) {
        throw new DocumentFormatException("Couldn't create annotation.", e);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) InvalidOffsetException(gate.util.InvalidOffsetException) Annotation(gate.Annotation) DocumentFormatException(gate.util.DocumentFormatException) FeatureMap(gate.FeatureMap) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) FeatureMap(gate.FeatureMap) HashSet(java.util.HashSet)

Example 55 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class Parameter method calculateValueFromString.

// calculateDefaultValue()
/**
 * Calculate and return the value for this parameter starting from a String
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public Object calculateValueFromString(String stringValue) throws ParameterException {
    // if we have no string we can't construct a value
    Object value = null;
    // get the Class for the parameter via Class.forName or CREOLE register
    Class<?> paramClass = getParameterClass();
    if (substituteClasses.containsKey(paramClass)) {
        paramClass = substituteClasses.get(paramClass);
    }
    // collectionSubstituteClasses and create a value of that type.
    if (Collection.class.isAssignableFrom(paramClass) && !paramClass.isInterface()) {
        // Create an collection object belonging to paramClass
        Collection<?> colection = null;
        try {
            colection = paramClass.asSubclass(Collection.class).getConstructor(new Class<?>[] {}).newInstance(new Object[] {});
        } catch (Exception ex) {
            throw new ParameterException("Could not construct an object of type " + typeName + " for param " + name + "\nProblem was: " + ex.toString());
        }
        // string tokens to the collection.
        if (itemClassName == null) {
            // Read the tokens from the default value and try to create items
            // belonging to the itemClassName
            StringTokenizer strTokenizer = new StringTokenizer(stringValue, ";");
            while (strTokenizer.hasMoreTokens()) {
                String itemStringValue = strTokenizer.nextToken();
                ((Collection<String>) colection).add(itemStringValue);
            }
        // End while
        } else {
            Class<?> itemClass = null;
            try {
                itemClass = Gate.getClassLoader().loadClass(itemClassName);
            } catch (ClassNotFoundException e) {
                throw new ParameterException("Could not construct a class object for " + itemClassName + " for param " + name + ", with type name=" + typeName);
            }
            // End try
            // Read the tokens from the default value and try to create items
            // belonging to the itemClassName
            StringTokenizer strTokenizer = new StringTokenizer(stringValue, ";");
            while (strTokenizer.hasMoreTokens()) {
                // Read a string item and construct an object belonging to
                // itemClassName
                String itemStringValue = strTokenizer.nextToken();
                Object itemValue = null;
                try {
                    itemValue = itemClass.getConstructor(new Class<?>[] { String.class }).newInstance(new Object[] { itemStringValue });
                } catch (Exception e) {
                    throw new ParameterException("Could not create an object of " + itemClassName + " for param name " + name + ", with type name =" + typeName);
                }
                // End try
                // Add the item value object to the collection
                ((Collection<Object>) colection).add(itemValue);
            }
        // End while
        }
        // End if(itemClassName == null)
        return colection;
    }
    if (FeatureMap.class.isAssignableFrom(paramClass)) {
        // a null string value means a null FeatureMap
        if (stringValue == null)
            return null;
        FeatureMap fm = null;
        // then just create a normal feature map using the factory
        if (paramClass.isInterface()) {
            fm = Factory.newFeatureMap();
        } else {
            try {
                fm = paramClass.asSubclass(FeatureMap.class).getConstructor(new Class<?>[] {}).newInstance(new Object[] {});
            } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) {
                throw new ParameterException("Could not construct an object of type " + typeName + " for param " + name + "\nProblem was: " + e.toString());
            }
        }
        // Read the tokens from the default value and try to create items
        // belonging to the itemClassName
        StringTokenizer strTokenizer = new StringTokenizer(stringValue, ";");
        while (strTokenizer.hasMoreTokens()) {
            String keyAndValue = strTokenizer.nextToken();
            int indexOfEquals = keyAndValue.indexOf('=');
            if (indexOfEquals == -1) {
                throw new ParameterException("Error parsing string \"" + stringValue + "\" for parameter " + name + " of type " + typeName + ". Value string must be of the form " + "name1=value1;name2=value2;...");
            }
            String featName = keyAndValue.substring(0, indexOfEquals);
            String featValue = keyAndValue.substring(indexOfEquals + 1);
            fm.put(featName, featValue);
        }
        return fm;
    }
    // Java 5.0 enum types
    if (paramClass.isEnum()) {
        if (stringValue == null) {
            value = null;
        } else {
            try {
                value = Enum.valueOf(paramClass.<Enum>asSubclass(Enum.class), stringValue);
            } catch (IllegalArgumentException e) {
                throw new ParameterException("Invalid enum constant name " + stringValue + " for type " + typeName);
            }
        }
    } else if (typeName.equals("gate.creole.ResourceReference")) {
        if (stringValue != null && !stringValue.equals("")) {
            try {
                value = new ResourceReference(plugin, stringValue);
            } catch (URISyntaxException e) {
                throw new ParameterException("Malformed ResourceReference parameter value: " + stringValue, e);
            }
        }
    } else // empty string value, but just leave value as null
    if (typeName.startsWith("java.")) {
        if (typeName.equals("java.lang.Boolean"))
            value = Boolean.valueOf(stringValue);
        else if (typeName.equals("java.lang.Long")) {
            if (stringValue != null && !stringValue.equals("")) {
                value = Long.valueOf(stringValue);
            }
        } else if (typeName.equals("java.lang.Integer")) {
            if (stringValue != null && !stringValue.equals("")) {
                value = Integer.valueOf(stringValue);
            }
        } else if (typeName.equals("java.lang.String"))
            value = stringValue;
        else if (typeName.equals("java.lang.Double")) {
            if (stringValue != null && !stringValue.equals("")) {
                value = Double.valueOf(stringValue);
            }
        } else if (typeName.equals("java.lang.Float")) {
            if (stringValue != null && !stringValue.equals("")) {
                value = Float.valueOf(stringValue);
            }
        } else if (typeName.equals("java.net.URL")) {
            try {
                if (stringValue != null && !stringValue.equals("")) {
                    value = new URL(plugin.getBaseURL(), stringValue);
                }
            } catch (MalformedURLException mue) {
                // value = null;
                throw new ParameterException("Malformed URL parameter value: " + stringValue, mue);
            }
        } else {
            // e.g. for URLs
            try {
                if (!paramClass.isAssignableFrom(String.class)) {
                    value = paramClass.getConstructor(new Class<?>[] { String.class }).newInstance(new Object[] { stringValue });
                }
            } catch (Exception e) {
                throw new ParameterException("Unsupported parameter type " + typeName);
            }
        }
    } else {
        // null string value means null target value
        if (stringValue != null) {
            // otherwise, if it's a GATE resource type pick the first registered instance
            if (resData == null)
                resData = Gate.getCreoleRegister().get(typeName);
            if (resData == null) {
                // unknown type
                return null;
            }
            List<Resource> instantiations = resData.getInstantiations();
            if (!instantiations.isEmpty())
                value = instantiations.get(0);
        }
    }
    return value;
}
Also used : MalformedURLException(java.net.MalformedURLException) URISyntaxException(java.net.URISyntaxException) URL(java.net.URL) Resource(gate.Resource) MalformedURLException(java.net.MalformedURLException) URISyntaxException(java.net.URISyntaxException) InvocationTargetException(java.lang.reflect.InvocationTargetException) GateRuntimeException(gate.util.GateRuntimeException) InvocationTargetException(java.lang.reflect.InvocationTargetException) FeatureMap(gate.FeatureMap) StringTokenizer(java.util.StringTokenizer) Collection(java.util.Collection)

Aggregations

FeatureMap (gate.FeatureMap)55 Document (gate.Document)15 URL (java.net.URL)14 ResourceInstantiationException (gate.creole.ResourceInstantiationException)11 File (java.io.File)10 Resource (gate.Resource)8 GateRuntimeException (gate.util.GateRuntimeException)7 ArrayList (java.util.ArrayList)7 List (java.util.List)7 PersistenceException (gate.persist.PersistenceException)6 Annotation (gate.Annotation)5 AnnotationSet (gate.AnnotationSet)5 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 TestDocument (gate.corpora.TestDocument)4 ResourceData (gate.creole.ResourceData)4 SerialDataStore (gate.persist.SerialDataStore)4 InvalidOffsetException (gate.util.InvalidOffsetException)4 Corpus (gate.Corpus)3 ProcessingResource (gate.ProcessingResource)3