Search in sources :

Example 6 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class HtmlDocumentHandler method handleStartTag.

/**
 * This method is called when the HTML parser encounts the beginning
 * of a tag that means that the tag is paired by an end tag and it's
 * not an empty one.
 */
@Override
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
    // Fire the status listener if the elements processed exceded the rate
    if (0 == (++elements % ELEMENTS_RATE))
        fireStatusChangedEvent("Processed elements : " + elements);
    // Start of STYLE tag
    if (HTML.Tag.STYLE.equals(t)) {
        isInsideStyleTag = true;
    }
    // if
    // Construct a feature map from the attributes list
    FeatureMap fm = Factory.newFeatureMap();
    // Take all the attributes an put them into the feature map
    if (0 != a.getAttributeCount()) {
        Enumeration<?> enumeration = a.getAttributeNames();
        while (enumeration.hasMoreElements()) {
            Object attribute = enumeration.nextElement();
            fm.put(attribute.toString(), (a.getAttribute(attribute)).toString());
        }
    // while
    }
    // if
    // Just analize the tag t and add some\n chars and spaces to the
    // tmpDocContent.The reason behind is that we need to have a readable form
    // for the final document.
    customizeAppearanceOfDocumentWithStartTag(t);
    // If until here the "tmpDocContent" ends with a NON whitespace char,
    // then we add a space char before calculating the START index of this
    // tag.
    // This is done in order not to concatenate the content of two separate tags
    // and obtain a different NEW word.
    int tmpDocContentSize = tmpDocContent.length();
    if (tmpDocContentSize != 0 && !Character.isWhitespace(tmpDocContent.charAt(tmpDocContentSize - 1)))
        tmpDocContent.append(" ");
    // create the start index of the annotation
    Long startIndex = new Long(tmpDocContent.length());
    // initialy the start index is equal with the End index
    CustomObject obj = new CustomObject(t.toString(), fm, startIndex, startIndex);
    // put it into the stack
    stack.push(obj);
}
Also used : FeatureMap(gate.FeatureMap)

Example 7 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class HtmlDocumentHandler method handleSimpleTag.

// handleEndTag
/**
 * This method is called when the HTML parser encounts an empty tag
 */
@Override
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
    // fire the status listener if the elements processed exceded the rate
    if ((++elements % ELEMENTS_RATE) == 0)
        fireStatusChangedEvent("Processed elements : " + elements);
    // construct a feature map from the attributes list
    // these are empty elements
    FeatureMap fm = Factory.newFeatureMap();
    // take all the attributes an put them into the feature map
    if (0 != a.getAttributeCount()) {
        // Out.println("HAS  attributes = " + a.getAttributeCount ());
        Enumeration<?> enumeration = a.getAttributeNames();
        while (enumeration.hasMoreElements()) {
            Object attribute = enumeration.nextElement();
            fm.put(attribute.toString(), (a.getAttribute(attribute)).toString());
        }
    // while
    }
    // if
    // create the start index of the annotation
    Long startIndex = new Long(tmpDocContent.length());
    // initialy the start index is equal with the End index
    CustomObject obj = new CustomObject(t.toString(), fm, startIndex, startIndex);
    // we add the object directly into the colector
    // we don't add it to the stack because this is an empty tag
    colector.add(obj);
    // Just analize the tag t and add some\n chars and spaces to the
    // tmpDocContent.The reason behind is that we need to have a readable form
    // for the final document.
    customizeAppearanceOfDocumentWithSimpleTag(t);
}
Also used : FeatureMap(gate.FeatureMap)

Example 8 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class AnnotationSchema method workWithJDom.

// end fromXSchema
/**
 * This method uses the JDom structure for our XSchema needs. What it does is
 * to add semantics to the XML elements defined in XSchema. In the end we need
 * to construct an AnnotationSchema object form an XSchema file.
 *
 * @param jDom the JDOM structure containing the XSchema document. It must not
 * be <b>null<b>
 */
private void workWithJDom(org.jdom.Document jDom) throws ResourceInstantiationException {
    // Use the jDom structure the way we want
    org.jdom.Element rootElement = jDom.getRootElement();
    Namespace namespace = rootElement.getNamespace();
    // get all children elements from the rootElement
    List<?> rootElementChildrenList = rootElement.getChildren("element", namespace);
    if (rootElementChildrenList.size() > 1)
        throw new ResourceInstantiationException("Each Annotation must be defined in a separate XML Schema file");
    Iterator<?> rootElementChildrenIterator = rootElementChildrenList.iterator();
    while (rootElementChildrenIterator.hasNext()) {
        org.jdom.Element childElement = (org.jdom.Element) rootElementChildrenIterator.next();
        createAnnotationSchemaObject(childElement, namespace);
    }
    // end while
    rootElementChildrenList = rootElement.getChildren("include", namespace);
    rootElementChildrenIterator = rootElementChildrenList.iterator();
    while (rootElementChildrenIterator.hasNext()) {
        org.jdom.Element childElement = (org.jdom.Element) rootElementChildrenIterator.next();
        try {
            String url = childElement.getAttributeValue("schemaLocation");
            FeatureMap params = Factory.newFeatureMap();
            params.put("xmlFileUrl", new URL(xmlFileUrl, url));
            lastIncluded = (AnnotationSchema) Factory.createResource("gate.creole.AnnotationSchema", params);
        } catch (Exception e) {
            throw new ResourceInstantiationException(e);
        }
    }
}
Also used : FeatureMap(gate.FeatureMap) Namespace(org.jdom.Namespace) URL(java.net.URL) JDOMException(org.jdom.JDOMException)

Example 9 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class CreoleXmlHandler method startElement.

// attributes2String()
/**
 * Called when the SAX parser encounts the beginning of an XML element
 */
@Override
public void startElement(String uri, String qName, String elementName, Attributes atts) throws SAXException {
    // call characterActions
    if (readCharacterStatus) {
        readCharacterStatus = false;
        charactersAction(new String(contentBuffer).toCharArray(), 0, contentBuffer.length());
    }
    if (DEBUG) {
        Out.pr("startElement: ");
        Out.println(elementName + " " + attributes2String(atts));
    }
    // create a new ResourceData when it's a RESOURCE element
    if (elementName.toUpperCase().equals("RESOURCE")) {
        resourceData = new ResourceData();
        resourceData.setFeatures(Factory.newFeatureMap());
        currentAutoinstances = new ArrayList<FeatureMap>();
    }
    // End if RESOURCE
    // record the attributes of this element
    currentAttributes = atts;
    // be prepared in order for the resource to be instantiated
    if (elementName.toUpperCase().equals("AUTOINSTANCE")) {
        currentAutoinstanceParams = Factory.newFeatureMap();
    }
    // be prepared in order for the resource to be instantiated
    if (elementName.toUpperCase().equals("HIDDEN-AUTOINSTANCE")) {
        currentAutoinstanceParams = Factory.newFeatureMap();
        Gate.setHiddenAttribute(currentAutoinstanceParams, true);
    }
    // with a value and added to the autoinstanceParams
    if (elementName.toUpperCase().equals("PARAM")) {
        // parser would signal this later....
        if (currentAutoinstanceParams == null)
            currentAutoinstanceParams = Factory.newFeatureMap();
        // Take the param's name and value
        String paramName = currentAttributes.getValue("NAME");
        String paramStrValue = currentAttributes.getValue("VALUE");
        if (paramName == null)
            throw new GateRuntimeException("Found in creole.xml a PARAM element" + " for resource " + resourceData.getClassName() + " without a NAME" + " attribute. Check the file and try again.");
        if (paramStrValue == null)
            throw new GateRuntimeException("Found in creole.xml a PARAM element" + " for resource " + resourceData.getClassName() + " without a VALUE" + " attribute. Check the file and try again.");
        // Add the paramname and its value to the autoinstanceParams
        currentAutoinstanceParams.put(paramName, paramStrValue);
    }
    // process attributes of parameter and GUI elements
    if (elementName.toUpperCase().equals("PARAMETER")) {
        if (DEBUG) {
            for (int i = 0, len = currentAttributes.getLength(); i < len; i++) {
                Out.prln(currentAttributes.getLocalName(i));
                Out.prln(currentAttributes.getValue(i));
            }
        // End for
        }
        // End if
        currentParam.comment = currentAttributes.getValue("COMMENT");
        currentParam.helpURL = currentAttributes.getValue("HELPURL");
        currentParam.defaultValueString = currentAttributes.getValue("DEFAULT");
        currentParam.optional = Boolean.valueOf(currentAttributes.getValue("OPTIONAL")).booleanValue();
        currentParam.name = currentAttributes.getValue("NAME");
        currentParam.runtime = Boolean.valueOf(currentAttributes.getValue("RUNTIME")).booleanValue();
        currentParam.itemClassName = currentAttributes.getValue("ITEM_CLASS_NAME");
        // read the suffixes and transform them to a Set of Strings
        String suffixes = currentAttributes.getValue("SUFFIXES");
        Set<String> suffiexesSet = null;
        if (suffixes != null) {
            suffiexesSet = new HashSet<String>();
            StringTokenizer strTokenizer = new StringTokenizer(suffixes, ";");
            while (strTokenizer.hasMoreTokens()) {
                suffiexesSet.add(strTokenizer.nextToken());
            }
        // End while
        }
        // End if
        currentParam.suffixes = suffiexesSet;
    } else if (elementName.toUpperCase().equals("GUI")) {
        String typeValue = currentAttributes.getValue("TYPE");
        if (typeValue != null) {
            if (typeValue.toUpperCase().equals("LARGE"))
                resourceData.setGuiType(ResourceData.LARGE_GUI);
            if (typeValue.toUpperCase().equals("SMALL"))
                resourceData.setGuiType(ResourceData.SMALL_GUI);
        }
    // End if
    }
    // (note that they're not disjunctive or previous "/OR" would have got 'em)
    if (elementName.toUpperCase().equals("OR")) {
        if (!currentParamDisjunction.isEmpty()) {
            currentParamList.addAll(currentFlattenedDisjunction());
            currentParamDisjunction.clear();
        }
    // End if
    }
// End if
}
Also used : FeatureMap(gate.FeatureMap) StringTokenizer(java.util.StringTokenizer) GateRuntimeException(gate.util.GateRuntimeException)

Example 10 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class CreoleXmlHandler method endElement.

// checkStack
/**
 * Called when the SAX parser encounts the end of an XML element.
 * This is where ResourceData objects get values set, and where
 * they are added to the CreoleRegister when we parsed their complete
 * metadata entries.
 */
@Override
public void endElement(String uri, String qName, String elementName) throws GateSaxException, SAXException {
    // call characterActions
    if (readCharacterStatus) {
        readCharacterStatus = false;
        charactersAction(new String(contentBuffer).toCharArray(), 0, contentBuffer.length());
    }
    if (DEBUG)
        Out.prln("endElement: " + elementName);
    // ////////////////////////////////////////////////////////////////
    if (elementName.toUpperCase().equals("RESOURCE")) {
        // check for validity of the resource data
        if (!resourceData.isValid())
            throw new GateSaxException("Invalid resource data: " + resourceData.getValidityMessage());
        // TODO should this be a URI instead of the URL?
        try {
            resourceData.setXmlFileUrl(new URL(plugin.getBaseURL(), "creole.xml"));
        } catch (MalformedURLException e) {
            throw new GateSaxException("Couldn't load autoloading resource: " + resourceData.getName() + "; problem was: " + e, e);
        }
        // add the new resource data object to the creole register
        register.put(resourceData.getClassName(), resourceData);
        // if the resource is auto-loading, try and load it
        if (resourceData.isAutoLoading())
            try {
                @SuppressWarnings("unused") Class<?> resClass = resourceData.getResourceClass();
            // Resource res = Factory.createResource(
            // resourceData.getClassName(), Factory.newFeatureMap()
            // );
            // resourceData.makeInstantiationPersistant(res);
            } catch (ClassNotFoundException e) {
                throw new GateSaxException("Couldn't load autoloading resource: " + resourceData.getName() + "; problem was: " + e);
            }
        // (note that they're not disjunctive or the "/OR" would have got them)
        if (!currentParamDisjunction.isEmpty()) {
            currentParamList.addAll(currentFlattenedDisjunction());
            currentParamDisjunction.clear();
        }
        // End if
        // add the parameter list to the resource (and reinitialise it)
        resourceData.setParameterList(currentParamList);
        currentParamList = new ParameterList();
        // final initialization of the ResourceData
        try {
            resourceData.init();
        } catch (Exception ex) {
            throw new GateSaxException("Couldn't initialize ResourceData for " + resourceData.getName(), ex);
        }
        if (DEBUG)
            Out.println("added: " + resourceData);
        // Iterate through autoinstances and try to instanciate them
        if (currentAutoinstances != null && !currentAutoinstances.isEmpty()) {
            ResourceData rd = Gate.getCreoleRegister().get(resourceData.getClassName());
            ParameterList existingParameters = null;
            if (rd.getReferenceCount() > 1) {
                // we aren't going to redefine a resource but we do need to use the
                // parameters from the new instance so we get the right base URL and
                // default values etc.
                existingParameters = rd.getParameterList();
                rd.setParameterList(resourceData.getParameterList());
            }
            try {
                Iterator<FeatureMap> iter = currentAutoinstances.iterator();
                while (iter.hasNext()) {
                    FeatureMap autoinstanceParams = iter.next();
                    iter.remove();
                    FeatureMap autoinstanceFeatures = null;
                    // map and move the hidden attribute there.
                    if (Gate.getHiddenAttribute(autoinstanceParams)) {
                        autoinstanceFeatures = Factory.newFeatureMap();
                        Gate.setHiddenAttribute(autoinstanceFeatures, true);
                        autoinstanceParams.remove(GateConstants.HIDDEN_FEATURE_KEY);
                    }
                    // Try to create the resource.
                    try {
                        // Resource res =
                        Factory.createResource(resourceData.getClassName(), autoinstanceParams, autoinstanceFeatures);
                    // resourceData.makeInstantiationPersistant(res);
                    // all resource instantiations are persistent
                    } catch (ResourceInstantiationException e) {
                        throw new GateSaxException("Couldn't auto-instantiate resource: " + resourceData.getName() + "; problem was: " + e);
                    }
                // End try
                }
            // End while
            } finally {
                // resource then put them back before we break something
                if (existingParameters != null)
                    rd.setParameterList(existingParameters);
            }
        }
        // End if
        currentAutoinstances = null;
    // End RESOURCE processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("AUTOINSTANCE") || elementName.toUpperCase().equals("HIDDEN-AUTOINSTANCE")) {
        // Cache the auto-instance into the autoins
        if (currentAutoinstanceParams != null)
            currentAutoinstances.add(currentAutoinstanceParams);
    // End AUTOINSTANCE processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("PARAM")) {
    // End PARAM processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("NAME")) {
        checkStack("endElement", "NAME");
        resourceData.setName(contentStack.pop());
    // End NAME processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("IVY")) {
        if (!contentStack.isEmpty())
            contentStack.pop();
    // End IVY processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("REQUIRES")) {
        if (!contentStack.isEmpty())
            contentStack.pop();
    // End REQUIRES processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("JAR")) {
        checkStack("endElement", "JAR");
        // add jar file name
        String jarFileName = contentStack.pop();
        if (resourceData != null) {
            resourceData.setJarFileName(jarFileName);
        }
        // add jar file URL if there is one
        // if(sourceUrl != null) {
        String sourceUrlName = plugin.getBaseURL().toExternalForm();
        String separator = "/";
        if (sourceUrlName.endsWith(separator))
            separator = "";
        URL jarFileUrl = null;
        try {
            jarFileUrl = new URL(sourceUrlName + separator + jarFileName);
            if (resourceData != null) {
                resourceData.setJarFileUrl(jarFileUrl);
            }
        // We no longer need to add the jar URL to the class loader, as this
        // is done before the SAX parse
        } catch (MalformedURLException e) {
            throw new GateSaxException("bad URL " + jarFileUrl + e);
        }
    // End try
    // }// End if
    // End JAR processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("CLASS")) {
        checkStack("endElement", "CLASS");
        resourceData.setClassName(contentStack.pop());
    // End CLASS processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("COMMENT")) {
        checkStack("endElement", "COMMENT");
        resourceData.setComment(contentStack.pop());
    // End COMMENT processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("HELPURL")) {
        checkStack("endElement", "HELPURL");
        resourceData.setHelpURL(contentStack.pop());
    // End HELPURL processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("INTERFACE")) {
        checkStack("endElement", "INTERFACE");
        resourceData.setInterfaceName(contentStack.pop());
    // End INTERFACE processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("ICON")) {
        checkStack("endElement", "ICON");
        resourceData.setIcon(contentStack.pop());
    // End ICON processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("OR")) {
        currentParamList.add(currentFlattenedDisjunction());
        currentParamDisjunction.clear();
    // End OR processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("PARAMETER")) {
        checkStack("endElement", "PARAMETER");
        currentParam.typeName = contentStack.pop();
        String priorityStr = currentAttributes.getValue("PRIORITY");
        // if no priority specified, assume lowest (i.e. parameters with an
        // explicit priority come ahead of those without).
        Integer priority = Integer.MAX_VALUE;
        try {
            if (priorityStr != null)
                priority = Integer.valueOf(priorityStr);
        } catch (NumberFormatException nfe) {
            throw new GateRuntimeException("Found in creole.xml a PARAM element" + " for resource " + resourceData.getClassName() + " with a non-numeric" + " PRIORITY attribute. Check the file and try again.");
        }
        List<Parameter> paramList = currentParamDisjunction.get(priority);
        if (paramList == null) {
            paramList = new ArrayList<Parameter>();
            currentParamDisjunction.put(priority, paramList);
        }
        paramList.add(currentParam);
        if (DEBUG)
            Out.prln("added param: " + currentParam);
        currentParam = new Parameter(plugin);
    // End PARAMETER processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("AUTOLOAD")) {
        resourceData.setAutoLoading(true);
    // End AUTOLOAD processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("PRIVATE")) {
        resourceData.setPrivate(true);
    // End PRIVATE processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("TOOL")) {
        resourceData.setTool(true);
    // End TOOL processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("MAIN_VIEWER")) {
        resourceData.setIsMainView(true);
    // End MAIN_VIEWER processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("RESOURCE_DISPLAYED")) {
        checkStack("endElement", "RESOURCE_DISPLAYED");
        String resourceDisplayed = contentStack.pop();
        resourceData.setResourceDisplayed(resourceDisplayed);
        try {
            @SuppressWarnings("unused") Class<?> resourceDisplayedClass = Gate.getClassLoader().loadClass(resourceDisplayed);
        } catch (ClassNotFoundException ex) {
            throw new GateRuntimeException("Couldn't get resource class from the resource name :" + resourceDisplayed + " " + ex);
        }
    // End try
    // End RESOURCE_DISPLAYED processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("ANNOTATION_TYPE_DISPLAYED")) {
        checkStack("endElement", "ANNOTATION_TYPE_DISPLAYED");
        resourceData.setAnnotationTypeDisplayed(contentStack.pop());
    // End ANNOTATION_TYPE_DISPLAYED processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("GUI")) {
    // End GUI processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("CREOLE")) {
    // End CREOLE processing
    // ////////////////////////////////////////////////////////////////
    } else if (elementName.toUpperCase().equals("CREOLE-DIRECTORY")) {
    // End CREOLE-DIRECTORY processing
    // ////////////////////////////////////////////////////////////////
    } else {
        // arbitrary elements get added as features of the resource data
        if (resourceData != null)
            resourceData.getFeatures().put(elementName.toUpperCase(), ((contentStack.isEmpty()) ? null : contentStack.pop()));
    }
// ////////////////////////////////////////////////////////////////
}
Also used : MalformedURLException(java.net.MalformedURLException) GateSaxException(gate.util.GateSaxException) ArrayList(java.util.ArrayList) URL(java.net.URL) GateRuntimeException(gate.util.GateRuntimeException) MalformedURLException(java.net.MalformedURLException) GateSaxException(gate.util.GateSaxException) SAXParseException(org.xml.sax.SAXParseException) SAXException(org.xml.sax.SAXException) FeatureMap(gate.FeatureMap) GateRuntimeException(gate.util.GateRuntimeException) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

FeatureMap (gate.FeatureMap)55 Document (gate.Document)15 URL (java.net.URL)14 ResourceInstantiationException (gate.creole.ResourceInstantiationException)11 File (java.io.File)10 Resource (gate.Resource)8 GateRuntimeException (gate.util.GateRuntimeException)7 ArrayList (java.util.ArrayList)7 List (java.util.List)7 PersistenceException (gate.persist.PersistenceException)6 Annotation (gate.Annotation)5 AnnotationSet (gate.AnnotationSet)5 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 TestDocument (gate.corpora.TestDocument)4 ResourceData (gate.creole.ResourceData)4 SerialDataStore (gate.persist.SerialDataStore)4 InvalidOffsetException (gate.util.InvalidOffsetException)4 Corpus (gate.Corpus)3 ProcessingResource (gate.ProcessingResource)3