Search in sources :

Example 1 with ArrayListNoNulls

use of com.joliciel.talismane.utils.ArrayListNoNulls in project talismane by joliciel-informatique.

the class RawTextAnnotatorFactory method getAnnotator.

/**
 * @param descriptor
 * @param blockSize
 * @return
 * @throws TalismaneException
 *           if a descriptor is incorrectly configured
 */
public RawTextAnnotator getAnnotator(String descriptor, int blockSize) throws TalismaneException {
    RawTextAnnotator filter = null;
    List<Class<? extends RawTextAnnotator>> classes = new ArrayListNoNulls<Class<? extends RawTextAnnotator>>();
    classes.add(DuplicateWhiteSpaceFilter.class);
    classes.add(NewlineEndOfSentenceMarker.class);
    classes.add(NewlineSpaceMarker.class);
    String[] parts = descriptor.split("\t");
    String filterName = parts[0];
    // add equality to RegexMarkerFilter for historical reasons
    if (filterName.equals("RegexMarkerFilter") || filterName.equals(RawTextRegexAnnotator.class.getSimpleName())) {
        String[] filterTypeStrings = parts[1].split(",");
        List<RawTextMarkType> filterTypes = new ArrayListNoNulls<RawTextMarkType>();
        for (String filterTypeString : filterTypeStrings) {
            filterTypes.add(RawTextMarkType.valueOf(filterTypeString));
        }
        boolean needsReplacement = false;
        boolean needsTag = false;
        int minParams = 3;
        if (filterTypes.contains(RawTextMarkType.REPLACE)) {
            needsReplacement = true;
            minParams = 4;
        } else if (filterTypes.contains(RawTextMarkType.TAG)) {
            needsTag = true;
            minParams = 4;
        }
        if (parts.length == minParams + 1) {
            filter = new RawTextRegexAnnotator(filterTypes, parts[2], Integer.parseInt(parts[3]), blockSize);
            if (needsReplacement)
                filter.setReplacement(parts[4]);
            if (needsTag) {
                if (parts[4].indexOf('=') >= 0) {
                    String attribute = parts[4].substring(0, parts[4].indexOf('='));
                    String value = parts[4].substring(parts[4].indexOf('=') + 1);
                    filter.setAttribute(new StringAttribute(attribute, value));
                } else {
                    filter.setAttribute(new StringAttribute(parts[4], ""));
                }
            }
        } else if (parts.length == minParams) {
            filter = new RawTextRegexAnnotator(filterTypes, parts[2], 0, blockSize);
            if (needsReplacement)
                filter.setReplacement(parts[3]);
            if (needsTag) {
                if (parts[3].indexOf('=') >= 0) {
                    String attribute = parts[3].substring(0, parts[3].indexOf('='));
                    String value = parts[3].substring(parts[3].indexOf('=') + 1);
                    filter.setAttribute(new StringAttribute(attribute, value));
                } else {
                    filter.setAttribute(new StringAttribute(parts[4], ""));
                }
            }
        } else {
            throw new TalismaneException("Wrong number of arguments for " + RawTextRegexAnnotator.class.getSimpleName() + ". Expected " + minParams + " or " + (minParams + 1) + ", but was " + parts.length);
        }
    } else {
        for (Class<? extends RawTextAnnotator> clazz : classes) {
            if (filterName.equals(clazz.getSimpleName())) {
                try {
                    Constructor<? extends RawTextAnnotator> constructor = clazz.getConstructor(Integer.class);
                    filter = constructor.newInstance(blockSize);
                } catch (ReflectiveOperationException e) {
                    throw new TalismaneException("Problem building class: " + filterName, e);
                }
            }
        }
        if (filter == null)
            throw new TalismaneException("Unknown text filter class: " + filterName);
    }
    return filter;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) StringAttribute(com.joliciel.talismane.tokeniser.StringAttribute) ArrayListNoNulls(com.joliciel.talismane.utils.ArrayListNoNulls)

Aggregations

TalismaneException (com.joliciel.talismane.TalismaneException)1 StringAttribute (com.joliciel.talismane.tokeniser.StringAttribute)1 ArrayListNoNulls (com.joliciel.talismane.utils.ArrayListNoNulls)1