Search in sources :

Example 1 with Event

use of opennlp.model.Event in project talismane by joliciel-informatique.

the class OpenNLPEventStream method next.

@Override
public Event next() throws IOException {
    try {
        Event event = null;
        if (this.corpusEventStream.hasNext()) {
            ClassificationEvent corpusEvent = this.corpusEventStream.next();
            List<String> contextList = new ArrayList<String>();
            List<Float> weightList = new ArrayList<Float>();
            OpenNLPDecisionMaker.prepareData(corpusEvent.getFeatureResults(), contextList, weightList);
            String[] contexts = new String[contextList.size()];
            float[] weights = new float[weightList.size()];
            int i = 0;
            for (String context : contextList) {
                contexts[i++] = context;
            }
            i = 0;
            for (Float weight : weightList) {
                weights[i++] = weight;
            }
            event = new Event(corpusEvent.getClassification(), contexts, weights);
        }
        return event;
    } catch (TalismaneException e) {
        LOG.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) Event(opennlp.model.Event) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent)

Example 2 with Event

use of opennlp.model.Event in project talismane by joliciel-informatique.

the class TwoPassDataIndexer method computeEventCounts.

/**
 * Reads events from <code>eventStream</code> into a linked list. The predicates
 * associated with each event are counted and any which occur at least
 * <code>cutoff</code> times are added to the <code>predicatesInOut</code> map along
 * with a unique integer index.
 *
 * @param eventStream
 *          an <code>EventStream</code> value
 * @param eventStore
 *          a writer to which the events are written to for later processing.
 * @param predicatesInOut
 *          a <code>TObjectIntHashMap</code> value
 * @param cutoff
 *          an <code>int</code> value
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
private int computeEventCounts(EventStream eventStream, Writer eventStore, Map<String, Integer> predicatesInOut, int cutoff) throws IOException {
    Map<String, Integer> counter = new HashMap<String, Integer>();
    int eventCount = 0;
    Set predicateSet = new HashSet();
    while (eventStream.hasNext()) {
        Event ev = eventStream.next();
        eventCount++;
        eventStore.write(this.toLine(ev));
        String[] ec = ev.getContext();
        update(ec, predicateSet, counter, cutoff);
    }
    predCounts = new int[predicateSet.size()];
    int index = 0;
    for (Iterator pi = predicateSet.iterator(); pi.hasNext(); index++) {
        String predicate = (String) pi.next();
        predCounts[index] = counter.get(predicate);
        predicatesInOut.put(predicate, index);
    }
    eventStore.close();
    return eventCount;
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) Iterator(java.util.Iterator) ComparableEvent(opennlp.model.ComparableEvent) Event(opennlp.model.Event) HashSet(java.util.HashSet)

Example 3 with Event

use of opennlp.model.Event in project talismane by joliciel-informatique.

the class TwoPassRealValueDataIndexer method index.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
protected List index(int numEvents, EventStream es, Map<String, Integer> predicateIndex) throws IOException {
    Map<String, Integer> omap = new HashMap<String, Integer>();
    int outcomeCount = 0;
    List eventsToCompare = new ArrayList(numEvents);
    List<Integer> indexedContext = new ArrayList<Integer>();
    while (es.hasNext()) {
        Event ev = es.next();
        String[] econtext = ev.getContext();
        ComparableEvent ce;
        int ocID;
        String oc = ev.getOutcome();
        if (omap.containsKey(oc)) {
            ocID = omap.get(oc);
        } else {
            ocID = outcomeCount++;
            omap.put(oc, ocID);
        }
        for (int i = 0; i < econtext.length; i++) {
            String pred = econtext[i];
            if (predicateIndex.containsKey(pred)) {
                indexedContext.add(predicateIndex.get(pred));
            }
        }
        // drop events with no active features
        if (indexedContext.size() > 0) {
            int[] cons = new int[indexedContext.size()];
            for (int ci = 0; ci < cons.length; ci++) {
                cons[ci] = indexedContext.get(ci);
            }
            ce = new ComparableEvent(ocID, cons, ev.getValues());
            eventsToCompare.add(ce);
        } else {
            LOG.debug("Dropped event " + ev.getOutcome() + ":" + Arrays.asList(ev.getContext()));
        }
        // recycle the TIntArrayList
        indexedContext.clear();
    }
    outcomeLabels = toIndexedStringArray(omap);
    predLabels = toIndexedStringArray(predicateIndex);
    return eventsToCompare;
}
Also used : ComparableEvent(opennlp.model.ComparableEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ComparableEvent(opennlp.model.ComparableEvent) Event(opennlp.model.Event) List(java.util.List) ArrayList(java.util.ArrayList)

Example 4 with Event

use of opennlp.model.Event in project talismane by joliciel-informatique.

the class RealValueFileEventStream2 method next.

@Override
public Event next() {
    StringTokenizer st = new StringTokenizer(line);
    String outcome = st.nextToken();
    if (outcome.equals("&null;"))
        outcome = "";
    else if (outcome.equals("&space;"))
        outcome = " ";
    int count = st.countTokens();
    // Assaf update: read real values from file
    boolean hasValues = line.contains("=");
    String[] context = new String[count];
    float[] values = null;
    if (hasValues)
        values = new float[count];
    for (int ci = 0; ci < count; ci++) {
        String token = st.nextToken();
        if (hasValues) {
            int equalsPos = token.lastIndexOf('=');
            if (equalsPos < 0) {
                LOG.error("Missing value");
                LOG.error("Line: " + line);
                LOG.error("Token: " + token);
                throw new RuntimeException("Missing value, on token \"" + token + "\"");
            }
            context[ci] = token.substring(0, equalsPos);
            values[ci] = Float.parseFloat(token.substring(equalsPos + 1));
        } else {
            context[ci] = token;
        }
    }
    Event event = null;
    if (hasValues)
        event = new Event(outcome, context, values);
    else
        event = new Event(outcome, context);
    return event;
}
Also used : StringTokenizer(java.util.StringTokenizer) Event(opennlp.model.Event)

Example 5 with Event

use of opennlp.model.Event in project talismane by joliciel-informatique.

the class TwoPassDataIndexer method index.

@SuppressWarnings({ "rawtypes", "unchecked" })
protected List index(int numEvents, EventStream es, Map<String, Integer> predicateIndex) throws IOException {
    Map<String, Integer> omap = new HashMap<String, Integer>();
    int outcomeCount = 0;
    List eventsToCompare = new ArrayList(numEvents);
    List<Integer> indexedContext = new ArrayList<Integer>();
    while (es.hasNext()) {
        Event ev = es.next();
        String[] econtext = ev.getContext();
        ComparableEvent ce;
        int ocID;
        String oc = ev.getOutcome();
        if (omap.containsKey(oc)) {
            ocID = omap.get(oc);
        } else {
            ocID = outcomeCount++;
            omap.put(oc, ocID);
        }
        for (int i = 0; i < econtext.length; i++) {
            String pred = econtext[i];
            if (predicateIndex.containsKey(pred)) {
                indexedContext.add(predicateIndex.get(pred));
            }
        }
        // drop events with no active features
        if (indexedContext.size() > 0) {
            int[] cons = new int[indexedContext.size()];
            for (int ci = 0; ci < cons.length; ci++) {
                cons[ci] = indexedContext.get(ci);
            }
            ce = new ComparableEvent(ocID, cons);
            eventsToCompare.add(ce);
        } else {
            LOG.debug("Dropped event " + ev.getOutcome() + ":" + Arrays.asList(ev.getContext()));
        }
        // recycle the TIntArrayList
        indexedContext.clear();
    }
    outcomeLabels = toIndexedStringArray(omap);
    predLabels = toIndexedStringArray(predicateIndex);
    return eventsToCompare;
}
Also used : ComparableEvent(opennlp.model.ComparableEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ComparableEvent(opennlp.model.ComparableEvent) Event(opennlp.model.Event) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

Event (opennlp.model.Event)5 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 ComparableEvent (opennlp.model.ComparableEvent)3 List (java.util.List)2 TalismaneException (com.joliciel.talismane.TalismaneException)1 ClassificationEvent (com.joliciel.talismane.machineLearning.ClassificationEvent)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 Set (java.util.Set)1 StringTokenizer (java.util.StringTokenizer)1