Search in sources :

Example 1 with TRECNugget

use of info.ephyra.trec.TRECNugget in project lucida by claritylab.

the class NuggetEvaluationFilter method apply.

/**
	 * Extracts NEs of particular types from the answer strings of the
	 * <code>Result</code> objects and creates a new <code>Result</code> for
	 * each extracted unique answer.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return extended array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    if ((results.length == 0) || (targetId == null))
        return results;
    this.lastTarget = results[0].getQuery().getOriginalQueryString();
    int nonWhiteLength = 0;
    int notifyLength = 1000;
    BufferedWriter br = null;
    try {
        br = new BufferedWriter(new FileWriter(this.fileName, true));
        br.write("===== Assessing target " + targetId + " (" + results[0].getQuery().getOriginalQueryString() + ") =====");
        br.newLine();
    } catch (Exception e) {
    }
    BufferedWriter cbr = null;
    try {
        cbr = new BufferedWriter(new FileWriter(this.conciseFileName, true));
        cbr.write("===== Assessing target " + targetId + " (" + results[0].getQuery().getOriginalQueryString() + ") =====");
        cbr.newLine();
    } catch (Exception e) {
    }
    float maxScore = results[0].getScore();
    boolean maxCutWritten = false;
    HashSet<TRECNugget> covered = new LinkedHashSet<TRECNugget>();
    HashMap<TRECNugget, Integer> coveredWhen = new HashMap<TRECNugget, Integer>();
    int vital = 0;
    int ok = 0;
    for (int i = 0; i < 7; i++) {
        lastVital[i] = 0;
        lastOk[i] = 0;
    }
    for (int r = 0; r < results.length; r++) {
        Result res = results[r];
        boolean resWritten = false;
        String[] tok = res.getAnswer().split("\\s++");
        for (int t = 0; t < tok.length; t++) nonWhiteLength += tok[t].length();
        //	write all snippets for the first 7000 characters
        if ((br != null) && (nonWhiteLength < 7000))
            try {
                br.write("Result " + r + " (" + res.getScore() + ") is: " + res.getAnswer());
                br.newLine();
                resWritten = true;
            } catch (Exception e) {
            }
        if (nonWhiteLength > notifyLength) {
            int index = ((notifyLength - 1) / 1000);
            if (index < 7) {
                lastVital[index] = vital;
                lastOk[index] = ok;
            }
            if (br != null)
                try {
                    br.write("===== " + notifyLength + " non-white char cutoff ===== ");
                    br.newLine();
                } catch (Exception e) {
                }
            notifyLength += 1000;
        }
        if ((br != null) && !maxCutWritten && ((res.getScore() * 2) < maxScore))
            try {
                br.write("===== half score cutoff ===== ");
                br.newLine();
                maxCutWritten = true;
            } catch (Exception e) {
            }
        int n = 0;
        while (n < nuggets.size()) {
            TRECNugget nug = nuggets.get(n);
            String[] uncovered = covers(res.getAnswer(), nug.nugget);
            if ((uncovered.length * 2) <= nug.size) {
                if (br != null)
                    try {
                        if (!resWritten) {
                            br.write("Result " + r + " (" + res.getScore() + ") is: " + res.getAnswer());
                            br.newLine();
                            resWritten = true;
                        }
                        br.write("  Nugget covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
                        br.newLine();
                        if (uncovered.length != 0) {
                            br.write("      Uncovered:");
                            for (String u : uncovered) br.write(" " + u);
                            br.newLine();
                        }
                    } catch (Exception e) {
                    }
                res.addCoveredNuggetID(nug.nuggetID);
                covered.add(nug);
                if ((uncovered.length * 4) <= nug.size)
                    nuggets.remove(n);
                else
                    n++;
                if (!coveredWhen.containsKey(nug)) {
                    if ("vital".equals(nug.nuggetType))
                        vital++;
                    else
                        ok++;
                    coveredWhen.put(nug, new Integer(nonWhiteLength));
                }
            } else {
                n++;
            }
        }
        if (resWritten && (br != null))
            try {
                br.newLine();
            } catch (Exception e) {
            }
    }
    if (br != null)
        try {
            ArrayList<TRECNugget> coveredNugs = new ArrayList<TRECNugget>(covered);
            for (TRECNugget nug : coveredNugs) {
                int when = -1;
                if (coveredWhen.containsKey(nug))
                    when = coveredWhen.get(nug).intValue();
                br.write("  (probably) covered (" + nug.nuggetID + "," + nug.nuggetType + ")" + ((when == -1) ? "" : (" first at " + when)) + ": " + nug.nugget);
                br.newLine();
            }
            for (TRECNugget nug : nuggets) {
                br.write("  Not (securely) covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
                br.newLine();
            }
            br.newLine();
            br.newLine();
            br.flush();
            br.close();
        } catch (Exception e) {
        }
    if (cbr != null)
        try {
            ArrayList<TRECNugget> coveredNugs = new ArrayList<TRECNugget>(covered);
            for (TRECNugget nug : coveredNugs) {
                int when = -1;
                if (coveredWhen.containsKey(nug))
                    when = coveredWhen.get(nug).intValue();
                cbr.write("  (probably) covered (" + nug.nuggetID + "," + nug.nuggetType + ")" + ((when == -1) ? "" : (" first at " + when)) + ": " + nug.nugget);
                cbr.newLine();
            }
            for (TRECNugget nug : nuggets) {
                cbr.write("  Not (securely) covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
                cbr.newLine();
            }
            cbr.newLine();
            cbr.newLine();
            cbr.flush();
            cbr.close();
        } catch (Exception e) {
        }
    return results;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TRECNugget(info.ephyra.trec.TRECNugget) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) BufferedWriter(java.io.BufferedWriter) Result(info.ephyra.search.Result)

Example 2 with TRECNugget

use of info.ephyra.trec.TRECNugget in project lucida by claritylab.

the class NuggetEvaluationFilter method setTargetID.

/**	set the ID of the next target, so upcoming results can be checked against the respective nuggets
	 * @param tid the ID of the next target
	 */
@SuppressWarnings("unchecked")
public static synchronized void setTargetID(String tid) {
    System.out.println("NuggetEvaluationFilter: global target ID set to " + tid);
    if (nuggetsByTargetID == null) {
        nuggetsByTargetID = new HashMap<String, ArrayList<TRECNugget>>();
        loadNuggets();
    }
    for (Iterator<NuggetEvaluationFilter> ii = instanceSet.values().iterator(); ii.hasNext(); ) ii.next().setTargetId(tid);
    targetID = tid;
    ArrayList<TRECNugget> nuggets = (tid == null) ? new ArrayList<TRECNugget>() : (nuggetsByTargetID.get(tid));
    numVital = 0;
    if (nuggets != null) {
        for (TRECNugget nug : nuggets) if ("vital".equals(nug.nuggetType))
            numVital++;
    }
}
Also used : TRECNugget(info.ephyra.trec.TRECNugget) ArrayList(java.util.ArrayList)

Example 3 with TRECNugget

use of info.ephyra.trec.TRECNugget in project lucida by claritylab.

the class NuggetEvaluationFilter method loadNuggets.

/**
	 * load the nuggets from the answer file
	 */
private static void loadNuggets() {
    try {
        BufferedReader br = new BufferedReader(new FileReader("./res/testdata/trec/trec15answers_other"));
        String targetID = null;
        ArrayList<TRECNugget> nuggets = new ArrayList<TRECNugget>();
        while (br.ready()) {
            String line = br.readLine();
            if ((line != null) && (line.length() != 0) && !line.startsWith("Qid")) {
                String[] parts = line.split("((\\s++)|\\.)", 5);
                TRECNugget nugget = new TRECNugget(parts[0], parts[1], parts[2], parts[3], parts[4]);
                if (!nugget.targetID.equals(targetID)) {
                    if (targetID != null)
                        nuggetsByTargetID.put(targetID, nuggets);
                    targetID = nugget.targetID;
                    nuggets = new ArrayList<TRECNugget>();
                }
                nuggets.add(nugget);
            }
        }
        if (targetID != null)
            nuggetsByTargetID.put(targetID, nuggets);
        br.close();
    } catch (Exception e) {
        System.out.println(e.getClass().getName() + " (" + e.getMessage() + ") while loading nuggets");
        e.printStackTrace(System.out);
    }
}
Also used : TRECNugget(info.ephyra.trec.TRECNugget) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) FileReader(java.io.FileReader)

Aggregations

TRECNugget (info.ephyra.trec.TRECNugget)3 ArrayList (java.util.ArrayList)3 Result (info.ephyra.search.Result)1 BufferedReader (java.io.BufferedReader)1 BufferedWriter (java.io.BufferedWriter)1 FileReader (java.io.FileReader)1 FileWriter (java.io.FileWriter)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedHashSet (java.util.LinkedHashSet)1