Search in sources :

Example 1 with JsonValue

use of javax.json.JsonValue in project CoreNLP by stanfordnlp.

the class ScorePhrases method learnNewPhrasesPrivate.

private Counter<CandidatePhrase> learnNewPhrasesPrivate(String label, PatternsForEachToken patternsForEachToken, Counter<E> patternsLearnedThisIter, Counter<E> allSelectedPatterns, Set<CandidatePhrase> alreadyIdentifiedWords, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat, Counter<CandidatePhrase> scoreForAllWordsThisIteration, TwoDimensionalCounter<CandidatePhrase, E> terms, TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted, TwoDimensionalCounter<E, CandidatePhrase> patternsAndWords4Label, String identifier, Set<CandidatePhrase> ignoreWords, boolean computeProcDataFreq) throws IOException, ClassNotFoundException {
    Set<CandidatePhrase> alreadyLabeledWords = new HashSet<>();
    if (constVars.doNotApplyPatterns) {
        // if want to get the stats by the lossy way of just counting without
        // applying the patterns
        ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(constVars.batchProcessSents);
        while (sentsIter.hasNext()) {
            Pair<Map<String, DataInstance>, File> sentsf = sentsIter.next();
            this.statsWithoutApplyingPatterns(sentsf.first(), patternsForEachToken, patternsLearnedThisIter, wordsPatExtracted);
        }
    } else {
        if (patternsLearnedThisIter.size() > 0) {
            this.applyPats(patternsLearnedThisIter, label, wordsPatExtracted, matchedTokensByPat, alreadyLabeledWords);
        }
    }
    if (computeProcDataFreq) {
        if (!phraseScorer.wordFreqNorm.equals(Normalization.NONE)) {
            Redwood.log(Redwood.DBG, "computing processed freq");
            for (Entry<CandidatePhrase, Double> fq : Data.rawFreq.entrySet()) {
                Double in = fq.getValue();
                if (phraseScorer.wordFreqNorm.equals(Normalization.SQRT))
                    in = Math.sqrt(in);
                else if (phraseScorer.wordFreqNorm.equals(Normalization.LOG))
                    in = 1 + Math.log(in);
                else
                    throw new RuntimeException("can't understand the normalization");
                assert !in.isNaN() : "Why is processed freq nan when rawfreq is " + in;
                Data.processedDataFreq.setCount(fq.getKey(), in);
            }
        } else
            Data.processedDataFreq = Data.rawFreq;
    }
    if (constVars.wordScoring.equals(WordScoring.WEIGHTEDNORM)) {
        for (CandidatePhrase en : wordsPatExtracted.firstKeySet()) {
            if (!constVars.getOtherSemanticClassesWords().contains(en) && (en.getPhraseLemma() == null || !constVars.getOtherSemanticClassesWords().contains(CandidatePhrase.createOrGet(en.getPhraseLemma()))) && !alreadyLabeledWords.contains(en)) {
                terms.addAll(en, wordsPatExtracted.getCounter(en));
            }
        }
        removeKeys(terms, constVars.getStopWords());
        Counter<CandidatePhrase> phraseScores = phraseScorer.scorePhrases(label, terms, wordsPatExtracted, allSelectedPatterns, alreadyIdentifiedWords, false);
        System.out.println("count for word U.S. is " + phraseScores.getCount(CandidatePhrase.createOrGet("U.S.")));
        Set<CandidatePhrase> ignoreWordsAll;
        if (ignoreWords != null && !ignoreWords.isEmpty()) {
            ignoreWordsAll = CollectionUtils.unionAsSet(ignoreWords, constVars.getOtherSemanticClassesWords());
        } else
            ignoreWordsAll = new HashSet<>(constVars.getOtherSemanticClassesWords());
        ignoreWordsAll.addAll(constVars.getSeedLabelDictionary().get(label));
        ignoreWordsAll.addAll(constVars.getLearnedWords(label).keySet());
        System.out.println("ignoreWordsAll contains word U.S. is " + ignoreWordsAll.contains(CandidatePhrase.createOrGet("U.S.")));
        Counter<CandidatePhrase> finalwords = chooseTopWords(phraseScores, terms, phraseScores, ignoreWordsAll, constVars.thresholdWordExtract);
        phraseScorer.printReasonForChoosing(finalwords);
        scoreForAllWordsThisIteration.clear();
        Counters.addInPlace(scoreForAllWordsThisIteration, phraseScores);
        Redwood.log(ConstantsAndVariables.minimaldebug, "\n\n## Selected Words for " + label + " : " + Counters.toSortedString(finalwords, finalwords.size(), "%1$s:%2$.2f", "\t"));
        if (constVars.goldEntities != null) {
            Map<String, Boolean> goldEntities4Label = constVars.goldEntities.get(label);
            if (goldEntities4Label != null) {
                StringBuffer s = new StringBuffer();
                finalwords.keySet().stream().forEach(x -> s.append(x.getPhrase() + (goldEntities4Label.containsKey(x.getPhrase()) ? ":" + goldEntities4Label.get(x.getPhrase()) : ":UKNOWN") + "\n"));
                Redwood.log(ConstantsAndVariables.minimaldebug, "\n\n## Gold labels for selected words for label " + label + " : " + s.toString());
            } else
                Redwood.log(Redwood.DBG, "No gold entities provided for label " + label);
        }
        if (constVars.outDir != null && !constVars.outDir.isEmpty()) {
            String outputdir = constVars.outDir + "/" + identifier + "/" + label;
            IOUtils.ensureDir(new File(outputdir));
            TwoDimensionalCounter<CandidatePhrase, CandidatePhrase> reasonForWords = new TwoDimensionalCounter<>();
            for (CandidatePhrase word : finalwords.keySet()) {
                for (E l : wordsPatExtracted.getCounter(word).keySet()) {
                    for (CandidatePhrase w2 : patternsAndWords4Label.getCounter(l)) {
                        reasonForWords.incrementCount(word, w2);
                    }
                }
            }
            Redwood.log(ConstantsAndVariables.minimaldebug, "Saving output in " + outputdir);
            String filename = outputdir + "/words.json";
            // the json object is an array corresponding to each iteration - of list
            // of objects,
            // each of which is a bean of entity and reasons
            JsonArrayBuilder obj = Json.createArrayBuilder();
            if (writtenInJustification.containsKey(label) && writtenInJustification.get(label)) {
                JsonReader jsonReader = Json.createReader(new BufferedInputStream(new FileInputStream(filename)));
                JsonArray objarr = jsonReader.readArray();
                for (JsonValue o : objarr) obj.add(o);
                jsonReader.close();
            }
            JsonArrayBuilder objThisIter = Json.createArrayBuilder();
            for (CandidatePhrase w : reasonForWords.firstKeySet()) {
                JsonObjectBuilder objinner = Json.createObjectBuilder();
                JsonArrayBuilder l = Json.createArrayBuilder();
                for (CandidatePhrase w2 : reasonForWords.getCounter(w).keySet()) {
                    l.add(w2.getPhrase());
                }
                JsonArrayBuilder pats = Json.createArrayBuilder();
                for (E p : wordsPatExtracted.getCounter(w)) {
                    pats.add(p.toStringSimple());
                }
                objinner.add("reasonwords", l);
                objinner.add("patterns", pats);
                objinner.add("score", finalwords.getCount(w));
                objinner.add("entity", w.getPhrase());
                objThisIter.add(objinner.build());
            }
            obj.add(objThisIter);
            // Redwood.log(ConstantsAndVariables.minimaldebug, channelNameLogger,
            // "Writing justification at " + filename);
            IOUtils.writeStringToFile(StringUtils.normalize(StringUtils.toAscii(obj.build().toString())), filename, "ASCII");
            writtenInJustification.put(label, true);
        }
        if (constVars.justify) {
            Redwood.log(Redwood.DBG, "\nJustification for phrases:\n");
            for (CandidatePhrase word : finalwords.keySet()) {
                Redwood.log(Redwood.DBG, "Phrase " + word + " extracted because of patterns: \t" + Counters.toSortedString(wordsPatExtracted.getCounter(word), wordsPatExtracted.getCounter(word).size(), "%1$s:%2$f", "\n"));
            }
        }
        return finalwords;
    } else if (constVars.wordScoring.equals(WordScoring.BPB)) {
        Counters.addInPlace(terms, wordsPatExtracted);
        Counter<CandidatePhrase> maxPatWeightTerms = new ClassicCounter<>();
        Map<CandidatePhrase, E> wordMaxPat = new HashMap<>();
        for (Entry<CandidatePhrase, ClassicCounter<E>> en : terms.entrySet()) {
            Counter<E> weights = new ClassicCounter<>();
            for (E k : en.getValue().keySet()) weights.setCount(k, patternsLearnedThisIter.getCount(k));
            maxPatWeightTerms.setCount(en.getKey(), Counters.max(weights));
            wordMaxPat.put(en.getKey(), Counters.argmax(weights));
        }
        Counters.removeKeys(maxPatWeightTerms, alreadyIdentifiedWords);
        double maxvalue = Counters.max(maxPatWeightTerms);
        Set<CandidatePhrase> words = Counters.keysAbove(maxPatWeightTerms, maxvalue - 1e-10);
        CandidatePhrase bestw = null;
        if (words.size() > 1) {
            double max = Double.NEGATIVE_INFINITY;
            for (CandidatePhrase w : words) {
                if (terms.getCount(w, wordMaxPat.get(w)) > max) {
                    max = terms.getCount(w, wordMaxPat.get(w));
                    bestw = w;
                }
            }
        } else if (words.size() == 1)
            bestw = words.iterator().next();
        else
            return new ClassicCounter<>();
        Redwood.log(ConstantsAndVariables.minimaldebug, "Selected Words: " + bestw);
        return Counters.asCounter(Arrays.asList(bestw));
    } else
        throw new RuntimeException("wordscoring " + constVars.wordScoring + " not identified");
}
Also used : Entry(java.util.Map.Entry) Counter(edu.stanford.nlp.stats.Counter) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) TwoDimensionalCounter(edu.stanford.nlp.stats.TwoDimensionalCounter) BufferedInputStream(java.io.BufferedInputStream) JsonReader(javax.json.JsonReader) JsonArrayBuilder(javax.json.JsonArrayBuilder) JsonObjectBuilder(javax.json.JsonObjectBuilder) JsonValue(javax.json.JsonValue) TwoDimensionalCounter(edu.stanford.nlp.stats.TwoDimensionalCounter) FileInputStream(java.io.FileInputStream) JsonArray(javax.json.JsonArray) File(java.io.File)

Example 2 with JsonValue

use of javax.json.JsonValue in project torodb by torodb.

the class BaseArrayToArrayConverter method fromJsonValue.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public KvArray fromJsonValue(JsonArray value) {
    List<KvValue<?>> list = new ArrayList<>(value.size());
    for (JsonValue child : value) {
        ArrayConverter converter = valueToArrayConverterProvider.fromJsonValue(child);
        list.add(converter.fromJsonValue(child));
    }
    return new ListKvArray(list);
}
Also used : ListKvArray(com.torodb.kvdocument.values.heap.ListKvArray) ArrayList(java.util.ArrayList) JsonValue(javax.json.JsonValue) KvValue(com.torodb.kvdocument.values.KvValue)

Example 3 with JsonValue

use of javax.json.JsonValue in project torodb by torodb.

the class BaseArrayValueToJsonConverter method toValue.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public KvArray toValue(JsonArray value) {
    List<KvValue<?>> list = new ArrayList<>(value.size());
    for (JsonValue child : value) {
        ArrayConverter converter = valueToArrayConverterProvider.fromJsonValue(child);
        list.add(converter.fromJsonValue(child));
    }
    return new ListKvArray(list);
}
Also used : ListKvArray(com.torodb.kvdocument.values.heap.ListKvArray) ArrayList(java.util.ArrayList) JsonValue(javax.json.JsonValue) ArrayConverter(com.torodb.backend.converters.array.ArrayConverter) KvValue(com.torodb.kvdocument.values.KvValue)

Example 4 with JsonValue

use of javax.json.JsonValue in project azure-iot-sdk-java by Azure.

the class Tools method getNumberValueFromJsonObject.

/**
     * Helper function to get numeric value from a JsonObject
     *
     * @param jsonObject The JsonObject object to get the value from
     * @param key The name of the key
     * @return The numeric value
     */
public static long getNumberValueFromJsonObject(JsonObject jsonObject, String key) {
    long retVal;
    JsonNumber jsonNumber = null;
    // Codes_SRS_SERVICE_SDK_JAVA_TOOLS_12_018: [The function shall return zero if any of the input is null]
    if ((jsonObject == null) || (jsonObject == JsonObject.NULL) || (key == null) || (key.length() == 0)) {
        retVal = 0;
    } else {
        // Codes_SRS_SERVICE_SDK_JAVA_TOOLS_12_019: [The function shall get the JsonValue of the key and return zero if it is null]
        JsonValue jsonValue = jsonObject.get(key);
        if (jsonValue != JsonValue.NULL) {
            // Codes_SRS_SERVICE_SDK_JAVA_TOOLS_12_020: [The function shall get the JsonNumber from the JsonValue and return zero if it is null]
            jsonNumber = jsonObject.getJsonNumber(key);
            if (jsonNumber != null) {
                // Codes_SRS_SERVICE_SDK_JAVA_TOOLS_12_021: [The function shall return the long value from the JsonNumber if the JsonNumber is not null]
                retVal = jsonNumber.longValue();
            } else {
                retVal = 0;
            }
        } else {
            retVal = 0;
        }
    }
    return retVal;
}
Also used : JsonNumber(javax.json.JsonNumber) JsonValue(javax.json.JsonValue)

Example 5 with JsonValue

use of javax.json.JsonValue in project sling by apache.

the class OrderedJsonReader method writeChildren.

@Override
protected void writeChildren(JsonObject obj, ContentCreator contentCreator) throws RepositoryException {
    if (!obj.containsKey(PN_ORDEREDCHILDREN)) {
        super.writeChildren(obj, contentCreator);
    } else {
        for (Map.Entry<String, JsonValue> entry : obj.entrySet()) {
            final String n = entry.getKey();
            // skip well known objects
            if (!ignoredNames.contains(n)) {
                Object o = entry.getValue();
                if (!handleSecurity(n, o, contentCreator)) {
                    if (n.equals(PN_ORDEREDCHILDREN)) {
                        if (o instanceof JsonArray) {
                            JsonArray children = (JsonArray) o;
                            for (int childIndex = 0; childIndex < children.size(); childIndex++) {
                                Object oc = children.get(childIndex);
                                if (oc instanceof JsonObject) {
                                    JsonObject child = (JsonObject) oc;
                                    String childName = child.getString(PN_ORDEREDCHILDNAME, null);
                                    if (StringUtils.isNotBlank(childName)) {
                                        JsonObjectBuilder builder = Json.createObjectBuilder();
                                        for (Map.Entry<String, JsonValue> e : child.entrySet()) {
                                            if (!PN_ORDEREDCHILDNAME.equals(e.getKey())) {
                                                builder.add(e.getKey(), e.getValue());
                                            }
                                        }
                                        child = builder.build();
                                        this.createNode(childName, child, contentCreator);
                                    } else {
                                        throw new JsonException(PN_ORDEREDCHILDREN + " children must have a name whose key is " + PN_ORDEREDCHILDNAME);
                                    }
                                } else {
                                    throw new JsonException(PN_ORDEREDCHILDREN + " array must only have JSONObject items");
                                }
                            }
                        } else {
                            throw new JsonException(PN_ORDEREDCHILDREN + " value must be a JSON array");
                        }
                    }
                } else {
                    this.createProperty(n, o, contentCreator);
                }
            }
        }
    }
}
Also used : JsonArray(javax.json.JsonArray) JsonException(javax.json.JsonException) JsonValue(javax.json.JsonValue) JsonObject(javax.json.JsonObject) JsonObject(javax.json.JsonObject) JsonObjectBuilder(javax.json.JsonObjectBuilder) Map(java.util.Map)

Aggregations

JsonValue (javax.json.JsonValue)14 JsonString (javax.json.JsonString)9 JsonObject (javax.json.JsonObject)8 JsonArray (javax.json.JsonArray)7 Map (java.util.Map)5 HashMap (java.util.HashMap)3 JsonException (javax.json.JsonException)3 JsonObjectBuilder (javax.json.JsonObjectBuilder)3 JsonReader (javax.json.JsonReader)3 KvValue (com.torodb.kvdocument.values.KvValue)2 ListKvArray (com.torodb.kvdocument.values.heap.ListKvArray)2 File (java.io.File)2 StringReader (java.io.StringReader)2 ArrayList (java.util.ArrayList)2 LinkedHashMap (java.util.LinkedHashMap)2 ArrayConverter (com.torodb.backend.converters.array.ArrayConverter)1 TableRef (com.torodb.core.TableRef)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1 Counter (edu.stanford.nlp.stats.Counter)1 TwoDimensionalCounter (edu.stanford.nlp.stats.TwoDimensionalCounter)1