use of javax.json.JsonValue in project CoreNLP by stanfordnlp.
the class ScorePhrases method learnNewPhrasesPrivate.
private Counter<CandidatePhrase> learnNewPhrasesPrivate(String label, PatternsForEachToken patternsForEachToken, Counter<E> patternsLearnedThisIter, Counter<E> allSelectedPatterns, Set<CandidatePhrase> alreadyIdentifiedWords, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat, Counter<CandidatePhrase> scoreForAllWordsThisIteration, TwoDimensionalCounter<CandidatePhrase, E> terms, TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted, TwoDimensionalCounter<E, CandidatePhrase> patternsAndWords4Label, String identifier, Set<CandidatePhrase> ignoreWords, boolean computeProcDataFreq) throws IOException, ClassNotFoundException {
Set<CandidatePhrase> alreadyLabeledWords = new HashSet<>();
if (constVars.doNotApplyPatterns) {
// if want to get the stats by the lossy way of just counting without
// applying the patterns
ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(constVars.batchProcessSents);
while (sentsIter.hasNext()) {
Pair<Map<String, DataInstance>, File> sentsf = sentsIter.next();
this.statsWithoutApplyingPatterns(sentsf.first(), patternsForEachToken, patternsLearnedThisIter, wordsPatExtracted);
}
} else {
if (patternsLearnedThisIter.size() > 0) {
this.applyPats(patternsLearnedThisIter, label, wordsPatExtracted, matchedTokensByPat, alreadyLabeledWords);
}
}
if (computeProcDataFreq) {
if (!phraseScorer.wordFreqNorm.equals(Normalization.NONE)) {
Redwood.log(Redwood.DBG, "computing processed freq");
for (Entry<CandidatePhrase, Double> fq : Data.rawFreq.entrySet()) {
Double in = fq.getValue();
if (phraseScorer.wordFreqNorm.equals(Normalization.SQRT))
in = Math.sqrt(in);
else if (phraseScorer.wordFreqNorm.equals(Normalization.LOG))
in = 1 + Math.log(in);
else
throw new RuntimeException("can't understand the normalization");
assert !in.isNaN() : "Why is processed freq nan when rawfreq is " + in;
Data.processedDataFreq.setCount(fq.getKey(), in);
}
} else
Data.processedDataFreq = Data.rawFreq;
}
if (constVars.wordScoring.equals(WordScoring.WEIGHTEDNORM)) {
for (CandidatePhrase en : wordsPatExtracted.firstKeySet()) {
if (!constVars.getOtherSemanticClassesWords().contains(en) && (en.getPhraseLemma() == null || !constVars.getOtherSemanticClassesWords().contains(CandidatePhrase.createOrGet(en.getPhraseLemma()))) && !alreadyLabeledWords.contains(en)) {
terms.addAll(en, wordsPatExtracted.getCounter(en));
}
}
removeKeys(terms, constVars.getStopWords());
Counter<CandidatePhrase> phraseScores = phraseScorer.scorePhrases(label, terms, wordsPatExtracted, allSelectedPatterns, alreadyIdentifiedWords, false);
System.out.println("count for word U.S. is " + phraseScores.getCount(CandidatePhrase.createOrGet("U.S.")));
Set<CandidatePhrase> ignoreWordsAll;
if (ignoreWords != null && !ignoreWords.isEmpty()) {
ignoreWordsAll = CollectionUtils.unionAsSet(ignoreWords, constVars.getOtherSemanticClassesWords());
} else
ignoreWordsAll = new HashSet<>(constVars.getOtherSemanticClassesWords());
ignoreWordsAll.addAll(constVars.getSeedLabelDictionary().get(label));
ignoreWordsAll.addAll(constVars.getLearnedWords(label).keySet());
System.out.println("ignoreWordsAll contains word U.S. is " + ignoreWordsAll.contains(CandidatePhrase.createOrGet("U.S.")));
Counter<CandidatePhrase> finalwords = chooseTopWords(phraseScores, terms, phraseScores, ignoreWordsAll, constVars.thresholdWordExtract);
phraseScorer.printReasonForChoosing(finalwords);
scoreForAllWordsThisIteration.clear();
Counters.addInPlace(scoreForAllWordsThisIteration, phraseScores);
Redwood.log(ConstantsAndVariables.minimaldebug, "\n\n## Selected Words for " + label + " : " + Counters.toSortedString(finalwords, finalwords.size(), "%1$s:%2$.2f", "\t"));
if (constVars.goldEntities != null) {
Map<String, Boolean> goldEntities4Label = constVars.goldEntities.get(label);
if (goldEntities4Label != null) {
StringBuffer s = new StringBuffer();
finalwords.keySet().stream().forEach(x -> s.append(x.getPhrase() + (goldEntities4Label.containsKey(x.getPhrase()) ? ":" + goldEntities4Label.get(x.getPhrase()) : ":UKNOWN") + "\n"));
Redwood.log(ConstantsAndVariables.minimaldebug, "\n\n## Gold labels for selected words for label " + label + " : " + s.toString());
} else
Redwood.log(Redwood.DBG, "No gold entities provided for label " + label);
}
if (constVars.outDir != null && !constVars.outDir.isEmpty()) {
String outputdir = constVars.outDir + "/" + identifier + "/" + label;
IOUtils.ensureDir(new File(outputdir));
TwoDimensionalCounter<CandidatePhrase, CandidatePhrase> reasonForWords = new TwoDimensionalCounter<>();
for (CandidatePhrase word : finalwords.keySet()) {
for (E l : wordsPatExtracted.getCounter(word).keySet()) {
for (CandidatePhrase w2 : patternsAndWords4Label.getCounter(l)) {
reasonForWords.incrementCount(word, w2);
}
}
}
Redwood.log(ConstantsAndVariables.minimaldebug, "Saving output in " + outputdir);
String filename = outputdir + "/words.json";
// the json object is an array corresponding to each iteration - of list
// of objects,
// each of which is a bean of entity and reasons
JsonArrayBuilder obj = Json.createArrayBuilder();
if (writtenInJustification.containsKey(label) && writtenInJustification.get(label)) {
JsonReader jsonReader = Json.createReader(new BufferedInputStream(new FileInputStream(filename)));
JsonArray objarr = jsonReader.readArray();
for (JsonValue o : objarr) obj.add(o);
jsonReader.close();
}
JsonArrayBuilder objThisIter = Json.createArrayBuilder();
for (CandidatePhrase w : reasonForWords.firstKeySet()) {
JsonObjectBuilder objinner = Json.createObjectBuilder();
JsonArrayBuilder l = Json.createArrayBuilder();
for (CandidatePhrase w2 : reasonForWords.getCounter(w).keySet()) {
l.add(w2.getPhrase());
}
JsonArrayBuilder pats = Json.createArrayBuilder();
for (E p : wordsPatExtracted.getCounter(w)) {
pats.add(p.toStringSimple());
}
objinner.add("reasonwords", l);
objinner.add("patterns", pats);
objinner.add("score", finalwords.getCount(w));
objinner.add("entity", w.getPhrase());
objThisIter.add(objinner.build());
}
obj.add(objThisIter);
// Redwood.log(ConstantsAndVariables.minimaldebug, channelNameLogger,
// "Writing justification at " + filename);
IOUtils.writeStringToFile(StringUtils.normalize(StringUtils.toAscii(obj.build().toString())), filename, "ASCII");
writtenInJustification.put(label, true);
}
if (constVars.justify) {
Redwood.log(Redwood.DBG, "\nJustification for phrases:\n");
for (CandidatePhrase word : finalwords.keySet()) {
Redwood.log(Redwood.DBG, "Phrase " + word + " extracted because of patterns: \t" + Counters.toSortedString(wordsPatExtracted.getCounter(word), wordsPatExtracted.getCounter(word).size(), "%1$s:%2$f", "\n"));
}
}
return finalwords;
} else if (constVars.wordScoring.equals(WordScoring.BPB)) {
Counters.addInPlace(terms, wordsPatExtracted);
Counter<CandidatePhrase> maxPatWeightTerms = new ClassicCounter<>();
Map<CandidatePhrase, E> wordMaxPat = new HashMap<>();
for (Entry<CandidatePhrase, ClassicCounter<E>> en : terms.entrySet()) {
Counter<E> weights = new ClassicCounter<>();
for (E k : en.getValue().keySet()) weights.setCount(k, patternsLearnedThisIter.getCount(k));
maxPatWeightTerms.setCount(en.getKey(), Counters.max(weights));
wordMaxPat.put(en.getKey(), Counters.argmax(weights));
}
Counters.removeKeys(maxPatWeightTerms, alreadyIdentifiedWords);
double maxvalue = Counters.max(maxPatWeightTerms);
Set<CandidatePhrase> words = Counters.keysAbove(maxPatWeightTerms, maxvalue - 1e-10);
CandidatePhrase bestw = null;
if (words.size() > 1) {
double max = Double.NEGATIVE_INFINITY;
for (CandidatePhrase w : words) {
if (terms.getCount(w, wordMaxPat.get(w)) > max) {
max = terms.getCount(w, wordMaxPat.get(w));
bestw = w;
}
}
} else if (words.size() == 1)
bestw = words.iterator().next();
else
return new ClassicCounter<>();
Redwood.log(ConstantsAndVariables.minimaldebug, "Selected Words: " + bestw);
return Counters.asCounter(Arrays.asList(bestw));
} else
throw new RuntimeException("wordscoring " + constVars.wordScoring + " not identified");
}
use of javax.json.JsonValue in project torodb by torodb.
the class BaseArrayToArrayConverter method fromJsonValue.
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public KvArray fromJsonValue(JsonArray value) {
List<KvValue<?>> list = new ArrayList<>(value.size());
for (JsonValue child : value) {
ArrayConverter converter = valueToArrayConverterProvider.fromJsonValue(child);
list.add(converter.fromJsonValue(child));
}
return new ListKvArray(list);
}
use of javax.json.JsonValue in project torodb by torodb.
the class BaseArrayValueToJsonConverter method toValue.
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public KvArray toValue(JsonArray value) {
List<KvValue<?>> list = new ArrayList<>(value.size());
for (JsonValue child : value) {
ArrayConverter converter = valueToArrayConverterProvider.fromJsonValue(child);
list.add(converter.fromJsonValue(child));
}
return new ListKvArray(list);
}
use of javax.json.JsonValue in project azure-iot-sdk-java by Azure.
the class Tools method getNumberValueFromJsonObject.
/**
* Helper function to get numeric value from a JsonObject
*
* @param jsonObject The JsonObject object to get the value from
* @param key The name of the key
* @return The numeric value
*/
public static long getNumberValueFromJsonObject(JsonObject jsonObject, String key) {
long retVal;
JsonNumber jsonNumber = null;
// Codes_SRS_SERVICE_SDK_JAVA_TOOLS_12_018: [The function shall return zero if any of the input is null]
if ((jsonObject == null) || (jsonObject == JsonObject.NULL) || (key == null) || (key.length() == 0)) {
retVal = 0;
} else {
// Codes_SRS_SERVICE_SDK_JAVA_TOOLS_12_019: [The function shall get the JsonValue of the key and return zero if it is null]
JsonValue jsonValue = jsonObject.get(key);
if (jsonValue != JsonValue.NULL) {
// Codes_SRS_SERVICE_SDK_JAVA_TOOLS_12_020: [The function shall get the JsonNumber from the JsonValue and return zero if it is null]
jsonNumber = jsonObject.getJsonNumber(key);
if (jsonNumber != null) {
// Codes_SRS_SERVICE_SDK_JAVA_TOOLS_12_021: [The function shall return the long value from the JsonNumber if the JsonNumber is not null]
retVal = jsonNumber.longValue();
} else {
retVal = 0;
}
} else {
retVal = 0;
}
}
return retVal;
}
use of javax.json.JsonValue in project sling by apache.
the class OrderedJsonReader method writeChildren.
@Override
protected void writeChildren(JsonObject obj, ContentCreator contentCreator) throws RepositoryException {
if (!obj.containsKey(PN_ORDEREDCHILDREN)) {
super.writeChildren(obj, contentCreator);
} else {
for (Map.Entry<String, JsonValue> entry : obj.entrySet()) {
final String n = entry.getKey();
// skip well known objects
if (!ignoredNames.contains(n)) {
Object o = entry.getValue();
if (!handleSecurity(n, o, contentCreator)) {
if (n.equals(PN_ORDEREDCHILDREN)) {
if (o instanceof JsonArray) {
JsonArray children = (JsonArray) o;
for (int childIndex = 0; childIndex < children.size(); childIndex++) {
Object oc = children.get(childIndex);
if (oc instanceof JsonObject) {
JsonObject child = (JsonObject) oc;
String childName = child.getString(PN_ORDEREDCHILDNAME, null);
if (StringUtils.isNotBlank(childName)) {
JsonObjectBuilder builder = Json.createObjectBuilder();
for (Map.Entry<String, JsonValue> e : child.entrySet()) {
if (!PN_ORDEREDCHILDNAME.equals(e.getKey())) {
builder.add(e.getKey(), e.getValue());
}
}
child = builder.build();
this.createNode(childName, child, contentCreator);
} else {
throw new JsonException(PN_ORDEREDCHILDREN + " children must have a name whose key is " + PN_ORDEREDCHILDNAME);
}
} else {
throw new JsonException(PN_ORDEREDCHILDREN + " array must only have JSONObject items");
}
}
} else {
throw new JsonException(PN_ORDEREDCHILDREN + " value must be a JSON array");
}
}
} else {
this.createProperty(n, o, contentCreator);
}
}
}
}
}
Aggregations