use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.
the class ManipulateTopBracket method main.
public static void main(String[] args) {
if (args.length < minArgs) {
System.out.println(usage());
System.exit(-1);
}
Properties options = StringUtils.argsToProperties(args, argDefs());
Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
TreebankLangParserParams tlpp = language.params;
DiskTreebank tb = null;
String encoding = options.getProperty("l", "UTF-8");
boolean removeBracket = PropertiesUtils.getBool(options, "b", false);
tlpp.setInputEncoding(encoding);
tlpp.setOutputEncoding(encoding);
tb = tlpp.diskTreebank();
String[] files = options.getProperty("", "").split("\\s+");
if (files.length != 0) {
for (String filename : files) {
tb.loadPath(filename);
}
} else {
log.info(usage());
System.exit(-1);
}
PrintWriter pwo = tlpp.pw();
String startSymbol = tlpp.treebankLanguagePack().startSymbol();
TreeFactory tf = new LabeledScoredTreeFactory();
int nTrees = 0;
for (Tree t : tb) {
if (removeBracket) {
if (t.value().equals(startSymbol)) {
t = t.firstChild();
}
} else if (!t.value().equals(startSymbol)) {
// Add a bracket if it isn't already there
t = tf.newTreeNode(startSymbol, Collections.singletonList(t));
}
pwo.println(t.toString());
nTrees++;
}
pwo.close();
System.err.printf("Processed %d trees.%n", nTrees);
}
use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.
the class MetaClass method cast.
/**
* Cast a String representation of an object into that object.
* E.g. "5.4" will be cast to a Double; "[1,2,3]" will be cast
* to an Integer[].
*
* NOTE: Date parses from a Long
*
* @param <E> The type of the object returned (same as type)
* @param value The string representation of the object
* @param type The type (usually class) to be returned (same as E)
* @return An object corresponding to the String value passed
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public static <E> E cast(String value, Type type) {
// --Get Type
Class<?> clazz;
if (type instanceof Class) {
clazz = (Class<?>) type;
} else if (type instanceof ParameterizedType) {
ParameterizedType pt = (ParameterizedType) type;
clazz = (Class<?>) pt.getRawType();
} else {
throw new IllegalArgumentException("Cannot cast to type (unhandled type): " + type);
}
// --Cast
if (String.class.isAssignableFrom(clazz)) {
// (case: String)
return (E) value;
} else if (Boolean.class.isAssignableFrom(clazz) || boolean.class.isAssignableFrom(clazz)) {
// (case: boolean)
if ("1".equals(value)) {
return (E) Boolean.TRUE;
}
return (E) Boolean.valueOf(Boolean.parseBoolean(value));
} else if (Integer.class.isAssignableFrom(clazz) || int.class.isAssignableFrom(clazz)) {
// (case: integer)
try {
return (E) Integer.valueOf(Integer.parseInt(value));
} catch (NumberFormatException e) {
return (E) Integer.valueOf((int) Double.parseDouble(value));
}
} else if (BigInteger.class.isAssignableFrom(clazz)) {
// (case: biginteger)
if (value == null) {
return (E) BigInteger.ZERO;
}
return (E) new BigInteger(value);
} else if (Long.class.isAssignableFrom(clazz) || long.class.isAssignableFrom(clazz)) {
// (case: long)
try {
return (E) Long.valueOf(Long.parseLong(value));
} catch (NumberFormatException e) {
return (E) Long.valueOf((long) Double.parseDouble(value));
}
} else if (Float.class.isAssignableFrom(clazz) || float.class.isAssignableFrom(clazz)) {
// (case: float)
if (value == null) {
return (E) Float.valueOf(Float.NaN);
}
return (E) Float.valueOf(Float.parseFloat(value));
} else if (Double.class.isAssignableFrom(clazz) || double.class.isAssignableFrom(clazz)) {
// (case: double)
if (value == null) {
return (E) Double.valueOf(Double.NaN);
}
return (E) Double.valueOf(Double.parseDouble(value));
} else if (BigDecimal.class.isAssignableFrom(clazz)) {
// (case: bigdecimal)
if (value == null) {
return (E) BigDecimal.ZERO;
}
return (E) new BigDecimal(value);
} else if (Short.class.isAssignableFrom(clazz) || short.class.isAssignableFrom(clazz)) {
// (case: short)
try {
return (E) Short.valueOf(Short.parseShort(value));
} catch (NumberFormatException e) {
return (E) Short.valueOf((short) Double.parseDouble(value));
}
} else if (Byte.class.isAssignableFrom(clazz) || byte.class.isAssignableFrom(clazz)) {
// (case: byte)
try {
return (E) Byte.valueOf(Byte.parseByte(value));
} catch (NumberFormatException e) {
return (E) Byte.valueOf((byte) Double.parseDouble(value));
}
} else if (Character.class.isAssignableFrom(clazz) || char.class.isAssignableFrom(clazz)) {
// (case: char)
return (E) Character.valueOf((char) Integer.parseInt(value));
} else if (Lazy.class.isAssignableFrom(clazz)) {
// (case: Lazy)
final String v = value;
return (E) Lazy.of(() -> MetaClass.castWithoutKnowingType(v));
} else if (Optional.class.isAssignableFrom(clazz)) {
// (case: Optional)
return (E) ((value == null || "null".equals(value.toLowerCase()) || "empty".equals(value.toLowerCase()) || "none".equals(value.toLowerCase())) ? Optional.empty() : Optional.of(value));
} else if (java.util.Date.class.isAssignableFrom(clazz)) {
// (case: date)
try {
return (E) new Date(Long.parseLong(value));
} catch (NumberFormatException e) {
return null;
}
} else if (java.util.Calendar.class.isAssignableFrom(clazz)) {
// (case: date)
try {
Date d = new Date(Long.parseLong(value));
GregorianCalendar cal = new GregorianCalendar();
cal.setTime(d);
return (E) cal;
} catch (NumberFormatException e) {
return null;
}
} else if (FileWriter.class.isAssignableFrom(clazz)) {
try {
return (E) new FileWriter(new File(value));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
} else if (BufferedReader.class.isAssignableFrom(clazz)) {
try {
return (E) IOUtils.readerFromString(value);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
} else if (FileReader.class.isAssignableFrom(clazz)) {
try {
return (E) new FileReader(new File(value));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
} else if (File.class.isAssignableFrom(clazz)) {
return (E) new File(value);
} else if (Class.class.isAssignableFrom(clazz)) {
try {
return (E) Class.forName(value);
} catch (ClassNotFoundException e) {
return null;
}
} else if (clazz.isArray()) {
if (value == null) {
return null;
}
Class<?> subType = clazz.getComponentType();
// (case: array)
String[] strings = StringUtils.decodeArray(value);
Object[] array = (Object[]) Array.newInstance(clazz.getComponentType(), strings.length);
for (int i = 0; i < strings.length; i++) {
array[i] = cast(strings[i], subType);
}
return (E) array;
} else if (Map.class.isAssignableFrom(clazz)) {
return (E) StringUtils.decodeMap(value);
} else if (clazz.isEnum()) {
// (case: enumeration)
Class c = (Class) clazz;
if (value == null) {
return null;
}
if (value.charAt(0) == '"')
value = value.substring(1);
if (value.charAt(value.length() - 1) == '"')
value = value.substring(0, value.length() - 1);
try {
return (E) Enum.valueOf(c, value);
} catch (Exception e) {
try {
return (E) Enum.valueOf(c, value.toLowerCase(Locale.ROOT));
} catch (Exception e2) {
try {
return (E) Enum.valueOf(c, value.toUpperCase(Locale.ROOT));
} catch (Exception e3) {
return (E) Enum.valueOf(c, (Character.isUpperCase(value.charAt(0)) ? Character.toLowerCase(value.charAt(0)) : Character.toUpperCase(value.charAt(0))) + value.substring(1));
}
}
}
} else if (ObjectOutputStream.class.isAssignableFrom(clazz)) {
// (case: object output stream)
try {
return (E) new ObjectOutputStream((OutputStream) cast(value, OutputStream.class));
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (ObjectInputStream.class.isAssignableFrom(clazz)) {
// (case: object input stream)
try {
return (E) new ObjectInputStream((InputStream) cast(value, InputStream.class));
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (PrintStream.class.isAssignableFrom(clazz)) {
// (case: input stream)
if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
return (E) System.out;
}
if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
return (E) System.err;
}
try {
return (E) new PrintStream(new FileOutputStream(value));
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (PrintWriter.class.isAssignableFrom(clazz)) {
// (case: input stream)
if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
return (E) new PrintWriter(System.out);
}
if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
return (E) new PrintWriter(System.err);
}
try {
return (E) IOUtils.getPrintWriter(value);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (OutputStream.class.isAssignableFrom(clazz)) {
// (case: output stream)
if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
return (E) System.out;
}
if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
return (E) System.err;
}
File toWriteTo = cast(value, File.class);
try {
if (toWriteTo == null || (!toWriteTo.exists() && !toWriteTo.createNewFile())) {
throw new IllegalStateException("Could not create output stream (cannot write file): " + value);
}
return (E) IOUtils.getFileOutputStream(value);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (InputStream.class.isAssignableFrom(clazz)) {
// (case: input stream)
if (value.equalsIgnoreCase("stdin") || value.equalsIgnoreCase("in")) {
return (E) System.in;
}
try {
return (E) IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(value);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
try {
// (case: can parse from string)
Method decode = clazz.getMethod("fromString", String.class);
return (E) decode.invoke(MetaClass.create(clazz), value);
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException | ClassCastException e) {
// Silent errors for misc failures
}
// Pass 2: Guess what the object could be
if (Tree.class.isAssignableFrom(clazz)) {
// (case: reading a tree)
try {
return (E) new PennTreeReader(new StringReader(value), new LabeledScoredTreeFactory(CoreLabel.factory())).readTree();
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (Collection.class.isAssignableFrom(clazz)) {
// (case: reading a collection)
Collection rtn;
if (Modifier.isAbstract(clazz.getModifiers())) {
rtn = abstractToConcreteCollectionMap.get(clazz).createInstance();
} else {
rtn = MetaClass.create(clazz).createInstance();
}
Class<?> subType = clazz.getComponentType();
String[] strings = StringUtils.decodeArray(value);
for (String string : strings) {
if (subType == null) {
rtn.add(castWithoutKnowingType(string));
} else {
rtn.add(cast(string, subType));
}
}
return (E) rtn;
} else {
// We could not cast this object
return null;
}
}
}
use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.
the class DependencyIndexITest method testPositions.
@Test
public void testPositions() {
try {
// System.err.println();
// System.err.println("One.");
// check a tree loaded from a reader, using StringLabelFactory
Tree tree = (new PennTreeReader(new StringReader("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))"), new LabeledScoredTreeFactory(new StringLabelFactory()))).readTree();
// System.out.println(tree.pennString());
checkTree(tree);
// System.err.println("Two.");
// check a tree created using Tree.valueOf()
tree = Tree.valueOf("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))");
// System.out.println(tree.pennString());
checkTree(tree);
// System.err.println("Three.");
// check a tree loaded from a reader, using CoreLabelFactory
tree = (new PennTreeReader(new StringReader("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))"), new LabeledScoredTreeFactory(CoreLabel.factory()))).readTree();
// System.out.println(tree.pennString());
checkTree(tree);
// System.err.println("Four.");
// check a tree generated by the parser
LexicalizedParser parser = LexicalizedParser.loadModel();
tree = parser.parse("Mary had a little lamb .");
// System.out.println(tree.pennString());
tree.indexLeaves();
checkTree(tree);
} catch (IOException e) {
// this should never happen
fail("IOException shouldn't happen.");
}
}
use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.
the class CustomAnnotationSerializer method read.
@Override
public Pair<Annotation, InputStream> read(InputStream is) throws IOException {
if (compress && !(is instanceof GZIPInputStream))
is = new GZIPInputStream(is);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
Annotation doc = new Annotation("");
String line;
// read the coref graph (new format)
Map<Integer, CorefChain> chains = loadCorefChains(reader);
if (chains != null)
doc.set(CorefCoreAnnotations.CorefChainAnnotation.class, chains);
// read the coref graph (old format)
line = reader.readLine().trim();
if (line.length() > 0) {
String[] bits = line.split(" ");
if (bits.length % 4 != 0) {
throw new RuntimeIOException("ERROR: Incorrect format for the serialized coref graph: " + line);
}
List<Pair<IntTuple, IntTuple>> corefGraph = new ArrayList<>();
for (int i = 0; i < bits.length; i += 4) {
IntTuple src = new IntTuple(2);
IntTuple dst = new IntTuple(2);
src.set(0, Integer.parseInt(bits[i]));
src.set(1, Integer.parseInt(bits[i + 1]));
dst.set(0, Integer.parseInt(bits[i + 2]));
dst.set(1, Integer.parseInt(bits[i + 3]));
corefGraph.add(new Pair<>(src, dst));
}
doc.set(CorefCoreAnnotations.CorefGraphAnnotation.class, corefGraph);
}
// read individual sentences
List<CoreMap> sentences = new ArrayList<>();
while ((line = reader.readLine()) != null) {
CoreMap sentence = new Annotation("");
// first line is the parse tree. construct it with CoreLabels in Tree nodes
Tree tree = new PennTreeReader(new StringReader(line), new LabeledScoredTreeFactory(CoreLabel.factory())).readTree();
sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
// read the dependency graphs
IntermediateSemanticGraph intermCollapsedDeps = loadDependencyGraph(reader);
IntermediateSemanticGraph intermUncollapsedDeps = loadDependencyGraph(reader);
IntermediateSemanticGraph intermCcDeps = loadDependencyGraph(reader);
// the remaining lines until empty line are tokens
List<CoreLabel> tokens = new ArrayList<>();
while ((line = reader.readLine()) != null) {
if (line.length() == 0)
break;
CoreLabel token = loadToken(line, haveExplicitAntecedent);
tokens.add(token);
}
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
// convert the intermediate graph to an actual SemanticGraph
SemanticGraph collapsedDeps = intermCollapsedDeps.convertIntermediateGraph(tokens);
sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, collapsedDeps);
SemanticGraph uncollapsedDeps = intermUncollapsedDeps.convertIntermediateGraph(tokens);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
SemanticGraph ccDeps = intermCcDeps.convertIntermediateGraph(tokens);
sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
sentences.add(sentence);
}
doc.set(CoreAnnotations.SentencesAnnotation.class, sentences);
return Pair.makePair(doc, is);
}
use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.
the class SpanishTreeNormalizerITest method setUp.
@Before
public void setUp() {
tf = new LabeledScoredTreeFactory();
tn = new SpanishTreeNormalizer(true, true, true);
}
Aggregations