Search in sources :

Example 1 with LabeledScoredTreeFactory

use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.

the class ManipulateTopBracket method main.

public static void main(String[] args) {
    if (args.length < minArgs) {
        System.out.println(usage());
        System.exit(-1);
    }
    Properties options = StringUtils.argsToProperties(args, argDefs());
    Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
    TreebankLangParserParams tlpp = language.params;
    DiskTreebank tb = null;
    String encoding = options.getProperty("l", "UTF-8");
    boolean removeBracket = PropertiesUtils.getBool(options, "b", false);
    tlpp.setInputEncoding(encoding);
    tlpp.setOutputEncoding(encoding);
    tb = tlpp.diskTreebank();
    String[] files = options.getProperty("", "").split("\\s+");
    if (files.length != 0) {
        for (String filename : files) {
            tb.loadPath(filename);
        }
    } else {
        log.info(usage());
        System.exit(-1);
    }
    PrintWriter pwo = tlpp.pw();
    String startSymbol = tlpp.treebankLanguagePack().startSymbol();
    TreeFactory tf = new LabeledScoredTreeFactory();
    int nTrees = 0;
    for (Tree t : tb) {
        if (removeBracket) {
            if (t.value().equals(startSymbol)) {
                t = t.firstChild();
            }
        } else if (!t.value().equals(startSymbol)) {
            // Add a bracket if it isn't already there
            t = tf.newTreeNode(startSymbol, Collections.singletonList(t));
        }
        pwo.println(t.toString());
        nTrees++;
    }
    pwo.close();
    System.err.printf("Processed %d trees.%n", nTrees);
}
Also used : DiskTreebank(edu.stanford.nlp.trees.DiskTreebank) Language(edu.stanford.nlp.international.Language) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) TreeFactory(edu.stanford.nlp.trees.TreeFactory) Tree(edu.stanford.nlp.trees.Tree) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) Properties(java.util.Properties) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) PrintWriter(java.io.PrintWriter)

Example 2 with LabeledScoredTreeFactory

use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.

the class MetaClass method cast.

/**
 * Cast a String representation of an object into that object.
 * E.g. "5.4" will be cast to a Double; "[1,2,3]" will be cast
 * to an Integer[].
 *
 * NOTE: Date parses from a Long
 *
 * @param <E> The type of the object returned (same as type)
 * @param value The string representation of the object
 * @param type The type (usually class) to be returned (same as E)
 * @return An object corresponding to the String value passed
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public static <E> E cast(String value, Type type) {
    // --Get Type
    Class<?> clazz;
    if (type instanceof Class) {
        clazz = (Class<?>) type;
    } else if (type instanceof ParameterizedType) {
        ParameterizedType pt = (ParameterizedType) type;
        clazz = (Class<?>) pt.getRawType();
    } else {
        throw new IllegalArgumentException("Cannot cast to type (unhandled type): " + type);
    }
    // --Cast
    if (String.class.isAssignableFrom(clazz)) {
        // (case: String)
        return (E) value;
    } else if (Boolean.class.isAssignableFrom(clazz) || boolean.class.isAssignableFrom(clazz)) {
        // (case: boolean)
        if ("1".equals(value)) {
            return (E) Boolean.TRUE;
        }
        return (E) Boolean.valueOf(Boolean.parseBoolean(value));
    } else if (Integer.class.isAssignableFrom(clazz) || int.class.isAssignableFrom(clazz)) {
        // (case: integer)
        try {
            return (E) Integer.valueOf(Integer.parseInt(value));
        } catch (NumberFormatException e) {
            return (E) Integer.valueOf((int) Double.parseDouble(value));
        }
    } else if (BigInteger.class.isAssignableFrom(clazz)) {
        // (case: biginteger)
        if (value == null) {
            return (E) BigInteger.ZERO;
        }
        return (E) new BigInteger(value);
    } else if (Long.class.isAssignableFrom(clazz) || long.class.isAssignableFrom(clazz)) {
        // (case: long)
        try {
            return (E) Long.valueOf(Long.parseLong(value));
        } catch (NumberFormatException e) {
            return (E) Long.valueOf((long) Double.parseDouble(value));
        }
    } else if (Float.class.isAssignableFrom(clazz) || float.class.isAssignableFrom(clazz)) {
        // (case: float)
        if (value == null) {
            return (E) Float.valueOf(Float.NaN);
        }
        return (E) Float.valueOf(Float.parseFloat(value));
    } else if (Double.class.isAssignableFrom(clazz) || double.class.isAssignableFrom(clazz)) {
        // (case: double)
        if (value == null) {
            return (E) Double.valueOf(Double.NaN);
        }
        return (E) Double.valueOf(Double.parseDouble(value));
    } else if (BigDecimal.class.isAssignableFrom(clazz)) {
        // (case: bigdecimal)
        if (value == null) {
            return (E) BigDecimal.ZERO;
        }
        return (E) new BigDecimal(value);
    } else if (Short.class.isAssignableFrom(clazz) || short.class.isAssignableFrom(clazz)) {
        // (case: short)
        try {
            return (E) Short.valueOf(Short.parseShort(value));
        } catch (NumberFormatException e) {
            return (E) Short.valueOf((short) Double.parseDouble(value));
        }
    } else if (Byte.class.isAssignableFrom(clazz) || byte.class.isAssignableFrom(clazz)) {
        // (case: byte)
        try {
            return (E) Byte.valueOf(Byte.parseByte(value));
        } catch (NumberFormatException e) {
            return (E) Byte.valueOf((byte) Double.parseDouble(value));
        }
    } else if (Character.class.isAssignableFrom(clazz) || char.class.isAssignableFrom(clazz)) {
        // (case: char)
        return (E) Character.valueOf((char) Integer.parseInt(value));
    } else if (Lazy.class.isAssignableFrom(clazz)) {
        // (case: Lazy)
        final String v = value;
        return (E) Lazy.of(() -> MetaClass.castWithoutKnowingType(v));
    } else if (Optional.class.isAssignableFrom(clazz)) {
        // (case: Optional)
        return (E) ((value == null || "null".equals(value.toLowerCase()) || "empty".equals(value.toLowerCase()) || "none".equals(value.toLowerCase())) ? Optional.empty() : Optional.of(value));
    } else if (java.util.Date.class.isAssignableFrom(clazz)) {
        // (case: date)
        try {
            return (E) new Date(Long.parseLong(value));
        } catch (NumberFormatException e) {
            return null;
        }
    } else if (java.util.Calendar.class.isAssignableFrom(clazz)) {
        // (case: date)
        try {
            Date d = new Date(Long.parseLong(value));
            GregorianCalendar cal = new GregorianCalendar();
            cal.setTime(d);
            return (E) cal;
        } catch (NumberFormatException e) {
            return null;
        }
    } else if (FileWriter.class.isAssignableFrom(clazz)) {
        try {
            return (E) new FileWriter(new File(value));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    } else if (BufferedReader.class.isAssignableFrom(clazz)) {
        try {
            return (E) IOUtils.readerFromString(value);
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    } else if (FileReader.class.isAssignableFrom(clazz)) {
        try {
            return (E) new FileReader(new File(value));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    } else if (File.class.isAssignableFrom(clazz)) {
        return (E) new File(value);
    } else if (Class.class.isAssignableFrom(clazz)) {
        try {
            return (E) Class.forName(value);
        } catch (ClassNotFoundException e) {
            return null;
        }
    } else if (clazz.isArray()) {
        if (value == null) {
            return null;
        }
        Class<?> subType = clazz.getComponentType();
        // (case: array)
        String[] strings = StringUtils.decodeArray(value);
        Object[] array = (Object[]) Array.newInstance(clazz.getComponentType(), strings.length);
        for (int i = 0; i < strings.length; i++) {
            array[i] = cast(strings[i], subType);
        }
        return (E) array;
    } else if (Map.class.isAssignableFrom(clazz)) {
        return (E) StringUtils.decodeMap(value);
    } else if (clazz.isEnum()) {
        // (case: enumeration)
        Class c = (Class) clazz;
        if (value == null) {
            return null;
        }
        if (value.charAt(0) == '"')
            value = value.substring(1);
        if (value.charAt(value.length() - 1) == '"')
            value = value.substring(0, value.length() - 1);
        try {
            return (E) Enum.valueOf(c, value);
        } catch (Exception e) {
            try {
                return (E) Enum.valueOf(c, value.toLowerCase(Locale.ROOT));
            } catch (Exception e2) {
                try {
                    return (E) Enum.valueOf(c, value.toUpperCase(Locale.ROOT));
                } catch (Exception e3) {
                    return (E) Enum.valueOf(c, (Character.isUpperCase(value.charAt(0)) ? Character.toLowerCase(value.charAt(0)) : Character.toUpperCase(value.charAt(0))) + value.substring(1));
                }
            }
        }
    } else if (ObjectOutputStream.class.isAssignableFrom(clazz)) {
        // (case: object output stream)
        try {
            return (E) new ObjectOutputStream((OutputStream) cast(value, OutputStream.class));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else if (ObjectInputStream.class.isAssignableFrom(clazz)) {
        // (case: object input stream)
        try {
            return (E) new ObjectInputStream((InputStream) cast(value, InputStream.class));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else if (PrintStream.class.isAssignableFrom(clazz)) {
        // (case: input stream)
        if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
            return (E) System.out;
        }
        if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
            return (E) System.err;
        }
        try {
            return (E) new PrintStream(new FileOutputStream(value));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else if (PrintWriter.class.isAssignableFrom(clazz)) {
        // (case: input stream)
        if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
            return (E) new PrintWriter(System.out);
        }
        if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
            return (E) new PrintWriter(System.err);
        }
        try {
            return (E) IOUtils.getPrintWriter(value);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else if (OutputStream.class.isAssignableFrom(clazz)) {
        // (case: output stream)
        if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
            return (E) System.out;
        }
        if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
            return (E) System.err;
        }
        File toWriteTo = cast(value, File.class);
        try {
            if (toWriteTo == null || (!toWriteTo.exists() && !toWriteTo.createNewFile())) {
                throw new IllegalStateException("Could not create output stream (cannot write file): " + value);
            }
            return (E) IOUtils.getFileOutputStream(value);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else if (InputStream.class.isAssignableFrom(clazz)) {
        // (case: input stream)
        if (value.equalsIgnoreCase("stdin") || value.equalsIgnoreCase("in")) {
            return (E) System.in;
        }
        try {
            return (E) IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(value);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else {
        try {
            // (case: can parse from string)
            Method decode = clazz.getMethod("fromString", String.class);
            return (E) decode.invoke(MetaClass.create(clazz), value);
        } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException | ClassCastException e) {
        // Silent errors for misc failures
        }
        // Pass 2: Guess what the object could be
        if (Tree.class.isAssignableFrom(clazz)) {
            // (case: reading a tree)
            try {
                return (E) new PennTreeReader(new StringReader(value), new LabeledScoredTreeFactory(CoreLabel.factory())).readTree();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        } else if (Collection.class.isAssignableFrom(clazz)) {
            // (case: reading a collection)
            Collection rtn;
            if (Modifier.isAbstract(clazz.getModifiers())) {
                rtn = abstractToConcreteCollectionMap.get(clazz).createInstance();
            } else {
                rtn = MetaClass.create(clazz).createInstance();
            }
            Class<?> subType = clazz.getComponentType();
            String[] strings = StringUtils.decodeArray(value);
            for (String string : strings) {
                if (subType == null) {
                    rtn.add(castWithoutKnowingType(string));
                } else {
                    rtn.add(cast(string, subType));
                }
            }
            return (E) rtn;
        } else {
            // We could not cast this object
            return null;
        }
    }
}
Also used : LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) java.util(java.util) BigInteger(java.math.BigInteger) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) BigDecimal(java.math.BigDecimal) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) BigInteger(java.math.BigInteger) PennTreeReader(edu.stanford.nlp.trees.PennTreeReader)

Example 3 with LabeledScoredTreeFactory

use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.

the class DependencyIndexITest method testPositions.

@Test
public void testPositions() {
    try {
        // System.err.println();
        // System.err.println("One.");
        // check a tree loaded from a reader, using StringLabelFactory
        Tree tree = (new PennTreeReader(new StringReader("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))"), new LabeledScoredTreeFactory(new StringLabelFactory()))).readTree();
        // System.out.println(tree.pennString());
        checkTree(tree);
        // System.err.println("Two.");
        // check a tree created using Tree.valueOf()
        tree = Tree.valueOf("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))");
        // System.out.println(tree.pennString());
        checkTree(tree);
        // System.err.println("Three.");
        // check a tree loaded from a reader, using CoreLabelFactory
        tree = (new PennTreeReader(new StringReader("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))"), new LabeledScoredTreeFactory(CoreLabel.factory()))).readTree();
        // System.out.println(tree.pennString());
        checkTree(tree);
        // System.err.println("Four.");
        // check a tree generated by the parser
        LexicalizedParser parser = LexicalizedParser.loadModel();
        tree = parser.parse("Mary had a little lamb .");
        // System.out.println(tree.pennString());
        tree.indexLeaves();
        checkTree(tree);
    } catch (IOException e) {
        // this should never happen
        fail("IOException shouldn't happen.");
    }
}
Also used : PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) StringLabelFactory(edu.stanford.nlp.ling.StringLabelFactory) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) StringReader(java.io.StringReader) Tree(edu.stanford.nlp.trees.Tree) IOException(java.io.IOException) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) Test(org.junit.Test)

Example 4 with LabeledScoredTreeFactory

use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.

the class CustomAnnotationSerializer method read.

@Override
public Pair<Annotation, InputStream> read(InputStream is) throws IOException {
    if (compress && !(is instanceof GZIPInputStream))
        is = new GZIPInputStream(is);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    Annotation doc = new Annotation("");
    String line;
    // read the coref graph (new format)
    Map<Integer, CorefChain> chains = loadCorefChains(reader);
    if (chains != null)
        doc.set(CorefCoreAnnotations.CorefChainAnnotation.class, chains);
    // read the coref graph (old format)
    line = reader.readLine().trim();
    if (line.length() > 0) {
        String[] bits = line.split(" ");
        if (bits.length % 4 != 0) {
            throw new RuntimeIOException("ERROR: Incorrect format for the serialized coref graph: " + line);
        }
        List<Pair<IntTuple, IntTuple>> corefGraph = new ArrayList<>();
        for (int i = 0; i < bits.length; i += 4) {
            IntTuple src = new IntTuple(2);
            IntTuple dst = new IntTuple(2);
            src.set(0, Integer.parseInt(bits[i]));
            src.set(1, Integer.parseInt(bits[i + 1]));
            dst.set(0, Integer.parseInt(bits[i + 2]));
            dst.set(1, Integer.parseInt(bits[i + 3]));
            corefGraph.add(new Pair<>(src, dst));
        }
        doc.set(CorefCoreAnnotations.CorefGraphAnnotation.class, corefGraph);
    }
    // read individual sentences
    List<CoreMap> sentences = new ArrayList<>();
    while ((line = reader.readLine()) != null) {
        CoreMap sentence = new Annotation("");
        // first line is the parse tree. construct it with CoreLabels in Tree nodes
        Tree tree = new PennTreeReader(new StringReader(line), new LabeledScoredTreeFactory(CoreLabel.factory())).readTree();
        sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
        // read the dependency graphs
        IntermediateSemanticGraph intermCollapsedDeps = loadDependencyGraph(reader);
        IntermediateSemanticGraph intermUncollapsedDeps = loadDependencyGraph(reader);
        IntermediateSemanticGraph intermCcDeps = loadDependencyGraph(reader);
        // the remaining lines until empty line are tokens
        List<CoreLabel> tokens = new ArrayList<>();
        while ((line = reader.readLine()) != null) {
            if (line.length() == 0)
                break;
            CoreLabel token = loadToken(line, haveExplicitAntecedent);
            tokens.add(token);
        }
        sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
        // convert the intermediate graph to an actual SemanticGraph
        SemanticGraph collapsedDeps = intermCollapsedDeps.convertIntermediateGraph(tokens);
        sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, collapsedDeps);
        SemanticGraph uncollapsedDeps = intermUncollapsedDeps.convertIntermediateGraph(tokens);
        sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
        SemanticGraph ccDeps = intermCcDeps.convertIntermediateGraph(tokens);
        sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
        sentences.add(sentence);
    }
    doc.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    return Pair.makePair(doc, is);
}
Also used : CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) GZIPInputStream(java.util.zip.GZIPInputStream) CorefChain(edu.stanford.nlp.coref.data.CorefChain) Tree(edu.stanford.nlp.trees.Tree) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph)

Example 5 with LabeledScoredTreeFactory

use of edu.stanford.nlp.trees.LabeledScoredTreeFactory in project CoreNLP by stanfordnlp.

the class SpanishTreeNormalizerITest method setUp.

@Before
public void setUp() {
    tf = new LabeledScoredTreeFactory();
    tn = new SpanishTreeNormalizer(true, true, true);
}
Also used : LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) Before(org.junit.Before)

Aggregations

LabeledScoredTreeFactory (edu.stanford.nlp.trees.LabeledScoredTreeFactory)9 Tree (edu.stanford.nlp.trees.Tree)7 PennTreeReader (edu.stanford.nlp.trees.PennTreeReader)4 TreeFactory (edu.stanford.nlp.trees.TreeFactory)3 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)2 StringLabelFactory (edu.stanford.nlp.ling.StringLabelFactory)2 StringReader (java.io.StringReader)2 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)1 CorefChain (edu.stanford.nlp.coref.data.CorefChain)1 Language (edu.stanford.nlp.international.Language)1 CategoryWordTagFactory (edu.stanford.nlp.ling.CategoryWordTagFactory)1 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 HasTag (edu.stanford.nlp.ling.HasTag)1 HasWord (edu.stanford.nlp.ling.HasWord)1 LexicalizedParser (edu.stanford.nlp.parser.lexparser.LexicalizedParser)1 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)1 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)1 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)1 DiskTreebank (edu.stanford.nlp.trees.DiskTreebank)1