use of edu.illinois.cs.cogcomp.transliteration.MultiExample in project cogcomp-nlp by CogComp.
the class Utils method convertMulti.
public static List<Example> convertMulti(List<MultiExample> lme) {
List<Example> training = new ArrayList<>();
for (MultiExample me : lme) {
for (Example e : me.toExampleList()) {
String[] tls = e.getTransliteratedWord().split(" ");
String[] ss = e.sourceWord.split(" ");
if (tls.length != ss.length) {
logger.error("Mismatched length: " + e.sourceWord);
continue;
}
for (int i = 0; i < tls.length; i++) {
training.add(new Example(ss[i], tls[i]));
}
}
}
return training;
}
use of edu.illinois.cs.cogcomp.transliteration.MultiExample in project cogcomp-nlp by CogComp.
the class Utils method readNEWSData.
/**
* Used for reading data from the NEWS2015 dataset.
* @param fname
* @return
* @throws ParserConfigurationException
* @throws IOException
* @throws SAXException
*/
public static List<MultiExample> readNEWSData(String fname) throws ParserConfigurationException, IOException, SAXException {
File file = new File(fname);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.parse(file);
NodeList nl = document.getElementsByTagName("Name");
List<MultiExample> examples = new ArrayList<>();
for (int i = 0; i < nl.getLength(); i++) {
Node n = nl.item(i);
NodeList sourceandtargets = n.getChildNodes();
MultiExample me = null;
for (int j = 0; j < sourceandtargets.getLength(); j++) {
Node st = sourceandtargets.item(j);
if (st.getNodeName().equals("SourceName")) {
me = new MultiExample(st.getTextContent().toLowerCase(), new ArrayList<String>());
} else if (st.getNodeName().equals("TargetName")) {
if (me != null) {
me.addTransliteratedWord(st.getTextContent());
}
}
}
examples.add(me);
}
return examples;
}
Aggregations