use of edu.illinois.cs.cogcomp.transliteration.SPModel in project cogcomp-nlp by CogComp.
the class Utils method main.
public static void main(String[] args) throws Exception {
// romanization();
String[] arabic_names = { "Urdu", "Arabic", "Egyptian_Arabic", "Mazandarani", "Pashto", "Persian", "Western_Punjabi" };
String[] devanagari_names = { "Newar", "Hindi", "Marathi", "Nepali", "Sanskrit" };
String[] cyrillic_names = { "Chuvash", "Bashkir", "Bulgarian", "Chechen", "Kirghiz", "Macedonian", "Russian", "Ukrainian" };
// for(String name : arabic_names){
// System.out.println(name + " : " + WAVE("models/probs-"+name+"-Urdu.txt"));
// getSize(name);
// }
String lang = "Arabic";
String wikidata = "Data/wikidata." + lang;
List<String> allnames = LineIO.read("/Users/stephen/Dropbox/papers/NAACL2016/data/all-names2.txt");
List<Example> training = readWikiData(wikidata);
training = training.subList(0, 2000);
SPModel m = new SPModel(training);
m.Train(5);
TopList<Double, String> res = m.Generate("stephen");
System.out.println(res);
List<String> outlines = new ArrayList<>();
int i = 0;
for (String nameAndLabel : allnames) {
if (i % 100 == 0) {
System.out.println(i);
}
i++;
String[] s = nameAndLabel.split("\t");
String name = s[0];
String label = s[1];
String[] sname = name.split(" ");
String line = "";
for (String tok : sname) {
res = m.Generate(tok.toLowerCase());
if (res.size() > 0) {
String topcand = res.getFirst().getSecond();
line += topcand + " ";
} else {
}
}
if (line.trim().length() > 0) {
outlines.add(line.trim() + "\t" + label);
}
}
LineIO.write("/Users/stephen/Dropbox/papers/NAACL2016/data/all-names-" + lang + "2.txt", outlines);
// Transliterator t = Transliterator.getInstance("Any-am_FONIPA");
//
// String result = t.transform("Stephen");
// System.out.println(result);
//
// Enumeration<String> tids = t.getAvailableIDs();
//
// while(tids.hasMoreElements()){
// String e = tids.nextElement();
// System.out.println(e);
// }
}
use of edu.illinois.cs.cogcomp.transliteration.SPModel in project cogcomp-nlp by CogComp.
the class TransliterationAnnotator method initialize.
@Override
public void initialize(ResourceManager rm) {
try {
Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
File f = dsNoCredentials.getDirectory("org.cogcomp.transliteration", "transliteration-models", 1.3, false);
String modelPath = f.getAbsolutePath() + File.separator + "transliteration-models-oct-2017" + File.separator + "probs-" + lang.getCode() + ".txt";
if (new File(modelPath).exists()) {
logger.info("Loading transliteration models for language: " + lang + " from " + modelPath);
model = new SPModel(modelPath);
model.setMaxCandidates(1);
} else {
logger.error("Model for language: " + lang + " don't exist: " + modelPath);
}
} catch (IOException | InvalidEndpointException | DatastoreException | InvalidPortException e) {
e.printStackTrace();
}
}
Aggregations