use of babel.util.NexusParser in project Babel by rbouckaert.
the class CharsetlabelsToCharsets method run.
@Override
public void run() throws Exception {
if (nexusInput.get() == null || nexusInput.get().getName().equals("[[none]]")) {
throw new IllegalArgumentException("A valid nexus file must be specified");
}
NexusParser parser = new NexusParser();
parser.parseFile(nexusInput.get());
if (parser.charstatelabels == null || parser.charstatelabels.length == 0) {
throw new IllegalArgumentException("Charsetlabels in nesus file must be specified (but could not find any)");
}
// process
StringBuilder buf = new StringBuilder();
int start = 0;
String prevName = sanitise(parser.charstatelabels[0]);
int k = 0;
for (int i = 0; i < parser.charstatelabels.length; i++) {
String name = sanitise(parser.charstatelabels[i]);
if (!name.equals(prevName)) {
buf.append("charset " + prevName + " = " + (start + 1) + "-" + i + ";\n");
start = i;
prevName = name;
k++;
}
}
buf.append("charset " + prevName + " = " + (start + 1) + "-" + parser.charstatelabels.length + ";\n");
// output
PrintStream out = System.out;
if (outputInput.get() != null && !outputInput.get().getName().equals("[[none]]")) {
Log.warning("Writing to file " + outputInput.get().getName());
out = new PrintStream(outputInput.get());
}
String old = BeautiDoc.load(nexusInput.get());
out.println(old);
out.println("begin assumptions;");
out.print(buf.toString());
out.println("end;");
Log.warning(k + " charsets");
Log.warning("Done!");
}
use of babel.util.NexusParser in project Babel by rbouckaert.
the class DolloAnnotator method getAlignment.
private Alignment getAlignment() throws IOException {
if (nexusFileInput.get() != null && !outputInput.get().getName().equals("[[none]]")) {
NexusParser nexus = new NexusParser();
nexus.parseFile(nexusFileInput.get());
Alignment data = nexus.m_alignment;
if (data == null || !data.getDataType().getTypeDescription().equals("binary")) {
throw new IllegalArgumentException("Expected a binary alignment in the NEXUS file");
}
if (filterInput.get() != null) {
FilteredAlignment filtered = new FilteredAlignment();
filtered.initByName("data", data, "filter", filterInput.get());
data = filtered;
}
return data;
}
return null;
}
use of babel.util.NexusParser in project Babel by rbouckaert.
the class CognateStats method run.
@Override
public void run() throws Exception {
Log.warning.println("Languages (language, # columns, # states):");
NexusParser parser = new NexusParser();
parser.parseFile(nexusFileInput.get());
Alignment data = parser.m_alignment;
if (data == null) {
throw new IOException("No alignment found in nexus file");
}
String[] cognates = parser.charstatelabels;
if (cognates != null && cognates.length != data.getSiteCount()) {
throw new IOException("Found " + cognates.length + " cognate labels, but the alignment contains " + data.getSiteCount() + " sites.");
}
Log.warning.println(" ");
Log.warning.println("Singletons (language, column):");
List<String> taxaNames = data.getTaxaNames();
int[] cognateCounts = new int[taxaNames.size() + 1];
for (int i = 0; i < data.getSiteCount(); i++) {
int[] pattern = data.getPattern(data.getPatternIndex(i));
int oneCount = 0;
int taxonNr = -1;
for (int k = 0; k < pattern.length; k++) {
if (pattern[k] == 1) {
oneCount++;
taxonNr = k;
}
}
if (oneCount == 1) {
Log.info.println(taxaNames.get(taxonNr) + " " + (cognates == null ? i : cognates[i]));
}
cognateCounts[oneCount]++;
}
Log.info.println(cognateCounts[1] + " singletons in total.\n");
Log.warning.println(" \nCognate distribution (number of cognates contained in N languages, where N is the first column):");
for (int i = 0; i < cognateCounts.length; i++) {
Log.info.println(i + " " + cognateCounts[i]);
}
Log.warning.println(" \nDuplicates:");
int dupCount = 0;
for (Alignment d : parser.filteredAlignments) {
for (int i = 0; i < d.getSiteCount(); i++) {
int[] pattern1 = d.getPattern(d.getPatternIndex(i));
for (int j = i + 1; j < d.getSiteCount(); j++) {
int[] pattern2 = d.getPattern(d.getPatternIndex(j));
if (equals(pattern1, pattern2)) {
dupCount++;
}
}
}
}
Log.info.println(dupCount + " duplicates in total.\n");
}
use of babel.util.NexusParser in project Babel by rbouckaert.
the class NexusToCharsets method run.
@Override
public void run() throws Exception {
if (nexusInput.get() == null || nexusInput.get().getName().equals("[[none]]")) {
throw new IllegalArgumentException("A valid nexus file must be specified");
}
NexusParser parser = new NexusParser();
parser.parseFile(nexusInput.get());
Alignment data = parser.m_alignment;
List<String> taxa = data.getTaxaNames();
String seq = data.getSequenceAsString(taxa.get(0));
char[][] seqs = new char[seq.length()][data.getTaxonCount()];
for (int i = 0; i < taxa.size(); i++) {
seq = data.getSequenceAsString(taxa.get(i));
for (int j = 0; j < seq.length(); j++) {
seqs[j][i] = seq.charAt(j);
}
}
if (sortInput.get()) {
Arrays.sort(seqs, (o1, o2) -> {
for (int i = 0; i < o1.length; i++) {
if (o1[i] == '?' && o2[i] != '?') {
return -1;
} else if (o2[i] == '?' && o1[i] != '?') {
return 1;
}
}
return 0;
});
}
int n = seqs.length;
// process
StringBuilder buf = new StringBuilder();
int start = 0;
int k = 0;
boolean strip = stripZeroColumnsInput.get();
if (strip) {
buf.append("charset concept" + k + " = 1,");
}
for (int i = 1; i < n; i++) {
if (!matches(seqs, i, i - 1)) {
if (!strip) {
buf.append("charset concept" + k + " = " + (start + 1) + "-" + i + ";\n");
} else {
buf.deleteCharAt(buf.length() - 1);
buf.append(";\n");
buf.append("charset concept" + (k + 1) + " = ");
}
start = i;
k++;
}
if (strip && has1(seqs, i)) {
buf.append((i + 1) + ",");
}
}
if (!strip) {
buf.append("charset concept" + k + " = " + (start + 1) + "-" + n + ";\n");
} else {
buf.deleteCharAt(buf.length() - 1);
buf.append(";\n");
}
// output
PrintStream out = System.out;
if (outputInput.get() != null && !outputInput.get().getName().equals("[[none]]")) {
Log.warning("Writing to file " + outputInput.get().getName());
out = new PrintStream(outputInput.get());
}
if (sortInput.get()) {
out.print("#NEXUS\n");
out.print("BEGIN DATA;\n");
out.print("DIMENSIONS NTAX=" + taxa.size() + " NCHAR=" + n + ";\n");
out.print("FORMAT DATATYPE=STANDARD MISSING=? GAP=- SYMBOLS=\"01\";\n");
out.print("MATRIX\n");
for (int i = 0; i < taxa.size(); i++) {
out.print(data.getTaxaNames().get(i) + " ");
for (int j = 0; j < n; j++) {
out.print(seqs[j][i]);
}
out.println();
}
out.println(";\nEND;\n");
} else {
String old = BeautiDoc.load(nexusInput.get());
out.println(old);
}
out.println("begin assumptions;");
out.print(buf.toString());
out.println("end;");
Log.warning(k + " charsets");
Log.warning("Done!");
}
use of babel.util.NexusParser in project Babel by rbouckaert.
the class FrechetMeanCentroid method MCCTree.
private Tree MCCTree() {
try {
String output = outputInput.get().getAbsolutePath();
TreeAnnotator.main(new String[] { "-b", burnInPercentageInput.get() + "", treeFileInput.get().getPath(), output });
NexusParser parser = new NexusParser();
parser.parseFile(outputInput.get());
return parser.trees.get(0);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
Aggregations