use of beast.util.ClusterTree in project beast2 by CompEvol.
the class StarBeastStartState method fullInit.
private void fullInit() {
// Build gene trees from alignments
final Function muInput = this.muInput.get();
final double mu = (muInput != null) ? muInput.getArrayValue() : 1;
final Tree stree = speciesTreeInput.get();
final TaxonSet species = stree.m_taxonset.get();
final List<String> speciesNames = species.asStringList();
final int speciesCount = speciesNames.size();
final List<Tree> geneTrees = genes.get();
// final List<Alignment> alignments = genes.get();
// final List<Tree> geneTrees = new ArrayList<>(alignments.size());
double maxNsites = 0;
// for( final Alignment alignment : alignments) {
for (final Tree gtree : geneTrees) {
// final Tree gtree = new Tree();
final Alignment alignment = gtree.m_taxonset.get().alignmentInput.get();
final ClusterTree ctree = new ClusterTree();
ctree.initByName("initial", gtree, "clusterType", "upgma", "taxa", alignment);
gtree.scale(1 / mu);
maxNsites = max(maxNsites, alignment.getSiteCount());
}
final Map<String, Integer> geneTips2Species = new HashMap<>();
final List<Taxon> taxonSets = species.taxonsetInput.get();
for (int k = 0; k < speciesNames.size(); ++k) {
final Taxon nx = taxonSets.get(k);
final List<Taxon> taxa = ((TaxonSet) nx).taxonsetInput.get();
for (final Taxon n : taxa) {
geneTips2Species.put(n.getID(), k);
}
}
final double[] dg = new double[(speciesCount * (speciesCount - 1)) / 2];
final double[][] genesDmins = new double[geneTrees.size()][];
for (int ng = 0; ng < geneTrees.size(); ++ng) {
final Tree g = geneTrees.get(ng);
final double[] dmin = firstMeetings(g, geneTips2Species, speciesCount);
genesDmins[ng] = dmin;
for (int i = 0; i < dmin.length; ++i) {
dg[i] += dmin[i];
if (dmin[i] == Double.MAX_VALUE) {
// this happens when a gene tree has no taxa for some species-tree taxon.
// TODO: ensure that if this happens, there will always be an "infinite"
// distance between species-taxon 0 and the species-taxon with missing lineages,
// so i < speciesCount - 1.
// What if lineages for species-taxon 0 are missing? Then all entries will be 'infinite'.
String id = (i < speciesCount - 1 ? stree.getExternalNodes().get(i + 1).getID() : "unknown taxon");
if (i == 0) {
// test that all entries are 'infinite', which implies taxon 0 has lineages missing
boolean b = true;
for (int k = 1; b && k < speciesCount - 1; k++) {
b = (dmin[k] == Double.MAX_VALUE);
}
if (b) {
// if all entries have 'infinite' distances, it is probably the first taxon that is at fault
id = stree.getExternalNodes().get(0).getID();
}
}
throw new RuntimeException("Gene tree " + g.getID() + " has no lineages for species taxon " + id + " ");
}
}
}
for (int i = 0; i < dg.length; ++i) {
double d = dg[i] / geneTrees.size();
if (d == 0) {
d = (0.5 / maxNsites) * (1 / mu);
} else {
// heights to distances
d *= 2;
}
dg[i] = d;
}
final ClusterTree ctree = new ClusterTree();
final Distance distance = new Distance() {
@Override
public double pairwiseDistance(final int s1, final int s2) {
final int i = getDMindex(speciesCount, s1, s2);
return dg[i];
}
};
ctree.initByName("initial", stree, "taxonset", species, "clusterType", "upgma", "distance", distance);
final Map<String, Integer> sptips2SpeciesIndex = new HashMap<>();
for (int i = 0; i < speciesNames.size(); ++i) {
sptips2SpeciesIndex.put(speciesNames.get(i), i);
}
final double[] spmin = firstMeetings(stree, sptips2SpeciesIndex, speciesCount);
for (int ng = 0; ng < geneTrees.size(); ++ng) {
final double[] dmin = genesDmins[ng];
boolean compatible = true;
for (int i = 0; i < spmin.length; ++i) {
if (dmin[i] <= spmin[i]) {
compatible = false;
break;
}
}
if (!compatible) {
final Tree gtree = geneTrees.get(ng);
final TaxonSet gtreeTaxa = gtree.m_taxonset.get();
final Alignment alignment = gtreeTaxa.alignmentInput.get();
final List<String> taxaNames = alignment.getTaxaNames();
final int taxonCount = taxaNames.size();
// speedup
final Map<Integer, Integer> g2s = new HashMap<>();
for (int i = 0; i < taxonCount; ++i) {
g2s.put(i, geneTips2Species.get(taxaNames.get(i)));
}
final JukesCantorDistance jc = new JukesCantorDistance();
jc.setPatterns(alignment);
final Distance gdistance = new Distance() {
@Override
public double pairwiseDistance(final int t1, final int t2) {
final int s1 = g2s.get(t1);
final int s2 = g2s.get(t2);
double d = jc.pairwiseDistance(t1, t2) / mu;
if (s1 != s2) {
final int i = getDMindex(speciesCount, s1, s2);
final double minDist = 2 * spmin[i];
if (d <= minDist) {
d = minDist * 1.001;
}
}
return d;
}
};
final ClusterTree gtreec = new ClusterTree();
gtreec.initByName("initial", gtree, "taxonset", gtreeTaxa, "clusterType", "upgma", "distance", gdistance);
}
}
{
final RealParameter lambda = birthRate.get();
if (lambda != null) {
final double rh = stree.getRoot().getHeight();
double l = 0;
for (int i = 2; i < speciesCount + 1; ++i) {
l += 1. / i;
}
setParameterValue(lambda, (1 / rh) * l);
}
double totBranches = 0;
final Node[] streeNodeas = stree.getNodesAsArray();
for (final Node n : streeNodeas) {
if (!n.isRoot()) {
totBranches += n.getLength();
}
}
totBranches /= 2 * (streeNodeas.length - 1);
final RealParameter popm = popMean.get();
if (popm != null) {
setParameterValue(popm, totBranches);
}
final SpeciesTreePrior speciesTreePrior = speciesTreePriorInput.get();
if (speciesTreePrior != null) {
final RealParameter popb = speciesTreePrior.popSizesBottomInput.get();
if (popb != null) {
for (int i = 0; i < popb.getDimension(); ++i) {
setParameterValue(popb, i, 2 * totBranches);
}
}
final RealParameter popt = speciesTreePrior.popSizesTopInput.get();
if (popt != null) {
for (int i = 0; i < popt.getDimension(); ++i) {
setParameterValue(popt, i, totBranches);
}
}
}
}
}
Aggregations