use of beast.evolution.alignment.distance.JukesCantorDistance in project beast2 by CompEvol.
the class StarBeastStartState method fullInit.
private void fullInit() {
// Build gene trees from alignments
final Function muInput = this.muInput.get();
final double mu = (muInput != null) ? muInput.getArrayValue() : 1;
final Tree stree = speciesTreeInput.get();
final TaxonSet species = stree.m_taxonset.get();
final List<String> speciesNames = species.asStringList();
final int speciesCount = speciesNames.size();
final List<Tree> geneTrees = genes.get();
// final List<Alignment> alignments = genes.get();
// final List<Tree> geneTrees = new ArrayList<>(alignments.size());
double maxNsites = 0;
// for( final Alignment alignment : alignments) {
for (final Tree gtree : geneTrees) {
// final Tree gtree = new Tree();
final Alignment alignment = gtree.m_taxonset.get().alignmentInput.get();
final ClusterTree ctree = new ClusterTree();
ctree.initByName("initial", gtree, "clusterType", "upgma", "taxa", alignment);
gtree.scale(1 / mu);
maxNsites = max(maxNsites, alignment.getSiteCount());
}
final Map<String, Integer> geneTips2Species = new HashMap<>();
final List<Taxon> taxonSets = species.taxonsetInput.get();
for (int k = 0; k < speciesNames.size(); ++k) {
final Taxon nx = taxonSets.get(k);
final List<Taxon> taxa = ((TaxonSet) nx).taxonsetInput.get();
for (final Taxon n : taxa) {
geneTips2Species.put(n.getID(), k);
}
}
final double[] dg = new double[(speciesCount * (speciesCount - 1)) / 2];
final double[][] genesDmins = new double[geneTrees.size()][];
for (int ng = 0; ng < geneTrees.size(); ++ng) {
final Tree g = geneTrees.get(ng);
final double[] dmin = firstMeetings(g, geneTips2Species, speciesCount);
genesDmins[ng] = dmin;
for (int i = 0; i < dmin.length; ++i) {
dg[i] += dmin[i];
if (dmin[i] == Double.MAX_VALUE) {
// this happens when a gene tree has no taxa for some species-tree taxon.
// TODO: ensure that if this happens, there will always be an "infinite"
// distance between species-taxon 0 and the species-taxon with missing lineages,
// so i < speciesCount - 1.
// What if lineages for species-taxon 0 are missing? Then all entries will be 'infinite'.
String id = (i < speciesCount - 1 ? stree.getExternalNodes().get(i + 1).getID() : "unknown taxon");
if (i == 0) {
// test that all entries are 'infinite', which implies taxon 0 has lineages missing
boolean b = true;
for (int k = 1; b && k < speciesCount - 1; k++) {
b = (dmin[k] == Double.MAX_VALUE);
}
if (b) {
// if all entries have 'infinite' distances, it is probably the first taxon that is at fault
id = stree.getExternalNodes().get(0).getID();
}
}
throw new RuntimeException("Gene tree " + g.getID() + " has no lineages for species taxon " + id + " ");
}
}
}
for (int i = 0; i < dg.length; ++i) {
double d = dg[i] / geneTrees.size();
if (d == 0) {
d = (0.5 / maxNsites) * (1 / mu);
} else {
// heights to distances
d *= 2;
}
dg[i] = d;
}
final ClusterTree ctree = new ClusterTree();
final Distance distance = new Distance() {
@Override
public double pairwiseDistance(final int s1, final int s2) {
final int i = getDMindex(speciesCount, s1, s2);
return dg[i];
}
};
ctree.initByName("initial", stree, "taxonset", species, "clusterType", "upgma", "distance", distance);
final Map<String, Integer> sptips2SpeciesIndex = new HashMap<>();
for (int i = 0; i < speciesNames.size(); ++i) {
sptips2SpeciesIndex.put(speciesNames.get(i), i);
}
final double[] spmin = firstMeetings(stree, sptips2SpeciesIndex, speciesCount);
for (int ng = 0; ng < geneTrees.size(); ++ng) {
final double[] dmin = genesDmins[ng];
boolean compatible = true;
for (int i = 0; i < spmin.length; ++i) {
if (dmin[i] <= spmin[i]) {
compatible = false;
break;
}
}
if (!compatible) {
final Tree gtree = geneTrees.get(ng);
final TaxonSet gtreeTaxa = gtree.m_taxonset.get();
final Alignment alignment = gtreeTaxa.alignmentInput.get();
final List<String> taxaNames = alignment.getTaxaNames();
final int taxonCount = taxaNames.size();
// speedup
final Map<Integer, Integer> g2s = new HashMap<>();
for (int i = 0; i < taxonCount; ++i) {
g2s.put(i, geneTips2Species.get(taxaNames.get(i)));
}
final JukesCantorDistance jc = new JukesCantorDistance();
jc.setPatterns(alignment);
final Distance gdistance = new Distance() {
@Override
public double pairwiseDistance(final int t1, final int t2) {
final int s1 = g2s.get(t1);
final int s2 = g2s.get(t2);
double d = jc.pairwiseDistance(t1, t2) / mu;
if (s1 != s2) {
final int i = getDMindex(speciesCount, s1, s2);
final double minDist = 2 * spmin[i];
if (d <= minDist) {
d = minDist * 1.001;
}
}
return d;
}
};
final ClusterTree gtreec = new ClusterTree();
gtreec.initByName("initial", gtree, "taxonset", gtreeTaxa, "clusterType", "upgma", "distance", gdistance);
}
}
{
final RealParameter lambda = birthRate.get();
if (lambda != null) {
final double rh = stree.getRoot().getHeight();
double l = 0;
for (int i = 2; i < speciesCount + 1; ++i) {
l += 1. / i;
}
setParameterValue(lambda, (1 / rh) * l);
}
double totBranches = 0;
final Node[] streeNodeas = stree.getNodesAsArray();
for (final Node n : streeNodeas) {
if (!n.isRoot()) {
totBranches += n.getLength();
}
}
totBranches /= 2 * (streeNodeas.length - 1);
final RealParameter popm = popMean.get();
if (popm != null) {
setParameterValue(popm, totBranches);
}
final SpeciesTreePrior speciesTreePrior = speciesTreePriorInput.get();
if (speciesTreePrior != null) {
final RealParameter popb = speciesTreePrior.popSizesBottomInput.get();
if (popb != null) {
for (int i = 0; i < popb.getDimension(); ++i) {
setParameterValue(popb, i, 2 * totBranches);
}
}
final RealParameter popt = speciesTreePrior.popSizesTopInput.get();
if (popt != null) {
for (int i = 0; i < popt.getDimension(); ++i) {
setParameterValue(popt, i, totBranches);
}
}
}
}
}
use of beast.evolution.alignment.distance.JukesCantorDistance in project beast2 by CompEvol.
the class ClusterTree method initAndValidate.
@Override
public void initAndValidate() {
RealParameter clockRate = clockRateInput.get();
if (dataInput.get() != null) {
taxaNames = dataInput.get().getTaxaNames();
} else {
if (m_taxonset.get() == null) {
throw new RuntimeException("At least one of taxa and taxonset input needs to be specified");
}
taxaNames = m_taxonset.get().asStringList();
}
if (Boolean.valueOf(System.getProperty("beast.resume")) && (isEstimatedInput.get() || (m_initial.get() != null && m_initial.get().isEstimatedInput.get()))) {
// don't bother creating a cluster tree to save some time, if it is read from file anyway
// make a caterpillar
Node left = newNode();
left.setNr(0);
left.setID(taxaNames.get(0));
left.setHeight(0);
for (int i = 1; i < taxaNames.size(); i++) {
final Node right = newNode();
right.setNr(i);
right.setID(taxaNames.get(i));
right.setHeight(0);
final Node parent = newNode();
parent.setNr(taxaNames.size() + i - 1);
parent.setHeight(i);
left.setParent(parent);
parent.setLeft(left);
right.setParent(parent);
parent.setRight(right);
left = parent;
}
root = left;
leafNodeCount = taxaNames.size();
nodeCount = leafNodeCount * 2 - 1;
internalNodeCount = leafNodeCount - 1;
super.initAndValidate();
return;
}
distance = distanceInput.get();
if (distance == null) {
distance = new JukesCantorDistance();
}
if (distance instanceof Distance.Base) {
if (dataInput.get() == null) {
// Distance requires an alignment?
}
((Distance.Base) distance).setPatterns(dataInput.get());
}
linkType = clusterTypeInput.get();
if (linkType == Type.upgma)
linkType = Type.average;
if (linkType == Type.neighborjoining || linkType == Type.neighborjoining2) {
distanceIsBranchLength = true;
}
final Node root = buildClusterer();
setRoot(root);
root.labelInternalNodes((getNodeCount() + 1) / 2);
super.initAndValidate();
if (linkType == Type.neighborjoining2) {
// set tip dates to zero
final Node[] nodes = getNodesAsArray();
for (int i = 0; i < getLeafNodeCount(); i++) {
nodes[i].setHeight(0);
}
super.initAndValidate();
}
if (m_initial.get() != null)
processTraits(m_initial.get().m_traitList.get());
else
processTraits(m_traitList.get());
if (timeTraitSet != null)
adjustTreeNodeHeights(root);
else {
// all nodes should be at zero height if no date-trait is available
for (int i = 0; i < getLeafNodeCount(); i++) {
getNode(i).setHeight(0);
}
}
// divide all node heights by clock rate to convert from substitutions to time.
for (Node node : getInternalNodes()) {
double height = node.getHeight();
node.setHeight(height / clockRate.getValue());
}
initStateNodes();
}
Aggregations