Search in sources :

Example 1 with Distance

use of beast.evolution.alignment.distance.Distance in project beast2 by CompEvol.

the class StarBeastStartState method fullInit.

private void fullInit() {
    // Build gene trees from  alignments
    final Function muInput = this.muInput.get();
    final double mu = (muInput != null) ? muInput.getArrayValue() : 1;
    final Tree stree = speciesTreeInput.get();
    final TaxonSet species = stree.m_taxonset.get();
    final List<String> speciesNames = species.asStringList();
    final int speciesCount = speciesNames.size();
    final List<Tree> geneTrees = genes.get();
    // final List<Alignment> alignments = genes.get();
    // final List<Tree> geneTrees = new ArrayList<>(alignments.size());
    double maxNsites = 0;
    // for( final Alignment alignment : alignments)  {
    for (final Tree gtree : geneTrees) {
        // final Tree gtree = new Tree();
        final Alignment alignment = gtree.m_taxonset.get().alignmentInput.get();
        final ClusterTree ctree = new ClusterTree();
        ctree.initByName("initial", gtree, "clusterType", "upgma", "taxa", alignment);
        gtree.scale(1 / mu);
        maxNsites = max(maxNsites, alignment.getSiteCount());
    }
    final Map<String, Integer> geneTips2Species = new HashMap<>();
    final List<Taxon> taxonSets = species.taxonsetInput.get();
    for (int k = 0; k < speciesNames.size(); ++k) {
        final Taxon nx = taxonSets.get(k);
        final List<Taxon> taxa = ((TaxonSet) nx).taxonsetInput.get();
        for (final Taxon n : taxa) {
            geneTips2Species.put(n.getID(), k);
        }
    }
    final double[] dg = new double[(speciesCount * (speciesCount - 1)) / 2];
    final double[][] genesDmins = new double[geneTrees.size()][];
    for (int ng = 0; ng < geneTrees.size(); ++ng) {
        final Tree g = geneTrees.get(ng);
        final double[] dmin = firstMeetings(g, geneTips2Species, speciesCount);
        genesDmins[ng] = dmin;
        for (int i = 0; i < dmin.length; ++i) {
            dg[i] += dmin[i];
            if (dmin[i] == Double.MAX_VALUE) {
                // this happens when a gene tree has no taxa for some species-tree taxon.
                // TODO: ensure that if this happens, there will always be an "infinite"
                // distance between species-taxon 0 and the species-taxon with missing lineages,
                // so i < speciesCount - 1.
                // What if lineages for species-taxon 0 are missing? Then all entries will be 'infinite'.
                String id = (i < speciesCount - 1 ? stree.getExternalNodes().get(i + 1).getID() : "unknown taxon");
                if (i == 0) {
                    // test that all entries are 'infinite', which implies taxon 0 has lineages missing
                    boolean b = true;
                    for (int k = 1; b && k < speciesCount - 1; k++) {
                        b = (dmin[k] == Double.MAX_VALUE);
                    }
                    if (b) {
                        // if all entries have 'infinite' distances, it is probably the first taxon that is at fault
                        id = stree.getExternalNodes().get(0).getID();
                    }
                }
                throw new RuntimeException("Gene tree " + g.getID() + " has no lineages for species taxon " + id + " ");
            }
        }
    }
    for (int i = 0; i < dg.length; ++i) {
        double d = dg[i] / geneTrees.size();
        if (d == 0) {
            d = (0.5 / maxNsites) * (1 / mu);
        } else {
            // heights to distances
            d *= 2;
        }
        dg[i] = d;
    }
    final ClusterTree ctree = new ClusterTree();
    final Distance distance = new Distance() {

        @Override
        public double pairwiseDistance(final int s1, final int s2) {
            final int i = getDMindex(speciesCount, s1, s2);
            return dg[i];
        }
    };
    ctree.initByName("initial", stree, "taxonset", species, "clusterType", "upgma", "distance", distance);
    final Map<String, Integer> sptips2SpeciesIndex = new HashMap<>();
    for (int i = 0; i < speciesNames.size(); ++i) {
        sptips2SpeciesIndex.put(speciesNames.get(i), i);
    }
    final double[] spmin = firstMeetings(stree, sptips2SpeciesIndex, speciesCount);
    for (int ng = 0; ng < geneTrees.size(); ++ng) {
        final double[] dmin = genesDmins[ng];
        boolean compatible = true;
        for (int i = 0; i < spmin.length; ++i) {
            if (dmin[i] <= spmin[i]) {
                compatible = false;
                break;
            }
        }
        if (!compatible) {
            final Tree gtree = geneTrees.get(ng);
            final TaxonSet gtreeTaxa = gtree.m_taxonset.get();
            final Alignment alignment = gtreeTaxa.alignmentInput.get();
            final List<String> taxaNames = alignment.getTaxaNames();
            final int taxonCount = taxaNames.size();
            // speedup
            final Map<Integer, Integer> g2s = new HashMap<>();
            for (int i = 0; i < taxonCount; ++i) {
                g2s.put(i, geneTips2Species.get(taxaNames.get(i)));
            }
            final JukesCantorDistance jc = new JukesCantorDistance();
            jc.setPatterns(alignment);
            final Distance gdistance = new Distance() {

                @Override
                public double pairwiseDistance(final int t1, final int t2) {
                    final int s1 = g2s.get(t1);
                    final int s2 = g2s.get(t2);
                    double d = jc.pairwiseDistance(t1, t2) / mu;
                    if (s1 != s2) {
                        final int i = getDMindex(speciesCount, s1, s2);
                        final double minDist = 2 * spmin[i];
                        if (d <= minDist) {
                            d = minDist * 1.001;
                        }
                    }
                    return d;
                }
            };
            final ClusterTree gtreec = new ClusterTree();
            gtreec.initByName("initial", gtree, "taxonset", gtreeTaxa, "clusterType", "upgma", "distance", gdistance);
        }
    }
    {
        final RealParameter lambda = birthRate.get();
        if (lambda != null) {
            final double rh = stree.getRoot().getHeight();
            double l = 0;
            for (int i = 2; i < speciesCount + 1; ++i) {
                l += 1. / i;
            }
            setParameterValue(lambda, (1 / rh) * l);
        }
        double totBranches = 0;
        final Node[] streeNodeas = stree.getNodesAsArray();
        for (final Node n : streeNodeas) {
            if (!n.isRoot()) {
                totBranches += n.getLength();
            }
        }
        totBranches /= 2 * (streeNodeas.length - 1);
        final RealParameter popm = popMean.get();
        if (popm != null) {
            setParameterValue(popm, totBranches);
        }
        final SpeciesTreePrior speciesTreePrior = speciesTreePriorInput.get();
        if (speciesTreePrior != null) {
            final RealParameter popb = speciesTreePrior.popSizesBottomInput.get();
            if (popb != null) {
                for (int i = 0; i < popb.getDimension(); ++i) {
                    setParameterValue(popb, i, 2 * totBranches);
                }
            }
            final RealParameter popt = speciesTreePrior.popSizesTopInput.get();
            if (popt != null) {
                for (int i = 0; i < popt.getDimension(); ++i) {
                    setParameterValue(popt, i, totBranches);
                }
            }
        }
    }
}
Also used : JukesCantorDistance(beast.evolution.alignment.distance.JukesCantorDistance) HashMap(java.util.HashMap) ClusterTree(beast.util.ClusterTree) Node(beast.evolution.tree.Node) Alignment(beast.evolution.alignment.Alignment) RandomTree(beast.evolution.tree.RandomTree) Tree(beast.evolution.tree.Tree) ClusterTree(beast.util.ClusterTree) Distance(beast.evolution.alignment.distance.Distance) JukesCantorDistance(beast.evolution.alignment.distance.JukesCantorDistance) Taxon(beast.evolution.alignment.Taxon) RealParameter(beast.core.parameter.RealParameter) TaxonSet(beast.evolution.alignment.TaxonSet)

Aggregations

RealParameter (beast.core.parameter.RealParameter)1 Alignment (beast.evolution.alignment.Alignment)1 Taxon (beast.evolution.alignment.Taxon)1 TaxonSet (beast.evolution.alignment.TaxonSet)1 Distance (beast.evolution.alignment.distance.Distance)1 JukesCantorDistance (beast.evolution.alignment.distance.JukesCantorDistance)1 Node (beast.evolution.tree.Node)1 RandomTree (beast.evolution.tree.RandomTree)1 Tree (beast.evolution.tree.Tree)1 ClusterTree (beast.util.ClusterTree)1 HashMap (java.util.HashMap)1