Search in sources :

Example 11 with ClusterTree

use of beast.util.ClusterTree in project beast2 by CompEvol.

the class StarBeastStartState method fullInit.

private void fullInit() {
    // Build gene trees from  alignments
    final Function muInput = this.muInput.get();
    final double mu = (muInput != null) ? muInput.getArrayValue() : 1;
    final Tree stree = speciesTreeInput.get();
    final TaxonSet species = stree.m_taxonset.get();
    final List<String> speciesNames = species.asStringList();
    final int speciesCount = speciesNames.size();
    final List<Tree> geneTrees = genes.get();
    // final List<Alignment> alignments = genes.get();
    // final List<Tree> geneTrees = new ArrayList<>(alignments.size());
    double maxNsites = 0;
    // for( final Alignment alignment : alignments)  {
    for (final Tree gtree : geneTrees) {
        // final Tree gtree = new Tree();
        final Alignment alignment = gtree.m_taxonset.get().alignmentInput.get();
        final ClusterTree ctree = new ClusterTree();
        ctree.initByName("initial", gtree, "clusterType", "upgma", "taxa", alignment);
        gtree.scale(1 / mu);
        maxNsites = max(maxNsites, alignment.getSiteCount());
    }
    final Map<String, Integer> geneTips2Species = new HashMap<>();
    final List<Taxon> taxonSets = species.taxonsetInput.get();
    for (int k = 0; k < speciesNames.size(); ++k) {
        final Taxon nx = taxonSets.get(k);
        final List<Taxon> taxa = ((TaxonSet) nx).taxonsetInput.get();
        for (final Taxon n : taxa) {
            geneTips2Species.put(n.getID(), k);
        }
    }
    final double[] dg = new double[(speciesCount * (speciesCount - 1)) / 2];
    final double[][] genesDmins = new double[geneTrees.size()][];
    for (int ng = 0; ng < geneTrees.size(); ++ng) {
        final Tree g = geneTrees.get(ng);
        final double[] dmin = firstMeetings(g, geneTips2Species, speciesCount);
        genesDmins[ng] = dmin;
        for (int i = 0; i < dmin.length; ++i) {
            dg[i] += dmin[i];
            if (dmin[i] == Double.MAX_VALUE) {
                // this happens when a gene tree has no taxa for some species-tree taxon.
                // TODO: ensure that if this happens, there will always be an "infinite"
                // distance between species-taxon 0 and the species-taxon with missing lineages,
                // so i < speciesCount - 1.
                // What if lineages for species-taxon 0 are missing? Then all entries will be 'infinite'.
                String id = (i < speciesCount - 1 ? stree.getExternalNodes().get(i + 1).getID() : "unknown taxon");
                if (i == 0) {
                    // test that all entries are 'infinite', which implies taxon 0 has lineages missing
                    boolean b = true;
                    for (int k = 1; b && k < speciesCount - 1; k++) {
                        b = (dmin[k] == Double.MAX_VALUE);
                    }
                    if (b) {
                        // if all entries have 'infinite' distances, it is probably the first taxon that is at fault
                        id = stree.getExternalNodes().get(0).getID();
                    }
                }
                throw new RuntimeException("Gene tree " + g.getID() + " has no lineages for species taxon " + id + " ");
            }
        }
    }
    for (int i = 0; i < dg.length; ++i) {
        double d = dg[i] / geneTrees.size();
        if (d == 0) {
            d = (0.5 / maxNsites) * (1 / mu);
        } else {
            // heights to distances
            d *= 2;
        }
        dg[i] = d;
    }
    final ClusterTree ctree = new ClusterTree();
    final Distance distance = new Distance() {

        @Override
        public double pairwiseDistance(final int s1, final int s2) {
            final int i = getDMindex(speciesCount, s1, s2);
            return dg[i];
        }
    };
    ctree.initByName("initial", stree, "taxonset", species, "clusterType", "upgma", "distance", distance);
    final Map<String, Integer> sptips2SpeciesIndex = new HashMap<>();
    for (int i = 0; i < speciesNames.size(); ++i) {
        sptips2SpeciesIndex.put(speciesNames.get(i), i);
    }
    final double[] spmin = firstMeetings(stree, sptips2SpeciesIndex, speciesCount);
    for (int ng = 0; ng < geneTrees.size(); ++ng) {
        final double[] dmin = genesDmins[ng];
        boolean compatible = true;
        for (int i = 0; i < spmin.length; ++i) {
            if (dmin[i] <= spmin[i]) {
                compatible = false;
                break;
            }
        }
        if (!compatible) {
            final Tree gtree = geneTrees.get(ng);
            final TaxonSet gtreeTaxa = gtree.m_taxonset.get();
            final Alignment alignment = gtreeTaxa.alignmentInput.get();
            final List<String> taxaNames = alignment.getTaxaNames();
            final int taxonCount = taxaNames.size();
            // speedup
            final Map<Integer, Integer> g2s = new HashMap<>();
            for (int i = 0; i < taxonCount; ++i) {
                g2s.put(i, geneTips2Species.get(taxaNames.get(i)));
            }
            final JukesCantorDistance jc = new JukesCantorDistance();
            jc.setPatterns(alignment);
            final Distance gdistance = new Distance() {

                @Override
                public double pairwiseDistance(final int t1, final int t2) {
                    final int s1 = g2s.get(t1);
                    final int s2 = g2s.get(t2);
                    double d = jc.pairwiseDistance(t1, t2) / mu;
                    if (s1 != s2) {
                        final int i = getDMindex(speciesCount, s1, s2);
                        final double minDist = 2 * spmin[i];
                        if (d <= minDist) {
                            d = minDist * 1.001;
                        }
                    }
                    return d;
                }
            };
            final ClusterTree gtreec = new ClusterTree();
            gtreec.initByName("initial", gtree, "taxonset", gtreeTaxa, "clusterType", "upgma", "distance", gdistance);
        }
    }
    {
        final RealParameter lambda = birthRate.get();
        if (lambda != null) {
            final double rh = stree.getRoot().getHeight();
            double l = 0;
            for (int i = 2; i < speciesCount + 1; ++i) {
                l += 1. / i;
            }
            setParameterValue(lambda, (1 / rh) * l);
        }
        double totBranches = 0;
        final Node[] streeNodeas = stree.getNodesAsArray();
        for (final Node n : streeNodeas) {
            if (!n.isRoot()) {
                totBranches += n.getLength();
            }
        }
        totBranches /= 2 * (streeNodeas.length - 1);
        final RealParameter popm = popMean.get();
        if (popm != null) {
            setParameterValue(popm, totBranches);
        }
        final SpeciesTreePrior speciesTreePrior = speciesTreePriorInput.get();
        if (speciesTreePrior != null) {
            final RealParameter popb = speciesTreePrior.popSizesBottomInput.get();
            if (popb != null) {
                for (int i = 0; i < popb.getDimension(); ++i) {
                    setParameterValue(popb, i, 2 * totBranches);
                }
            }
            final RealParameter popt = speciesTreePrior.popSizesTopInput.get();
            if (popt != null) {
                for (int i = 0; i < popt.getDimension(); ++i) {
                    setParameterValue(popt, i, totBranches);
                }
            }
        }
    }
}
Also used : JukesCantorDistance(beast.evolution.alignment.distance.JukesCantorDistance) HashMap(java.util.HashMap) ClusterTree(beast.util.ClusterTree) Node(beast.evolution.tree.Node) Alignment(beast.evolution.alignment.Alignment) RandomTree(beast.evolution.tree.RandomTree) Tree(beast.evolution.tree.Tree) ClusterTree(beast.util.ClusterTree) Distance(beast.evolution.alignment.distance.Distance) JukesCantorDistance(beast.evolution.alignment.distance.JukesCantorDistance) Taxon(beast.evolution.alignment.Taxon) RealParameter(beast.core.parameter.RealParameter) TaxonSet(beast.evolution.alignment.TaxonSet)

Aggregations

ClusterTree (beast.util.ClusterTree)11 Test (org.junit.Test)9 Alignment (beast.evolution.alignment.Alignment)8 Node (beast.evolution.tree.Node)7 SiteModel (beast.evolution.sitemodel.SiteModel)5 JukesCantor (beast.evolution.substitutionmodel.JukesCantor)5 Conversion (bacter.Conversion)3 ConversionGraph (bacter.ConversionGraph)3 Locus (bacter.Locus)3 RealParameter (beast.core.parameter.RealParameter)2 Sequence (beast.evolution.alignment.Sequence)2 TaxonSet (beast.evolution.alignment.TaxonSet)2 Tree (beast.evolution.tree.Tree)2 ArrayList (java.util.ArrayList)2 Taxon (beast.evolution.alignment.Taxon)1 Distance (beast.evolution.alignment.distance.Distance)1 JukesCantorDistance (beast.evolution.alignment.distance.JukesCantorDistance)1 RandomTree (beast.evolution.tree.RandomTree)1 ConstantPopulation (beast.evolution.tree.coalescent.ConstantPopulation)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1