Search in sources :

Example 1 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class Consensus method getConsensusSequence.

public final Sequence getConsensusSequence() {
    StringBuffer buffer = new StringBuffer();
    for (int i = 0; i < consensus.length; i++) {
        buffer.append(dataType.getChar(getState(i)));
    }
    Sequence sequence = new Sequence(new Taxon(name), buffer.toString());
    sequence.setDataType(dataType);
    return sequence;
}
Also used : Taxon(dr.evolution.util.Taxon) Sequence(dr.evolution.sequence.Sequence)

Example 2 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class Defects method main.

public static void main(String[] args) throws java.io.IOException {
    String alignmentFileName = null;
    if (args != null && args.length > 0) {
        alignmentFileName = args[0];
    }
    Alignment alignment = readNexusFile(alignmentFileName);
    Defects defects = new Defects(alignment);
    int siteCount = alignment.getSiteCount() / 3;
    int sequenceCount = alignment.getSequenceCount();
    int totalSites = siteCount * sequenceCount;
    int totalReads = defects.getTotalCodonsMinusGaps();
    int defectiveSites = defects.getDefectiveSiteCount();
    int defectiveSequences = defects.getDefectiveSequenceCount();
    int totalDefects = defects.getDefectCount();
    int totalStops = defects.getStopCodonCount();
    double siteP = (double) defectiveSites / (double) siteCount;
    double sequenceP = (double) defectiveSequences / (double) sequenceCount;
    double totalP = (double) totalDefects / (double) totalReads;
    double totalSP = (double) totalStops / (double) totalReads;
    System.out.println("Matrix size=" + totalSites);
    System.out.println("Non-gap codons=" + totalReads);
    System.out.println(defectiveSequences + "/" + sequenceCount + "(" + sequenceP + ") defective sequences.");
    System.out.println(defectiveSites + "/" + siteCount + "(" + siteP + ") defective sites.");
    System.out.println(totalDefects + "/" + totalReads + "(" + totalP + ") defects.");
    System.out.println(totalStops + "/" + totalReads + "(" + totalSP + ") stop codons.");
    double mean = (double) totalDefects / (double) sequenceCount;
    System.out.println(mean + " defects per sequence");
    double meanS = (double) totalStops / (double) sequenceCount;
    System.out.println(meanS + " stops per sequence");
    int maxDefects = defects.getMaxDefectiveSites();
    int maxStops = defects.getMaxStopSites();
    System.out.println("defective sequences:");
    // }
    for (int d = 1; d <= maxDefects; d++) {
        Set<Integer> seqs = defects.getSequences(d);
        for (Integer seq : seqs) {
            String name = alignment.getTaxonId(seq);
            System.out.println(d + "  " + name);
        }
    }
    System.out.println("Defects\tSequences\texpected");
    double probTerm;
    // probability of 0
    probTerm = Math.exp(-mean);
    for (int i = 0; i < 10; i++) {
        System.out.println(i + "\t" + defects.getSequenceCount(i) + "\t" + probTerm * sequenceCount);
        // compute probability of n from prob. of n-1
        probTerm *= mean / (i + 1);
    }
    System.out.println("Stops\tSequences\texpected");
    // probability of 0
    probTerm = Math.exp(-meanS);
    for (int i = 0; i < 10; i++) {
        System.out.println(i + "\t" + defects.getStopSequenceCount(i) + "\t" + probTerm * sequenceCount);
        // compute probability of n from prob. of n-1
        probTerm *= meanS / (i + 1);
    }
    System.out.println("stop-codon sequences:");
    for (int d = 1; d <= maxStops; d++) {
        Set<Integer> seqs = defects.getSequencesByStopCount(d);
        for (Integer index : seqs) {
            String name = alignment.getTaxonId(index);
            System.out.println(d + "  " + name);
        }
    }
    Consensus con = new Consensus("mode", alignment, true);
    Sequence consensus = con.getConsensusSequence();
    SimpleAlignment a = new SimpleAlignment();
    a.addSequence(new Sequence(consensus));
    ConvertAlignment ca = new ConvertAlignment(Codons.UNIVERSAL, a);
    double[] pstop = new double[ca.getSiteCount()];
    int[] counts = new int[10];
    for (double kappa = 2.0; kappa <= 2.0; kappa += 1.0) {
        for (int i = 0; i < ca.getSiteCount(); i++) {
            int state = ca.getState(0, i);
            if (Codons.UNIVERSAL.isStopCodon(state)) {
                throw new RuntimeException("Consensus has a stop codon in it at position " + i + "!");
            }
            int[] triplet = Codons.UNIVERSAL.getTripletStates(state);
            int[][] tvmutants = generateTransversionMutants(triplet);
            int[][] timutants = generateTransitionMutants(triplet);
            int tvStops = 0;
            for (int j = 0; j < 6; j++) {
                if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(tvmutants[j][0], tvmutants[j][1], tvmutants[j][2]))) {
                    tvStops += 1;
                }
            }
            int tiStops = 0;
            for (int j = 0; j < 3; j++) {
                if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(timutants[j][0], timutants[j][1], timutants[j][2]))) {
                    tiStops += 1;
                }
            }
            pstop[i] = (tvStops + kappa * tiStops) / (6.0 + kappa * 3.0);
            counts[tvStops + tiStops] += 1;
        }
        System.out.println("kappa = " + kappa + " pstop=" + dr.stats.DiscreteStatistics.mean(pstop));
    }
    System.out.println("stop-mutations\tcodons");
    for (int i = 0; i < 10; i++) {
        System.out.println(i + "\t" + counts[i]);
    }
    /*int count = 0;
        int reps = 100000;
        for (int i = 0; i < reps; i++) {
            SimpleAlignment simpleAlignment = new SimpleAlignment();
            simpleAlignment.addSequence(new Sequence(consensus));

            //pick a random position
            int pos = random.nextInt(simpleAlignment.getSiteCount());
            int state = simpleAlignment.getState(0,pos);
            int newState = random.nextInt(4);
            while (newState == state) {
                newState = random.nextInt(4);
            }
            simpleAlignment.setState(0,pos,newState);
            defects = new Defects(simpleAlignment);
            count += defects.getDefectCount();
        }

        double p = (double)count/(double)reps;
        */
    double p = dr.stats.DiscreteStatistics.mean(pstop);
    double rate = totalSP * (1.0 - p) / p;
    System.out.println("Total inferred point-mutation rate = " + rate);
    System.out.println("Total inferred point-mutations = " + (totalStops * (1 - p) / p));
    System.out.println("Proportion of point-mutations that produce premature stop codons = " + p);
}
Also used : Sequence(dr.evolution.sequence.Sequence)

Example 3 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class SimpleAlignment method getUncertainSitePattern.

@Override
public double[][] getUncertainSitePattern(int siteIndex) {
    if (areUncertain()) {
        double[][] pattern = new double[getSequenceCount()][];
        for (int i = 0; i < getSequenceCount(); ++i) {
            Sequence seq = getSequence(i);
            if (siteIndex > seq.getLength()) {
                pattern[i] = new double[dataType.getStateCount()];
                Arrays.fill(pattern[i], 1.0);
            } else {
                if (seq instanceof UncertainSequence) {
                    pattern[i] = ((UncertainSequence) seq).getUncertainPattern(siteIndex);
                } else {
                    pattern[i] = new double[dataType.getStateCount()];
                    int[] states = dataType.getStates(seq.getState(siteIndex));
                    for (int state : states) {
                        pattern[i][state] = 1.0;
                    }
                }
            }
        }
        return pattern;
    } else {
        throw new UnsupportedOperationException("getUncertainSitePattern not implemented yet");
    }
}
Also used : UncertainSequence(dr.evolution.sequence.UncertainSequence) Sequence(dr.evolution.sequence.Sequence) UncertainSequence(dr.evolution.sequence.UncertainSequence)

Example 4 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class NexusImporter method readSequenceData.

/**
 * Reads sequences in a 'DATA' or 'CHARACTERS' block.
 */
private void readSequenceData(Sequences sequences, TaxonList taxonList) throws ImportException, IOException {
    int n, i;
    String firstSequence = null;
    if (isInterleaved) {
        boolean firstLoop = true;
        int readCount = 0;
        while (readCount < siteCount) {
            n = -1;
            for (i = 0; i < taxonCount; i++) {
                String token = readToken().trim();
                Sequence sequence;
                if (firstLoop) {
                    sequence = new Sequence();
                    sequence.setDataType(dataType);
                    sequences.addSequence(sequence);
                    Taxon taxon;
                    if (taxonList != null) {
                        int index = taxonList.getTaxonIndex(token.trim());
                        if (index == -1) {
                            // ...perhaps it is a numerical taxon reference?
                            throw new UnknownTaxonException(token);
                        } else {
                            taxon = taxonList.getTaxon(index);
                        }
                    } else {
                        taxon = new Taxon(token.trim());
                    }
                    sequence.setTaxon(taxon);
                } else {
                    sequence = sequences.getSequence(i);
                    Taxon taxon = sequence.getTaxon();
                    if (!taxon.getId().equals(token)) {
                        throw new UnknownTaxonException("Unknown taxon label: expecting '" + taxon.getId() + "', found '" + token + "'");
                    }
                }
                StringBuffer buffer = new StringBuffer();
                readSequenceLine(buffer, dataType, ";", gapCharacters, missingCharacters, matchCharacters, firstSequence);
                String seqString = buffer.toString();
                sequence.appendSequenceString(seqString);
                if (i == 0) {
                    firstSequence = seqString;
                }
                if (getLastDelimiter() == ';') {
                    if (i < taxonCount - 1) {
                        throw new TooFewTaxaException();
                    }
                    if (readCount + n < siteCount) {
                        throw new ShortSequenceException(sequence.getTaxon().getId());
                    }
                }
                if (n == -1) {
                    n = seqString.length();
                }
                if (n != seqString.length()) {
                    throw new ShortSequenceException(sequence.getTaxon().getId());
                }
            }
            firstLoop = false;
            readCount += n;
        }
        if (getLastDelimiter() != ';') {
            throw new BadFormatException("Expecting ';' after sequences data");
        }
    } else {
        for (i = 0; i < taxonCount; i++) {
            String token = readToken().trim();
            Sequence sequence = new Sequence();
            sequence.setDataType(dataType);
            sequences.addSequence(sequence);
            Taxon taxon;
            if (taxonList != null) {
                int index = taxonList.getTaxonIndex(token);
                if (index == -1) {
                    // ...perhaps it is a numerical taxon reference?
                    throw new UnknownTaxonException(token);
                } else {
                    taxon = taxonList.getTaxon(index);
                }
            } else {
                taxon = new Taxon(token);
            }
            sequence.setTaxon(taxon);
            StringBuffer buffer = new StringBuffer();
            readSequence(buffer, dataType, ";", siteCount, gapCharacters, missingCharacters, matchCharacters, firstSequence);
            String seqString = buffer.toString();
            if (seqString.length() != siteCount) {
                throw new ShortSequenceException(sequence.getTaxon().getId());
            }
            sequence.appendSequenceString(seqString);
            if (i == 0) {
                firstSequence = seqString;
            }
            if (getLastDelimiter() == ';' && i < taxonCount - 1) {
                throw new TooFewTaxaException();
            }
        }
        if (getLastDelimiter() != ';') {
            throw new BadFormatException("Expecting ';' after sequences data, has '" + (char) getLastDelimiter() + "' in line " + getLineNumber());
        }
    }
}
Also used : Sequence(dr.evolution.sequence.Sequence)

Example 5 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class FastaImporter method importAlignment.

/**
 * importAlignment.
 */
public Alignment importAlignment() throws IOException, ImportException {
    SimpleAlignment alignment = null;
    try {
        // find fasta line start
        while (read() != FASTA_FIRST_CHAR) {
        }
        do {
            final String name = readLine().trim();
            StringBuffer seq = new StringBuffer();
            readSequence(seq, dataType, "" + FASTA_FIRST_CHAR, Integer.MAX_VALUE, "-", "?", "", "");
            if (alignment == null) {
                alignment = new SimpleAlignment();
            }
            alignment.addSequence(new Sequence(new Taxon(name.toString()), seq.toString()));
        } while (getLastDelimiter() == FASTA_FIRST_CHAR);
    } catch (EOFException e) {
    // catch end of file the ugly way.
    }
    return alignment;
}
Also used : SimpleAlignment(dr.evolution.alignment.SimpleAlignment) Taxon(dr.evolution.util.Taxon) EOFException(java.io.EOFException) Sequence(dr.evolution.sequence.Sequence)

Aggregations

Sequence (dr.evolution.sequence.Sequence)33 SimpleAlignment (dr.evolution.alignment.SimpleAlignment)16 Taxon (dr.evolution.util.Taxon)12 DataType (dr.evolution.datatype.DataType)7 BranchRateModel (dr.evomodel.branchratemodel.BranchRateModel)6 UncertainSequence (dr.evolution.sequence.UncertainSequence)5 GammaSiteRateModel (dr.evomodel.siteratemodel.GammaSiteRateModel)5 Parameter (dr.inference.model.Parameter)5 Tree (dr.evolution.tree.Tree)4 Date (dr.evolution.util.Date)4 HomogeneousBranchModel (dr.evomodel.branchmodel.HomogeneousBranchModel)4 DefaultBranchRateModel (dr.evomodel.branchratemodel.DefaultBranchRateModel)4 FrequencyModel (dr.evomodel.substmodel.FrequencyModel)4 DefaultTreeModel (dr.evomodel.tree.DefaultTreeModel)4 ArrayList (java.util.ArrayList)4 Partition (dr.app.beagle.tools.Partition)3 NewickImporter (dr.evolution.io.NewickImporter)3 HKY (dr.evomodel.substmodel.nucleotide.HKY)3 TreeModel (dr.evomodel.tree.TreeModel)3 BeagleSequenceSimulator (dr.app.beagle.tools.BeagleSequenceSimulator)2