Search in sources :

Example 6 with DataType

use of beast.evolution.datatype.DataType in project beast2 by CompEvol.

the class SequenceSimulator method intArray2Sequence.

/**
 * Convert integer representation of sequence into a Sequence
 *
 * @param seq  integer representation of the sequence
 * @param node used to determine taxon for sequence
 * @return Sequence
 * @
 */
Sequence intArray2Sequence(int[] seq, Node node) {
    DataType dataType = m_data.get().getDataType();
    String seqString = dataType.state2string(seq);
    // StringBuilder seq = new StringBuilder();
    // String map = m_data.get().getMap();
    // if (map != null) {
    // for (int i  = 0; i < m_sequenceLength; i++) {
    // seq.append(map.charAt(seq[i]));
    // }
    // } else {
    // for (int i  = 0; i < m_sequenceLength-1; i++) {
    // seq.append(seq[i] + ",");
    // }
    // seq.append(seq[m_sequenceLength-1] + "");
    // }
    List<Sequence> taxa = m_data.get().sequenceInput.get();
    String taxon = taxa.get(node.getNr()).taxonInput.get();
    return new Sequence(taxon, seqString);
}
Also used : DataType(beast.evolution.datatype.DataType) Sequence(beast.evolution.alignment.Sequence)

Example 7 with DataType

use of beast.evolution.datatype.DataType in project beast2 by CompEvol.

the class SimulatedAlignment method intArray2Sequence.

/**
 * Convert integer representation of sequence into a Sequence
 *
 * @param seq  integer representation of the sequence
 * @param node used to determine taxon for sequence
 * @return Sequence
 */
Sequence intArray2Sequence(int[] seq, Node node) {
    DataType dataType = m_data.get().getDataType();
    String seqString = dataType.state2string(seq);
    // StringBuilder seq = new StringBuilder();
    // String map = m_data.get().getMap();
    // if (map != null) {
    // for (int i  = 0; i < m_sequenceLength; i++) {
    // seq.append(map.charAt(seq[i]));
    // }
    // } else {
    // for (int i  = 0; i < m_sequenceLength-1; i++) {
    // seq.append(seq[i] + ",");
    // }
    // seq.append(seq[m_sequenceLength-1] + "");
    // }
    String taxon = m_data.get().getTaxaNames().get(node.getNr());
    return new Sequence(taxon, seqString);
}
Also used : DataType(beast.evolution.datatype.DataType) Sequence(beast.evolution.alignment.Sequence)

Example 8 with DataType

use of beast.evolution.datatype.DataType in project beast2 by CompEvol.

the class FilteredAlignment method calcPatterns.

@Override
protected void calcPatterns() {
    int nrOfTaxa = counts.size();
    int nrOfSites = filter.length;
    DataType baseType = alignmentInput.get().m_dataType;
    // convert data to transposed int array
    int[][] data = new int[nrOfSites][nrOfTaxa];
    for (int i = 0; i < nrOfTaxa; i++) {
        List<Integer> sites = counts.get(i);
        for (int j = 0; j < nrOfSites; j++) {
            data[j][i] = sites.get(filter[j]);
            if (convertDataType) {
                try {
                    String code = baseType.getCode(data[j][i]);
                    data[j][i] = m_dataType.string2state(code).get(0);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    }
    // add constant sites, if specified
    if (constantSiteWeightsInput.get() != null) {
        int dim = constantSiteWeightsInput.get().getDimension();
        // add constant patterns
        int[][] data2 = new int[nrOfSites + dim][];
        System.arraycopy(data, 0, data2, 0, nrOfSites);
        for (int i = 0; i < dim; i++) {
            data2[nrOfSites + i] = new int[nrOfTaxa];
            for (int j = 0; j < nrOfTaxa; j++) {
                data2[nrOfSites + i][j] = i;
            }
        }
        data = data2;
        nrOfSites += dim;
    }
    // sort data
    SiteComparator comparator = new SiteComparator();
    Arrays.sort(data, comparator);
    // count patterns in sorted data
    int[] weights = new int[nrOfSites];
    int nrOfPatterns = 1;
    if (nrOfSites > 0) {
        weights[0] = 1;
        for (int i = 1; i < nrOfSites; i++) {
            if (comparator.compare(data[i - 1], data[i]) != 0) {
                nrOfPatterns++;
                data[nrOfPatterns - 1] = data[i];
            }
            weights[nrOfPatterns - 1]++;
        }
    } else {
        nrOfPatterns = 0;
    }
    // addjust weight of invariant sites, if stripInvariantSitesInput i sspecified
    if (stripInvariantSitesInput.get()) {
        // don't add patterns that are invariant, e.g. all gaps
        Log.info.print("Stripping invariant sites");
        int removedSites = 0;
        for (int i = 0; i < nrOfPatterns; i++) {
            boolean isContant = true;
            for (int j = 1; j < nrOfTaxa; j++) {
                if (data[i][j] != data[i][0]) {
                    isContant = false;
                    break;
                }
            }
            // if this is a constant site, and it is not an ambiguous site
            if (isContant) {
                Log.warning.print(" <" + data[i][0] + "> ");
                removedSites += weights[i];
                weights[i] = 0;
            }
        }
        Log.warning.println(" removed " + removedSites + " sites ");
    }
    // addjust weight of constant sites, if specified
    if (constantSiteWeightsInput.get() != null) {
        Integer[] constantWeights = constantSiteWeightsInput.get().getValues();
        for (int i = 0; i < nrOfPatterns; i++) {
            boolean isContant = true;
            for (int j = 1; j < nrOfTaxa; j++) {
                if (data[i][j] != data[i][0]) {
                    isContant = false;
                    break;
                }
            }
            // if this is a constant site, and it is not an ambiguous site
            if (isContant && data[i][0] >= 0 && data[i][0] < constantWeights.length) {
                // take weights in data in account as well
                // by adding constant patterns, we added a weight of 1, which now gets corrected
                // but if filtered by stripping constant sites, that weight is already set to zero
                weights[i] = (stripInvariantSitesInput.get() ? 0 : weights[i] - 1) + constantWeights[data[i][0]];
            }
        }
        // need to decrease siteCount for mapping sites to patterns in m_nPatternIndex
        nrOfSites -= constantWeights.length;
    }
    // reserve memory for patterns
    patternWeight = new int[nrOfPatterns];
    sitePatterns = new int[nrOfPatterns][nrOfTaxa];
    for (int i = 0; i < nrOfPatterns; i++) {
        patternWeight[i] = weights[i];
        sitePatterns[i] = data[i];
    }
    // find patterns for the sites
    patternIndex = new int[nrOfSites];
    for (int i = 0; i < nrOfSites; i++) {
        int[] sites = new int[nrOfTaxa];
        for (int j = 0; j < nrOfTaxa; j++) {
            sites[j] = counts.get(j).get(filter[i]);
            if (convertDataType) {
                try {
                    sites[j] = m_dataType.string2state(baseType.getCode(sites[j])).get(0);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        patternIndex[i] = Arrays.binarySearch(sitePatterns, sites, comparator);
    }
    if (siteWeights != null) {
        // TODO: fill in weights with siteweights.
        throw new RuntimeException("Cannot handle site weights in FilteredAlignment. Remove \"weights\" from data input.");
    }
    // determine maximum state count
    // Usually, the state count is equal for all sites,
    // though for SnAP analysis, this is typically not the case.
    maxStateCount = 0;
    for (int stateCount1 : stateCounts) {
        maxStateCount = Math.max(maxStateCount, stateCount1);
    }
    if (convertDataType) {
        maxStateCount = Math.max(maxStateCount, m_dataType.getStateCount());
    }
    // report some statistics
    // for (int i = 0; i < m_sTaxaNames.size(); i++) {
    // System.err.println(m_sTaxaNames.get(i) + ": " + m_counts.get(i).size() + " " + m_nStateCounts.get(i));
    // }
    Log.info.println("Filter " + filterInput.get());
    Log.info.println(getTaxonCount() + " taxa");
    if (constantSiteWeightsInput.get() != null) {
        Integer[] constantWeights = constantSiteWeightsInput.get().getValues();
        int sum = 0;
        for (int i : constantWeights) {
            sum += i;
        }
        Log.info.println(getSiteCount() + " sites + " + sum + " constant sites");
    } else {
        Log.info.println(getSiteCount() + " sites");
    }
    Log.info.println(getPatternCount() + " patterns");
    // counts are not valid any more -- better set to null in case
    // someone gets bitten by this.
    this.counts = null;
}
Also used : DataType(beast.evolution.datatype.DataType)

Aggregations

DataType (beast.evolution.datatype.DataType)8 Alignment (beast.evolution.alignment.Alignment)3 Sequence (beast.evolution.alignment.Sequence)3 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 ArrayList (java.util.ArrayList)1 Test (org.junit.Test)1