Search in sources :

Example 16 with StructureDataInterface

use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.

the class InteractionAnalysisSimple method main.

/**
 * @param args no input arguments
 * @throws IOException if MmtfReader fails
 */
public static void main(String[] args) throws IOException {
    String path = MmtfReader.getMmtfFullPath();
    long start = System.nanoTime();
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(InteractionAnalysisSimple.class.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    // read PDB in MMTF format
    JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, sc);
    // use only representative structures
    int sequenceIdentity = 40;
    double resolution = 2.5;
    pdb = pdb.filter(new Pisces(sequenceIdentity, resolution));
    GroupInteractionExtractor finder = new GroupInteractionExtractor("ZN", 3);
    Dataset<Row> interactions = finder.getDataset(pdb).cache();
    // list the top 10 residue types that interact with Zn
    interactions.printSchema();
    interactions.show(20);
    System.out.println("# interactions: " + interactions.count());
    // show the top 10 interacting groups
    interactions.groupBy(col("residue2")).count().sort(col("count").desc()).show(10);
    long end = System.nanoTime();
    System.out.println("Time:     " + (end - start) / 1E9 + "sec.");
    sc.close();
}
Also used : StructureDataInterface(org.rcsb.mmtf.api.StructureDataInterface) Pisces(edu.sdsc.mmtf.spark.webfilters.Pisces) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Row(org.apache.spark.sql.Row) SparkConf(org.apache.spark.SparkConf) GroupInteractionExtractor(edu.sdsc.mmtf.spark.datasets.GroupInteractionExtractor)

Example 17 with StructureDataInterface

use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.

the class ExperimentalMethods method call.

@Override
public Boolean call(Tuple2<String, StructureDataInterface> t) throws Exception {
    StructureDataInterface structure = t._2;
    Set<String> methods = new HashSet<>(Arrays.asList(structure.getExperimentalMethods()));
    methods.retainAll(experimentalMethods);
    return !methods.isEmpty();
}
Also used : StructureDataInterface(org.rcsb.mmtf.api.StructureDataInterface) HashSet(java.util.HashSet)

Example 18 with StructureDataInterface

use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.

the class SecondaryStructure method call.

@Override
public Boolean call(Tuple2<String, StructureDataInterface> t) throws Exception {
    StructureDataInterface structure = t._2;
    boolean containsPolymer = false;
    boolean globalMatch = false;
    // only check first model
    int numChains = structure.getChainsPerModel()[0];
    int[] secStruct = structure.getSecStructList();
    for (int i = 0, groupCounter = 0; i < numChains; i++) {
        double helix = 0;
        double sheet = 0;
        double coil = 0;
        int other = 0;
        boolean match = true;
        String chainType = EncoderUtils.getTypeFromChainId(structure, i);
        boolean polymer = chainType.equals("polymer");
        if (polymer) {
            containsPolymer = true;
        } else {
            match = false;
        }
        for (int j = 0; j < structure.getGroupsPerChain()[i]; j++, groupCounter++) {
            if (match && polymer) {
                int code = secStruct[groupCounter];
                switch(DsspSecondaryStructure.getQ3Code(code)) {
                    case ALPHA_HELIX:
                        helix++;
                        break;
                    case EXTENDED:
                        sheet++;
                        break;
                    case COIL:
                        coil++;
                        break;
                    default:
                        other++;
                        break;
                }
            }
        }
        if (match && polymer) {
            int n = (structure.getGroupsPerChain()[i] - other);
            helix /= n;
            sheet /= n;
            coil /= n;
            match = helix >= helixFractionMin && helix <= helixFractionMax && sheet >= sheetFractionMin && sheet <= sheetFractionMax && coil >= coilFractionMin && coil <= coilFractionMax;
        }
        if (polymer && match && !exclusive) {
            return true;
        }
        if (polymer && !match && exclusive) {
            return false;
        }
        if (match) {
            globalMatch = true;
        }
    }
    return globalMatch && containsPolymer;
}
Also used : StructureDataInterface(org.rcsb.mmtf.api.StructureDataInterface)

Example 19 with StructureDataInterface

use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.

the class ContainsGroup method call.

@Override
public Boolean call(Tuple2<String, StructureDataInterface> t) throws Exception {
    StructureDataInterface structure = t._2;
    // find number of unique groups
    int uniqueGroups = 0;
    for (int index : structure.getGroupTypeIndices()) {
        uniqueGroups = Math.max(uniqueGroups, index);
    }
    // need to add 1 since the group indices array is zero-based
    uniqueGroups++;
    // add all groups to the set
    Set<String> groupNames = new HashSet<String>(uniqueGroups);
    for (int i = 0; i < uniqueGroups; i++) {
        groupNames.add(structure.getGroupName(i));
    }
    // check if any of the groups are present in the structure
    groupNames.retainAll(groupQuery);
    return !groupNames.isEmpty();
}
Also used : StructureDataInterface(org.rcsb.mmtf.api.StructureDataInterface) HashSet(java.util.HashSet)

Example 20 with StructureDataInterface

use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.

the class StructureToPolymerChains method call.

@Override
public Iterator<Tuple2<String, StructureDataInterface>> call(Tuple2<String, StructureDataInterface> t) throws Exception {
    StructureDataInterface structure = t._2;
    // precalculate indices
    int numChains = structure.getChainsPerModel()[0];
    int[] chainToEntityIndex = getChainToEntityIndex(structure);
    int[] atomsPerChain = new int[numChains];
    int[] bondsPerChain = new int[numChains];
    getNumAtomsAndBonds(structure, atomsPerChain, bondsPerChain);
    List<Tuple2<String, StructureDataInterface>> chainList = new ArrayList<>();
    Set<String> seqSet = new HashSet<>();
    for (int i = 0, atomCounter = 0, groupCounter = 0; i < numChains; i++) {
        AdapterToStructureData polymerChain = new AdapterToStructureData();
        int entityToChainIndex = chainToEntityIndex[i];
        boolean polymer = structure.getEntityType(entityToChainIndex).equals("polymer");
        int polymerAtomCount = 0;
        Map<Integer, Integer> atomMap = new HashMap<>();
        if (polymer) {
            // to avoid of information loss, add chainName/IDs and entity id
            // this required by some queries
            String structureId = structure.getStructureId() + "." + structure.getChainNames()[i] + "." + structure.getChainIds()[i] + "." + (entityToChainIndex + 1);
            // set header
            polymerChain.initStructure(bondsPerChain[i], atomsPerChain[i], structure.getGroupsPerChain()[i], 1, 1, structureId);
            DecoderUtils.addXtalographicInfo(structure, polymerChain);
            DecoderUtils.addHeaderInfo(structure, polymerChain);
            // set model info (only one model: 0)
            polymerChain.setModelInfo(0, 1);
            // set entity and chain info
            polymerChain.setEntityInfo(new int[] { 0 }, structure.getEntitySequence(entityToChainIndex), structure.getEntityDescription(entityToChainIndex), structure.getEntityType(entityToChainIndex));
            polymerChain.setChainInfo(structure.getChainIds()[i], structure.getChainNames()[i], structure.getGroupsPerChain()[i]);
        }
        for (int j = 0; j < structure.getGroupsPerChain()[i]; j++, groupCounter++) {
            int groupIndex = structure.getGroupTypeIndices()[groupCounter];
            if (polymer) {
                // set group info
                polymerChain.setGroupInfo(structure.getGroupName(groupIndex), structure.getGroupIds()[groupCounter], structure.getInsCodes()[groupCounter], structure.getGroupChemCompType(groupIndex), structure.getNumAtomsInGroup(groupIndex), structure.getGroupBondOrders(groupIndex).length, structure.getGroupSingleLetterCode(groupIndex), structure.getGroupSequenceIndices()[groupCounter], structure.getSecStructList()[groupCounter]);
            }
            for (int k = 0; k < structure.getNumAtomsInGroup(groupIndex); k++, atomCounter++) {
                if (polymer) {
                    // set atom info
                    atomMap.put(atomCounter, polymerAtomCount);
                    polymerAtomCount++;
                    polymerChain.setAtomInfo(structure.getGroupAtomNames(groupIndex)[k], structure.getAtomIds()[atomCounter], structure.getAltLocIds()[atomCounter], structure.getxCoords()[atomCounter], structure.getyCoords()[atomCounter], structure.getzCoords()[atomCounter], structure.getOccupancies()[atomCounter], structure.getbFactors()[atomCounter], structure.getGroupElementNames(groupIndex)[k], structure.getGroupAtomCharges(groupIndex)[k]);
                }
            }
            if (polymer) {
                // add intra-group bond info
                for (int l = 0; l < structure.getGroupBondOrders(groupIndex).length; l++) {
                    int bondIndOne = structure.getGroupBondIndices(groupIndex)[l * 2];
                    int bondIndTwo = structure.getGroupBondIndices(groupIndex)[l * 2 + 1];
                    int bondOrder = structure.getGroupBondOrders(groupIndex)[l];
                    polymerChain.setGroupBond(bondIndOne, bondIndTwo, bondOrder);
                }
            }
        }
        if (polymer) {
            // Add inter-group bond info
            for (int ii = 0; ii < structure.getInterGroupBondOrders().length; ii++) {
                int bondIndOne = structure.getInterGroupBondIndices()[ii * 2];
                int bondIndTwo = structure.getInterGroupBondIndices()[ii * 2 + 1];
                int bondOrder = structure.getInterGroupBondOrders()[ii];
                Integer indexOne = atomMap.get(bondIndOne);
                if (indexOne != null) {
                    Integer indexTwo = atomMap.get(bondIndTwo);
                    if (indexTwo != null) {
                        polymerChain.setInterGroupBond(indexOne, indexTwo, bondOrder);
                    }
                }
            }
            polymerChain.finalizeStructure();
            String chId = structure.getChainNames()[i];
            if (useChainIdInsteadOfChainName) {
                chId = structure.getChainIds()[i];
            }
            if (excludeDuplicates) {
                if (seqSet.contains(structure.getEntitySequence(chainToEntityIndex[i]))) {
                    continue;
                }
                seqSet.add(structure.getEntitySequence(chainToEntityIndex[i]));
            }
            chainList.add(new Tuple2<String, StructureDataInterface>(structure.getStructureId() + "." + chId, polymerChain));
        }
    }
    return chainList.iterator();
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StructureDataInterface(org.rcsb.mmtf.api.StructureDataInterface) AdapterToStructureData(org.rcsb.mmtf.encoder.AdapterToStructureData) Tuple2(scala.Tuple2) HashSet(java.util.HashSet)

Aggregations

StructureDataInterface (org.rcsb.mmtf.api.StructureDataInterface)102 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)60 SparkConf (org.apache.spark.SparkConf)58 Row (org.apache.spark.sql.Row)27 StructureToPolymerChains (edu.sdsc.mmtf.spark.mappers.StructureToPolymerChains)22 Test (org.junit.Test)20 Pisces (edu.sdsc.mmtf.spark.webfilters.Pisces)19 ArrayList (java.util.ArrayList)12 ProteinSequenceEncoder (edu.sdsc.mmtf.spark.ml.ProteinSequenceEncoder)10 ColumnarStructure (edu.sdsc.mmtf.spark.utils.ColumnarStructure)10 Tuple2 (scala.Tuple2)9 Path (java.nio.file.Path)7 HashSet (java.util.HashSet)7 AdapterToStructureData (org.rcsb.mmtf.encoder.AdapterToStructureData)7 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)6 ContainsLProteinChain (edu.sdsc.mmtf.spark.filters.ContainsLProteinChain)5 List (java.util.List)5 Resolution (edu.sdsc.mmtf.spark.filters.Resolution)4 MmtfReader (edu.sdsc.mmtf.spark.io.MmtfReader)4 File (java.io.File)4