use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class InteractionAnalysisSimple method main.
/**
* @param args no input arguments
* @throws IOException if MmtfReader fails
*/
public static void main(String[] args) throws IOException {
String path = MmtfReader.getMmtfFullPath();
long start = System.nanoTime();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(InteractionAnalysisSimple.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB in MMTF format
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, sc);
// use only representative structures
int sequenceIdentity = 40;
double resolution = 2.5;
pdb = pdb.filter(new Pisces(sequenceIdentity, resolution));
GroupInteractionExtractor finder = new GroupInteractionExtractor("ZN", 3);
Dataset<Row> interactions = finder.getDataset(pdb).cache();
// list the top 10 residue types that interact with Zn
interactions.printSchema();
interactions.show(20);
System.out.println("# interactions: " + interactions.count());
// show the top 10 interacting groups
interactions.groupBy(col("residue2")).count().sort(col("count").desc()).show(10);
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + "sec.");
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class ExperimentalMethods method call.
@Override
public Boolean call(Tuple2<String, StructureDataInterface> t) throws Exception {
StructureDataInterface structure = t._2;
Set<String> methods = new HashSet<>(Arrays.asList(structure.getExperimentalMethods()));
methods.retainAll(experimentalMethods);
return !methods.isEmpty();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class SecondaryStructure method call.
@Override
public Boolean call(Tuple2<String, StructureDataInterface> t) throws Exception {
StructureDataInterface structure = t._2;
boolean containsPolymer = false;
boolean globalMatch = false;
// only check first model
int numChains = structure.getChainsPerModel()[0];
int[] secStruct = structure.getSecStructList();
for (int i = 0, groupCounter = 0; i < numChains; i++) {
double helix = 0;
double sheet = 0;
double coil = 0;
int other = 0;
boolean match = true;
String chainType = EncoderUtils.getTypeFromChainId(structure, i);
boolean polymer = chainType.equals("polymer");
if (polymer) {
containsPolymer = true;
} else {
match = false;
}
for (int j = 0; j < structure.getGroupsPerChain()[i]; j++, groupCounter++) {
if (match && polymer) {
int code = secStruct[groupCounter];
switch(DsspSecondaryStructure.getQ3Code(code)) {
case ALPHA_HELIX:
helix++;
break;
case EXTENDED:
sheet++;
break;
case COIL:
coil++;
break;
default:
other++;
break;
}
}
}
if (match && polymer) {
int n = (structure.getGroupsPerChain()[i] - other);
helix /= n;
sheet /= n;
coil /= n;
match = helix >= helixFractionMin && helix <= helixFractionMax && sheet >= sheetFractionMin && sheet <= sheetFractionMax && coil >= coilFractionMin && coil <= coilFractionMax;
}
if (polymer && match && !exclusive) {
return true;
}
if (polymer && !match && exclusive) {
return false;
}
if (match) {
globalMatch = true;
}
}
return globalMatch && containsPolymer;
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class ContainsGroup method call.
@Override
public Boolean call(Tuple2<String, StructureDataInterface> t) throws Exception {
StructureDataInterface structure = t._2;
// find number of unique groups
int uniqueGroups = 0;
for (int index : structure.getGroupTypeIndices()) {
uniqueGroups = Math.max(uniqueGroups, index);
}
// need to add 1 since the group indices array is zero-based
uniqueGroups++;
// add all groups to the set
Set<String> groupNames = new HashSet<String>(uniqueGroups);
for (int i = 0; i < uniqueGroups; i++) {
groupNames.add(structure.getGroupName(i));
}
// check if any of the groups are present in the structure
groupNames.retainAll(groupQuery);
return !groupNames.isEmpty();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class StructureToPolymerChains method call.
@Override
public Iterator<Tuple2<String, StructureDataInterface>> call(Tuple2<String, StructureDataInterface> t) throws Exception {
StructureDataInterface structure = t._2;
// precalculate indices
int numChains = structure.getChainsPerModel()[0];
int[] chainToEntityIndex = getChainToEntityIndex(structure);
int[] atomsPerChain = new int[numChains];
int[] bondsPerChain = new int[numChains];
getNumAtomsAndBonds(structure, atomsPerChain, bondsPerChain);
List<Tuple2<String, StructureDataInterface>> chainList = new ArrayList<>();
Set<String> seqSet = new HashSet<>();
for (int i = 0, atomCounter = 0, groupCounter = 0; i < numChains; i++) {
AdapterToStructureData polymerChain = new AdapterToStructureData();
int entityToChainIndex = chainToEntityIndex[i];
boolean polymer = structure.getEntityType(entityToChainIndex).equals("polymer");
int polymerAtomCount = 0;
Map<Integer, Integer> atomMap = new HashMap<>();
if (polymer) {
// to avoid of information loss, add chainName/IDs and entity id
// this required by some queries
String structureId = structure.getStructureId() + "." + structure.getChainNames()[i] + "." + structure.getChainIds()[i] + "." + (entityToChainIndex + 1);
// set header
polymerChain.initStructure(bondsPerChain[i], atomsPerChain[i], structure.getGroupsPerChain()[i], 1, 1, structureId);
DecoderUtils.addXtalographicInfo(structure, polymerChain);
DecoderUtils.addHeaderInfo(structure, polymerChain);
// set model info (only one model: 0)
polymerChain.setModelInfo(0, 1);
// set entity and chain info
polymerChain.setEntityInfo(new int[] { 0 }, structure.getEntitySequence(entityToChainIndex), structure.getEntityDescription(entityToChainIndex), structure.getEntityType(entityToChainIndex));
polymerChain.setChainInfo(structure.getChainIds()[i], structure.getChainNames()[i], structure.getGroupsPerChain()[i]);
}
for (int j = 0; j < structure.getGroupsPerChain()[i]; j++, groupCounter++) {
int groupIndex = structure.getGroupTypeIndices()[groupCounter];
if (polymer) {
// set group info
polymerChain.setGroupInfo(structure.getGroupName(groupIndex), structure.getGroupIds()[groupCounter], structure.getInsCodes()[groupCounter], structure.getGroupChemCompType(groupIndex), structure.getNumAtomsInGroup(groupIndex), structure.getGroupBondOrders(groupIndex).length, structure.getGroupSingleLetterCode(groupIndex), structure.getGroupSequenceIndices()[groupCounter], structure.getSecStructList()[groupCounter]);
}
for (int k = 0; k < structure.getNumAtomsInGroup(groupIndex); k++, atomCounter++) {
if (polymer) {
// set atom info
atomMap.put(atomCounter, polymerAtomCount);
polymerAtomCount++;
polymerChain.setAtomInfo(structure.getGroupAtomNames(groupIndex)[k], structure.getAtomIds()[atomCounter], structure.getAltLocIds()[atomCounter], structure.getxCoords()[atomCounter], structure.getyCoords()[atomCounter], structure.getzCoords()[atomCounter], structure.getOccupancies()[atomCounter], structure.getbFactors()[atomCounter], structure.getGroupElementNames(groupIndex)[k], structure.getGroupAtomCharges(groupIndex)[k]);
}
}
if (polymer) {
// add intra-group bond info
for (int l = 0; l < structure.getGroupBondOrders(groupIndex).length; l++) {
int bondIndOne = structure.getGroupBondIndices(groupIndex)[l * 2];
int bondIndTwo = structure.getGroupBondIndices(groupIndex)[l * 2 + 1];
int bondOrder = structure.getGroupBondOrders(groupIndex)[l];
polymerChain.setGroupBond(bondIndOne, bondIndTwo, bondOrder);
}
}
}
if (polymer) {
// Add inter-group bond info
for (int ii = 0; ii < structure.getInterGroupBondOrders().length; ii++) {
int bondIndOne = structure.getInterGroupBondIndices()[ii * 2];
int bondIndTwo = structure.getInterGroupBondIndices()[ii * 2 + 1];
int bondOrder = structure.getInterGroupBondOrders()[ii];
Integer indexOne = atomMap.get(bondIndOne);
if (indexOne != null) {
Integer indexTwo = atomMap.get(bondIndTwo);
if (indexTwo != null) {
polymerChain.setInterGroupBond(indexOne, indexTwo, bondOrder);
}
}
}
polymerChain.finalizeStructure();
String chId = structure.getChainNames()[i];
if (useChainIdInsteadOfChainName) {
chId = structure.getChainIds()[i];
}
if (excludeDuplicates) {
if (seqSet.contains(structure.getEntitySequence(chainToEntityIndex[i]))) {
continue;
}
seqSet.add(structure.getEntitySequence(chainToEntityIndex[i]));
}
chainList.add(new Tuple2<String, StructureDataInterface>(structure.getStructureId() + "." + chId, polymerChain));
}
}
return chainList.iterator();
}
Aggregations