use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class DemoAllVsAll_cluster method main.
public static void main(String[] args) throws IOException {
String path = MmtfReader.getMmtfReducedPath();
long start = System.nanoTime();
SparkConf conf = new SparkConf();
JavaSparkContext sc = new JavaSparkContext(conf);
// Read PDB and create a Pisces non-redundant set at 20% sequence identity and a resolution better than 1.6 A.
// Then take a 1% random sample.
double fraction = 0.01;
// optional command line argument
if (args.length == 1) {
fraction = Double.parseDouble(args[0]);
}
long seed = 123;
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, sc).flatMapToPair(new StructureToPolymerChains()).filter(new Pisces(20, 1.6)).sample(false, fraction, seed);
System.out.println(pdb.count());
// run the structural alignment
String algorithmName = FatCatRigid.algorithmName;
Dataset<Row> alignments = StructureAligner.getAllVsAllAlignments(pdb, algorithmName).cache();
// show results
int count = (int) alignments.count();
alignments.show(count);
System.out.println("Pairs: " + count);
long end = System.nanoTime();
System.out.println("Time per alignment: " + TimeUnit.NANOSECONDS.toMillis((end - start) / count) + " msec.");
System.out.println("Time: " + TimeUnit.NANOSECONDS.toSeconds(end - start) + " sec.");
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class DemoQueryVsAll method main.
public static void main(String[] args) throws IOException {
String path = MmtfReader.getMmtfReducedPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(DemoQueryVsAll.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
long start = System.nanoTime();
// download query structure
List<String> queryId = Arrays.asList("2W47");
JavaPairRDD<String, StructureDataInterface> query = MmtfReader.downloadFullMmtfFiles(queryId, sc).flatMapToPair(new StructureToPolymerChains());
// use a 1 % sample of the PDB and then filter by the Pisces
// non-redundant set
// at 20% sequence identity and a resolution better than 1.6 A.
double fraction = 1.0;
long seed = 123;
JavaPairRDD<String, StructureDataInterface> target = MmtfReader.readSequenceFile(path, fraction, seed, sc).flatMapToPair(new StructureToPolymerChains()).filter(new Pisces(20, 1.6)).sample(false, 0.08, seed);
// specialized algorithms
// String alignmentAlgorithm = CeMain.algorithmName;
// String alignmentAlgorithm = CeCPMain.algorithmName;
// String alignmentAlgorithm = FatCatFlexible.algorithmName;
// two standard algorithms
// String alignmentAlgorithm = CeMain.algorithmName;
String alignmentAlgorithm = FatCatRigid.algorithmName;
// String alignmentAlgorithm = ExhaustiveAligner.alignmentAlgorithm;
// calculate alignments
Dataset<Row> alignments = StructureAligner.getQueryVsAllAlignments(query, target, alignmentAlgorithm).cache();
// show results
int count = (int) alignments.count();
alignments.sort(col("tm").desc()).show(count);
System.out.println("Pairs: " + count);
long end = System.nanoTime();
System.out.println("Time per alignment: " + TimeUnit.NANOSECONDS.toMillis((end - start) / count) + " msec.");
System.out.println("Time: " + TimeUnit.NANOSECONDS.toSeconds(end - start) + " sec.");
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class StructureAligner method getAllVsAllAlignments.
/**
* Calculates all vs. all structural alignments of protein chains using the
* specified alignment algorithm. The input structures must contain single
* protein chains.
*
* @param targets structures containing single protein chains
* @param alignmentAlgorithm name of the algorithm
* @return dataset with alignment metrics
*/
public static Dataset<Row> getAllVsAllAlignments(JavaPairRDD<String, StructureDataInterface> targets, String alignmentAlgorithm) {
SparkSession session = SparkSession.builder().getOrCreate();
JavaSparkContext sc = new JavaSparkContext(session.sparkContext());
// create a list of chainName/ C Alpha coordinates
List<Tuple2<String, Point3d[]>> chains = targets.mapValues(s -> new ColumnarStructureX(s, true).getcAlphaCoordinates()).collect();
// create an RDD of all pair indices (0,1), (0,2), ..., (1,2), (1,3), ...
JavaRDD<Tuple2<Integer, Integer>> pairs = getPairs(sc, chains.size());
// calculate structural alignments for all pairs.
// broadcast (copy) chains to all worker nodes for efficient processing.
// for each pair there can be zero or more solutions, therefore we flatmap the pairs.
JavaRDD<Row> rows = pairs.flatMap(new StructuralAlignmentMapper(sc.broadcast(chains), alignmentAlgorithm));
// convert rows to a dataset
return session.createDataFrame(rows, getSchema());
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class MergeMmtf method addEntityInfo.
private static void addEntityInfo(StructureDataInterface[] structures, AdapterToStructureData complex) {
int currentOffset = 0;
int offset = 0;
for (StructureDataInterface structure : structures) {
for (int i = 0; i < structure.getNumEntities(); i++) {
int[] indices = structure.getEntityChainIndexList(i).clone();
// System.out.println("offset: " + offset);
for (int j = 0; j < indices.length; j++) {
indices[j] += offset;
currentOffset = Math.max(currentOffset, indices[j]);
}
complex.setEntityInfo(indices, structure.getEntitySequence(i), structure.getEntityDescription(i), structure.getEntityType(i));
}
offset = currentOffset + 1;
}
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class MergeMmtf method initStructure.
private static void initStructure(String structureId, StructureDataInterface[] structures, AdapterToStructureData complex) {
int nBonds = 0;
int nAtoms = 0;
int nGroups = 0;
int nChains = 0;
int nModels = 1;
float resolution = -1;
float rFree = -1;
float rWork = -1;
String title = "";
for (StructureDataInterface s : structures) {
nBonds += s.getNumBonds();
nAtoms += s.getNumAtoms();
nGroups += s.getNumGroups();
nChains += s.getNumChains();
resolution = Math.max(resolution, s.getResolution());
rFree = Math.max(rFree, s.getRfree());
rWork = Math.max(rFree, s.getRwork());
title += s.getTitle();
}
String[] experimentalMethods = { "THEORETICAL MODEL" };
complex.setMmtfProducer("mmtf-spark");
complex.initStructure(nBonds, nAtoms, nGroups, nChains, nModels, structureId);
complex.setHeaderInfo(rFree, rWork, resolution, title, "20180101", "20180101", experimentalMethods);
complex.setModelInfo(0, nChains);
}
Aggregations