use of edu.sdsc.mmtf.spark.utils.ColumnarStructureX in project mm-dev by sbl-sdsc.
the class StructureAligner method getAllVsAllAlignments.
/**
* Calculates all vs. all structural alignments of protein chains using the
* specified alignment algorithm. The input structures must contain single
* protein chains.
*
* @param targets structures containing single protein chains
* @param alignmentAlgorithm name of the algorithm
* @return dataset with alignment metrics
*/
public static Dataset<Row> getAllVsAllAlignments(JavaPairRDD<String, StructureDataInterface> targets, String alignmentAlgorithm) {
SparkSession session = SparkSession.builder().getOrCreate();
JavaSparkContext sc = new JavaSparkContext(session.sparkContext());
// create a list of chainName/ C Alpha coordinates
List<Tuple2<String, Point3d[]>> chains = targets.mapValues(s -> new ColumnarStructureX(s, true).getcAlphaCoordinates()).collect();
// create an RDD of all pair indices (0,1), (0,2), ..., (1,2), (1,3), ...
JavaRDD<Tuple2<Integer, Integer>> pairs = getPairs(sc, chains.size());
// calculate structural alignments for all pairs.
// broadcast (copy) chains to all worker nodes for efficient processing.
// for each pair there can be zero or more solutions, therefore we flatmap the pairs.
JavaRDD<Row> rows = pairs.flatMap(new StructuralAlignmentMapper(sc.broadcast(chains), alignmentAlgorithm));
// convert rows to a dataset
return session.createDataFrame(rows, getSchema());
}
use of edu.sdsc.mmtf.spark.utils.ColumnarStructureX in project mm-dev by sbl-sdsc.
the class StructureAligner method getQueryVsAllAlignments.
/**
* Calculates structural alignments between a query and a target set of protein chains
* using the specified alignment algorithm. An input structures must contain single
* protein chains.
*
* @param targets structures containing single protein chains
* @param alignmentAlgorithm name of the algorithm
* @return dataset with alignment metrics
*/
public static Dataset<Row> getQueryVsAllAlignments(JavaPairRDD<String, StructureDataInterface> queries, JavaPairRDD<String, StructureDataInterface> targets, String alignmentAlgorithm) {
SparkSession session = SparkSession.builder().getOrCreate();
// spark context should not be closed here
@SuppressWarnings("resource") JavaSparkContext sc = new JavaSparkContext(session.sparkContext());
List<Tuple2<String, Point3d[]>> chains = new ArrayList<>();
// create a list of chainName/ C Alpha coordinates for query chains
chains.addAll(queries.mapValues(s -> new ColumnarStructureX(s, true).getcAlphaCoordinates()).collect());
int querySize = chains.size();
// create a list of chainName/ C Alpha coordinates for target chains
chains.addAll(targets.mapValues(s -> new ColumnarStructureX(s, true).getcAlphaCoordinates()).collect());
// create an RDD with indices for all query - target pairs (q, t)
List<Tuple2<Integer, Integer>> pairList = new ArrayList<>(chains.size());
for (int q = 0; q < querySize; q++) {
for (int t = querySize; t < chains.size(); t++) {
pairList.add(new Tuple2<Integer, Integer>(q, t));
}
}
JavaRDD<Tuple2<Integer, Integer>> pairs = sc.parallelize(pairList, NUM_TASKS * sc.defaultParallelism());
// calculate structural alignments for all pairs.
// the chains are broadcast (copied) to all worker nodes for efficient processing
JavaRDD<Row> rows = pairs.flatMap(new StructuralAlignmentMapper(sc.broadcast(chains), alignmentAlgorithm));
// convert rows to a dataset
return session.createDataFrame(rows, getSchema());
}
Aggregations