use of edu.sdsc.mmtf.spark.filters.ContainsGroup in project mm-dev by sbl-sdsc.
the class TestRosettaMmtf method main.
/**
* Test: Read MMTF-Hadoop Sequence file.
*
* @param args args[0] <path-to-mmtf-haddop-sequence-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
// instantiate Spark
// TODO set to local[1] !!!!
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("TestSwissModelMmtf");
JavaSparkContext sc = new JavaSparkContext(conf);
long start = System.nanoTime();
// read PDB files recursively starting the specified directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfReader.readSequenceFile(args[0], sc);
// total: 639 structures
// structures = structures.filter(new ContainsDnaChain()); // ?
// structures = structures.filter(new ContainsLProteinChain()); // 639?
// structures = structures.filter(new ContainsGroup("ZN")); // 0
// structures = structures.filter(new ContainsGroup("ATP")); //
// debug: print structure data
// structures.foreach(t -> TraverseStructureHierarchy.demo(t._2));
// structures.foreach(t -> System.out.println(t._1));
System.out.println(structures.map(t -> t._2.getNumEntities()).reduce((a, b) -> a + b));
System.out.println("Number of structures read: " + structures.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + " sec.");
// close Spark
sc.close();
}
use of edu.sdsc.mmtf.spark.filters.ContainsGroup in project mmtf-spark by sbl-sdsc.
the class FilterByGroups method main.
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfReducedPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FilterByGroups.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// find all structure that contain ATP and MG
long count = MmtfReader.readSequenceFile(path, sc).filter(new ContainsGroup("ATP")).filter(new ContainsGroup("MG")).count();
System.out.println("Structures with ATP + MG: " + count);
sc.close();
}
use of edu.sdsc.mmtf.spark.filters.ContainsGroup in project mm-dev by sbl-sdsc.
the class TestSwissModelMmtf method main.
/**
* Test: Read MMTF-Hadoop Sequence file.
*
* @param args args[0] <path-to-mmtf-haddop-sequence-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
// instantiate Spark
// TODO set to local[1] !!!!
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("TestSwissModelMmtf");
JavaSparkContext sc = new JavaSparkContext(conf);
long start = System.nanoTime();
// read PDB files recursively starting the specified directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfReader.readSequenceFile(args[0], sc);
// total: 6022 structures
// structures = structures.filter(new ContainsDnaChain()); // 3 ?
// structures = structures.filter(new ContainsLProteinChain()); // 6022 ?
// structures = structures.filter(new ContainsGroup("ZN")); // 228
// 228
structures = structures.filter(new ContainsGroup("ATP"));
// debug: print structure data
// structures.foreach(t -> TraverseStructureHierarchy.demo(t._2));
// structures.foreach(t -> System.out.println(t._1));
// System.out.println(structures.map(t -> t._2.getNumGroups()).reduce((a, b) -> a+b));
System.out.println("Number of structures read: " + structures.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + " sec.");
// close Spark
sc.close();
}
Aggregations