use of edu.sdsc.mmtf.spark.filters.ExperimentalMethods in project mmtf-spark by sbl-sdsc.
the class WriteMmtfCustom method main.
/**
* @param args
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfFullPath();
long start = System.nanoTime();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(WriteMmtfCustom.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read a 20% random sample of the PDB
double fraction = 0.2;
long seed = 123;
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, fraction, seed, sc);
// retain high resolution X-ray structures
pdb = pdb.filter(new ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION)).filter(new Resolution(0, 2.0)).filter(new Rfree(0, 0.2));
// coalesce this into 8 partitions to avoid creating many small files
pdb = pdb.coalesce(8);
// save this subset in a Hadoop Sequence file
MmtfWriter.writeSequenceFile(path + "_xray", sc, pdb);
System.out.println("# structures in custom set: " + pdb.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + "sec.");
sc.close();
}
use of edu.sdsc.mmtf.spark.filters.ExperimentalMethods in project mmtf-spark by sbl-sdsc.
the class MapToTuple4 method main.
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfReducedPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(MapToTuple4.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// sample a small fraction of the PDB
double fraction = 0.001;
long seed = 123;
// read PDB, use X-ray structures only, and create a Tuple with PDB ID,
// resolution, r-free, and r-work
MmtfReader.readSequenceFile(path, fraction, seed, sc).filter(new ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION)).map(t -> new Tuple4<String, Float, Float, Float>(t._1, t._2.getResolution(), t._2.getRfree(), t._2.getRwork())).foreach(t -> System.out.println(t));
sc.close();
}
use of edu.sdsc.mmtf.spark.filters.ExperimentalMethods in project mmtf-spark by sbl-sdsc.
the class FilterByExperimentalMethods method main.
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfReducedPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FilterByExperimentalMethods.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
MmtfReader.readSequenceFile(path, // read MMTF hadoop sequence file
sc).filter(new ExperimentalMethods(ExperimentalMethods.NEUTRON_DIFFRACTION)).filter(new ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION)).keys().foreach(// print the keys (using a lambda expression)
key -> System.out.println(key));
sc.close();
}
Aggregations