use of edu.sdsc.mmtf.spark.filters.Rfree in project mmtf-spark by sbl-sdsc.
the class WriteMmtfCustom method main.
/**
* @param args
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfFullPath();
long start = System.nanoTime();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(WriteMmtfCustom.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read a 20% random sample of the PDB
double fraction = 0.2;
long seed = 123;
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, fraction, seed, sc);
// retain high resolution X-ray structures
pdb = pdb.filter(new ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION)).filter(new Resolution(0, 2.0)).filter(new Rfree(0, 0.2));
// coalesce this into 8 partitions to avoid creating many small files
pdb = pdb.coalesce(8);
// save this subset in a Hadoop Sequence file
MmtfWriter.writeSequenceFile(path + "_xray", sc, pdb);
System.out.println("# structures in custom set: " + pdb.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + "sec.");
sc.close();
}
Aggregations