use of org.apache.spark.mllib.regression.LabeledPoint in project deeplearning4j by deeplearning4j.
the class MLLibUtil method fromContinuousLabeledPoint.
/**
* Convert rdd labeled points to a rdd dataset with continuous features
* @param data the java rdd labeled points ready to convert
* @return a JavaRDD<Dataset> with a continuous label
*/
@Deprecated
public static JavaRDD<DataSet> fromContinuousLabeledPoint(JavaSparkContext sc, JavaRDD<LabeledPoint> data) {
List<LabeledPoint> labeledPoints = data.collect();
List<DataSet> dataSets = new ArrayList<>();
for (LabeledPoint labeledPoint : labeledPoints) {
dataSets.add(convertToDataset(labeledPoint));
}
return sc.parallelize(dataSets);
}
use of org.apache.spark.mllib.regression.LabeledPoint in project deeplearning4j by deeplearning4j.
the class TestSparkMultiLayerParameterAveraging method testFromSvmLight.
@Test
public void testFromSvmLight() throws Exception {
JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), new ClassPathResource("svmLight/iris_svmLight_0.txt").getTempFileFromArchive().getAbsolutePath()).toJavaRDD().map(new Function<LabeledPoint, LabeledPoint>() {
@Override
public LabeledPoint call(LabeledPoint v1) throws Exception {
return new LabeledPoint(v1.label(), Vectors.dense(v1.features().toArray()));
}
});
DataSet d = new IrisDataSetIterator(150, 150).next();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(100).miniBatch(true).maxNumLineSearchIterations(10).list().layer(0, new RBM.Builder(RBM.HiddenUnit.RECTIFIED, RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(100).weightInit(WeightInit.XAVIER).activation(Activation.RELU).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(100).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()).backprop(false).build();
MultiLayerNetwork network = new MultiLayerNetwork(conf);
network.init();
System.out.println("Initializing network");
SparkDl4jMultiLayer master = new SparkDl4jMultiLayer(sc, getBasicConf(), new ParameterAveragingTrainingMaster(true, numExecutors(), 1, 5, 1, 0));
MultiLayerNetwork network2 = master.fitLabeledPoint(data);
Evaluation evaluation = new Evaluation();
evaluation.eval(d.getLabels(), network2.output(d.getFeatureMatrix()));
System.out.println(evaluation.stats());
}
use of org.apache.spark.mllib.regression.LabeledPoint in project deeplearning4j by deeplearning4j.
the class MLLIbUtilTest method testMlLibTest.
@Test
public void testMlLibTest() {
DataSet dataSet = new IrisDataSetIterator(150, 150).next();
List<DataSet> list = dataSet.asList();
JavaRDD<DataSet> data = sc.parallelize(list);
JavaRDD<LabeledPoint> mllLibData = MLLibUtil.fromDataSet(sc, data);
}
use of org.apache.spark.mllib.regression.LabeledPoint in project java_study by aloyschen.
the class GbdtAndLr method load_gbdt_data.
/*
* 读取正负样本训练数据,分成训练集和测试集
* @Param Data_Path: 样本数据存放路径
* @return Data: 经过预处理和标签特征处理之后的训练样本数据
*/
private JavaRDD<LabeledPoint> load_gbdt_data(Dataset<Row> data) {
JavaRDD<LabeledPoint> numerical_labelpoint;
if (data.rdd().isEmpty()) {
System.exit(0);
}
JavaRDD<Row> numerical_row = data.toJavaRDD();
numerical_labelpoint = numerical_row.map(row -> {
// 总共12个连续特征给GBDT处理
double[] features = new double[12];
// 将dataset中每列特征值放入DenseVector中
for (Integer i = 6; i < 18; i++) {
org.apache.spark.mllib.linalg.DenseVector den = null;
if (row.get(i) instanceof org.apache.spark.ml.linalg.Vector) {
den = (DenseVector) Vectors.fromML((org.apache.spark.ml.linalg.DenseVector) row.get(i));
features[i - 6] = den.toArray()[0];
} else {
features[i - 6] = Double.parseDouble(row.get(i).toString());
}
}
DenseVector denseVector = new DenseVector(features);
return new LabeledPoint(Double.valueOf(row.get(1).toString()), denseVector);
});
// //将预处理过的数据保存
// List<LabeledPoint> data_save = data.collect();
// try {
// FileWriter fw = new FileWriter("./pre_data.txt");
// BufferedWriter bufferedWriter = new BufferedWriter(fw);
// for(LabeledPoint row_data : data_save){
// double[] features = row_data.features().toArray();
// for (Double element : features){
// bufferedWriter.write(element.toString());
// bufferedWriter.write(";");
// }
// bufferedWriter.write("\n");
// }
// bufferedWriter.close();
// }catch (Exception e){
// e.printStackTrace();
// }
System.out.println("Samples count:" + numerical_labelpoint.count());
return numerical_labelpoint;
}
use of org.apache.spark.mllib.regression.LabeledPoint in project incubator-sdap-mudrod by apache.
the class SparkSVM method main.
public static void main(String[] args) {
MudrodEngine me = new MudrodEngine();
JavaSparkContext jsc = me.startSparkDriver().sc;
String path = SparkSVM.class.getClassLoader().getResource("inputDataForSVM_spark.txt").toString();
JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), path).toJavaRDD();
// Run training algorithm to build the model.
int numIterations = 100;
final SVMModel model = SVMWithSGD.train(data.rdd(), numIterations);
// Save and load model
model.save(jsc.sc(), SparkSVM.class.getClassLoader().getResource("javaSVMWithSGDModel").toString());
jsc.sc().stop();
}
Aggregations