use of org.tribuo.provenance.DataSourceProvenance in project tribuo by oracle.
the class LabelledDataGenerator method sparseTrainTest.
/**
* Generates a pair of datasets, where the features are sparse,
* and unknown features appear in the test data. It has the same
* 4 classes {Foo,Bar,Baz,Quux}.
* @param negate Supply -1.0 to negate some values in this dataset.
* @return A pair of train and test datasets.
*/
public static Pair<Dataset<Label>, Dataset<Label>> sparseTrainTest(double negate) {
DataSourceProvenance provenance = new SimpleDataSourceProvenance("TrainingData", OffsetDateTime.now(), labelFactory);
MutableDataset<Label> train = new MutableDataset<>(provenance, labelFactory);
String[] names = new String[] { "A", "B", "C", "D" };
double[] values = new double[] { 1.0, 0.5, 1.0, negate * 1.0 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "B", "D", "F", "H" };
values = new double[] { 1.5, 0.35, 1.3, negate * 1.2 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "A", "J", "D", "M" };
values = new double[] { 1.2, 0.45, 1.5, negate * 1.0 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "C", "E", "F", "H" };
values = new double[] { negate * 1.1, 0.55, negate * 1.5, 0.5 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "E", "G", "F", "I" };
values = new double[] { negate * 1.5, 0.25, negate * 1, 0.125 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "J", "K", "C", "E" };
values = new double[] { negate * 1, 0.5, negate * 1.123, 0.123 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "E", "A", "K", "J" };
values = new double[] { 1.5, 5.0, 0.5, 4.5 };
train.add(new ArrayExample<>(new Label("Baz"), names, values));
names = new String[] { "B", "C", "E", "H" };
values = new double[] { 1.234, 5.1235, 0.1235, 6.0 };
train.add(new ArrayExample<>(new Label("Baz"), names, values));
names = new String[] { "A", "M", "I", "J" };
values = new double[] { 1.734, 4.5, 0.5123, 5.5 };
train.add(new ArrayExample<>(new Label("Baz"), names, values));
names = new String[] { "Z", "A", "B", "C" };
values = new double[] { negate * 1, 0.25, 5, 10.0 };
train.add(new ArrayExample<>(new Label("Quux"), names, values));
names = new String[] { "K", "V", "E", "D" };
values = new double[] { negate * 1.4, 0.55, 5.65, 12.0 };
train.add(new ArrayExample<>(new Label("Quux"), names, values));
names = new String[] { "B", "G", "E", "A" };
values = new double[] { negate * 1.9, 0.25, 5.9, 15 };
train.add(new ArrayExample<>(new Label("Quux"), names, values));
DataSourceProvenance testProvenance = new SimpleDataSourceProvenance("TestingData", OffsetDateTime.now(), labelFactory);
MutableDataset<Label> test = new MutableDataset<>(testProvenance, labelFactory);
names = new String[] { "AA", "B", "C", "D" };
values = new double[] { 2.0, 0.45, 3.5, negate * 2.0 };
test.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "B", "BB", "F", "E" };
values = new double[] { negate * 2.0, 0.55, negate * 2.5, 2.5 };
test.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "B", "E", "G", "H" };
values = new double[] { 1.75, 5.0, 1.0, 6.5 };
test.add(new ArrayExample<>(new Label("Baz"), names, values));
names = new String[] { "B", "CC", "DD", "EE" };
values = new double[] { negate * 1.5, 0.25, 5.0, 20.0 };
test.add(new ArrayExample<>(new Label("Quux"), names, values));
return new Pair<>(train, test);
}
use of org.tribuo.provenance.DataSourceProvenance in project tribuo by oracle.
the class MultiLabelDataGenerator method generateTestData.
/**
* Simple test data for checking multi-label trainers.
* @return Simple 3 class test data.
*/
public static Dataset<MultiLabel> generateTestData() {
DataSourceProvenance provenance = new SimpleDataSourceProvenance("TestingData", OffsetDateTime.now(), factory);
MutableDataset<MultiLabel> dataset = new MutableDataset<>(provenance, factory);
ArrayExample<MultiLabel> e = new ArrayExample<>(factory.generateOutput("MONKEY,PUZZLE,TREE"));
e.add(new Feature("A-MONKEY", 1.0));
e.add(new Feature("B-PUZZLE", 1.0));
e.add(new Feature("C-TREE", 1.0));
dataset.add(e);
e = new ArrayExample<>(factory.generateOutput("MONKEY"));
e.add(new Feature("A-MONKEY", 1.0));
e.add(new Feature("B-PUZZLE", 0.0));
e.add(new Feature("C-TREE", 0.0));
dataset.add(e);
e = new ArrayExample<>(factory.generateOutput("PUZZLE"));
e.add(new Feature("A-MONKEY", 0.0));
e.add(new Feature("B-PUZZLE", 1.0));
e.add(new Feature("C-TREE", 0.0));
dataset.add(e);
e = new ArrayExample<>(factory.generateOutput("TREE"));
e.add(new Feature("A-MONKEY", 0.0));
e.add(new Feature("B-PUZZLE", 0.0));
e.add(new Feature("C-TREE", 1.0));
dataset.add(e);
return dataset;
}
use of org.tribuo.provenance.DataSourceProvenance in project tribuo by oracle.
the class LabelledDataGenerator method denseTrainTest.
/**
* Generates a train/test dataset pair which is dense in the features,
* each example has 4 features,{A,B,C,D}, and there are 4 classes,
* {Foo,Bar,Baz,Quux}.
* @param negate Supply -1.0 to insert some negative values into the dataset.
* @return A pair of datasets.
*/
public static Pair<Dataset<Label>, Dataset<Label>> denseTrainTest(double negate) {
DataSourceProvenance provenance = new SimpleDataSourceProvenance("TrainingData", OffsetDateTime.now(), labelFactory);
MutableDataset<Label> train = new MutableDataset<>(provenance, labelFactory);
String[] names = new String[] { "A", "B", "C", "D" };
double[] values = new double[] { 1.0, 0.5, 1.0, negate * 1.0 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
values = new double[] { 1.5, 0.35, 1.3, negate * 1.2 };
train.add(new ArrayExample<>(new Label("Foo"), names.clone(), values));
values = new double[] { 1.2, 0.45, 1.5, negate * 1.0 };
train.add(new ArrayExample<>(new Label("Foo"), names.clone(), values));
values = new double[] { negate * 1.1, 0.55, negate * 1.5, 0.5 };
train.add(new ArrayExample<>(new Label("Bar"), names.clone(), values));
values = new double[] { negate * 1.5, 0.25, negate * 1, 0.125 };
train.add(new ArrayExample<>(new Label("Bar"), names.clone(), values));
values = new double[] { negate * 1, 0.5, negate * 1.123, 0.123 };
train.add(new ArrayExample<>(new Label("Bar"), names.clone(), values));
values = new double[] { 1.5, 5.0, 0.5, 4.5 };
train.add(new ArrayExample<>(new Label("Baz"), names.clone(), values));
values = new double[] { 1.234, 5.1235, 0.1235, 6.0 };
train.add(new ArrayExample<>(new Label("Baz"), names.clone(), values));
values = new double[] { 1.734, 4.5, 0.5123, 5.5 };
train.add(new ArrayExample<>(new Label("Baz"), names.clone(), values));
values = new double[] { negate * 1, 0.25, 5, 10.0 };
train.add(new ArrayExample<>(new Label("Quux"), names.clone(), values));
values = new double[] { negate * 1.4, 0.55, 5.65, 12.0 };
train.add(new ArrayExample<>(new Label("Quux"), names.clone(), values));
values = new double[] { negate * 1.9, 0.25, 5.9, 15 };
train.add(new ArrayExample<>(new Label("Quux"), names.clone(), values));
DataSourceProvenance testProvenance = new SimpleDataSourceProvenance("TestingData", OffsetDateTime.now(), labelFactory);
MutableDataset<Label> test = new MutableDataset<>(testProvenance, labelFactory);
values = new double[] { 2.0, 0.45, 3.5, negate * 2.0 };
test.add(new ArrayExample<>(new Label("Foo"), names.clone(), values));
values = new double[] { negate * 2.0, 0.55, negate * 2.5, 2.5 };
test.add(new ArrayExample<>(new Label("Bar"), names.clone(), values));
values = new double[] { 1.75, 5.0, 1.0, 6.5 };
test.add(new ArrayExample<>(new Label("Baz"), names.clone(), values));
values = new double[] { negate * 1.5, 0.25, 5.0, 20.0 };
test.add(new ArrayExample<>(new Label("Quux"), names.clone(), values));
return new Pair<>(train, test);
}
use of org.tribuo.provenance.DataSourceProvenance in project tribuo by oracle.
the class LabelledDataGenerator method binarySparseTrainTest.
/**
* Generates a pair of datasets with sparse features and unknown features
* in the test data. Has binary labels {Foo,Bar}.
* @param negate Supply -1.0 to negate some values in this dataset.
* @return A pair of train and test datasets.
*/
public static Pair<Dataset<Label>, Dataset<Label>> binarySparseTrainTest(double negate) {
DataSourceProvenance provenance = new SimpleDataSourceProvenance("TrainingData", OffsetDateTime.now(), labelFactory);
MutableDataset<Label> train = new MutableDataset<>(provenance, labelFactory);
String[] names = new String[] { "A", "B", "C", "D" };
double[] values = new double[] { 1.0, 0.5, 1.0, negate * 1.0 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "B", "D", "F", "H" };
values = new double[] { 1.5, 0.35, 1.3, negate * 1.2 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "A", "J", "D", "M" };
values = new double[] { 1.2, 0.45, 1.5, negate * 1.0 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "C", "E", "F", "H" };
values = new double[] { negate * 1.1, 0.55, negate * 1.5, 0.5 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "E", "G", "F", "I" };
values = new double[] { negate * 1.5, 0.25, negate * 1, 0.125 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "J", "K", "C", "E" };
values = new double[] { negate * 1, 0.5, negate * 1.123, 0.123 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "E", "A", "K", "J" };
values = new double[] { 1.5, 5.0, 0.5, 4.5 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "B", "C", "E", "H" };
values = new double[] { 1.234, 5.1235, 0.1235, 6.0 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "A", "M", "I", "J" };
values = new double[] { 1.734, 4.5, 0.5123, 5.5 };
train.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "Z", "A", "B", "C" };
values = new double[] { negate * 1, 0.25, 5, 10.0 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "K", "V", "E", "D" };
values = new double[] { negate * 1.4, 0.55, 5.65, 12.0 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "B", "G", "E", "A" };
values = new double[] { negate * 1.9, 0.25, 5.9, 15 };
train.add(new ArrayExample<>(new Label("Bar"), names, values));
DataSourceProvenance testProvenance = new SimpleDataSourceProvenance("TestingData", OffsetDateTime.now(), labelFactory);
MutableDataset<Label> test = new MutableDataset<>(testProvenance, labelFactory);
names = new String[] { "AA", "B", "C", "D" };
values = new double[] { 2.0, 0.45, 3.5, negate * 2.0 };
test.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "B", "BB", "F", "E" };
values = new double[] { negate * 2.0, 0.55, negate * 2.5, 2.5 };
test.add(new ArrayExample<>(new Label("Bar"), names, values));
names = new String[] { "B", "E", "G", "H" };
values = new double[] { 1.75, 5.0, 1.0, 6.5 };
test.add(new ArrayExample<>(new Label("Foo"), names, values));
names = new String[] { "B", "CC", "DD", "EE" };
values = new double[] { negate * 1.5, 0.25, 5.0, 20.0 };
test.add(new ArrayExample<>(new Label("Bar"), names, values));
return new Pair<>(train, test);
}
use of org.tribuo.provenance.DataSourceProvenance in project tribuo by oracle.
the class RegressionDataGenerator method denseTrainTest.
/**
* Generates a train/test dataset pair which is dense in the features,
* each example has 4 features,{A,B,C,D}.
* @param negate Supply -1.0 to negate some values in this dataset.
* @return A pair of datasets.
*/
public static Pair<Dataset<Regressor>, Dataset<Regressor>> denseTrainTest(double negate) {
DataSourceProvenance provenance = new SimpleDataSourceProvenance("TrainingData", OffsetDateTime.now(), REGRESSION_FACTORY);
MutableDataset<Regressor> train = new MutableDataset<>(provenance, REGRESSION_FACTORY);
String[] names = new String[] { "A", "B", "C", "D" };
double[] values = new double[] { 1.0, 0.5, 1.0, negate * 1.0 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 5.0), names, values));
values = new double[] { 1.5, 0.35, 1.3, negate * 1.2 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 5.8), names, values));
values = new double[] { 1.2, 0.45, 1.5, negate * 1.0 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 8.0), names, values));
values = new double[] { negate * 1.1, 0.55, negate * 1.5, 0.5 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 10.0), names, values));
values = new double[] { negate * 1.5, 0.25, negate * 1, 0.125 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 10.0), names, values));
values = new double[] { negate * 1, 0.5, negate * 1.123, 0.123 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 10.0), names, values));
values = new double[] { 1.5, 5.0, 0.5, 4.5 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 20), names, values));
values = new double[] { 1.234, 5.1235, 0.1235, 6.0 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 20), names, values));
values = new double[] { 1.734, 4.5, 0.5123, 5.5 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 20), names, values));
values = new double[] { negate * 1, 0.25, 5, 10.0 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 50), names, values));
values = new double[] { negate * 1.4, 0.55, 5.65, 12.0 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 50), names, values));
values = new double[] { negate * 1.9, 0.25, 5.9, 15 };
train.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 50), names, values));
DataSourceProvenance testProvenance = new SimpleDataSourceProvenance("TestingData", OffsetDateTime.now(), REGRESSION_FACTORY);
MutableDataset<Regressor> test = new MutableDataset<>(testProvenance, REGRESSION_FACTORY);
values = new double[] { 2.0, 0.45, 3.5, negate * 2.0 };
test.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 5.1), names, values));
values = new double[] { negate * 2.0, 0.55, negate * 2.5, 2.5 };
test.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 10.0), names, values));
values = new double[] { 1.75, 5.0, 1.0, 6.5 };
test.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 20), names, values));
values = new double[] { negate * 1.5, 0.25, 5.0, 20.0 };
test.add(new ArrayExample<>(new Regressor(SINGLE_DIM_NAME, 50), names, values));
return new Pair<>(train, test);
}
Aggregations