use of org.locationtech.geowave.core.store.api.DataTypeAdapter in project geowave by locationtech.
the class RasterTileResizeSparkRunner method run.
public void run() throws IOException {
initContext();
// Validate inputs
if (inputStoreOptions == null) {
LOGGER.error("You must supply an input datastore!");
throw new IOException("You must supply an input datastore!");
}
final InternalAdapterStore internalAdapterStore = inputStoreOptions.createInternalAdapterStore();
final short internalAdapterId = internalAdapterStore.getAdapterId(rasterResizeOptions.getInputCoverageName());
final DataTypeAdapter adapter = inputStoreOptions.createAdapterStore().getAdapter(internalAdapterId).getAdapter();
if (adapter == null) {
throw new IllegalArgumentException("Adapter for coverage '" + rasterResizeOptions.getInputCoverageName() + "' does not exist in namespace '" + inputStoreOptions.getGeoWaveNamespace() + "'");
}
Index index = null;
final IndexStore indexStore = inputStoreOptions.createIndexStore();
if (rasterResizeOptions.getIndexName() != null) {
index = indexStore.getIndex(rasterResizeOptions.getIndexName());
}
if (index == null) {
try (CloseableIterator<Index> indices = indexStore.getIndices()) {
index = indices.next();
}
if (index == null) {
throw new IllegalArgumentException("Index does not exist in namespace '" + inputStoreOptions.getGeoWaveNamespace() + "'");
}
}
final RasterDataAdapter newAdapter = new RasterDataAdapter((RasterDataAdapter) adapter, rasterResizeOptions.getOutputCoverageName(), rasterResizeOptions.getOutputTileSize());
final DataStore store = outputStoreOptions.createDataStore();
store.addType(newAdapter, index);
final short newInternalAdapterId = outputStoreOptions.createInternalAdapterStore().addTypeName(newAdapter.getTypeName());
final RDDOptions options = new RDDOptions();
if (rasterResizeOptions.getMinSplits() != null) {
options.setMinSplits(rasterResizeOptions.getMinSplits());
}
if (rasterResizeOptions.getMaxSplits() != null) {
options.setMaxSplits(rasterResizeOptions.getMaxSplits());
}
final JavaPairRDD<GeoWaveInputKey, GridCoverage> inputRDD = GeoWaveRDDLoader.loadRawRasterRDD(jsc.sc(), inputStoreOptions, index.getName(), rasterResizeOptions.getMinSplits(), rasterResizeOptions.getMaxSplits());
LOGGER.debug("Writing results to output store...");
RDDUtils.writeRasterToGeoWave(jsc.sc(), index, outputStoreOptions, newAdapter, inputRDD.flatMapToPair(new RasterResizeMappingFunction(internalAdapterId, newInternalAdapterId, newAdapter, index)).groupByKey().map(new MergeRasterFunction(internalAdapterId, newInternalAdapterId, newAdapter, index)));
LOGGER.debug("Results successfully written!");
}
use of org.locationtech.geowave.core.store.api.DataTypeAdapter in project geowave by locationtech.
the class RDDUtils method writeToGeoWave.
/**
* Translate a set of objects in a JavaRDD to a provided type and push to GeoWave
*
* @throws IOException
*/
private static void writeToGeoWave(final SparkContext sc, final Index index, final DataStorePluginOptions outputStoreOptions, final DataTypeAdapter adapter, final JavaRDD<SimpleFeature> inputRDD) throws IOException {
// setup the configuration and the output format
final Configuration conf = new org.apache.hadoop.conf.Configuration(sc.hadoopConfiguration());
GeoWaveOutputFormat.setStoreOptions(conf, outputStoreOptions);
GeoWaveOutputFormat.addIndex(conf, index);
GeoWaveOutputFormat.addDataAdapter(conf, adapter);
// create the job
final Job job = new Job(conf);
job.setOutputKeyClass(GeoWaveOutputKey.class);
job.setOutputValueClass(SimpleFeature.class);
job.setOutputFormatClass(GeoWaveOutputFormat.class);
// broadcast string names
final ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
final Broadcast<String> typeName = sc.broadcast(adapter.getTypeName(), stringTag);
final Broadcast<String> indexName = sc.broadcast(index.getName(), stringTag);
// map to a pair containing the output key and the output value
inputRDD.mapToPair(feat -> new Tuple2<>(new GeoWaveOutputKey(typeName.value(), indexName.value()), feat)).saveAsNewAPIHadoopDataset(job.getConfiguration());
}
use of org.locationtech.geowave.core.store.api.DataTypeAdapter in project geowave by locationtech.
the class AbstractLocalFileIngestDriver method runOperation.
public boolean runOperation(final String inputPath, final File configFile) {
// first collect the local file ingest plugins
final Map<String, LocalFileIngestPlugin<?>> localFileIngestPlugins = new HashMap<>();
final List<DataTypeAdapter<?>> adapters = new ArrayList<>();
for (final Entry<String, LocalFileIngestPlugin<?>> pluginEntry : getIngestPlugins().entrySet()) {
if (!isSupported(pluginEntry.getKey(), pluginEntry.getValue())) {
continue;
}
localFileIngestPlugins.put(pluginEntry.getKey(), pluginEntry.getValue());
adapters.addAll(Arrays.asList(pluginEntry.getValue().getDataAdapters()));
}
final DataStore dataStore = getDataStore();
try (LocalIngestRunData runData = new LocalIngestRunData(adapters, dataStore, getVisibilityHandler())) {
startExecutor();
processInput(inputPath, configFile, localFileIngestPlugins, runData);
// We place this here and not just in finally because of the way
// that try-with-resources works.
// We want to wait for our ingesting threads to finish before we
// kill our index writers, which
// are cached in LocalIngestRunData. If we were don't, then the
// index writers will be
// closed before they are finished processing the file entries.
shutdownExecutor();
} catch (final IOException e) {
LOGGER.error("Unexpected I/O exception when reading input files", e);
return false;
} finally {
shutdownExecutor();
}
return true;
}
use of org.locationtech.geowave.core.store.api.DataTypeAdapter in project geowave by locationtech.
the class BasicMapReduceIT method testIngestOsmGpxMultipleIndices.
@Test
public void testIngestOsmGpxMultipleIndices() throws Exception {
TestUtils.deleteAll(dataStorePluginOptions);
// ingest the data set into multiple indices and then try several query
// methods, by adapter and by index
MapReduceTestUtils.testMapReduceIngest(dataStorePluginOptions, DimensionalityType.SPATIAL_AND_SPATIAL_TEMPORAL, OSM_GPX_INPUT_DIR);
final DataTypeAdapter<SimpleFeature>[] adapters = new GpxIngestPlugin().getDataAdapters(null);
final org.locationtech.geowave.core.store.api.DataStore geowaveStore = dataStorePluginOptions.createDataStore();
final Map<String, ExpectedResults> adapterIdToResultsMap = new HashMap<>();
for (final DataTypeAdapter<SimpleFeature> adapter : adapters) {
adapterIdToResultsMap.put(adapter.getTypeName(), TestUtils.getExpectedResults(geowaveStore.query(QueryBuilder.newBuilder().addTypeName(adapter.getTypeName()).build())));
}
final List<DataTypeAdapter<?>> firstTwoAdapters = new ArrayList<>();
firstTwoAdapters.add(adapters[0]);
firstTwoAdapters.add(adapters[1]);
final ExpectedResults firstTwoAdaptersResults = TestUtils.getExpectedResults(geowaveStore.query(QueryBuilder.newBuilder().addTypeName(adapters[0].getTypeName()).addTypeName(adapters[1].getTypeName()).build()));
final ExpectedResults fullDataSetResults = TestUtils.getExpectedResults(geowaveStore.query(QueryBuilder.newBuilder().build()));
// just for sanity verify its greater than 0 (ie. that data was actually
// ingested in the first place)
Assert.assertTrue("There is no data ingested from OSM GPX test files", fullDataSetResults.count > 0);
// now that we have expected results, run map-reduce export and
// re-ingest it
testMapReduceExportAndReingest(DimensionalityType.SPATIAL_AND_SPATIAL_TEMPORAL);
// first try each adapter individually
for (final DataTypeAdapter<SimpleFeature> adapter : adapters) {
final ExpectedResults expResults = adapterIdToResultsMap.get(adapter.getTypeName());
if (expResults.count > 0) {
LOGGER.error("Running test for adapter " + adapter.getTypeName());
runTestJob(expResults, null, new DataTypeAdapter[] { adapter }, null);
}
}
// then try the first 2 adapters, and may as well try with both indices
// set (should be the default behavior anyways)
runTestJob(firstTwoAdaptersResults, null, new DataTypeAdapter[] { adapters[0], adapters[1] }, null);
// now try all adapters and the spatial temporal index, the result
// should be the full data set
runTestJob(fullDataSetResults, null, adapters, TestUtils.DEFAULT_SPATIAL_TEMPORAL_INDEX);
// and finally run with nothing set, should be the full data set
runTestJob(fullDataSetResults, null, null, null);
}
use of org.locationtech.geowave.core.store.api.DataTypeAdapter in project geowave by locationtech.
the class AbstractGeoWaveBasicVectorIT method testStats.
@SuppressWarnings("unchecked")
protected void testStats(final URL[] inputFiles, final boolean multithreaded, final CoordinateReferenceSystem crs, final Index... indices) {
// In the multithreaded case, only test min/max and count. Stats will be
// ingested/ in a different order and will not match.
final LocalFileIngestPlugin<SimpleFeature> localFileIngest = new GeoToolsVectorDataStoreIngestPlugin(Filter.INCLUDE);
final Map<String, StatisticsCache> statsCache = new HashMap<>();
final String[] indexNames = Arrays.stream(indices).map(i -> i.getName()).toArray(i -> new String[i]);
for (final URL inputFile : inputFiles) {
LOGGER.warn("Calculating stats from file '" + inputFile.getPath() + "' - this may take several minutes...");
try (final CloseableIterator<GeoWaveData<SimpleFeature>> dataIterator = localFileIngest.toGeoWaveData(inputFile, indexNames)) {
final TransientAdapterStore adapterCache = new MemoryAdapterStore(localFileIngest.getDataAdapters());
while (dataIterator.hasNext()) {
final GeoWaveData<SimpleFeature> data = dataIterator.next();
final DataTypeAdapter<SimpleFeature> adapter = data.getAdapter(adapterCache);
// it should be a statistical data adapter
if (adapter instanceof DefaultStatisticsProvider) {
StatisticsCache cachedValues = statsCache.get(adapter.getTypeName());
if (cachedValues == null) {
cachedValues = new StatisticsCache(adapter, crs);
statsCache.put(adapter.getTypeName(), cachedValues);
}
cachedValues.entryIngested(data.getValue());
}
}
}
}
final DataStatisticsStore statsStore = getDataStorePluginOptions().createDataStatisticsStore();
final PersistentAdapterStore adapterStore = getDataStorePluginOptions().createAdapterStore();
final InternalDataAdapter<?>[] adapters = adapterStore.getAdapters();
for (final InternalDataAdapter<?> internalDataAdapter : adapters) {
final FeatureDataAdapter adapter = (FeatureDataAdapter) internalDataAdapter.getAdapter();
final StatisticsCache cachedValue = statsCache.get(adapter.getTypeName());
Assert.assertNotNull(cachedValue);
final Set<Entry<Statistic<?>, Map<ByteArray, StatisticValue<?>>>> expectedStats = cachedValue.statsCache.entrySet();
int statsCount = 0;
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> statsIterator = statsStore.getDataTypeStatistics(adapter, null, null)) {
while (statsIterator.hasNext()) {
statsIterator.next();
statsCount++;
}
}
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> statsIterator = statsStore.getFieldStatistics(adapter, null, null, null)) {
while (statsIterator.hasNext()) {
statsIterator.next();
statsCount++;
}
}
Assert.assertEquals("The number of stats for data adapter '" + adapter.getTypeName() + "' do not match count expected", expectedStats.size(), statsCount);
for (final Entry<Statistic<?>, Map<ByteArray, StatisticValue<?>>> expectedStat : expectedStats) {
for (final Entry<ByteArray, StatisticValue<?>> expectedValues : expectedStat.getValue().entrySet()) {
StatisticValue<Object> actual;
if (expectedValues.getKey().equals(StatisticValue.NO_BIN)) {
actual = statsStore.getStatisticValue((Statistic<StatisticValue<Object>>) expectedStat.getKey());
} else {
actual = statsStore.getStatisticValue((Statistic<StatisticValue<Object>>) expectedStat.getKey(), expectedValues.getKey());
}
assertEquals(expectedValues.getValue().getValue(), actual.getValue());
}
}
// finally check the one stat that is more manually calculated -
// the bounding box
StatisticQuery<BoundingBoxValue, Envelope> query = StatisticQueryBuilder.newBuilder(BoundingBoxStatistic.STATS_TYPE).fieldName(adapter.getFeatureType().getGeometryDescriptor().getLocalName()).typeName(adapter.getTypeName()).build();
BoundingBoxValue bboxStat = getDataStorePluginOptions().createDataStore().aggregateStatistics(query);
validateBBox(bboxStat.getValue(), cachedValue);
// now make sure it works without giving field name because there is only one geometry field
// anyways
query = StatisticQueryBuilder.newBuilder(BoundingBoxStatistic.STATS_TYPE).typeName(adapter.getTypeName()).build();
bboxStat = getDataStorePluginOptions().createDataStore().aggregateStatistics(query);
validateBBox(bboxStat.getValue(), cachedValue);
final StatisticId<BoundingBoxValue> bboxStatId = FieldStatistic.generateStatisticId(adapter.getTypeName(), BoundingBoxStatistic.STATS_TYPE, adapter.getFeatureType().getGeometryDescriptor().getLocalName(), Statistic.INTERNAL_TAG);
Assert.assertTrue("Unable to remove individual stat", statsStore.removeStatistic(statsStore.getStatisticById(bboxStatId)));
Assert.assertNull("Individual stat was not successfully removed", statsStore.getStatisticById(bboxStatId));
}
}
Aggregations