use of org.locationtech.geowave.core.store.statistics.index.IndexMetaDataSetStatistic.IndexMetaDataSetValue in project geowave by locationtech.
the class GeoWaveStatisticsIT method testInternalStatistics.
@Test
public void testInternalStatistics() throws IllegalArgumentException, IllegalAccessException, NoSuchFieldException, SecurityException {
final PersistentAdapterStore adapterStore = dataStore.createAdapterStore();
final DataStatisticsStore statsStore = dataStore.createDataStatisticsStore();
final InternalAdapterStore internalAdapterStore = dataStore.createInternalAdapterStore();
final Index index = SimpleIngest.createSpatialIndex();
final Collection<Short> adapterIds = Collections.singletonList(internalAdapterStore.getAdapterId(SimpleIngest.FEATURE_NAME));
final IndexMetaDataSetValue ims = InternalStatisticsHelper.getIndexMetadata(index, adapterIds, adapterStore, statsStore);
assertEquals(2, ims.getValue().size());
assertTrue(ims.getValue().get(0) instanceof TieredSFCIndexStrategy.TierIndexMetaData);
// the tiered strategy should be empty so it should look like the original empty metadata
assertEquals(SimpleIngest.createSpatialIndex().getIndexStrategy().createMetaData().get(0).toString(), ((TieredSFCIndexStrategy.TierIndexMetaData) ims.getValue().get(0)).toString());
// to avoid opening up accessors in code we just grab the field via reflection in this test
final Field pointCurveField = XZHierarchicalIndexStrategy.XZHierarchicalIndexMetaData.class.getDeclaredField("pointCurveCount");
pointCurveField.setAccessible(true);
final Field xzCurveField = XZHierarchicalIndexStrategy.XZHierarchicalIndexMetaData.class.getDeclaredField("xzCurveCount");
xzCurveField.setAccessible(true);
assertTrue(ims.getValue().get(1) instanceof XZHierarchicalIndexStrategy.XZHierarchicalIndexMetaData);
assertEquals(20, pointCurveField.getInt(ims.getValue().get(1)));
assertEquals(0, xzCurveField.getInt(ims.getValue().get(1)));
// duplicate count should be empty
assertEquals(0L, InternalStatisticsHelper.getDuplicateCounts(index, adapterIds, adapterStore, statsStore).getValue().longValue());
// differing visibility count should be empty
assertEquals(0L, InternalStatisticsHelper.getDifferingVisibilityCounts(index, adapterIds, adapterStore, statsStore).getValue().longValue());
// visibility count should have 20 empty visibilities
final Map<ByteArray, Long> visMap = InternalStatisticsHelper.getVisibilityCounts(index, adapterIds, adapterStore, statsStore).getValue();
assertEquals(1, visMap.size());
assertEquals(20L, visMap.get(new ByteArray("")).longValue());
}
use of org.locationtech.geowave.core.store.statistics.index.IndexMetaDataSetStatistic.IndexMetaDataSetValue in project geowave by locationtech.
the class SplitsProvider method getSplits.
/**
* Read the metadata table to get tablets and match up ranges to them.
*/
public List<InputSplit> getSplits(final DataStoreOperations operations, final CommonQueryOptions commonOptions, final DataTypeQueryOptions<?> typeOptions, final IndexQueryOptions indexOptions, final QueryConstraints constraints, final TransientAdapterStore adapterStore, final DataStatisticsStore statsStore, final InternalAdapterStore internalAdapterStore, final IndexStore indexStore, final AdapterIndexMappingStore adapterIndexMappingStore, final JobContext context, final Integer minSplits, final Integer maxSplits) throws IOException, InterruptedException {
final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache = new HashMap<>();
final List<InputSplit> retVal = new ArrayList<>();
final TreeSet<IntermediateSplitInfo> splits = new TreeSet<>();
final Map<String, List<Short>> indexIdToAdaptersMap = new HashMap<>();
for (final Pair<Index, List<Short>> indexAdapterIdPair : BaseDataStoreUtils.getAdaptersWithMinimalSetOfIndices(typeOptions.getTypeNames(), indexOptions.getIndexName(), adapterStore, internalAdapterStore, adapterIndexMappingStore, indexStore, constraints)) {
QueryConstraints indexAdapterConstraints;
if (constraints instanceof AdapterAndIndexBasedQueryConstraints) {
final List<Short> adapters = indexAdapterIdPair.getRight();
DataTypeAdapter<?> adapter = null;
// types/adapters
if (adapters.size() == 1) {
final String typeName = internalAdapterStore.getTypeName(adapters.get(0));
if (typeName != null) {
adapter = adapterStore.getAdapter(typeName);
}
}
if (adapter == null) {
indexAdapterConstraints = constraints;
LOGGER.info("Unable to find type matching an adapter dependent query");
} else {
indexAdapterConstraints = ((AdapterAndIndexBasedQueryConstraints) constraints).createQueryConstraints(adapter.asInternalAdapter(adapters.get(0)), indexAdapterIdPair.getLeft(), adapterIndexMappingStore.getMapping(adapters.get(0), indexAdapterIdPair.getLeft().getName()));
if (indexAdapterConstraints == null) {
continue;
}
// make sure we pass along the new constraints to the record
// reader - for spark on YARN (not localy though), job
// configuration is immutable so while picking up the
// appropriate constraint from the configuration is more
// efficient, also do a check for
// AdapterAndIndexBasedQueryConstraints within the Record Reader
// itself
GeoWaveInputFormat.setQueryConstraints(context.getConfiguration(), indexAdapterConstraints);
}
} else {
indexAdapterConstraints = constraints;
}
indexIdToAdaptersMap.put(indexAdapterIdPair.getKey().getName(), indexAdapterIdPair.getValue());
IndexMetaData[] indexMetadata = null;
if (indexAdapterConstraints != null) {
final IndexMetaDataSetValue statValue = InternalStatisticsHelper.getIndexMetadata(indexAdapterIdPair.getLeft(), indexAdapterIdPair.getRight(), new AdapterStoreWrapper(adapterStore, internalAdapterStore), statsStore, commonOptions.getAuthorizations());
if (statValue != null) {
indexMetadata = statValue.toArray();
}
}
populateIntermediateSplits(splits, operations, indexAdapterIdPair.getLeft(), indexAdapterIdPair.getValue(), statsCache, adapterStore, internalAdapterStore, statsStore, maxSplits, indexAdapterConstraints, (double[]) commonOptions.getHints().get(DataStoreUtils.TARGET_RESOLUTION_PER_DIMENSION_FOR_HIERARCHICAL_INDEX), indexMetadata, commonOptions.getAuthorizations());
}
// this is an incremental algorithm, it may be better use the target
// split count to drive it (ie. to get 3 splits this will split 1
// large
// range into two down the middle and then split one of those ranges
// down the middle to get 3, rather than splitting one range into
// thirds)
final List<IntermediateSplitInfo> unsplittable = new ArrayList<>();
if (!statsCache.isEmpty() && !splits.isEmpty() && (minSplits != null) && (splits.size() < minSplits)) {
// set the ranges to at least min splits
do {
// remove the highest range, split it into 2 and add both
// back,
// increasing the size by 1
final IntermediateSplitInfo highestSplit = splits.pollLast();
final IntermediateSplitInfo otherSplit = highestSplit.split(statsCache);
// working our way up the split set.
if (otherSplit == null) {
unsplittable.add(highestSplit);
} else {
splits.add(highestSplit);
splits.add(otherSplit);
}
} while ((splits.size() != 0) && ((splits.size() + unsplittable.size()) < minSplits));
// Add all unsplittable splits back to splits array
splits.addAll(unsplittable);
if (splits.size() < minSplits) {
LOGGER.warn("Truly unable to meet split count. Actual Count: " + splits.size());
}
} else if (((maxSplits != null) && (maxSplits > 0)) && (splits.size() > maxSplits)) {
// merge splits to fit within max splits
do {
// this is the naive approach, remove the lowest two ranges
// and merge them, decreasing the size by 1
// TODO Ideally merge takes into account locations (as well
// as possibly the index as a secondary criteria) to limit
// the number of locations/indices
final IntermediateSplitInfo lowestSplit = splits.pollFirst();
final IntermediateSplitInfo nextLowestSplit = splits.pollFirst();
lowestSplit.merge(nextLowestSplit);
splits.add(lowestSplit);
} while (splits.size() > maxSplits);
}
for (final IntermediateSplitInfo split : splits) {
retVal.add(split.toFinalSplit(statsStore, adapterStore, internalAdapterStore, indexIdToAdaptersMap, commonOptions.getAuthorizations()));
}
return retVal;
}
Aggregations