use of io.prestosql.MockSplit in project hetu-core by openlookeng.
the class TestSplitKey method testEquals.
@Test
public void testEquals() {
SplitKey splitKey1 = new SplitKey(split1, catalogName.toString(), schema, table);
SplitKey splitKey2 = new SplitKey(split2, catalogName.toString(), schema, table);
assertNotEquals(splitKey1, splitKey2, "split key should not be equal");
SplitKey splitKey3 = new SplitKey(split3, catalogName.toString(), schema, table);
assertNotEquals(splitKey2, splitKey3, "split key should not be equal");
// Scenario where file is updated but the split path/start/end are the same
MockSplit mockConnectorSplit4 = new MockSplit("hdfs://hacluster/AppData/BIProd/DWD/EVT/bogus_table/000000_0", 0, 10, System.currentTimeMillis());
Split split4 = new Split(catalogName, mockConnectorSplit4, Lifespan.taskWide());
SplitKey splitKey4 = new SplitKey(split4, catalogName.toString(), schema, table);
// Should not be treated the same as the data may now be outdated
assertNotEquals(splitKey1, splitKey4);
}
use of io.prestosql.MockSplit in project hetu-core by openlookeng.
the class TestSplitKey method setup.
@BeforeTest
public void setup() {
mockConnectorSplit1 = new MockSplit("hdfs://hacluster/AppData/BIProd/DWD/EVT/bogus_table/a=20/000000_0", 0, 10, System.currentTimeMillis());
Long lastModified = System.currentTimeMillis();
mockConnectorSplit2 = new MockSplit("hdfs://hacluster/AppData/BIProd/DWD/EVT/bogus_table/a=21/000000_1", 0, 10, lastModified);
mockConnectorSplit3 = new MockSplit("hdfs://hacluster/AppData/BIProd/DWD/EVT/bogus_table/b=22/000000_1", 11, 50, lastModified);
split1 = new Split(catalogName, mockConnectorSplit1, Lifespan.taskWide());
split2 = new Split(catalogName, mockConnectorSplit2, Lifespan.taskWide());
split3 = new Split(catalogName, mockConnectorSplit3, Lifespan.taskWide());
}
use of io.prestosql.MockSplit in project hetu-core by openlookeng.
the class TestNodeScheduler method testSplitCacheAwareScheduling.
@Test
public void testSplitCacheAwareScheduling() {
setUpNodes();
PropertyService.setProperty(HetuConstant.SPLIT_CACHE_MAP_ENABLED, true);
SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
QualifiedName tableQN = QualifiedName.of(CONNECTOR_ID.toString(), TEST_SCHEMA, TEST_TABLE);
MockSplit mock = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/a=23/000000_0", 0, 10, System.currentTimeMillis(), true);
MockSplit mock2 = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/b=33/000000_0", 0, 10, System.currentTimeMillis(), false);
MockSplit mock3 = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/a=23/000001_0", 0, 10, System.currentTimeMillis(), true);
Split split = new Split(CONNECTOR_ID, mock, Lifespan.taskWide());
Split split2 = new Split(CONNECTOR_ID, mock2, Lifespan.taskWide());
Split split3 = new Split(CONNECTOR_ID, mock3, Lifespan.taskWide());
Set<Split> splits = ImmutableSet.of(split, split2, split3);
assertFalse(splitCacheMap.cacheExists(tableQN));
Map splitInfoMap = (Map) split.getConnectorSplit().getInfo();
SplitKey splitKey = new SplitKey(split, split.getCatalogName().getCatalogName(), TEST_SCHEMA, splitInfoMap.get("table").toString());
assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
Map split2InfoMap = (Map) split.getConnectorSplit().getInfo();
SplitKey split2Key = new SplitKey(split2, split2.getCatalogName().getCatalogName(), TEST_SCHEMA, split2InfoMap.get("table").toString());
Map split3InfoMap = (Map) split.getConnectorSplit().getInfo();
SplitKey split3Key = new SplitKey(split3, split3.getCatalogName().getCatalogName(), TEST_SCHEMA, split3InfoMap.get("table").toString());
TestNetworkTopology topology = new TestNetworkTopology();
NetworkLocationCache locationCache = new NetworkLocationCache(topology);
// contents of taskMap indicate the node-task map for the current stage
NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(20).setIncludeCoordinator(false).setMaxPendingSplitsPerTask(10);
NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, nodeManager, nodeSchedulerConfig, nodeTaskMap);
NodeSelector selector = nodeScheduler.createNodeSelector(CONNECTOR_ID, false, null);
assertTrue(selector instanceof SplitCacheAwareNodeSelector);
Multimap<InternalNode, Split> assignment1 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
assertEquals(3, assignment1.size());
// No cache predicates defined, thus the split to worker mapping will not be saved
assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
// Add cache predicate
ColumnMetadata columnMetadataA = new ColumnMetadata("a", BIGINT);
TupleDomain<ColumnMetadata> tupleDomainA = TupleDomain.withColumnDomains(ImmutableMap.of(columnMetadataA, Domain.singleValue(BIGINT, 23L)));
splitCacheMap.addCache(tableQN, tupleDomainA, "a = 23");
assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
Multimap<InternalNode, Split> assignment2 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
// Split will be assigned by default node selector and the mapping cached
assertTrue(assignment2.containsValue(split));
assertTrue(assignment2.containsValue(split2));
assertTrue(assignment2.containsValue(split3));
assertFalse(splitCacheMap.getCachedNodeId(split2Key).isPresent());
Multimap<String, Split> nodeIdToSplits = ArrayListMultimap.create();
assignment2.forEach((node, spl) -> nodeIdToSplits.put(node.getNodeIdentifier(), spl));
assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(splitKey).get()).contains(split));
assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(split3Key).get()).contains(split3));
// Schedule split again and the same assignments should be returned
Multimap<InternalNode, Split> assignment3 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
// Split will be assigned by default node selector and the mapping cached
assertTrue(assignment3.containsValue(split));
assertTrue(assignment3.containsValue(split2));
assertTrue(assignment3.containsValue(split3));
assertFalse(splitCacheMap.getCachedNodeId(split2Key).isPresent());
Multimap<String, Split> nodeIdToSplits3 = ArrayListMultimap.create();
assignment3.forEach((node, spl) -> nodeIdToSplits3.put(node.getNodeIdentifier(), spl));
assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(splitKey).get()).contains(split));
assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(split3Key).get()).contains(split3));
}
Aggregations