Skip to content

Commit

Permalink
HDDS-11989. Enable SCM Ratis in tests related to DeletedBlockLog (apa…
Browse files Browse the repository at this point in the history
  • Loading branch information
chungen0126 authored Jan 7, 2025
1 parent 6b8b844 commit 8a774a5
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -321,17 +321,7 @@ public void testBlockDeletionTransactions() throws Exception {
// after sometime, all the TX should be proceed and by then
// the number of containerBlocks of all known containers will be
// empty again.
GenericTestUtils.waitFor(() -> {
try {
if (SCMHAUtils.isSCMHAEnabled(cluster.getConf())) {
cluster.getStorageContainerManager().getScmHAManager()
.asSCMHADBTransactionBuffer().flush();
}
return delLog.getNumOfValidTransactions() == 0;
} catch (IOException e) {
return false;
}
}, 1000, 22000);
OzoneTestUtils.waitBlockDeleted(cluster.getStorageContainerManager());
assertTrue(verifyBlocksWithTxnTable(cluster, conf, containerBlocks));
// Continue the work, add some TXs that with known container names,
// but unknown block IDs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,37 @@ public static void closeContainer(StorageContainerManager scm,
container.getState() == HddsProtos.LifeCycleState.CLOSED,
200, 30000);
}

/**
* Flush deleted block log & wait till something was flushed.
*/
public static void flushAndWaitForDeletedBlockLog(StorageContainerManager scm)
throws InterruptedException, TimeoutException {
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
if (scm.getScmBlockManager().getDeletedBlockLog().getNumOfValidTransactions() > 0) {
return true;
}
} catch (IOException e) {
}
return false;
}, 100, 3000);
}

/**
* Wait till all blocks are removed.
*/
public static void waitBlockDeleted(StorageContainerManager scm)
throws InterruptedException, TimeoutException {
GenericTestUtils.waitFor(() -> {
try {
if (scm.getScmBlockManager().getDeletedBlockLog().getNumOfValidTransactions() == 0) {
return true;
}
} catch (IOException e) {
}
return false;
}, 1000, 60000);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.OzoneTestUtils;
import org.apache.hadoop.ozone.RatisTestHelper;
import org.apache.hadoop.ozone.client.ObjectStore;
import org.apache.hadoop.ozone.client.OzoneClient;
Expand All @@ -65,6 +66,7 @@
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
Expand All @@ -73,6 +75,7 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;

import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.BeforeAll;
Expand Down Expand Up @@ -103,6 +106,7 @@ public static void init() throws Exception {

conf = new OzoneConfiguration();

conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true);
conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 100,
TimeUnit.MILLISECONDS);
conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200,
Expand Down Expand Up @@ -281,6 +285,7 @@ void testDeleteKeyWithInAdequateDN() throws Exception {
//cluster.getOzoneManager().deleteKey(keyArgs);
client.getObjectStore().getVolume(volumeName).getBucket(bucketName).
deleteKey("ratis");
OzoneTestUtils.flushAndWaitForDeletedBlockLog(cluster.getStorageContainerManager());
// make sure the chunk was never deleted on the leader even though
// deleteBlock handler is invoked

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
import org.apache.hadoop.hdds.scm.block.ScmBlockDeletingServiceMetrics;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.ContainerStateManager;
import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
Expand Down Expand Up @@ -95,6 +94,7 @@
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EXPIRED_CONTAINER_REPLICA_OP_SCRUB_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL;
Expand Down Expand Up @@ -133,6 +133,7 @@ public void init() throws Exception {
GenericTestUtils.setLogLevel(SCMBlockDeletingService.LOG, Level.DEBUG);
GenericTestUtils.setLogLevel(ReplicationManager.LOG, Level.DEBUG);

conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true);
conf.set("ozone.replication.allowed-configs",
"^(RATIS/THREE)|(EC/2-1-256k)$");
conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100,
Expand Down Expand Up @@ -239,6 +240,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception {
// verify key blocks were created in DN.
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
verifyBlocksCreated(omKeyLocationInfoGroupList);
return true;
} catch (Throwable t) {
Expand Down Expand Up @@ -283,6 +285,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception {
// The blocks should be deleted in the DN.
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
verifyBlocksDeleted(omKeyLocationInfoGroupList);
return true;
} catch (Throwable t) {
Expand All @@ -299,6 +302,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception {
// Verify transactions committed
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
verifyTransactionsCommitted();
return true;
} catch (Throwable t) {
Expand Down Expand Up @@ -380,10 +384,16 @@ public void testContainerStatisticsAfterDelete() throws Exception {

writeClient.deleteKey(keyArgs);
// Wait for blocks to be deleted and container reports to be processed
GenericTestUtils.waitFor(() ->
scm.getContainerManager().getContainers().stream()
.allMatch(c -> c.getUsedBytes() == 0 &&
c.getNumberOfKeys() == 0), 500, 20000);
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
} catch (IOException e) {
throw new RuntimeException(e);
}
return scm.getContainerManager().getContainers().stream()
.allMatch(c -> c.getUsedBytes() == 0 &&
c.getNumberOfKeys() == 0);
}, 500, 20000);
Thread.sleep(5000);
// Verify that pending block delete num are as expected with resent cmds
cluster.getHddsDatanodes().forEach(dn -> {
Expand Down Expand Up @@ -425,6 +435,7 @@ public void testContainerStatisticsAfterDelete() throws Exception {
assertEquals(HddsProtos.LifeCycleState.DELETED,
container.getState());
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
assertEquals(HddsProtos.LifeCycleState.DELETED,
scm.getScmMetadataStore().getContainerTable()
.get(container.containerID()).getState());
Expand Down Expand Up @@ -516,14 +527,14 @@ public void testContainerStateAfterDNRestart() throws Exception {

GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
return scm.getContainerManager().getContainerReplicas(
containerId).stream().
allMatch(replica -> replica.isEmpty());
} catch (ContainerNotFoundException e) {
} catch (IOException e) {
throw new RuntimeException(e);
}
},
100, 10 * 1000);
}, 100, 10 * 1000);

// Container state should be empty now as key got deleted
assertTrue(getContainerFromDN(
Expand All @@ -546,6 +557,7 @@ public void testContainerStateAfterDNRestart() throws Exception {
assertEquals(HddsProtos.LifeCycleState.DELETED,
container.getState());
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
assertEquals(HddsProtos.LifeCycleState.DELETED,
scm.getScmMetadataStore().getContainerTable()
.get(container.containerID()).getState());
Expand All @@ -560,7 +572,6 @@ public void testContainerStateAfterDNRestart() throws Exception {
}
return true;
}, 500, 30000);
LOG.info(metrics.toString());
}

/**
Expand Down Expand Up @@ -646,14 +657,14 @@ public void testContainerDeleteWithInvalidKeyCount()
// Ensure isEmpty are true for all replica after delete key
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
return scm.getContainerManager().getContainerReplicas(
containerId).stream()
.allMatch(replica -> replica.isEmpty());
} catch (ContainerNotFoundException e) {
} catch (IOException e) {
throw new RuntimeException(e);
}
},
500, 5 * 2000);
}, 500, 5 * 2000);

// Update container replica by making invalid keyCount in one replica
ContainerReplica replicaOne = ContainerReplica.newBuilder()
Expand Down Expand Up @@ -683,6 +694,7 @@ public void testContainerDeleteWithInvalidKeyCount()
assertEquals(HddsProtos.LifeCycleState.DELETED,
container.getState());
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
assertEquals(HddsProtos.LifeCycleState.DELETED,
scm.getScmMetadataStore().getContainerTable()
.get(container.containerID()).getState());
Expand Down Expand Up @@ -812,17 +824,7 @@ public void testBlockDeleteCommandParallelProcess() throws Exception {
}

// Wait for block delete command sent from OM
GenericTestUtils.waitFor(() -> {
try {
if (scm.getScmBlockManager().getDeletedBlockLog()
.getNumOfValidTransactions() > 0) {
return true;
}
} catch (IOException e) {
}
return false;
}, 100, 5000);

OzoneTestUtils.flushAndWaitForDeletedBlockLog(scm);
long start = System.currentTimeMillis();
// Wait for all blocks been deleted.
GenericTestUtils.waitFor(() -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
Expand All @@ -97,6 +98,7 @@ public class TestDeleteContainerHandler {
@BeforeAll
public static void setup() throws Exception {
conf = new OzoneConfiguration();
conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true);
conf.set(OZONE_SCM_CONTAINER_SIZE, "1GB");
conf.setStorageSize(OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN,
0, StorageUnit.MB);
Expand Down Expand Up @@ -196,6 +198,8 @@ public void testDeleteNonEmptyContainerOnDirEmptyCheckTrue()
// Delete key, which will make isEmpty flag to true in containerData
objectStore.getVolume(volumeName)
.getBucket(bucketName).deleteKey(keyName);
OzoneTestUtils.flushAndWaitForDeletedBlockLog(cluster.getStorageContainerManager());
OzoneTestUtils.waitBlockDeleted(cluster.getStorageContainerManager());

// Ensure isEmpty flag is true when key is deleted and container is empty
GenericTestUtils.waitFor(() -> getContainerfromDN(
Expand Down Expand Up @@ -313,6 +317,8 @@ public void testDeleteNonEmptyContainerOnDirEmptyCheckFalse()
// Delete key, which will make isEmpty flag to true in containerData
objectStore.getVolume(volumeName)
.getBucket(bucketName).deleteKey(keyName);
OzoneTestUtils.flushAndWaitForDeletedBlockLog(cluster.getStorageContainerManager());
OzoneTestUtils.waitBlockDeleted(cluster.getStorageContainerManager());

// Ensure isEmpty flag is true when key is deleted and container is empty
GenericTestUtils.waitFor(() -> getContainerfromDN(
Expand Down Expand Up @@ -652,6 +658,8 @@ public void testDeleteContainerRequestHandlerOnClosedContainer()
// Delete key, which will make isEmpty flag to true in containerData
objectStore.getVolume(volumeName)
.getBucket(bucketName).deleteKey(keyName);
OzoneTestUtils.flushAndWaitForDeletedBlockLog(cluster.getStorageContainerManager());
OzoneTestUtils.waitBlockDeleted(cluster.getStorageContainerManager());

// Ensure isEmpty flag is true when key is deleted
GenericTestUtils.waitFor(() -> getContainerfromDN(
Expand Down

0 comments on commit 8a774a5

Please sign in to comment.