From 5561f21ef62b34679421a7267bae84707ffd3e1c Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Wed, 7 Aug 2024 15:24:10 -0700 Subject: [PATCH 01/50] HDDS-11174. [hsync] Change XceiverClientRatis.watchForCommit to async. (#6941) Co-authored-by: Siyao Meng <50227127+smengcl@users.noreply.github.com> --- .../hadoop/hdds/scm/XceiverClientGrpc.java | 9 ---- .../hadoop/hdds/scm/XceiverClientRatis.java | 53 ++++++++----------- .../hdds/scm/client/HddsClientUtils.java | 3 +- .../scm/storage/AbstractCommitWatcher.java | 17 +++--- .../TestBlockOutputStreamCorrectness.java | 4 +- .../hadoop/hdds/scm/XceiverClientSpi.java | 8 +-- .../ozone/client/MockXceiverClientSpi.java | 5 -- .../ozone/client/rpc/TestCommitInRatis.java | 4 +- .../ozone/client/rpc/TestWatchForCommit.java | 9 ++-- .../ozone/freon/DatanodeChunkGenerator.java | 3 +- 10 files changed, 46 insertions(+), 69 deletions(-) diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java index 5cd41edd38d0..c02306f8af8b 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java @@ -55,7 +55,6 @@ import org.apache.hadoop.ozone.ClientVersion; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConsts; -import java.util.concurrent.TimeoutException; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -649,14 +648,6 @@ private void reconnect(DatanodeDetails dn) } } - @Override - public XceiverClientReply watchForCommit(long index) - throws InterruptedException, ExecutionException, TimeoutException, - IOException { - // there is no notion of watch for commit index in standalone pipeline - return null; - } - @Override public long getReplicatedMinCommitIndex() { return 0; diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java index b67f4a56ec3b..2794ca9c61b0 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java @@ -29,8 +29,6 @@ import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Stream; @@ -66,6 +64,7 @@ import org.apache.ratis.rpc.RpcType; import org.apache.ratis.rpc.SupportedRpcType; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; +import org.apache.ratis.util.JavaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -295,46 +294,39 @@ private XceiverClientReply newWatchReply( } @Override - public XceiverClientReply watchForCommit(long index) - throws InterruptedException, ExecutionException, TimeoutException, - IOException { + public CompletableFuture watchForCommit(long index) { final long replicatedMin = getReplicatedMinCommitIndex(); if (replicatedMin >= index) { - return newWatchReply(index, "replicatedMin", replicatedMin); + return CompletableFuture.completedFuture(newWatchReply(index, "replicatedMin", replicatedMin)); } - try { - CompletableFuture replyFuture = getClient().async().watch(index, watchType); - final RaftClientReply reply = replyFuture.get(); + final CompletableFuture replyFuture = new CompletableFuture<>(); + getClient().async().watch(index, watchType).thenAccept(reply -> { final long updated = updateCommitInfosMap(reply, watchType); - Preconditions.checkState(updated >= index, "Returned index " + updated + " is smaller than expected " + index); - return newWatchReply(index, watchType, updated); - } catch (Exception e) { + Preconditions.checkState(updated >= index, "Returned index " + updated + " < expected " + index); + replyFuture.complete(newWatchReply(index, watchType, updated)); + }).exceptionally(e -> { LOG.warn("{} way commit failed on pipeline {}", watchType, pipeline, e); - Throwable t = - HddsClientUtils.containsException(e, GroupMismatchException.class); - if (t != null) { - throw e; - } - if (watchType == ReplicationLevel.ALL_COMMITTED) { - Throwable nre = - HddsClientUtils.containsException(e, NotReplicatedException.class); - Collection commitInfoProtoList; + final boolean isGroupMismatch = HddsClientUtils.containsException(e, GroupMismatchException.class) != null; + if (!isGroupMismatch && watchType == ReplicationLevel.ALL_COMMITTED) { + final Throwable nre = HddsClientUtils.containsException(e, NotReplicatedException.class); if (nre instanceof NotReplicatedException) { // If NotReplicatedException is thrown from the Datanode leader // we can save one watch request round trip by using the CommitInfoProto // in the NotReplicatedException - commitInfoProtoList = ((NotReplicatedException) nre).getCommitInfos(); + final Collection commitInfoProtoList = ((NotReplicatedException) nre).getCommitInfos(); + replyFuture.complete(handleFailedAllCommit(index, commitInfoProtoList)); } else { - final RaftClientReply reply = getClient().async() - .watch(index, RaftProtos.ReplicationLevel.MAJORITY_COMMITTED) - .get(); - commitInfoProtoList = reply.getCommitInfos(); + getClient().async().watch(index, ReplicationLevel.MAJORITY_COMMITTED) + .thenApply(reply -> handleFailedAllCommit(index, reply.getCommitInfos())) + .whenComplete(JavaUtils.asBiConsumer(replyFuture)); } - return handleFailedAllCommit(index, commitInfoProtoList); + } else { + replyFuture.completeExceptionally(e); } - throw e; - } + return null; + }); + return replyFuture; } private XceiverClientReply handleFailedAllCommit(long index, Collection commitInfoProtoList) { @@ -374,8 +366,7 @@ public XceiverClientReply sendCommandAsync( CompletableFuture containerCommandResponse = raftClientReply.whenComplete((reply, e) -> { if (LOG.isDebugEnabled()) { - LOG.debug("received reply {} for request: cmdType={} containerID={}" - + " pipelineID={} traceID={} exception: {}", reply, + LOG.debug("received reply {} for request: cmdType={}, containerID={}, pipelineID={}, traceID={}", reply, request.getCmdType(), request.getContainerID(), request.getPipelineID(), request.getTraceID(), e); } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/HddsClientUtils.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/HddsClientUtils.java index 6c5f9a0a9891..f5a7c0ad5509 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/HddsClientUtils.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/HddsClientUtils.java @@ -254,9 +254,8 @@ public static Throwable checkForException(Exception e) { // This will return the underlying expected exception if it exists // in an exception trace. Otherwise, returns null. - public static Throwable containsException(Exception e, + public static Throwable containsException(Throwable t, Class expectedExceptionClass) { - Throwable t = e; while (t != null) { if (expectedExceptionClass.isInstance(t)) { return t; diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/AbstractCommitWatcher.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/AbstractCommitWatcher.java index fb489d0d0c6c..61bc73420e65 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/AbstractCommitWatcher.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/AbstractCommitWatcher.java @@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; /** @@ -127,19 +126,17 @@ XceiverClientReply watchOnLastIndex() throws IOException { * @return minimum commit index replicated to all nodes * @throws IOException IOException in case watch gets timed out */ - XceiverClientReply watchForCommit(long commitIndex) - throws IOException { + CompletableFuture watchForCommitAsync(long commitIndex) { final MemoizedSupplier> supplier = JavaUtils.memoize(CompletableFuture::new); final CompletableFuture f = replies.compute(commitIndex, (key, value) -> value != null ? value : supplier.get()); if (!supplier.isInitialized()) { // future already exists - return f.join(); + return f; } - try { - final XceiverClientReply reply = client.watchForCommit(commitIndex); + return client.watchForCommit(commitIndex).thenApply(reply -> { f.complete(reply); final CompletableFuture removed = replies.remove(commitIndex); Preconditions.checkState(removed == f); @@ -147,11 +144,17 @@ XceiverClientReply watchForCommit(long commitIndex) final long index = reply != null ? reply.getLogIndex() : 0; adjustBuffers(index); return reply; + }); + } + + XceiverClientReply watchForCommit(long commitIndex) throws IOException { + try { + return watchForCommitAsync(commitIndex).get(); } catch (InterruptedException e) { // Re-interrupt the thread while catching InterruptedException Thread.currentThread().interrupt(); throw getIOExceptionForWatchForCommit(commitIndex, e); - } catch (TimeoutException | ExecutionException e) { + } catch (ExecutionException e) { throw getIOExceptionForWatchForCommit(commitIndex, e); } } diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockOutputStreamCorrectness.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockOutputStreamCorrectness.java index df55b5bf57ae..df4d1cb3f8aa 100644 --- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockOutputStreamCorrectness.java +++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockOutputStreamCorrectness.java @@ -276,7 +276,7 @@ public ReplicationType getPipelineType() { } @Override - public XceiverClientReply watchForCommit(long index) { + public CompletableFuture watchForCommit(long index) { final ContainerCommandResponseProto.Builder builder = ContainerCommandResponseProto.newBuilder() .setCmdType(Type.WriteChunk) @@ -284,7 +284,7 @@ public XceiverClientReply watchForCommit(long index) { final XceiverClientReply xceiverClientReply = new XceiverClientReply( CompletableFuture.completedFuture(builder.build())); xceiverClientReply.setLogIndex(index); - return xceiverClientReply; + return CompletableFuture.completedFuture(xceiverClientReply); } @Override diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientSpi.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientSpi.java index f6529e84bdc1..9ac32c469ca4 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientSpi.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientSpi.java @@ -22,8 +22,8 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.hdds.HddsUtils; @@ -176,9 +176,9 @@ public static IOException getIOExceptionForSendCommand( * @return reply containing the min commit index replicated to all or majority * servers in case of a failure */ - public abstract XceiverClientReply watchForCommit(long index) - throws InterruptedException, ExecutionException, TimeoutException, - IOException; + public CompletableFuture watchForCommit(long index) { + return CompletableFuture.completedFuture(null); + } /** * returns the min commit index replicated to all servers. diff --git a/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/MockXceiverClientSpi.java b/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/MockXceiverClientSpi.java index 0d82f0f8bbb2..b14582c8ea97 100644 --- a/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/MockXceiverClientSpi.java +++ b/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/MockXceiverClientSpi.java @@ -187,11 +187,6 @@ public ReplicationType getPipelineType() { return pipeline.getType(); } - @Override - public XceiverClientReply watchForCommit(long index) { - return null; - } - @Override public long getReplicatedMinCommitIndex() { return 0; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitInRatis.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitInRatis.java index f7fbbf37c52b..4ff671df6163 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitInRatis.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCommitInRatis.java @@ -163,7 +163,7 @@ public void test2WayCommitForRetryfailure(RaftProtos.ReplicationLevel watchType) reply.getResponse().get(); assertEquals(3, ratisClient.getCommitInfoMap().size()); // wait for the container to be created on all the nodes - xceiverClient.watchForCommit(reply.getLogIndex()); + xceiverClient.watchForCommit(reply.getLogIndex()).get(); for (HddsDatanodeService dn : cluster.getHddsDatanodes()) { // shutdown the ratis follower if (RatisTestHelper.isRatisFollower(dn, pipeline)) { @@ -175,7 +175,7 @@ public void test2WayCommitForRetryfailure(RaftProtos.ReplicationLevel watchType) .getCloseContainer(pipeline, container1.getContainerInfo().getContainerID())); reply.getResponse().get(); - xceiverClient.watchForCommit(reply.getLogIndex()); + xceiverClient.watchForCommit(reply.getLogIndex()).get(); if (watchType == RaftProtos.ReplicationLevel.ALL_COMMITTED) { // commitInfo Map will be reduced to 2 here diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java index f42969e67f08..bec14b23b0f0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java @@ -291,7 +291,7 @@ public void testWatchForCommitForRetryfailure(RaftProtos.ReplicationLevel watchT // as well as there is no logIndex generate in Ratis. // The basic idea here is just to test if its throws an exception. ExecutionException e = assertThrows(ExecutionException.class, - () -> xceiverClient.watchForCommit(index + RandomUtils.nextInt(0, 100) + 10)); + () -> xceiverClient.watchForCommit(index + RandomUtils.nextInt(0, 100) + 10).get()); // since the timeout value is quite long, the watch request will either // fail with NotReplicated exceptio, RetryFailureException or // RuntimeException @@ -348,7 +348,7 @@ public void test2WayCommitForTimeoutException(RaftProtos.ReplicationLevel watchT .getCloseContainer(pipeline, container1.getContainerInfo().getContainerID())); reply.getResponse().get(); - xceiverClient.watchForCommit(reply.getLogIndex()); + xceiverClient.watchForCommit(reply.getLogIndex()).get(); // commitInfo Map will be reduced to 2 here if (watchType == RaftProtos.ReplicationLevel.ALL_COMMITTED) { @@ -392,9 +392,8 @@ public void testWatchForCommitForGroupMismatchException() throws Exception { // just watch for a log index which in not updated in the commitInfo Map // as well as there is no logIndex generate in Ratis. // The basic idea here is just to test if its throws an exception. - Exception e = - assertThrows(Exception.class, - () -> xceiverClient.watchForCommit(reply.getLogIndex() + RandomUtils.nextInt(0, 100) + 10)); + final Exception e = assertThrows(Exception.class, + () -> xceiverClient.watchForCommit(reply.getLogIndex() + RandomUtils.nextInt(0, 100) + 10).get()); assertInstanceOf(GroupMismatchException.class, HddsClientUtils.checkForException(e)); } finally { clientManager.releaseClient(xceiverClient, false); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkGenerator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkGenerator.java index 6362f32d0499..23988106d47f 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkGenerator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkGenerator.java @@ -246,8 +246,7 @@ private void sendWriteChunkRequest(DatanodeBlockID blockId, if (async) { XceiverClientReply xceiverClientReply = xceiverClientSpi.sendCommandAsync(request); - xceiverClientSpi - .watchForCommit(xceiverClientReply.getLogIndex()); + xceiverClientSpi.watchForCommit(xceiverClientReply.getLogIndex()).get(); } else { xceiverClientSpi.sendCommand(request); From daa04de053f6c44eb0df7a171c7ca04659caae02 Mon Sep 17 00:00:00 2001 From: Abhishek Pal <43001336+devabhishekpal@users.noreply.github.com> Date: Thu, 8 Aug 2024 11:03:41 +0530 Subject: [PATCH 02/50] HDDS-11154. Improve Overview Page UI (#7017) --- .../webapps/recon/ozone-recon-web/api/db.json | 5 +- .../recon/ozone-recon-web/src/app.less | 7 + .../webapps/recon/ozone-recon-web/src/app.tsx | 12 +- .../src/v2/components/eChart/eChart.tsx | 89 +++ .../errorBoundary/errorBoundary.tsx | 52 ++ .../overviewCard/overviewCardWrapper.tsx | 79 +++ .../overviewCard/overviewSimpleCard.tsx | 147 +++++ .../overviewCard/overviewStorageCard.tsx | 241 ++++++++ .../overviewCard/overviewSummaryCard.tsx | 108 ++++ .../v2/components/storageBar/storageBar.tsx | 93 +++ .../src/v2/pages/overview/overview.less | 26 + .../src/v2/pages/overview/overview.tsx | 542 ++++++++++++++++++ .../ozone-recon-web/src/v2/routes-v2.tsx | 26 + .../src/v2/types/overview.types.ts | 66 +++ 14 files changed, 1487 insertions(+), 6 deletions(-) create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/eChart/eChart.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/errorBoundary/errorBoundary.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewCardWrapper.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewSimpleCard.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewStorageCard.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewSummaryCard.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/overview/overview.less create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/overview/overview.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/routes-v2.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/overview.types.ts diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json index 5416ca7f00cc..8cfb23ad685b 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json @@ -5,8 +5,9 @@ "healthyDatanodes": 24, "storageReport": { "capacity": 202114732032, - "used": 16384, - "remaining": 182447632384 + "used": 4667099648, + "remaining": 182447632384, + "committed": 12000222315 }, "containers": 3230, "missingContainers": 1002, diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.less b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.less index 3d2cbc814e48..1895cabc184e 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.less +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.less @@ -44,6 +44,13 @@ font-weight: 500; } +.page-header-v2 { + padding: 10px 20px; + font-size: 20px; + font-weight: 500; + background-color: #FFFFFF; +} + .content-div { padding: 24px; background-color: #FFF; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.tsx index 3fec211e7ef0..c52fe9efa922 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.tsx @@ -23,6 +23,7 @@ import NavBar from './components/navBar/navBar'; import Breadcrumbs from './components/breadcrumbs/breadcrumbs'; import { HashRouter as Router, Switch, Route, Redirect } from 'react-router-dom'; import { routes } from '@/routes'; +import { routesV2 } from '@/v2/routes-v2'; import { MakeRouteWithSubRoutes } from '@/makeRouteWithSubRoutes'; import classNames from 'classnames'; @@ -51,7 +52,7 @@ class App extends React.Component, IAppState> { }; render() { - const { collapsed } = this.state; + const { collapsed, enableNewUI } = this.state; const layoutClass = classNames('content-layout', { 'sidebar-collapsed': collapsed }); @@ -73,13 +74,16 @@ class App extends React.Component, IAppState> { }} /> - + - { - routes.map( + {(enableNewUI) + ? routesV2.map( + (route, index) => + ) + : routes.map( (route, index) => ) } diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/eChart/eChart.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/eChart/eChart.tsx new file mode 100644 index 000000000000..8be22fcc9f65 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/eChart/eChart.tsx @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React, { useRef, useEffect } from "react"; +import { init, getInstanceByDom } from 'echarts'; +import type { CSSProperties } from "react"; +import type { EChartsOption, ECharts, SetOptionOpts } from 'echarts'; + +export interface EChartProps { + option: EChartsOption; + style?: CSSProperties; + settings?: SetOptionOpts; + loading?: boolean; + theme?: 'light'; + onClick?: () => any | void; +} + +const EChart = ({ + option, + style, + settings, + loading, + theme, + onClick +}: EChartProps): JSX.Element => { + const chartRef = useRef(null); + useEffect(() => { + // Initialize chart + let chart: ECharts | undefined; + if (chartRef.current !== null) { + chart = init(chartRef.current, theme); + if (onClick) { + chart.on('click', onClick); + } + } + + // Add chart resize listener + // ResizeObserver is leading to a bit janky UX + function resizeChart() { + chart?.resize(); + } + window.addEventListener("resize", resizeChart); + + // Return cleanup function + return () => { + chart?.dispose(); + window.removeEventListener("resize", resizeChart); + }; + }, [theme]); + + useEffect(() => { + // Update chart + if (chartRef.current !== null) { + const chart = getInstanceByDom(chartRef.current); + chart!.setOption(option, settings); + if (onClick) { + chart!.on('click', onClick); + } + } + }, [option, settings, theme]); // Whenever theme changes we need to add option and setting due to it being deleted in cleanup function + + useEffect(() => { + // Update chart + if (chartRef.current !== null) { + const chart = getInstanceByDom(chartRef.current); + // eslint-disable-next-line @typescript-eslint/no-unused-expressions + loading === true ? chart!.showLoading() : chart!.hideLoading(); + } + }, [loading, theme]); // If we switch theme we should put chart in loading mode, and also if loading changes i.e completes then hide loader + + return
; +} + +export default EChart; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/errorBoundary/errorBoundary.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/errorBoundary/errorBoundary.tsx new file mode 100644 index 000000000000..a7f7c9f45a8b --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/errorBoundary/errorBoundary.tsx @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from "react"; + +type ErrorProps = { + fallback: string | React.ReactNode; + children: React.ReactNode; +} + +type ErrorState = { + hasError: boolean; +} + +class ErrorBoundary extends React.Component{ + constructor(props: ErrorProps) { + super(props); + this.state = { hasError: false } + } + + static getDerivedStateFromError(error: Error) { + return { hasError: true } + } + + componentDidCatch(error: Error, errorInfo: React.ErrorInfo): void { + console.error(error, errorInfo) + } + + render(): React.ReactNode { + if (this.state.hasError) { + return this.props.fallback; + } + return this.props.children; + } +} + +export default ErrorBoundary; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewCardWrapper.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewCardWrapper.tsx new file mode 100644 index 000000000000..5e0998e6f64e --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewCardWrapper.tsx @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from 'react'; +import { Link } from 'react-router-dom'; + +// ------------- Types -------------- // +type OverviewCardWrapperProps = { + linkToUrl: string; + title: string; + children: React.ReactElement; +} + +// ------------- Component -------------- // +const OverviewCardWrapper: React.FC = ({ + linkToUrl = '/', + title = '', + children = <> +}) => { + + const setCurrentActiveTab = (title: string): { active: string } => { + if (title === 'Open Keys Summary') { + return { + active: '2' + } + } + else if (title === 'Pending Deleted Keys Summary') { + return { + active: '3' + } + } + else if (title === 'OM Service') { + return { + active: '4' + } + } + return { + active: '1' + } + }; + + if (linkToUrl === '/Om') { + return ( + + {children} + + ); + } + else if (linkToUrl) { + return ( + + {children} + + ); + } + else { + return children; + } +} + +export default OverviewCardWrapper; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewSimpleCard.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewSimpleCard.tsx new file mode 100644 index 000000000000..183ae73bc4fd --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewSimpleCard.tsx @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from 'react'; +import { Card, Col, Row } from 'antd'; +import { Link } from 'react-router-dom'; +import { + ClusterOutlined, + ContainerOutlined, + DatabaseOutlined, + DeleteOutlined, + DeploymentUnitOutlined, + FileTextOutlined, + FolderOpenOutlined, + InboxOutlined, + QuestionCircleOutlined +} from '@ant-design/icons'; + + +// ------------- Types -------------- // +type IconOptions = { + [key: string]: React.ReactElement +} + +type OverviewCardProps = { + icon: string; + data: number | React.ReactElement; + title: string; + hoverable?: boolean; + loading?: boolean; + linkToUrl?: string; +} + +// ------------- Styles -------------- // +const defaultIconStyle: React.CSSProperties = { + fontSize: '50px', + float: 'right' +}; +const iconStyle: React.CSSProperties = { + fontSize: '20px', + paddingRight: '4px', + float: 'inline-start' +}; +const cardHeadStyle: React.CSSProperties = { fontSize: '14px' }; +const cardBodyStyle: React.CSSProperties = { + padding: '16px', + justifyTracks: 'space-between' +}; +const dataColStyle: React.CSSProperties = { fontSize: '24px' }; +const titleLinkStyle: React.CSSProperties = { fontWeight: 400 } + +// Since AntD no longer supports string icon component +// we are using a utility function to map the strings to +// the appropriate Icon to render +const IconSelector = ({ + iconType, style +}: { + iconType: string; + style: React.CSSProperties +}) => { + const Icons: IconOptions = { + 'cluster': , + 'deployment-unit': , + 'database': , + 'container': , + 'inbox': , + 'folder-open': , + 'file-text': , + 'delete': + }; + + const selectIcon = (iconType: string): React.ReactElement => { + // Setting the default Icon as a question mark in case no match found + let ico = + + const found = Object.entries(Icons).find( + ([k]) => k.toLowerCase() === iconType.toLowerCase() + ); + + if (found) { + [, ico] = found; + } + return ico; + } + return selectIcon(iconType); +} + + +// ------------- Component -------------- // +const OverviewSimpleCard: React.FC = ({ + icon = '', + data = 0, + title = '', + hoverable = false, + loading = false, + linkToUrl = '' +}) => { + + const titleElement = (linkToUrl) + ? ( +
+ {title} + + View More + +
) + : title + + return ( + + + + + + + {data} + + + + ); +} + +export default OverviewSimpleCard; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewStorageCard.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewStorageCard.tsx new file mode 100644 index 000000000000..d41f5dbcfb53 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewStorageCard.tsx @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React, { useMemo } from 'react'; +import filesize from 'filesize'; +import { Card, Row, Col, Table, Tag } from 'antd'; + +import EChart from '@/v2/components/eChart/eChart'; +import OverviewCardWrapper from '@/v2/components/overviewCard/overviewCardWrapper'; + +import { StorageReport } from '@/v2/types/overview.types'; + +// ------------- Types -------------- // +type OverviewStorageCardProps = { + loading?: boolean; + storageReport: StorageReport; +} + +const size = filesize.partial({ round: 1 }); + +function getUsagePercentages( + { used, remaining, capacity, committed }: StorageReport): ({ + ozoneUsedPercentage: number, + nonOzoneUsedPercentage: number, + committedPercentage: number, + usagePercentage: number + }) { + return { + ozoneUsedPercentage: Math.floor(used / capacity * 100), + nonOzoneUsedPercentage: Math.floor((capacity - remaining - used) / capacity * 100), + committedPercentage: Math.floor(committed / capacity * 100), + usagePercentage: Math.floor((capacity - remaining) / capacity * 100) + } +} + +// ------------- Styles -------------- // +const cardHeadStyle: React.CSSProperties = { fontSize: '14px' }; +const cardBodyStyle: React.CSSProperties = { padding: '16px' }; +const cardStyle: React.CSSProperties = { + boxSizing: 'border-box', + height: '100%' +} +const eChartStyle: React.CSSProperties = { + width: '280px', + height: '200px' +} + + +// ------------- Component -------------- // +const OverviewStorageCard: React.FC = ({ + loading = false, + storageReport = { + capacity: 0, + used: 0, + remaining: 0, + committed: 0 + } +}) => { + + const { + ozoneUsedPercentage, + nonOzoneUsedPercentage, + committedPercentage, + usagePercentage + } = useMemo(() => + getUsagePercentages(storageReport), + [ + storageReport.capacity, + storageReport.committed, + storageReport.remaining, + storageReport.used, + ] + ) + + let capacityData = [{ + value: ozoneUsedPercentage, + itemStyle: { + color: '#52C41A' + } + }, { + value: nonOzoneUsedPercentage, + itemStyle: { + color: '#1890FF' + } + }, { + value: committedPercentage, + itemStyle: { + color: '#FF595E' + } + }] + // Remove all zero values + // because guage chart shows a dot if value is zero + capacityData = capacityData.filter((val) => val.value > 0) + + const eChartOptions = { + title: { + left: 'center', + bottom: 'bottom', + text: `${size(storageReport.capacity - storageReport.remaining)} / ${size(storageReport.capacity)}`, + textStyle: { + fontWeight: 'normal', + fontFamily: 'Roboto' + } + }, + series: [ + { + type: 'gauge', + startAngle: 90, + endAngle: -270, + radius: '70%', + center: ['50%', '45%'], + bottom: '50%', + pointer: { + show: false + }, + progress: { + show: true, + overlap: true, + roundCap: true, + clip: true + }, + splitLine: { + show: false + }, + axisTick: { + show: false + }, + axisLabel: { + show: false, + distance: 50 + }, + detail: { + rich: { + value: { + fontSize: 24, + fontWeight: 400, + fontFamily: 'Roboto', + color: '#1B232A' + }, + percent: { + fontSize: 20, + fontWeight: 400, + color: '#1B232A' + } + }, + formatter: `{value|${usagePercentage}}{percent|%}`, + offsetCenter: [0, 0] + }, + data: capacityData + } + ] + } + + const cardChildren = ( + + + + + + + Ozone Used, + size: size(storageReport.used) + }, + { + key: 'non-ozone-used', + usage: Non Ozone Used, + size: size(storageReport.capacity - storageReport.remaining - storageReport.used) + }, + { + key: 'remaining', + usage: + Remaining + , + size: size(storageReport.remaining) + }, + { + key: 'pre-allocated', + usage: Container Pre-allocated, + size: size(storageReport.committed) + } + ]} /> + + + + ) + + return ( + + ) +} + +export default OverviewStorageCard; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewSummaryCard.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewSummaryCard.tsx new file mode 100644 index 000000000000..42c28676dd7a --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/overviewCard/overviewSummaryCard.tsx @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from 'react'; +import { Card, Row, Table } from 'antd'; + +import { ColumnType } from 'antd/es/table'; +import { Link } from 'react-router-dom'; + +// ------------- Types -------------- // +type TableData = { + key: React.Key; + name: string; + value: string; + action?: React.ReactElement | string; +} + +type OverviewTableCardProps = { + title: string; + columns: ColumnType[]; + tableData: TableData[]; + hoverable?: boolean; + loading?: boolean; + data?: string | React.ReactElement; + linkToUrl?: string; + showHeader?: boolean; +} + +// ------------- Styles -------------- // +const cardStyle: React.CSSProperties = { + height: '100%' +} +const cardHeadStyle: React.CSSProperties = { + fontSize: '14px' +} +const cardBodyStyle: React.CSSProperties = { + padding: '16px', + justifyTracks: 'space-between' +} + + +// ------------- Component -------------- // +const OverviewSummaryCard: React.FC = ({ + data = '', + title = '', + hoverable = false, + loading = false, + columns = [], + tableData = [], + linkToUrl = '', + showHeader = false +}) => { + + const titleElement = (linkToUrl) + ? ( +
+ {title} + View Insights +
) + : title + + return ( + + { + (data) && + + {data} + + } +
+ + ) +} + +export default OverviewSummaryCard; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.tsx new file mode 100644 index 000000000000..591b0088b04b --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.tsx @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from 'react'; +import { Progress } from 'antd'; +import filesize from 'filesize'; +import Icon from '@ant-design/icons'; +import { withRouter } from 'react-router-dom'; +import Tooltip from 'antd/lib/tooltip'; + +import { FilledIcon } from '@/utils/themeIcons'; +import { getCapacityPercent } from '@/utils/common'; +import type { StorageReport } from '@/v2/types/overview.types'; + +const size = filesize.partial({ + standard: 'iec', + round: 1 +}); + +type StorageReportProps = { + showMeta: boolean; +} & StorageReport + + +const StorageBar = (props: StorageReportProps = { + capacity: 0, + used: 0, + remaining: 0, + committed: 0, + showMeta: true, +}) => { + const { capacity, used, remaining, committed, showMeta } = props; + + const nonOzoneUsed = capacity - remaining - used; + const totalUsed = capacity - remaining; + const tooltip = ( + <> +
+ + Ozone Used ({size(used)}) +
+
+ + Non Ozone Used ({size(nonOzoneUsed)}) +
+
+ + Remaining ({size(remaining)}) +
+
+ + Container Pre-allocated ({size(committed)}) +
+ + ); + const metaElement = (showMeta) ? ( +
+ {size(used + nonOzoneUsed)} / {size(capacity)} +
+ ) : <>; + + + return ( +
+ + {metaElement} + + +
+ ); +} + + +export default StorageBar; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/overview/overview.less b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/overview/overview.less new file mode 100644 index 000000000000..24fc453f4ba5 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/overview/overview.less @@ -0,0 +1,26 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +.card-title-div { + display: flex; + justify-content: space-between; +} + +.echart-col { + justify-items: center; +} diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/overview/overview.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/overview/overview.tsx new file mode 100644 index 000000000000..f511cb3a1618 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/overview/overview.tsx @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React, { useEffect, useState } from 'react'; +import moment from 'moment'; +import filesize from 'filesize'; +import axios, { CanceledError } from 'axios'; +import { Row, Col, Button } from 'antd'; +import { + CheckCircleFilled, + WarningFilled +} from '@ant-design/icons'; +import { Link } from 'react-router-dom'; + +import AutoReloadPanel from '@/components/autoReloadPanel/autoReloadPanel'; +import OverviewSummaryCard from '@/v2/components/overviewCard/overviewSummaryCard'; +import OverviewStorageCard from '@/v2/components/overviewCard/overviewStorageCard'; +import OverviewSimpleCard from '@/v2/components/overviewCard/overviewSimpleCard'; + +import { AutoReloadHelper } from '@/utils/autoReloadHelper'; +import { showDataFetchError } from '@/utils/common'; +import { AxiosGetHelper, cancelRequests, PromiseAllSettledGetHelper } from '@/utils/axiosRequestHelper'; + +import { ClusterStateResponse, OverviewState, StorageReport } from '@/v2/types/overview.types'; + +import './overview.less'; + + +const size = filesize.partial({ round: 1 }); + +const getHealthIcon = (value: string): React.ReactElement => { + const values = value.split('/'); + if (values.length == 2 && values[0] < values[1]) { + return ( + <> +
+ + Unhealthy +
+ + ) + } + return ( +
+ + Healthy +
+ ) +} + +const checkResponseError = (responses: Awaited>[]) => { + const responseError = responses.filter( + (resp) => resp.status === 'rejected' + ); + + if (responseError.length !== 0) { + responseError.forEach((err) => { + if (err.reason.toString().includes("CanceledError")) { + throw new CanceledError('canceled', "ERR_CANCELED"); + } + else { + const reqMethod = err.reason.config.method; + const reqURL = err.reason.config.url + showDataFetchError( + `Failed to ${reqMethod} URL ${reqURL}\n${err.reason.toString()}` + ); + } + }) + } +} + +const getSummaryTableValue = ( + value: number | string | undefined, + colType: 'value' | undefined = undefined +): string => { + if (!value) return 'N/A'; + if (colType === 'value') String(value as string) + return size(value as number) +} + +const Overview: React.FC<{}> = () => { + + let cancelOverviewSignal: AbortController; + let cancelOMDBSyncSignal: AbortController; + + const [state, setState] = useState({ + loading: false, + datanodes: '', + pipelines: 0, + containers: 0, + volumes: 0, + buckets: 0, + keys: 0, + missingContainersCount: 0, + lastRefreshed: 0, + lastUpdatedOMDBDelta: 0, + lastUpdatedOMDBFull: 0, + omStatus: '', + openContainers: 0, + deletedContainers: 0, + openSummarytotalUnrepSize: 0, + openSummarytotalRepSize: 0, + openSummarytotalOpenKeys: 0, + deletePendingSummarytotalUnrepSize: 0, + deletePendingSummarytotalRepSize: 0, + deletePendingSummarytotalDeletedKeys: 0, + scmServiceId: '', + omServiceId: '' + }) + const [storageReport, setStorageReport] = useState({ + capacity: 0, + used: 0, + remaining: 0, + committed: 0 + }) + + // Component mounted, fetch initial data + useEffect(() => { + loadOverviewPageData(); + autoReloadHelper.startPolling(); + return (() => { + // Component will Un-mount + autoReloadHelper.stopPolling(); + cancelRequests([ + cancelOMDBSyncSignal, + cancelOverviewSignal + ]); + }) + }, []) + + const loadOverviewPageData = () => { + setState({ + ...state, + loading: true + }); + + // Cancel any previous pending requests + cancelRequests([ + cancelOMDBSyncSignal, + cancelOverviewSignal + ]); + + const { requests, controller } = PromiseAllSettledGetHelper([ + '/api/v1/clusterState', + '/api/v1/task/status', + '/api/v1/keys/open/summary', + '/api/v1/keys/deletePending/summary' + ], cancelOverviewSignal); + cancelOverviewSignal = controller; + + requests.then(axios.spread(( + clusterStateResponse: Awaited>, + taskstatusResponse: Awaited>, + openResponse: Awaited>, + deletePendingResponse: Awaited> + ) => { + + checkResponseError([ + clusterStateResponse, + taskstatusResponse, + openResponse, + deletePendingResponse + ]); + + const clusterState: ClusterStateResponse = clusterStateResponse.value?.data ?? { + missingContainers: 'N/A', + totalDatanodes: 'N/A', + healthyDatanodes: 'N/A', + pipelines: 'N/A', + storageReport: { + capacity: 0, + used: 0, + remaining: 0, + committed: 0 + }, + containers: 'N/A', + volumes: 'N/A', + buckets: 'N/A', + keys: 'N/A', + openContainers: 'N/A', + deletedContainers: 'N/A', + keysPendingDeletion: 'N/A', + scmServiceId: 'N/A', + omServiceId: 'N/A', + }; + const taskStatus = taskstatusResponse.value?.data ?? [{ + taskName: 'N/A', + lastUpdatedTimestamp: 0, + lastUpdatedSeqNumber: 0 + }]; + const missingContainersCount = clusterState.missingContainers; + const omDBDeltaObject = taskStatus && taskStatus.find((item: any) => item.taskName === 'OmDeltaRequest'); + const omDBFullObject = taskStatus && taskStatus.find((item: any) => item.taskName === 'OmSnapshotRequest'); + + setState({ + ...state, + loading: false, + datanodes: `${clusterState.healthyDatanodes}/${clusterState.totalDatanodes}`, + pipelines: clusterState.pipelines, + containers: clusterState.containers, + volumes: clusterState.volumes, + buckets: clusterState.buckets, + keys: clusterState.keys, + missingContainersCount: missingContainersCount, + openContainers: clusterState.openContainers, + deletedContainers: clusterState.deletedContainers, + lastRefreshed: Number(moment()), + lastUpdatedOMDBDelta: omDBDeltaObject?.lastUpdatedTimestamp, + lastUpdatedOMDBFull: omDBFullObject?.lastUpdatedTimestamp, + openSummarytotalUnrepSize: openResponse?.value?.data?.totalUnreplicatedDataSize, + openSummarytotalRepSize: openResponse?.value?.data?.totalReplicatedDataSize, + openSummarytotalOpenKeys: openResponse?.value?.data?.totalOpenKeys, + deletePendingSummarytotalUnrepSize: deletePendingResponse?.value?.data?.totalUnreplicatedDataSize, + deletePendingSummarytotalRepSize: deletePendingResponse?.value?.data?.totalReplicatedDataSize, + deletePendingSummarytotalDeletedKeys: deletePendingResponse?.value?.data?.totalDeletedKeys, + scmServiceId: clusterState?.scmServiceId, + omServiceId: clusterState?.omServiceId + }); + setStorageReport({ + ...storageReport, + ...clusterState.storageReport + }); + })).catch((error: Error) => { + setState({ + ...state, + loading: false + }); + showDataFetchError(error.toString()); + }); + } + + let autoReloadHelper: AutoReloadHelper = new AutoReloadHelper(loadOverviewPageData); + + const syncOmData = () => { + setState({ + ...state, + loading: true + }); + + const { request, controller } = AxiosGetHelper( + '/api/v1/triggerdbsync/om', + cancelOMDBSyncSignal, + 'OM-DB Sync request cancelled because data was updated' + ); + cancelOMDBSyncSignal = controller; + + request.then(omStatusResponse => { + const omStatus = omStatusResponse.data; + setState({ + ...state, + loading: false, + omStatus: omStatus + }); + }).catch((error: Error) => { + setState({ + ...state, + loading: false + }); + showDataFetchError(error.toString()); + }); + }; + + const { + loading, datanodes, pipelines, + containers, volumes, buckets, + openSummarytotalUnrepSize, + openSummarytotalRepSize, + openSummarytotalOpenKeys, + deletePendingSummarytotalUnrepSize, + deletePendingSummarytotalRepSize, + deletePendingSummarytotalDeletedKeys, + keys, missingContainersCount, + lastRefreshed, lastUpdatedOMDBDelta, + lastUpdatedOMDBFull, + omStatus, openContainers, + deletedContainers, scmServiceId, omServiceId + } = state; + + const healthCardIndicators = ( + <> + + Datanodes + {getHealthIcon(datanodes)} + + + Containers + {getHealthIcon(`${(containers - missingContainersCount)}/${containers}`)} + + + ) + + const datanodesLink = ( + + ) + + const containersLink = (missingContainersCount > 0) + ? ( + + ) : ( + + ) + + return ( + <> +
+ Overview + +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ); +} + +export default Overview; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/routes-v2.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/routes-v2.tsx new file mode 100644 index 000000000000..4cdd700d502d --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/routes-v2.tsx @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Overview from '@/v2/pages/overview/overview'; + +export const routesV2: IRoute[] = [ + { + path: '/Overview', + component: Overview + } +]; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/overview.types.ts b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/overview.types.ts new file mode 100644 index 000000000000..f8390fd43468 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/overview.types.ts @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export type ClusterStateResponse = { + missingContainers: number; + totalDatanodes: number; + healthyDatanodes: number; + pipelines: number; + storageReport: StorageReport; + containers: number; + volumes: number; + buckets: number; + keys: number; + openContainers: number; + deletedContainers: number; + keysPendingDeletion: number; + scmServiceId: string; + omServiceId: string; +} + +export type OverviewState = { + loading: boolean; + datanodes: string; + pipelines: number; + containers: number; + volumes: number; + buckets: number; + keys: number; + missingContainersCount: number; + lastRefreshed: number; + lastUpdatedOMDBDelta: number; + lastUpdatedOMDBFull: number; + omStatus: string; + openContainers: number; + deletedContainers: number; + openSummarytotalUnrepSize: number; + openSummarytotalRepSize: number; + openSummarytotalOpenKeys: number; + deletePendingSummarytotalUnrepSize: number; + deletePendingSummarytotalRepSize: number; + deletePendingSummarytotalDeletedKeys: number; + scmServiceId: string; + omServiceId: string; +} + +export type StorageReport = { + capacity: number; + used: number; + remaining: number; + committed: number; +} From a4be83f5237288e12bdfd3efa640f5d55f87b2c6 Mon Sep 17 00:00:00 2001 From: Arafat2198 Date: Thu, 8 Aug 2024 12:26:09 +0530 Subject: [PATCH 03/50] HDDS-11267. Skip duplicate deletion of block in datanode (#7031) --- .../background/BlockDeletingTask.java | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java index 60e5a583551e..af500b2b6b8d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java @@ -22,6 +22,8 @@ import java.time.Duration; import java.time.Instant; +import java.util.HashSet; +import java.util.Set; import java.util.LinkedList; import java.util.Objects; import java.util.ArrayList; @@ -424,14 +426,27 @@ private DeleteTransactionStats deleteTransactions( List delBlocks, Handler handler, Table blockDataTable, Container container) throws IOException { + int blocksProcessed = 0; int blocksDeleted = 0; long bytesReleased = 0; List deletedBlocksTxs = new ArrayList<>(); Instant startTime = Instant.now(); + // Track deleted blocks to avoid duplicate deletion + Set deletedBlockSet = new HashSet<>(); + for (DeletedBlocksTransaction entry : delBlocks) { for (Long blkLong : entry.getLocalIDList()) { + // Increment blocksProcessed for every block processed + blocksProcessed++; + + // Check if the block has already been deleted + if (deletedBlockSet.contains(blkLong)) { + LOG.debug("Skipping duplicate deletion for block {}", blkLong); + continue; + } + String blk = containerData.getBlockKey(blkLong); BlockData blkInfo = blockDataTable.get(blk); LOG.debug("Deleting block {}", blkLong); @@ -442,8 +457,6 @@ private DeleteTransactionStats deleteTransactions( LOG.error("Failed to delete files for unreferenced block {} of" + " container {}", blkLong, container.getContainerData().getContainerID(), e); - } finally { - blocksProcessed++; } continue; } @@ -453,14 +466,14 @@ private DeleteTransactionStats deleteTransactions( handler.deleteBlock(container, blkInfo); blocksDeleted++; deleted = true; + // Track this block as deleted + deletedBlockSet.add(blkLong); } catch (IOException e) { // TODO: if deletion of certain block retries exceed the certain // number of times, service should skip deleting it, // otherwise invalid numPendingDeletionBlocks could accumulate // beyond the limit and the following deletion will stop. LOG.error("Failed to delete files for block {}", blkLong, e); - } finally { - blocksProcessed++; } if (deleted) { From 9b6c142b85875355fa5ab942d42a0ffc6790daf4 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Thu, 8 Aug 2024 10:55:01 +0200 Subject: [PATCH 04/50] HDDS-10874. Create non-caching XceiverClientFactory implementation (#7044) --- .../hadoop/hdds/scm/XceiverClientCreator.java | 117 ++++++++++++++++++ .../hadoop/hdds/scm/XceiverClientFactory.java | 41 +++++- .../hadoop/hdds/scm/XceiverClientManager.java | 117 ++---------------- .../ozone/freon/TestDNRPCLoadGenerator.java | 13 +- .../ozone/freon/DNRPCLoadGenerator.java | 7 +- .../ozone/freon/DatanodeBlockPutter.java | 7 +- .../ozone/freon/DatanodeChunkGenerator.java | 7 +- .../ozone/freon/DatanodeChunkValidator.java | 7 +- 8 files changed, 186 insertions(+), 130 deletions(-) create mode 100644 hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientCreator.java diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientCreator.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientCreator.java new file mode 100644 index 000000000000..cd46bc49a1cb --- /dev/null +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientCreator.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.scm.client.ClientTrustManager; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.utils.IOUtils; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.OzoneSecurityUtil; + +import java.io.IOException; + +/** + * Factory for XceiverClientSpi implementations. Client instances are not cached. + */ +public class XceiverClientCreator implements XceiverClientFactory { + private final ConfigurationSource conf; + private final boolean topologyAwareRead; + private final ClientTrustManager trustManager; + private final boolean securityEnabled; + + public XceiverClientCreator(ConfigurationSource conf) { + this(conf, null); + } + + public XceiverClientCreator(ConfigurationSource conf, ClientTrustManager trustManager) { + this.conf = conf; + this.securityEnabled = OzoneSecurityUtil.isSecurityEnabled(conf); + topologyAwareRead = conf.getBoolean( + OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY, + OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_DEFAULT); + this.trustManager = trustManager; + if (securityEnabled) { + Preconditions.checkNotNull(trustManager); + } + } + + public boolean isSecurityEnabled() { + return securityEnabled; + } + + protected XceiverClientSpi newClient(Pipeline pipeline) throws IOException { + XceiverClientSpi client; + switch (pipeline.getType()) { + case RATIS: + client = XceiverClientRatis.newXceiverClientRatis(pipeline, conf, trustManager); + break; + case STAND_ALONE: + client = new XceiverClientGrpc(pipeline, conf, trustManager); + break; + case EC: + client = new ECXceiverClientGrpc(pipeline, conf, trustManager); + break; + case CHAINED: + default: + throw new IOException("not implemented " + pipeline.getType()); + } + try { + client.connect(); + } catch (Exception e) { + throw new IOException(e); + } + return client; + } + + @Override + public XceiverClientSpi acquireClient(Pipeline pipeline) throws IOException { + return acquireClient(pipeline, false); + } + + @Override + public void releaseClient(XceiverClientSpi xceiverClient, boolean invalidateClient) { + releaseClient(xceiverClient, invalidateClient, false); + } + + @Override + public XceiverClientSpi acquireClientForReadData(Pipeline pipeline) throws IOException { + return acquireClient(pipeline); + } + + @Override + public void releaseClientForReadData(XceiverClientSpi xceiverClient, boolean invalidateClient) { + releaseClient(xceiverClient, invalidateClient, topologyAwareRead); + } + + @Override + public XceiverClientSpi acquireClient(Pipeline pipeline, boolean topologyAware) throws IOException { + return newClient(pipeline); + } + + @Override + public void releaseClient(XceiverClientSpi xceiverClient, boolean invalidateClient, boolean topologyAware) { + IOUtils.closeQuietly(xceiverClient); + } + + @Override + public void close() throws Exception { + // clients are not tracked, closing each client is the responsibility of users of this class + } +} diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientFactory.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientFactory.java index 36c134b87a4d..b7276d645b44 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientFactory.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientFactory.java @@ -26,16 +26,53 @@ */ public interface XceiverClientFactory extends AutoCloseable { + /** + * Acquires a XceiverClientSpi connected to a container capable of + * storing the specified key. It does not consider the topology + * of the datanodes in the pipeline (e.g. closest datanode to the + * client) + * + * @param pipeline the container pipeline for the client connection + * @return XceiverClientSpi connected to a container + * @throws IOException if a XceiverClientSpi cannot be acquired + */ XceiverClientSpi acquireClient(Pipeline pipeline) throws IOException; - void releaseClient(XceiverClientSpi xceiverClient, boolean invalidateClient); + /** + * Releases a XceiverClientSpi after use. + * + * @param client client to release + * @param invalidateClient if true, invalidates the client in cache + */ + void releaseClient(XceiverClientSpi client, boolean invalidateClient); + /** + * Acquires a XceiverClientSpi connected to a container for read. + * + * @param pipeline the container pipeline for the client connection + * @return XceiverClientSpi connected to a container + * @throws IOException if a XceiverClientSpi cannot be acquired + */ XceiverClientSpi acquireClientForReadData(Pipeline pipeline) throws IOException; - void releaseClientForReadData(XceiverClientSpi xceiverClient, + /** + * Releases a read XceiverClientSpi after use. + * + * @param client client to release + * @param invalidateClient if true, invalidates the client in cache + */ + void releaseClientForReadData(XceiverClientSpi client, boolean invalidateClient); + /** + * Acquires a XceiverClientSpi connected to a container capable of + * storing the specified key. + * + * @param pipeline the container pipeline for the client connection + * @return XceiverClientSpi connected to a container + * @throws IOException if a XceiverClientSpi cannot be acquired + */ XceiverClientSpi acquireClient(Pipeline pipeline, boolean topologyAware) throws IOException; diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientManager.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientManager.java index 2190391d18f6..285a47ec574a 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientManager.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientManager.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hdds.scm; import java.io.IOException; -import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hdds.conf.Config; @@ -30,8 +29,6 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.client.ClientTrustManager; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; -import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.OzoneSecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import com.google.common.annotations.VisibleForTesting; @@ -61,18 +58,14 @@ * without reestablishing connection. But the connection will be closed if * not being used for a period of time. */ -public class XceiverClientManager implements XceiverClientFactory { +public class XceiverClientManager extends XceiverClientCreator { private static final Logger LOG = LoggerFactory.getLogger(XceiverClientManager.class); - //TODO : change this to SCM configuration class - private final ConfigurationSource conf; private final Cache clientCache; private final CacheMetrics cacheMetrics; - private ClientTrustManager trustManager; private static XceiverClientMetrics metrics; - private boolean isSecurityEnabled; - private final boolean topologyAwareRead; + /** * Creates a new XceiverClientManager for non secured ozone cluster. * For security enabled ozone cluster, client should use the other constructor @@ -87,15 +80,10 @@ public XceiverClientManager(ConfigurationSource conf) throws IOException { public XceiverClientManager(ConfigurationSource conf, ScmClientConfig clientConf, ClientTrustManager trustManager) throws IOException { + super(conf, trustManager); Preconditions.checkNotNull(clientConf); Preconditions.checkNotNull(conf); long staleThresholdMs = clientConf.getStaleThreshold(MILLISECONDS); - this.conf = conf; - this.isSecurityEnabled = OzoneSecurityUtil.isSecurityEnabled(conf); - if (isSecurityEnabled) { - Preconditions.checkNotNull(trustManager); - this.trustManager = trustManager; - } this.clientCache = CacheBuilder.newBuilder() .recordStats() @@ -114,9 +102,6 @@ public void onRemoval( } } }).build(); - topologyAwareRead = conf.getBoolean( - OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY, - OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_DEFAULT); cacheMetrics = CacheMetrics.create(clientCache, this); } @@ -127,50 +112,10 @@ public Cache getClientCache() { } /** - * Acquires a XceiverClientSpi connected to a container capable of - * storing the specified key. It does not consider the topology - * of the datanodes in the pipeline (e.g. closest datanode to the - * client) - * - * If there is already a cached XceiverClientSpi, simply return - * the cached otherwise create a new one. - * - * @param pipeline the container pipeline for the client connection - * @return XceiverClientSpi connected to a container - * @throws IOException if a XceiverClientSpi cannot be acquired - */ - @Override - public XceiverClientSpi acquireClient(Pipeline pipeline) - throws IOException { - return acquireClient(pipeline, false); - } - - /** - * Acquires a XceiverClientSpi connected to a container for read. - * - * If there is already a cached XceiverClientSpi, simply return - * the cached otherwise create a new one. - * - * @param pipeline the container pipeline for the client connection - * @return XceiverClientSpi connected to a container - * @throws IOException if a XceiverClientSpi cannot be acquired - */ - @Override - public XceiverClientSpi acquireClientForReadData(Pipeline pipeline) - throws IOException { - return acquireClient(pipeline, topologyAwareRead); - } - - /** - * Acquires a XceiverClientSpi connected to a container capable of - * storing the specified key. + * {@inheritDoc} * * If there is already a cached XceiverClientSpi, simply return * the cached otherwise create a new one. - * - * @param pipeline the container pipeline for the client connection - * @return XceiverClientSpi connected to a container - * @throws IOException if a XceiverClientSpi cannot be acquired */ @Override public XceiverClientSpi acquireClient(Pipeline pipeline, @@ -187,29 +132,6 @@ public XceiverClientSpi acquireClient(Pipeline pipeline, } } - /** - * Releases a XceiverClientSpi after use. - * - * @param client client to release - * @param invalidateClient if true, invalidates the client in cache - */ - @Override - public void releaseClient(XceiverClientSpi client, boolean invalidateClient) { - releaseClient(client, invalidateClient, false); - } - - /** - * Releases a read XceiverClientSpi after use. - * - * @param client client to release - * @param invalidateClient if true, invalidates the client in cache - */ - @Override - public void releaseClientForReadData(XceiverClientSpi client, - boolean invalidateClient) { - releaseClient(client, invalidateClient, topologyAwareRead); - } - @Override public void releaseClient(XceiverClientSpi client, boolean invalidateClient, boolean topologyAware) { @@ -227,39 +149,16 @@ public void releaseClient(XceiverClientSpi client, boolean invalidateClient, } } - private XceiverClientSpi getClient(Pipeline pipeline, boolean topologyAware) + protected XceiverClientSpi getClient(Pipeline pipeline, boolean topologyAware) throws IOException { - HddsProtos.ReplicationType type = pipeline.getType(); try { // create different client different pipeline node based on // network topology String key = getPipelineCacheKey(pipeline, topologyAware); - return clientCache.get(key, new Callable() { - @Override - public XceiverClientSpi call() throws Exception { - XceiverClientSpi client = null; - switch (type) { - case RATIS: - client = XceiverClientRatis.newXceiverClientRatis(pipeline, conf, - trustManager); - break; - case STAND_ALONE: - client = new XceiverClientGrpc(pipeline, conf, trustManager); - break; - case EC: - client = new ECXceiverClientGrpc(pipeline, conf, trustManager); - break; - case CHAINED: - default: - throw new IOException("not implemented " + pipeline.getType()); - } - client.connect(); - return client; - } - }); + return clientCache.get(key, () -> newClient(pipeline)); } catch (Exception e) { throw new IOException( - "Exception getting XceiverClient: " + e.toString(), e); + "Exception getting XceiverClient: " + e, e); } } @@ -293,7 +192,7 @@ private String getPipelineCacheKey(Pipeline pipeline, } } - if (isSecurityEnabled) { + if (isSecurityEnabled()) { // Append user short name to key to prevent a different user // from using same instance of xceiverClient. try { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDNRPCLoadGenerator.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDNRPCLoadGenerator.java index f209783c7453..33d59f101ebc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDNRPCLoadGenerator.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestDNRPCLoadGenerator.java @@ -22,7 +22,8 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.ratis.conf.RatisClientConfig; -import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientCreator; +import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; @@ -79,11 +80,11 @@ private static void startCluster(OzoneConfiguration conf) throws Exception { storageContainerLocationClient.allocateContainer( SCMTestUtils.getReplicationType(conf), HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE); - XceiverClientManager xceiverClientManager = new XceiverClientManager(conf); - XceiverClientSpi client = xceiverClientManager - .acquireClient(container.getPipeline()); - ContainerProtocolCalls.createContainer(client, - container.getContainerInfo().getContainerID(), null); + try (XceiverClientFactory factory = new XceiverClientCreator(conf); + XceiverClientSpi client = factory.acquireClient(container.getPipeline())) { + ContainerProtocolCalls.createContainer(client, + container.getContainerInfo().getContainerID(), null); + } } static void shutdownCluster() { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DNRPCLoadGenerator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DNRPCLoadGenerator.java index 3b4d25cddaf7..f83b2a1a4a9b 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DNRPCLoadGenerator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DNRPCLoadGenerator.java @@ -24,8 +24,8 @@ import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.XceiverClientCreator; import org.apache.hadoop.hdds.scm.XceiverClientFactory; -import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; import org.apache.hadoop.hdds.scm.client.ClientTrustManager; @@ -152,11 +152,10 @@ public Void call() throws Exception { XceiverClientFactory xceiverClientManager; if (OzoneSecurityUtil.isSecurityEnabled(configuration)) { CACertificateProvider caCerts = () -> HAUtils.buildCAX509List(null, configuration); - xceiverClientManager = new XceiverClientManager(configuration, - configuration.getObject(XceiverClientManager.ScmClientConfig.class), + xceiverClientManager = new XceiverClientCreator(configuration, new ClientTrustManager(caCerts, null)); } else { - xceiverClientManager = new XceiverClientManager(configuration); + xceiverClientManager = new XceiverClientCreator(configuration); } clients = new ArrayList<>(numClients); for (int i = 0; i < numClients; i++) { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeBlockPutter.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeBlockPutter.java index f6a5c5965018..3e613d2d2c54 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeBlockPutter.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeBlockPutter.java @@ -29,7 +29,8 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.PutBlockRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; -import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientCreator; +import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; @@ -99,8 +100,8 @@ public Void call() throws Exception { Pipeline pipeline = findPipelineForTest(pipelineId, scmLocationClient, LOG); - try (XceiverClientManager xceiverClientManager = - new XceiverClientManager(ozoneConf)) { + try (XceiverClientFactory xceiverClientManager = + new XceiverClientCreator(ozoneConf)) { client = xceiverClientManager.acquireClient(pipeline); timer = getMetrics().timer("put-block"); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkGenerator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkGenerator.java index 23988106d47f..7f0f5bb9e57b 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkGenerator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkGenerator.java @@ -36,7 +36,8 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.DatanodeBlockID; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.WriteChunkRequestProto; -import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientCreator; +import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.XceiverClientReply; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -117,8 +118,8 @@ public Void call() throws Exception { try (StorageContainerLocationProtocol scmLocationClient = createStorageContainerLocationClient(ozoneConf); - XceiverClientManager xceiverClientManager = - new XceiverClientManager(ozoneConf)) { + XceiverClientFactory xceiverClientManager = + new XceiverClientCreator(ozoneConf)) { List pipelinesFromSCM = scmLocationClient.listPipelines(); Pipeline firstPipeline; init(); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkValidator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkValidator.java index 2bbf8b6d5b24..0b1e34efe784 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkValidator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkValidator.java @@ -24,7 +24,8 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; -import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientCreator; +import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; @@ -91,8 +92,8 @@ public Void call() throws Exception { createStorageContainerLocationClient(ozoneConf)) { Pipeline pipeline = findPipelineForTest(pipelineId, scmClient, LOG); - try (XceiverClientManager xceiverClientManager = - new XceiverClientManager(ozoneConf)) { + try (XceiverClientFactory xceiverClientManager = + new XceiverClientCreator(ozoneConf)) { xceiverClient = xceiverClientManager.acquireClientForReadData(pipeline); checksumProtobuf = ContainerProtos.ChecksumData.newBuilder() From ba6e796a2d847cecb2de6f09f4440c471fd9ac21 Mon Sep 17 00:00:00 2001 From: Istvan Fajth Date: Fri, 9 Aug 2024 19:39:31 +0200 Subject: [PATCH 05/50] HDDS-11028. Replace PKCS10CertificationRequest usage in CertificateClient (#6842) --- .../certificate/client/CertificateClient.java | 11 ++-- .../utils/CertificateSignRequest.java | 59 ++++++++++--------- .../client/DNCertificateClient.java | 15 ++--- .../client/DefaultCertificateClient.java | 42 ++++--------- .../client/SCMCertificateClient.java | 33 +++++------ .../authority/TestDefaultCAServer.java | 20 +++++-- .../authority/TestDefaultProfile.java | 44 +++++--------- .../client/CertificateClientTestImpl.java | 19 +++--- .../client/TestDefaultCertificateClient.java | 15 +---- .../utils/TestCertificateSignRequest.java | 31 ++++------ .../scm/security/RootCARotationManager.java | 2 +- .../hadoop/ozone/TestSecureOzoneCluster.java | 3 +- .../ozone/security/OMCertificateClient.java | 15 ++--- .../security/ReconCertificateClient.java | 15 ++--- 14 files changed, 144 insertions(+), 180 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClient.java index 0c23a846563a..79db6985e76f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClient.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClient.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.security.exception.OzoneSecurityException; import org.apache.hadoop.hdds.security.ssl.ReloadingX509KeyManager; import org.apache.hadoop.hdds.security.ssl.ReloadingX509TrustManager; +import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exception.CertificateException; @@ -156,13 +157,13 @@ boolean verifySignature(byte[] data, byte[] signature, X509Certificate cert) throws CertificateException; /** - * Returns a CSR builder that can be used to create a Certificate sigining - * request. + * Returns a CertificateSignRequest Builder object, that can be used to configure the sign request + * which we use to get a signed certificate from our CA server implementation. * - * @return CertificateSignRequest.Builder + * @return CertificateSignRequest.Builder a {@link CertificateSignRequest} + * based on which the certificate may be issued to this client. */ - CertificateSignRequest.Builder getCSRBuilder() - throws CertificateException; + CertificateSignRequest.Builder configureCSRBuilder() throws SCMSecurityException; default void assertValidKeysAndCertificate() throws OzoneSecurityException { try { diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateSignRequest.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateSignRequest.java index c1cc67121545..1f04e868a851 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateSignRequest.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateSignRequest.java @@ -157,34 +157,43 @@ public static ASN1Set getPkcs9ExtRequest(PKCS10CertificationRequest csr) throw new CertificateException("No PKCS#9 extension found in CSR"); } - private PKCS10CertificationRequest generateCSR() throws - OperatorCreationException { - X500Name dnName = getDistinguishedName(subject, scmID, clusterID); - PKCS10CertificationRequestBuilder p10Builder = - new JcaPKCS10CertificationRequestBuilder(dnName, keyPair.getPublic()); - - ContentSigner contentSigner = - new JcaContentSignerBuilder(config.getSignatureAlgo()) - .setProvider(config.getProvider()) - .build(keyPair.getPrivate()); - - if (extensions != null) { - p10Builder.addAttribute( - PKCSObjectIdentifiers.pkcs_9_at_extensionRequest, extensions); - } - return p10Builder.build(contentSigner); - } - public static String getEncodedString(PKCS10CertificationRequest request) - throws IOException { - PemObject pemObject = - new PemObject("CERTIFICATE REQUEST", request.getEncoded()); + /** + * Encodes this CertificateSignRequest to a String representation, that can be transferred over the wire to + * the CA server for signing. + * + * @return the Certificate Sign Request encoded to a String + * @throws IOException if an error occurs during encoding. + */ + public String toEncodedFormat() throws IOException { StringWriter str = new StringWriter(); try (JcaPEMWriter pemWriter = new JcaPEMWriter(str)) { + PemObject pemObject = new PemObject("CERTIFICATE REQUEST", generateCSR().getEncoded()); pemWriter.writeObject(pemObject); } return str.toString(); } + //TODO: this should be private once the server side of removing PKCS10CertReq class is done. + public PKCS10CertificationRequest generateCSR() throws IOException { + X500Name dnName = getDistinguishedName(subject, scmID, clusterID); + PKCS10CertificationRequestBuilder p10Builder = + new JcaPKCS10CertificationRequestBuilder(dnName, keyPair.getPublic()); + + try { + ContentSigner contentSigner = + new JcaContentSignerBuilder(config.getSignatureAlgo()) + .setProvider(config.getProvider()) + .build(keyPair.getPrivate()); + + if (extensions != null) { + p10Builder.addAttribute( + PKCSObjectIdentifiers.pkcs_9_at_extensionRequest, extensions); + } + return p10Builder.build(contentSigner); + } catch (OperatorCreationException e) { + throw new IOException(e); + } + } /** * Gets a CertificateRequest Object from PEM encoded CSR. @@ -413,7 +422,7 @@ private Extensions createExtensions() throws IOException { extensions.toArray(new Extension[extensions.size()])); } - public PKCS10CertificationRequest build() throws SCMSecurityException { + public CertificateSignRequest build() throws SCMSecurityException { Preconditions.checkNotNull(key, "KeyPair cannot be null"); Preconditions.checkArgument(StringUtils.isNotBlank(subject), "Subject " + "cannot be blank"); @@ -421,15 +430,11 @@ public PKCS10CertificationRequest build() throws SCMSecurityException { try { CertificateSignRequest csr = new CertificateSignRequest(subject, scmID, clusterID, key, config, createExtensions()); - return csr.generateCSR(); + return csr; } catch (IOException ioe) { throw new CertificateException(String.format("Unable to create " + "extension for certificate sign request for %s.", getDistinguishedName(subject, scmID, clusterID)), ioe.getCause()); - } catch (OperatorCreationException ex) { - throw new CertificateException(String.format("Unable to create " + - "certificate sign request for %s.", - getDistinguishedName(subject, scmID, clusterID)), ex.getCause()); } } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java index e9f7c4465ddb..7ce895760ad9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java @@ -23,10 +23,10 @@ import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto; import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exception.CertificateException; import org.apache.hadoop.security.UserGroupInformation; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,7 +35,6 @@ import java.security.KeyPair; import java.util.function.Consumer; -import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest.getEncodedString; import static org.apache.hadoop.hdds.security.x509.exception.CertificateException.ErrorCode.CSR_ERROR; /** @@ -70,9 +69,9 @@ public DNCertificateClient( * @return CertificateSignRequest.Builder */ @Override - public CertificateSignRequest.Builder getCSRBuilder() - throws CertificateException { - CertificateSignRequest.Builder builder = super.getCSRBuilder(); + public CertificateSignRequest.Builder configureCSRBuilder() + throws SCMSecurityException { + CertificateSignRequest.Builder builder = super.configureCSRBuilder(); try { String hostname = InetAddress.getLocalHost().getCanonicalHostName(); @@ -93,10 +92,8 @@ public CertificateSignRequest.Builder getCSRBuilder() } @Override - public SCMGetCertResponseProto getCertificateSignResponse( - PKCS10CertificationRequest csr) throws IOException { - return getScmSecureClient().getDataNodeCertificateChain( - dn.getProtoBufMessage(), getEncodedString(csr)); + public SCMGetCertResponseProto sign(CertificateSignRequest csr) throws IOException { + return getScmSecureClient().getDataNodeCertificateChain(dn.getProtoBufMessage(), csr.toEncodedFormat()); } @Override diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index 2fb258e1a29e..70a475982bd4 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -76,6 +76,7 @@ import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.security.ssl.ReloadingX509KeyManager; import org.apache.hadoop.hdds.security.ssl.ReloadingX509TrustManager; +import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.x509.certificate.authority.CAType; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; @@ -99,7 +100,6 @@ import static org.apache.hadoop.hdds.security.x509.exception.CertificateException.ErrorCode.RENEW_ERROR; import static org.apache.hadoop.hdds.security.x509.exception.CertificateException.ErrorCode.ROLLBACK_ERROR; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; /** @@ -567,15 +567,12 @@ private boolean verifySignature(byte[] data, byte[] signature, * @return CertificateSignRequest.Builder */ @Override - public CertificateSignRequest.Builder getCSRBuilder() - throws CertificateException { - CertificateSignRequest.Builder builder = - new CertificateSignRequest.Builder() - .setConfiguration(securityConfig) - .addInetAddresses() - .setDigitalEncryption(true) - .setDigitalSignature(true); - return builder; + public CertificateSignRequest.Builder configureCSRBuilder() throws SCMSecurityException { + return new CertificateSignRequest.Builder() + .setConfiguration(securityConfig) + .addInetAddresses() + .setDigitalEncryption(true) + .setDigitalSignature(true); } /** @@ -805,7 +802,8 @@ protected void recoverStateIfNeeded(InitResponse state) throws IOException { getLogger().info("Initialization successful, case:{}.", state); break; case GETCERT: - String certId = signAndStoreCertificate(getCSRBuilder().build()); + Path certLocation = securityConfig.getCertificateLocation(getComponentName()); + String certId = signAndStoreCertificate(configureCSRBuilder().build(), certLocation, false); if (certIdSaveCallback != null) { certIdSaveCallback.accept(certId); } else { @@ -1152,7 +1150,7 @@ public String renewAndStoreKeyAndCertificate(boolean force) // Get certificate signed String newCertSerialId; try { - CertificateSignRequest.Builder csrBuilder = getCSRBuilder(); + CertificateSignRequest.Builder csrBuilder = configureCSRBuilder(); csrBuilder.setKey(newKeyPair); newCertSerialId = signAndStoreCertificate(csrBuilder.build(), Paths.get(newCertPath), true); @@ -1320,20 +1318,12 @@ private synchronized String updateCertSerialId(String newCertSerialId) { return certSerialId; } - protected String signAndStoreCertificate( - PKCS10CertificationRequest request, Path certificatePath) - throws CertificateException { - return signAndStoreCertificate(request, certificatePath, false); - } - - protected abstract SCMGetCertResponseProto getCertificateSignResponse( - PKCS10CertificationRequest request) throws IOException; + protected abstract SCMGetCertResponseProto sign(CertificateSignRequest request) throws IOException; - protected String signAndStoreCertificate( - PKCS10CertificationRequest request, Path certificatePath, boolean renew) + protected String signAndStoreCertificate(CertificateSignRequest csr, Path certificatePath, boolean renew) throws CertificateException { try { - SCMGetCertResponseProto response = getCertificateSignResponse(request); + SCMGetCertResponseProto response = sign(csr); // Persist certificates. if (response.hasX509CACertificate()) { @@ -1371,12 +1361,6 @@ private void getAndStoreAllRootCAs(CertificateCodec certCodec, boolean renew) } } - public String signAndStoreCertificate( - PKCS10CertificationRequest request) throws CertificateException { - return updateCertSerialId(signAndStoreCertificate(request, - securityConfig.getCertificateLocation(getComponentName()))); - } - public SCMSecurityProtocolClientSideTranslatorPB getScmSecureClient() { return scmSecurityClient; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index 8bad4f18ade5..ae0c0f0db84e 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.x509.certificate.authority.CAType; import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer; import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateStore; @@ -32,10 +33,8 @@ import org.apache.hadoop.hdds.security.x509.certificate.authority.profile.PKIProfile; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; -import org.apache.hadoop.hdds.security.x509.exception.CertificateException; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.OzoneSecurityUtil; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,7 +55,6 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType.SCM; import static org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateApprover.ApprovalType.KERBEROS_TRUSTED; -import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest.getEncodedString; import static org.apache.hadoop.ozone.OzoneConsts.SCM_ROOT_CA_COMPONENT_NAME; import static org.apache.hadoop.ozone.OzoneConsts.SCM_ROOT_CA_PREFIX; import static org.apache.hadoop.ozone.OzoneConsts.SCM_SUB_CA_PREFIX; @@ -137,14 +135,14 @@ public SCMCertificateClient( * * @return CertificateSignRequest.Builder */ - public CertificateSignRequest.Builder getCSRBuilder() - throws CertificateException { + public CertificateSignRequest.Builder configureCSRBuilder() + throws SCMSecurityException { String subject = SCM_SUB_CA_PREFIX + scmHostname; LOG.info("Creating csr for SCM->hostName:{},scmId:{},clusterId:{}," + "subject:{}", scmHostname, scmId, cId, subject); - return super.getCSRBuilder() + return super.configureCSRBuilder() .setSubject(subject) .setScmID(scmId) .setClusterID(cId) @@ -164,15 +162,13 @@ public Logger getLogger() { } @Override - protected SCMGetCertResponseProto getCertificateSignResponse( - PKCS10CertificationRequest request) { - throw new UnsupportedOperationException("getCertSignResponse of " + - " SCMCertificateClient is not supported currently"); + protected SCMGetCertResponseProto sign(CertificateSignRequest request) { + throw new UnsupportedOperationException("Invalid call to SCMCertificateClient#sign(CertificateSignRequest. " + + "SCM certificate client uses a different mechanism to sign the SCMs' certificate."); } @Override - public String signAndStoreCertificate(PKCS10CertificationRequest request, - Path certPath, boolean renew) throws CertificateException { + public String signAndStoreCertificate(CertificateSignRequest csr, Path certPath, boolean renew) { try { HddsProtos.ScmNodeDetailsProto scmNodeDetailsProto = HddsProtos.ScmNodeDetailsProto.newBuilder() @@ -182,8 +178,7 @@ public String signAndStoreCertificate(PKCS10CertificationRequest request, // Get SCM sub CA cert. SCMGetCertResponseProto response = - getScmSecureClient().getSCMCertChain(scmNodeDetailsProto, - getEncodedString(request), true); + getScmSecureClient().getSCMCertChain(scmNodeDetailsProto, csr.toEncodedFormat(), true); CertificateCodec certCodec = new CertificateCodec( getSecurityConfig(), certPath); @@ -310,7 +305,7 @@ protected void recoverStateIfNeeded(InitResponse state) throws IOException { private void getRootCASignedSCMCert() { try { // Generate CSR. - PKCS10CertificationRequest csr = getCSRBuilder().build(); + CertificateSignRequest csr = configureCSRBuilder().build(); HddsProtos.ScmNodeDetailsProto scmNodeDetailsProto = HddsProtos.ScmNodeDetailsProto.newBuilder() .setClusterId(cId) @@ -319,7 +314,7 @@ private void getRootCASignedSCMCert() { // Get SCM sub CA cert. SCMGetCertResponseProto response = getScmSecureClient(). - getSCMCertChain(scmNodeDetailsProto, getEncodedString(csr), false); + getSCMCertChain(scmNodeDetailsProto, csr.toEncodedFormat(), false); String pemEncodedCert = response.getX509Certificate(); // Store SCM sub CA and root CA certificate. @@ -357,9 +352,11 @@ private void getPrimarySCMSelfSignedCert() { String pemEncodedRootCert = CertificateCodec.getPEMEncodedString(rootCACertificatePath); - PKCS10CertificationRequest csr = getCSRBuilder().build(); + CertificateSignRequest csr = configureCSRBuilder().build(); String subCaSerialId = BigInteger.ONE.add(BigInteger.ONE).toString(); - CertPath scmSubCACertPath = rootCAServer.requestCertificate(csr, KERBEROS_TRUSTED, SCM, subCaSerialId).get(); + //TODO: do not use generateCSR() here once the server side change is also done. + CertPath scmSubCACertPath = + rootCAServer.requestCertificate(csr.generateCSR(), KERBEROS_TRUSTED, SCM, subCaSerialId).get(); String pemEncodedCert = CertificateCodec.getPEMEncodedString(scmSubCACertPath); storeCertificate(pemEncodedRootCert, CAType.SUBORDINATE); diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultCAServer.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultCAServer.java index 56f84b9e3be9..e029006a6af0 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultCAServer.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultCAServer.java @@ -153,6 +153,7 @@ public void testRequestCertificate() throws Exception { String clusterId = RandomStringUtils.randomAlphabetic(4); KeyPair keyPair = new HDDSKeyGenerator(securityConfig).generateKey(); + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("8.8.8.8") @@ -163,7 +164,8 @@ public void testRequestCertificate() throws Exception { .setSubject("Ozone Cluster") .setConfiguration(securityConfig) .setKey(keyPair) - .build(); + .build() + .generateCSR(); CertificateServer testCA = new DefaultCAServer("testCA", clusterId, scmId, caStore, @@ -204,6 +206,7 @@ public void testRequestCertificate() throws Exception { public void testRequestCertificateWithInvalidSubject() throws Exception { KeyPair keyPair = new HDDSKeyGenerator(securityConfig).generateKey(); + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("8.8.8.8") @@ -211,7 +214,8 @@ public void testRequestCertificateWithInvalidSubject() throws Exception { .setSubject("Ozone Cluster") .setConfiguration(securityConfig) .setKey(keyPair) - .build(); + .build() + .generateCSR(); CertificateServer testCA = new DefaultCAServer("testCA", RandomStringUtils.randomAlphabetic(4), @@ -232,6 +236,7 @@ public void testRequestCertificateWithInvalidSubject() throws Exception { public void testRequestCertificateWithInvalidSubjectFailure() throws Exception { KeyPair keyPair = new HDDSKeyGenerator(securityConfig).generateKey(); + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("8.8.8.8") @@ -241,7 +246,8 @@ public void testRequestCertificateWithInvalidSubjectFailure() throws Exception { .setSubject("Ozone Cluster") .setConfiguration(securityConfig) .setKey(keyPair) - .build(); + .build() + .generateCSR(); CertificateServer testCA = new DefaultCAServer("testCA", RandomStringUtils.randomAlphabetic(4), @@ -344,6 +350,7 @@ public void testInitWithCertChain(@TempDir Path tempDir) throws Exception { LocalDate beginDate = LocalDate.now().atStartOfDay().toLocalDate(); LocalDate endDate = LocalDate.from(LocalDate.now().atStartOfDay().plusDays(10)); + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("8.8.8.8") @@ -354,7 +361,8 @@ public void testInitWithCertChain(@TempDir Path tempDir) throws Exception { .setSubject("Ozone Cluster") .setConfiguration(securityConfig) .setKey(keyPair) - .build(); + .build() + .generateCSR(); X509Certificate externalCert = generateExternalCert(keyPair); X509Certificate signedCert = approver.sign(securityConfig, keyPair.getPrivate(), externalCert, @@ -405,6 +413,7 @@ clusterId, scmId, caStore, new DefaultProfile(), // Generate cert KeyPair keyPair = new HDDSKeyGenerator(securityConfig).generateKey(); + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("8.8.8.8") @@ -412,7 +421,8 @@ clusterId, scmId, caStore, new DefaultProfile(), .setSubject("testCA") .setConfiguration(securityConfig) .setKey(keyPair) - .build(); + .build() + .generateCSR(); Future holder = rootCA.requestCertificate(csr, CertificateApprover.ApprovalType.TESTING_AUTOMATIC, SCM, diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultProfile.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultProfile.java index 1204f9052136..2f4dd681ea35 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultProfile.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultProfile.java @@ -41,7 +41,6 @@ import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder; import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.bouncycastle.pkcs.PKCS10CertificationRequestBuilder; -import org.bouncycastle.pkcs.PKCSException; import org.bouncycastle.pkcs.jcajce.JcaPKCS10CertificationRequestBuilder; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -50,8 +49,6 @@ import java.io.IOException; import java.nio.file.Path; import java.security.KeyPair; -import java.security.NoSuchAlgorithmException; -import java.security.NoSuchProviderException; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -94,14 +91,10 @@ public void testisSupportedGeneralName() { /** * Test valid keys are validated correctly. - * - * @throws SCMSecurityException - on Error. - * @throws PKCSException - on Error. - * @throws OperatorCreationException - on Error. */ @Test - public void testVerifyCertificate() throws SCMSecurityException, - PKCSException, OperatorCreationException { + public void testVerifyCertificate() throws Exception { + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("8.8.8.8") @@ -112,7 +105,8 @@ public void testVerifyCertificate() throws SCMSecurityException, .setSubject("Ozone Cluster") .setConfiguration(securityConfig) .setKey(keyPair) - .build(); + .build() + .generateCSR(); assertTrue(approver.verifyPkcs10Request(csr)); } @@ -121,20 +115,13 @@ public void testVerifyCertificate() throws SCMSecurityException, /** * Test invalid keys fail in the validation. - * - * @throws SCMSecurityException - on Error. - * @throws PKCSException - on Error. - * @throws OperatorCreationException - on Error. - * @throws NoSuchProviderException - on Error. - * @throws NoSuchAlgorithmException - on Error. */ @Test - public void testVerifyCertificateInvalidKeys() throws SCMSecurityException, - PKCSException, OperatorCreationException, - NoSuchProviderException, NoSuchAlgorithmException { + public void testVerifyCertificateInvalidKeys() throws Exception { KeyPair newKeyPair = new HDDSKeyGenerator(securityConfig).generateKey(); KeyPair wrongKey = new KeyPair(keyPair.getPublic(), newKeyPair.getPrivate()); + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("8.8.8.8") @@ -144,7 +131,8 @@ public void testVerifyCertificateInvalidKeys() throws SCMSecurityException, .setSubject("Ozone Cluster") .setConfiguration(securityConfig) .setKey(wrongKey) - .build(); + .build() + .generateCSR(); // Signature verification should fail here, since the public/private key // does not match. assertFalse(approver.verifyPkcs10Request(csr)); @@ -152,13 +140,10 @@ public void testVerifyCertificateInvalidKeys() throws SCMSecurityException, /** * Tests that normal valid extensions work with the default profile. - * - * @throws SCMSecurityException - on Error. - * @throws PKCSException - on Error. - * @throws OperatorCreationException - on Error. */ @Test - public void testExtensions() throws SCMSecurityException { + public void testExtensions() throws Exception { + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("192.10.234.6") @@ -168,7 +153,8 @@ public void testExtensions() throws SCMSecurityException { .setSubject("Ozone Cluster") .setConfiguration(securityConfig) .setKey(keyPair) - .build(); + .build() + .generateCSR(); assertTrue(approver.verfiyExtensions(csr)); } @@ -180,7 +166,8 @@ public void testExtensions() throws SCMSecurityException { */ @Test - public void testInvalidExtensionsWithCA() throws SCMSecurityException { + public void testInvalidExtensionsWithCA() throws Exception { + //TODO: generateCSR! PKCS10CertificationRequest csr = new CertificateSignRequest.Builder() .addDnsName("hadoop.apache.org") .addIpAddress("192.10.234.6") @@ -190,7 +177,8 @@ public void testInvalidExtensionsWithCA() throws SCMSecurityException { .setSubject("Ozone Cluster") .setConfiguration(securityConfig) .setKey(keyPair) - .build(); + .build() + .generateCSR(); assertFalse(approver.verfiyExtensions(csr)); } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClientTestImpl.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClientTestImpl.java index 00058500f597..fa784b755384 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClientTestImpl.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClientTestImpl.java @@ -48,6 +48,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.security.ssl.ReloadingX509KeyManager; import org.apache.hadoop.hdds.security.ssl.ReloadingX509TrustManager; @@ -135,13 +136,14 @@ public CertificateClientTestImpl(OzoneConfiguration conf, boolean autoRenew) start = LocalDateTime.now(); String certDuration = conf.get(HDDS_X509_DEFAULT_DURATION, HDDS_X509_DEFAULT_DURATION_DEFAULT); + //TODO: generateCSR should not be called... x509Certificate = approver.sign(securityConfig, rootKeyPair.getPrivate(), - rootCert, - Date.from(start.atZone(ZoneId.systemDefault()).toInstant()), - Date.from(start.plus(Duration.parse(certDuration)) - .atZone(ZoneId.systemDefault()).toInstant()), - csrBuilder.build(), "scm1", "cluster1", - String.valueOf(System.nanoTime())); + rootCert, + Date.from(start.atZone(ZoneId.systemDefault()).toInstant()), + Date.from(start.plus(Duration.parse(certDuration)) + .atZone(ZoneId.systemDefault()).toInstant()), + csrBuilder.build().generateCSR(), "scm1", "cluster1", + String.valueOf(System.nanoTime())); certificateMap.put(x509Certificate.getSerialNumber().toString(), x509Certificate); @@ -227,7 +229,7 @@ public boolean verifySignature(byte[] data, byte[] signature, } @Override - public CertificateSignRequest.Builder getCSRBuilder() { + public CertificateSignRequest.Builder configureCSRBuilder() throws SCMSecurityException { return new CertificateSignRequest.Builder(); } @@ -298,9 +300,10 @@ public void renewKey() throws Exception { Duration certDuration = securityConfig.getDefaultCertDuration(); Date start = new Date(); + //TODO: get rid of generateCSR call here, once the server side changes happened. X509Certificate newX509Certificate = approver.sign(securityConfig, rootKeyPair.getPrivate(), rootCert, start, - new Date(start.getTime() + certDuration.toMillis()), csrBuilder.build(), "scm1", "cluster1", + new Date(start.getTime() + certDuration.toMillis()), csrBuilder.build().generateCSR(), "scm1", "cluster1", String.valueOf(System.nanoTime()) ); diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java index 59c623a53ddf..f6827352f00b 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java @@ -25,9 +25,9 @@ import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.x509.certificate.authority.CAType; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exception.CertificateException; import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -560,21 +560,12 @@ public void testCloseCertificateClient(@TempDir File metaDir) ) { @Override - protected String signAndStoreCertificate( - PKCS10CertificationRequest request, Path certificatePath) { - return ""; - } - - @Override - protected SCMGetCertResponseProto getCertificateSignResponse( - PKCS10CertificationRequest request) { + protected SCMGetCertResponseProto sign(CertificateSignRequest request) { return null; } @Override - protected String signAndStoreCertificate( - PKCS10CertificationRequest request, Path certificatePath, - boolean renew) { + protected String signAndStoreCertificate(CertificateSignRequest request, Path certificatePath, boolean renew) { return null; } }; diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/utils/TestCertificateSignRequest.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/utils/TestCertificateSignRequest.java index 1d32712fc281..598e74cb36c1 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/utils/TestCertificateSignRequest.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/utils/TestCertificateSignRequest.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hdds.security.x509.certificate.utils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; import org.bouncycastle.asn1.ASN1Encodable; @@ -33,10 +32,8 @@ import org.bouncycastle.asn1.x509.GeneralNames; import org.bouncycastle.asn1.x509.SubjectPublicKeyInfo; import org.bouncycastle.operator.ContentVerifierProvider; -import org.bouncycastle.operator.OperatorCreationException; import org.bouncycastle.operator.jcajce.JcaContentVerifierProviderBuilder; import org.bouncycastle.pkcs.PKCS10CertificationRequest; -import org.bouncycastle.pkcs.PKCSException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -44,8 +41,6 @@ import java.io.IOException; import java.nio.file.Path; import java.security.KeyPair; -import java.security.NoSuchAlgorithmException; -import java.security.NoSuchProviderException; import java.util.Iterator; import java.util.UUID; @@ -73,9 +68,7 @@ public void init(@TempDir Path tempDir) throws IOException { } @Test - public void testGenerateCSR() throws NoSuchProviderException, - NoSuchAlgorithmException, SCMSecurityException, - OperatorCreationException, PKCSException { + public void testGenerateCSR() throws Exception { String clusterID = UUID.randomUUID().toString(); String scmID = UUID.randomUUID().toString(); String subject = "DN001"; @@ -90,7 +83,8 @@ public void testGenerateCSR() throws NoSuchProviderException, .setClusterID(clusterID) .setKey(keyPair) .setConfiguration(securityConfig); - PKCS10CertificationRequest csr = builder.build(); + //TODO: generateCSR! + PKCS10CertificationRequest csr = builder.build().generateCSR(); // Check the Subject Name is in the expected format. String dnName = String.format(getDistinguishedNameFormat(), @@ -124,9 +118,7 @@ public void testGenerateCSR() throws NoSuchProviderException, } @Test - public void testGenerateCSRwithSan() throws NoSuchProviderException, - NoSuchAlgorithmException, SCMSecurityException, - OperatorCreationException, PKCSException { + public void testGenerateCSRwithSan() throws Exception { String clusterID = UUID.randomUUID().toString(); String scmID = UUID.randomUUID().toString(); String subject = "DN001"; @@ -149,7 +141,8 @@ public void testGenerateCSRwithSan() throws NoSuchProviderException, builder.addDnsName("dn1.abc.com"); - PKCS10CertificationRequest csr = builder.build(); + //TODO: generateCSR! + PKCS10CertificationRequest csr = builder.build().generateCSR(); // Check the Subject Name is in the expected format. String dnName = String.format(getDistinguishedNameFormat(), @@ -181,8 +174,7 @@ public void testGenerateCSRwithSan() throws NoSuchProviderException, } @Test - public void testGenerateCSRWithInvalidParams() throws NoSuchProviderException, - NoSuchAlgorithmException, SCMSecurityException { + public void testGenerateCSRWithInvalidParams() throws Exception { String clusterID = UUID.randomUUID().toString(); String scmID = UUID.randomUUID().toString(); String subject = "DN001"; @@ -225,7 +217,8 @@ public void testGenerateCSRWithInvalidParams() throws NoSuchProviderException, builder.build(); }); - PKCS10CertificationRequest csr = builder.build(); + //TODO: generateCSR! + PKCS10CertificationRequest csr = builder.build().generateCSR(); // Check the Subject Name is in the expected format. String dnName = String.format(getDistinguishedNameFormat(), @@ -244,8 +237,7 @@ public void testGenerateCSRWithInvalidParams() throws NoSuchProviderException, } @Test - public void testCsrSerialization() throws NoSuchProviderException, - NoSuchAlgorithmException, SCMSecurityException, IOException { + public void testCsrSerialization() throws Exception { String clusterID = UUID.randomUUID().toString(); String scmID = UUID.randomUUID().toString(); String subject = "DN001"; @@ -261,7 +253,8 @@ public void testCsrSerialization() throws NoSuchProviderException, .setKey(keyPair) .setConfiguration(securityConfig); - PKCS10CertificationRequest csr = builder.build(); + //TODO: generateCSR! + PKCS10CertificationRequest csr = builder.build().generateCSR(); byte[] csrBytes = csr.getEncoded(); // Verify de-serialized CSR matches with the original CSR diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index c88abb5b8d08..d38a904d09c0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -584,7 +584,7 @@ public void run() { String newCertSerialId = ""; try { CertificateSignRequest.Builder csrBuilder = - scmCertClient.getCSRBuilder(); + scmCertClient.configureCSRBuilder(); csrBuilder.setKey(newKeyPair); newCertSerialId = scmCertClient.signAndStoreCertificate( csrBuilder.build(), diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java index 2b29701cf792..4f41d516153f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java @@ -1449,11 +1449,12 @@ private static X509Certificate signX509Cert( addIpAndDnsDataToBuilder(csrBuilder); LocalDateTime start = LocalDateTime.now(); Duration certDuration = conf.getDefaultCertDuration(); + //TODO: generateCSR! return approver.sign(conf, rootKeyPair.getPrivate(), rootCert, Date.from(start.atZone(ZoneId.systemDefault()).toInstant()), Date.from(start.plus(certDuration) .atZone(ZoneId.systemDefault()).toInstant()), - csrBuilder.build(), "test", clusterId, + csrBuilder.build().generateCSR(), "test", clusterId, String.valueOf(System.nanoTime())); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java index a6bfed699f72..0a28fc37f3cb 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java @@ -25,12 +25,12 @@ import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto; import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exception.CertificateException; import org.apache.hadoop.ozone.om.OMStorage; import org.apache.hadoop.security.UserGroupInformation; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,7 +38,6 @@ import java.security.KeyPair; import java.util.function.Consumer; -import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest.getEncodedString; /** * Certificate client for OzoneManager. @@ -81,9 +80,9 @@ public OMCertificateClient( * @return CertificateSignRequest.Builder */ @Override - public CertificateSignRequest.Builder getCSRBuilder() - throws CertificateException { - CertificateSignRequest.Builder builder = super.getCSRBuilder(); + public CertificateSignRequest.Builder configureCSRBuilder() + throws SCMSecurityException { + CertificateSignRequest.Builder builder = super.configureCSRBuilder(); String hostname = omInfo.getHostName(); String subject; @@ -118,10 +117,8 @@ public CertificateSignRequest.Builder getCSRBuilder() } @Override - protected SCMGetCertResponseProto getCertificateSignResponse( - PKCS10CertificationRequest request) throws IOException { - return getScmSecureClient().getOMCertChain( - omInfo, getEncodedString(request)); + protected SCMGetCertResponseProto sign(CertificateSignRequest request) throws IOException { + return getScmSecureClient().getOMCertChain(omInfo, request.toEncodedFormat()); } @Override diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java index 92b540ecd13a..95fdfabbb881 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java @@ -21,12 +21,12 @@ import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto; import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exception.CertificateException; import org.apache.hadoop.ozone.recon.scm.ReconStorageConfig; import org.apache.hadoop.security.UserGroupInformation; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,7 +35,6 @@ import java.security.KeyPair; import java.util.function.Consumer; -import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest.getEncodedString; import static org.apache.hadoop.hdds.security.x509.exception.CertificateException.ErrorCode.CSR_ERROR; /** @@ -62,11 +61,11 @@ public ReconCertificateClient( } @Override - public CertificateSignRequest.Builder getCSRBuilder() - throws CertificateException { + public CertificateSignRequest.Builder configureCSRBuilder() + throws SCMSecurityException { LOG.info("Creating CSR for Recon."); try { - CertificateSignRequest.Builder builder = super.getCSRBuilder(); + CertificateSignRequest.Builder builder = super.configureCSRBuilder(); String hostname = InetAddress.getLocalHost().getCanonicalHostName(); String subject = UserGroupInformation.getCurrentUser() .getShortUserName() + "@" + hostname; @@ -85,8 +84,7 @@ public CertificateSignRequest.Builder getCSRBuilder() } @Override - protected SCMGetCertResponseProto getCertificateSignResponse( - PKCS10CertificationRequest request) throws IOException { + protected SCMGetCertResponseProto sign(CertificateSignRequest request) throws IOException { SCMGetCertResponseProto response; HddsProtos.NodeDetailsProto.Builder reconDetailsProtoBuilder = HddsProtos.NodeDetailsProto.newBuilder() @@ -95,8 +93,7 @@ protected SCMGetCertResponseProto getCertificateSignResponse( .setUuid(reconID) .setNodeType(HddsProtos.NodeType.RECON); // TODO: For SCM CA we should fetch certificate from multiple SCMs. - response = getScmSecureClient().getCertificateChain( - reconDetailsProtoBuilder.build(), getEncodedString(request)); + response = getScmSecureClient().getCertificateChain(reconDetailsProtoBuilder.build(), request.toEncodedFormat()); return response; } From 387fd6bb92f8bf7fb8f83f275b36f948242c6894 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Fri, 9 Aug 2024 21:13:13 +0200 Subject: [PATCH 06/50] HDDS-11105. Generate Robot report in container (#6951) --- hadoop-ozone/dev-support/checks/acceptance.sh | 5 -- hadoop-ozone/dist/src/main/compose/testlib.sh | 59 ++++++++++++++----- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/hadoop-ozone/dev-support/checks/acceptance.sh b/hadoop-ozone/dev-support/checks/acceptance.sh index 1e16b277aff8..3425f66605ef 100755 --- a/hadoop-ozone/dev-support/checks/acceptance.sh +++ b/hadoop-ozone/dev-support/checks/acceptance.sh @@ -52,11 +52,6 @@ if [[ "${OZONE_ACCEPTANCE_SUITE}" == "s3a" ]]; then download_hadoop_aws "${HADOOP_AWS_DIR}" fi -if [[ "${OZONE_ACCEPTANCE_TEST_TYPE}" == "robot" ]]; then - install_virtualenv - install_robot -fi - export OZONE_ACCEPTANCE_SUITE OZONE_ACCEPTANCE_TEST_TYPE cd "$DIST_DIR/compose" || exit 1 diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index 44b4f303807f..1e9cc85781a6 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -37,8 +37,6 @@ create_results_dir() { #delete previous results [[ "${OZONE_KEEP_RESULTS:-}" == "true" ]] || rm -rf "$RESULT_DIR" mkdir -p "$RESULT_DIR" - #Should be writeable from the docker containers where user is different. - chmod ogu+w "$RESULT_DIR" } ## @description find all the test*.sh scripts in the immediate child dirs @@ -390,22 +388,44 @@ cleanup_docker_images() { fi } +## @description Run Robot Framework report generator (rebot) in ozone-runner container. +## @param input directory where source Robot XML files are +## @param output directory where report should be placed +## @param rebot options and arguments +run_rebot() { + local input_dir="$(realpath "$1")" + local output_dir="$(realpath "$2")" + + shift 2 + + local tempdir="$(mktemp -d --suffix rebot -p "${output_dir}")" + #Should be writeable from the docker containers where user is different. + chmod a+wx "${tempdir}" + if docker run --rm -v "${input_dir}":/rebot-input -v "${tempdir}":/rebot-output -w /rebot-input \ + $(get_runner_image_spec) \ + bash -c "rebot --nostatusrc -d /rebot-output $@"; then + mv -v "${tempdir}"/* "${output_dir}"/ + fi + rmdir "${tempdir}" +} + ## @description Generate robot framework reports based on the saved results. generate_report(){ local title="${1:-${COMPOSE_ENV_NAME}}" local dir="${2:-${RESULT_DIR}}" local xunitdir="${3:-}" - if command -v rebot > /dev/null 2>&1; then - #Generate the combined output and return with the right exit code (note: robot = execute test, rebot = generate output) - if [ -z "${xunitdir}" ]; then - rebot --reporttitle "${title}" -N "${title}" -d "${dir}" "${dir}/*.xml" - else - rebot --reporttitle "${title}" -N "${title}" --xunit ${xunitdir}/TEST-ozone.xml -d "${dir}" "${dir}/*.xml" - fi - else - echo "Robot framework is not installed, the reports cannot be generated (sudo pip install robotframework)." - exit 1 + if [[ -n "$(find "${dir}" -mindepth 1 -maxdepth 1 -name "*.xml")" ]]; then + xunit_args="" + if [[ -n "${xunitdir}" ]] && [[ -e "${xunitdir}" ]]; then + xunit_args="--xunit TEST-ozone.xml" + fi + + run_rebot "$dir" "$dir" "--reporttitle '${title}' -N '${title}' ${xunit_args} *.xml" + + if [[ -n "${xunit_args}" ]]; then + mv -v "${dir}"/TEST-ozone.xml "${xunitdir}"/ || rm -f "${dir}"/TEST-ozone.xml + fi fi } @@ -429,8 +449,8 @@ copy_results() { target_dir="${target_dir}/${test_script_name}" fi - if command -v rebot > /dev/null 2>&1 && [[ -n "$(find "${result_dir}" -name "*.xml")" ]]; then - rebot --nostatusrc -N "${test_name}" -l NONE -r NONE -o "${all_result_dir}/${test_name}.xml" "${result_dir}"/*.xml \ + if [[ -n "$(find "${result_dir}" -mindepth 1 -maxdepth 1 -name "*.xml")" ]]; then + run_rebot "${result_dir}" "${all_result_dir}" "-N '${test_name}' -l NONE -r NONE -o '${test_name}.xml' *.xml" \ && rm -fv "${result_dir}"/*.xml "${result_dir}"/log.html "${result_dir}"/report.html fi @@ -505,14 +525,21 @@ prepare_for_binary_image() { ## @description Define variables required for using `ozone-runner` docker image ## (no binaries included) ## @param `ozone-runner` image version (optional) -prepare_for_runner_image() { +get_runner_image_spec() { local default_version=${docker.ozone-runner.version} # set at build-time from Maven property local runner_version=${OZONE_RUNNER_VERSION:-${default_version}} # may be specified by user running the test local runner_image=${OZONE_RUNNER_IMAGE:-apache/ozone-runner} # may be specified by user running the test local v=${1:-${runner_version}} # prefer explicit argument + echo "${runner_image}:${v}" +} + +## @description Define variables required for using `ozone-runner` docker image +## (no binaries included) +## @param `ozone-runner` image version (optional) +prepare_for_runner_image() { export OZONE_DIR=/opt/hadoop - export OZONE_IMAGE="${runner_image}:${v}" + export OZONE_IMAGE="$(get_runner_image_spec "$@")" } ## @description Executing the Ozone Debug CLI related robot tests From 2359a0564cbc8b722b2a8424e6d16b915ca91248 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 10 Aug 2024 11:05:31 +0200 Subject: [PATCH 07/50] HDDS-11294. Bump download-maven-plugin to 1.9.0 (#7053) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c0f1dc843f84..e7f9fa87bd68 100644 --- a/pom.xml +++ b/pom.xml @@ -125,7 +125,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 1.12.0 1.6 1.5 - 1.7.1 + 1.9.0 ${project.build.directory}/test-dir ${test.build.dir} From 8b52d0c285c6385018d276b08c16049fff736c60 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 10 Aug 2024 12:31:20 +0200 Subject: [PATCH 08/50] HDDS-11296. Bump commons-compress to 1.27.0 (#7054) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e7f9fa87bd68..beed528a59cc 100644 --- a/pom.xml +++ b/pom.xml @@ -112,7 +112,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 1.8.0 1.17.0 3.2.2 - 1.26.2 + 1.27.0 2.10.1 1.5.6-3 1.4.0 From 462fa71c199dc95c1cf1dadd4e217cc3e33a6497 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 10 Aug 2024 16:52:05 +0200 Subject: [PATCH 09/50] HDDS-11299. Bump error_prone_annotations to 2.29.2 (#7055) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index beed528a59cc..25364c27ed3b 100644 --- a/pom.xml +++ b/pom.xml @@ -203,7 +203,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 2.12.7 0.21.0 - 2.28.0 + 2.29.2 32.1.3-jre 6.0.0 2.10.1 From b5bec1cd74b590e910832fa31e510cd3c8a8b25e Mon Sep 17 00:00:00 2001 From: Adnan Khan <118618204+Adnan2199@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:33:21 +0530 Subject: [PATCH 10/50] HDDS-11301. Add Missing Utilization Endpoint in ReconApi.md for Ozone Recon Documentation. (#7056) --- .../docs/content/interface/ReconApi.md | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 485c3b0fd42e..b5b885b403e9 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -23,8 +23,9 @@ summary: Recon server supports HTTP endpoints to help troubleshoot and monitor O limitations under the License. --> -The Recon API v1 is a set of HTTP endpoints that help you understand the current -state of an Ozone cluster and to troubleshoot if needed. +The Recon API v1 offers a collection of HTTP endpoints designed to provide insights into the current state of an Ozone cluster, +facilitating monitoring, management, and troubleshooting. These endpoints allow administrators to access critical cluster +metadata, container status, key management, and more. Endpoints that are marked as *admin only* can only be accessed by Kerberos users specified in the **ozone.administrators** or **ozone.recon.administrators** @@ -37,7 +38,7 @@ ozone.security.enabled| *true* ozone.security.http.kerberos.enabled| *true* ozone.acl.enabled| *true* -Checkout an interactive version of the API powered by Swagger [here]({{< relref "./SwaggerReconApi.md" >}}) +Access an interactive version of the API, complete with detailed descriptions and example requests, powered by Swagger [here]({{< relref "./SwaggerReconApi.md" >}}) ## Containers (admin only) @@ -1062,6 +1063,28 @@ response object being the upper cap for file size range. "count": 2 }] ``` + +### GET /api/v1/utilization/containerCount + +**Parameters** + +* containerSize (optional) + + Filters the results based on the given container size. The smallest container size being tracked for count is 512 MB (512000000 bytes). + +**Returns** + +Returns the container counts within different container size ranges, with `containerSize` representing the size range and `count` representing the number of containers within that range. + +```json + [{ + "containerSize": 2147483648, + "count": 9 + }, { + "containerSize": 1073741824, + "count": 3 + }] +``` ## Metrics From ad7e8b32a749defeb1de6171334952157c6b4d9b Mon Sep 17 00:00:00 2001 From: jyotirmoy-gh <69982926+jyotirmoy-gh@users.noreply.github.com> Date: Mon, 12 Aug 2024 00:59:04 +0530 Subject: [PATCH 11/50] HDDS-11288. Add tests for quota level boundary conditions (#7049) --- .../smoketest/basic/ozone-shell-lib.robot | 29 ++++++++++++++-- .../hadoop/ozone/shell/TestOzoneShellHA.java | 33 +++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/basic/ozone-shell-lib.robot b/hadoop-ozone/dist/src/main/smoketest/basic/ozone-shell-lib.robot index 83f0a1b69e26..719cdaf83f39 100644 --- a/hadoop-ozone/dist/src/main/smoketest/basic/ozone-shell-lib.robot +++ b/hadoop-ozone/dist/src/main/smoketest/basic/ozone-shell-lib.robot @@ -92,18 +92,43 @@ Test ozone shell Test ozone shell errors [arguments] ${protocol} ${server} ${volume} - ${result} = Execute and checkrc ozone sh volume create ${protocol}${server}/${volume} --space-quota invalid 255 - Should contain ${result} invalid + ${result} = Execute and checkrc ozone sh volume create ${protocol}${server}/${volume} --space-quota 1.5GB 255 + Should contain ${result} 1.5GB is invalid + ${result} = Execute and checkrc ozone sh volume create ${protocol}${server}/${volume} --namespace-quota 1.5 255 + Should contain ${result} 1.5 is invalid Execute and checkrc ozone sh volume create ${protocol}${server}/${volume} 0 ${result} = Execute and checkrc ozone sh bucket create ${protocol}${server}/${volume}/bucket_1 255 Should contain ${result} INVALID_BUCKET_NAME + ${result} = Execute and checkrc ozone sh bucket create ${protocol}${server}/${volume}/bucket1 --space-quota 1.5GB 255 + Should contain ${result} 1.5GB is invalid + ${result} = Execute and checkrc ozone sh bucket create ${protocol}${server}/${volume}/bucket1 --namespace-quota 1.5 255 + Should contain ${result} 1.5 is invalid ${result} = Execute and checkrc ozone sh bucket create ${protocol}${server}/${volume}/bucket1 --layout Invalid 2 Should contain ${result} Usage Execute and checkrc ozone sh bucket create ${protocol}${server}/${volume}/bucket1 0 ${result} = Execute and checkrc ozone sh key info ${protocol}${server}/${volume}/bucket1/non-existing 255 Should contain ${result} KEY_NOT_FOUND ${result} = Execute and checkrc ozone sh key put ${protocol}${server}/${volume}/bucket1/key1 unexisting --type invalid 2 + ${result} = Execute and checkrc ozone sh bucket setquota ${volume}/bucket1 --space-quota 1.5 255 + Should contain ${result} 1.5 is invalid + ${result} = Execute and checkrc ozone sh bucket setquota ${volume}/bucket1 --namespace-quota 1.5 255 + Should contain ${result} 1.5 is invalid + ${result} = Execute and checkrc ozone sh volume setquota ${volume} --space-quota 1.5 255 + Should contain ${result} 1.5 is invalid + ${result} = Execute and checkrc ozone sh volume setquota ${volume} --namespace-quota 1.5 255 + Should contain ${result} 1.5 is invalid + Execute and checkrc ozone sh bucket setquota ${volume}/bucket1 --space-quota 2KB 0 + ${result} = Execute and checkrc ozone sh key put ${volume}/bucket1/key1 /opt/hadoop/NOTICE.txt 255 + Should contain ${result} QUOTA_EXCEEDED + ${result} = Execute and checkrc ozone sh volume setquota ${volume} --space-quota 1KB 255 + Should contain ${result} QUOTA_EXCEEDED + Execute and checkrc ozone sh bucket clrquota ${volume}/bucket1 --space-quota 0 + ${result} = Execute and checkrc ozone sh volume setquota ${volume} --space-quota 1GB 255 + Should contain ${result} QUOTA_ERROR Execute and checkrc ozone sh bucket delete ${protocol}${server}/${volume}/bucket1 0 + Execute and checkrc ozone sh volume setquota ${volume} --space-quota 1GB 0 + ${result} = Execute and checkrc ozone sh bucket create ${protocol}${server}/${volume}/bucket1 255 + Should contain ${result} QUOTA_ERROR Execute and checkrc ozone sh volume delete ${protocol}${server}/${volume} 0 diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java index 5a46d571c6b7..89f068cdedfa 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java @@ -1568,6 +1568,39 @@ public void testShQuota() throws Exception { .contains("Missing required parameter"); out.reset(); + // Test incompatible volume-bucket quota + args = new String[]{"volume", "create", "vol6"}; + execute(ozoneShell, args); + out.reset(); + + args = new String[]{"bucket", "create", "vol6/buck6"}; + execute(ozoneShell, args); + out.reset(); + + args = new String[]{"volume", "setquota", "vol6", "--space-quota", "1000B"}; + executeWithError(ozoneShell, args, "Can not set volume space quota " + + "on volume as some of buckets in this volume have no quota set"); + out.reset(); + + args = new String[]{"bucket", "setquota", "vol6/buck6", "--space-quota", "1000B"}; + execute(ozoneShell, args); + out.reset(); + + args = new String[]{"volume", "setquota", "vol6", "--space-quota", "2000B"}; + execute(ozoneShell, args); + out.reset(); + + args = new String[]{"bucket", "create", "vol6/buck62"}; + executeWithError(ozoneShell, args, "Bucket space quota in this " + + "volume should be set as volume space quota is already set."); + out.reset(); + + args = new String[]{"bucket", "create", "vol6/buck62", "--space-quota", "2000B"}; + executeWithError(ozoneShell, args, "Total buckets quota in this volume " + + "should not be greater than volume quota : the total space quota is set to:3000. " + + "But the volume space quota is:2000"); + out.reset(); + // Test set bucket spaceQuota or nameSpaceQuota to normal value. String[] bucketArgs8 = new String[]{"bucket", "setquota", "vol4/buck4", "--space-quota", "1000B"}; From e63cb91b12d531fe265d0e9301896f867730910e Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Mon, 12 Aug 2024 12:04:38 +0800 Subject: [PATCH 12/50] HDDS-11252. Statistics of some storage space indicators (#7005) --- .../hadoop/hdds/scm/node/SCMNodeManager.java | 78 ++++++++++++++++--- .../resources/webapps/scm/scm-overview.html | 26 +++++++ .../src/main/resources/webapps/scm/scm.js | 14 ++++ 3 files changed, 107 insertions(+), 11 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 3339b27f2cec..05a686288525 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -1155,29 +1155,37 @@ public static String calculateStorageCapacity( } } - double ua = capacityByte; + return convertUnit(capacityByte); + } + + /** + * Convert byte value to other units, such as KB, MB, GB, TB. + * @param value Original value, in byte. + * @return + */ + private static String convertUnit(double value) { StringBuilder unit = new StringBuilder("B"); - if (ua > 1024) { - ua = ua / 1024; + if (value > 1024) { + value = value / 1024; unit.replace(0, 1, "KB"); } - if (ua > 1024) { - ua = ua / 1024; + if (value > 1024) { + value = value / 1024; unit.replace(0, 2, "MB"); } - if (ua > 1024) { - ua = ua / 1024; + if (value > 1024) { + value = value / 1024; unit.replace(0, 2, "GB"); } - if (ua > 1024) { - ua = ua / 1024; + if (value > 1024) { + value = value / 1024; unit.replace(0, 2, "TB"); } DecimalFormat decimalFormat = new DecimalFormat("#0.0"); decimalFormat.setRoundingMode(RoundingMode.HALF_UP); - String capacity = decimalFormat.format(ua); - return capacity + unit.toString(); + String newValue = decimalFormat.format(value); + return newValue + unit.toString(); } /** @@ -1225,6 +1233,8 @@ public Map getNodeStatistics() { nodeUsageStatistics(nodeStatistics); // Statistics node states nodeStateStatistics(nodeStatistics); + // Statistics node space + nodeSpaceStatistics(nodeStatistics); // todo: Statistics of other instances return nodeStatistics; } @@ -1280,6 +1290,38 @@ private void nodeStateStatistics(Map nodeStatics) { nodeStatics.put(StateStatistics.VOLUME_FAILURES.getLabel(), String.valueOf(volumeFailuresNodeCount)); } + private void nodeSpaceStatistics(Map nodeStatics) { + if (nodeStateManager.getAllNodes().size() < 1) { + return; + } + long capacityByte = 0; + long scmUsedByte = 0; + long remainingByte = 0; + for (DatanodeInfo dni : nodeStateManager.getAllNodes()) { + List storageReports = dni.getStorageReports(); + if (storageReports != null && !storageReports.isEmpty()) { + for (StorageReportProto storageReport : storageReports) { + capacityByte += storageReport.getCapacity(); + scmUsedByte += storageReport.getScmUsed(); + remainingByte += storageReport.getRemaining(); + } + } + } + + long nonScmUsedByte = capacityByte - scmUsedByte - remainingByte; + if (nonScmUsedByte < 0) { + nonScmUsedByte = 0; + } + String capacity = convertUnit(capacityByte); + String scmUsed = convertUnit(scmUsedByte); + String remaining = convertUnit(remainingByte); + String nonScmUsed = convertUnit(nonScmUsedByte); + nodeStatics.put(SpaceStatistics.CAPACITY.getLabel(), capacity); + nodeStatics.put(SpaceStatistics.SCM_USED.getLabel(), scmUsed); + nodeStatics.put(SpaceStatistics.REMAINING.getLabel(), remaining); + nodeStatics.put(SpaceStatistics.NON_SCM_USED.getLabel(), nonScmUsed); + } + /** * Based on the current time and the last heartbeat, calculate the time difference * and get a string of the relative value. E.g. "2s ago", "1m 2s ago", etc. @@ -1376,6 +1418,20 @@ public String getLabel() { } } + private enum SpaceStatistics { + CAPACITY("Capacity"), + SCM_USED("Scmused"), + NON_SCM_USED("NonScmused"), + REMAINING("Remaining"); + private String label; + public String getLabel() { + return label; + } + SpaceStatistics(String label) { + this.label = label; + } + } + /** * Returns the min of no healthy volumes reported out of the set * of datanodes constituting the pipeline. diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html index 5a4f2ff633c8..3f825d4e25f7 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html @@ -84,6 +84,32 @@

State Statistics

+

Space Statistics

+ + + + + + + + + + + + + + + + + + + + + + + +
Datanode SpaceSize
Capacity{{statistics.nodes.space.capacity}}
ScmUsed{{statistics.nodes.space.scmused}}
Remaining{{statistics.nodes.space.remaining}}
Non ScmUsed{{statistics.nodes.space.nonscmused}}
+

Node Status

diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js index 41dc25cb650f..6fac68495307 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js @@ -46,6 +46,12 @@ decommissioning : "N/A", enteringmaintenance : "N/A", volumefailures : "N/A" + }, + space : { + capacity : "N/A", + scmused : "N/A", + remaining : "N/A", + nonscmused : "N/A" } } } @@ -118,6 +124,14 @@ $scope.statistics.nodes.state.enteringmaintenance = value; } else if(key == "VolumeFailures") { $scope.statistics.nodes.state.volumefailures = value; + } else if(key == "Capacity") { + $scope.statistics.nodes.space.capacity = value; + } else if(key == "Scmused") { + $scope.statistics.nodes.space.scmused = value; + } else if(key == "Remaining") { + $scope.statistics.nodes.space.remaining = value; + } else if(key == "NonScmused") { + $scope.statistics.nodes.space.nonscmused = value; } }); }); From 609b17864297ee9594c2346f697c5ae1f171a72d Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Mon, 12 Aug 2024 08:50:42 +0200 Subject: [PATCH 13/50] HDDS-11271. Add direct dependencies in hadoop-hdds (#7029) --- hadoop-hdds/client/pom.xml | 57 ++++++++ hadoop-hdds/common/pom.xml | 52 ++++++++ hadoop-hdds/container-service/pom.xml | 116 ++++++++++++++++ hadoop-hdds/erasurecode/pom.xml | 15 +++ hadoop-hdds/framework/pom.xml | 126 ++++++++++++++++++ hadoop-hdds/interface-server/pom.xml | 4 + hadoop-hdds/managed-rocksdb/pom.xml | 20 +++ hadoop-hdds/rocks-native/pom.xml | 23 ++++ hadoop-hdds/rocksdb-checkpoint-differ/pom.xml | 31 +++++ hadoop-hdds/server-scm/pom.xml | 110 +++++++++++++++ hadoop-hdds/tools/pom.xml | 61 +++++++++ pom.xml | 48 +++++++ 12 files changed, 663 insertions(+) diff --git a/hadoop-hdds/client/pom.xml b/hadoop-hdds/client/pom.xml index 5cd4ead18fbf..bf728403cb41 100644 --- a/hadoop-hdds/client/pom.xml +++ b/hadoop-hdds/client/pom.xml @@ -37,10 +37,67 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-common + + org.apache.ozone + hdds-config + org.apache.ozone hdds-erasurecode + + org.apache.ozone + hdds-interface-client + + + + org.apache.commons + commons-lang3 + + + org.apache.ratis + ratis-client + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-grpc + + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-thirdparty-misc + + + + org.slf4j + slf4j-api + + + + com.google.guava + guava + + + + io.opentracing + opentracing-api + + + io.opentracing + opentracing-util + + + + jakarta.annotation + jakarta.annotation-api + diff --git a/hadoop-hdds/common/pom.xml b/hadoop-hdds/common/pom.xml index aeec60f97906..29cb513bb6fc 100644 --- a/hadoop-hdds/common/pom.xml +++ b/hadoop-hdds/common/pom.xml @@ -64,6 +64,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.commons commons-lang3 + + commons-collections + commons-collections + commons-io commons-io @@ -72,6 +76,14 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> com.fasterxml.jackson.core jackson-annotations + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + com.fasterxml.jackson.datatype jackson-datatype-jsr310 @@ -93,6 +105,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> jakarta.annotation jakarta.annotation-api + + jakarta.xml.bind + jakarta.xml.bind-api + io.dropwizard.metrics @@ -114,6 +130,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ratis + ratis-common + ratis-netty org.apache.ratis @@ -122,6 +142,18 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> ratis-grpc org.apache.ratis + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-client + + + org.apache.ratis + ratis-thirdparty-misc + com.google.errorprone error_prone_annotations @@ -134,6 +166,14 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> bcpkix-jdk18on ${bouncycastle.version} + + org.bouncycastle + bcprov-jdk18on + + + org.bouncycastle + bcutil-jdk18on + commons-validator commons-validator @@ -142,10 +182,18 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> io.jaegertracing jaeger-client + + io.jaegertracing + jaeger-core + org.jetbrains.kotlin kotlin-stdlib + + io.opentracing + opentracing-api + io.opentracing opentracing-util @@ -158,6 +206,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.reflections reflections + + org.slf4j + slf4j-api + org.apache.ozone hdds-interface-client diff --git a/hadoop-hdds/container-service/pom.xml b/hadoop-hdds/container-service/pom.xml index 7a341bd66a8f..d73bea95895e 100644 --- a/hadoop-hdds/container-service/pom.xml +++ b/hadoop-hdds/container-service/pom.xml @@ -34,10 +34,35 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-common + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-interface-client + + + org.apache.ozone + hdds-interface-server + + + org.apache.ozone + hdds-managed-rocksdb + + org.apache.commons commons-compress + + org.apache.logging.log4j + log4j-api + + + commons-io + commons-io + com.github.luben zstd-jni @@ -54,6 +79,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> commons-codec commons-codec + + commons-collections + commons-collections + io.dropwizard.metrics metrics-core @@ -70,6 +99,15 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> provided + + org.apache.ratis + ratis-server + + + + jakarta.annotation + jakarta.annotation-api + jakarta.xml.bind jakarta.xml.bind-api @@ -78,10 +116,22 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.glassfish.jaxb jaxb-runtime + + info.picocli + picocli + io.netty netty-transport + + io.netty + netty-buffer + + + io.netty + netty-common + io.netty netty-codec @@ -90,10 +140,76 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> io.netty netty-handler + + io.opentracing + opentracing-api + + + io.opentracing + opentracing-util + + + org.apache.commons + commons-lang3 + org.apache.commons commons-text + + org.apache.ratis + ratis-client + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-grpc + + + org.apache.ratis + ratis-netty + + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-server-api + + + org.apache.ratis + ratis-thirdparty-misc + + + + org.rocksdb + rocksdbjni + + + org.slf4j + slf4j-api + + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-databind + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + diff --git a/hadoop-hdds/erasurecode/pom.xml b/hadoop-hdds/erasurecode/pom.xml index cd0afbca7f80..201336d5ed3a 100644 --- a/hadoop-hdds/erasurecode/pom.xml +++ b/hadoop-hdds/erasurecode/pom.xml @@ -38,6 +38,16 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-common + + org.slf4j + slf4j-api + + + + com.google.guava + guava + + org.apache.ozone @@ -49,6 +59,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-test-utils test + + org.apache.ozone + hdds-config + test + diff --git a/hadoop-hdds/framework/pom.xml b/hadoop-hdds/framework/pom.xml index 5eb3d2071b70..70cce849aec1 100644 --- a/hadoop-hdds/framework/pom.xml +++ b/hadoop-hdds/framework/pom.xml @@ -33,6 +33,14 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-interface-client + org.apache.ozone hdds-interface-server @@ -57,6 +65,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> ch.qos.reload4j reload4j + + org.slf4j + slf4j-api + org.slf4j slf4j-reload4j @@ -69,11 +81,32 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.commons commons-configuration2 + + org.apache.commons + commons-lang3 + + + commons-codec + commons-codec + commons-fileupload commons-fileupload ${commons-fileupload.version} + + commons-io + commons-io + + + commons-logging + commons-logging + + + commons-validator + commons-validator + + org.apache.logging.log4j log4j-api @@ -88,6 +121,18 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> disruptor runtime + + org.bouncycastle + bcpkix-jdk18on + + + org.bouncycastle + bcprov-jdk18on + + + org.eclipse.jetty + jetty-http + org.eclipse.jetty jetty-util @@ -104,11 +149,20 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.eclipse.jetty jetty-webapp + + org.glassfish.jersey.core + jersey-server + org.glassfish.jersey.containers jersey-container-servlet-core + org.rocksdb + rocksdbjni + + + ratis-server org.apache.ratis @@ -122,6 +176,18 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-metrics-api + + + org.apache.ratis + ratis-server-api + ratis-metrics-dropwizard3 org.apache.ratis @@ -132,6 +198,23 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ratis + ratis-thirdparty-misc + + + + io.dropwizard.metrics + metrics-core + + + io.opentracing + opentracing-api + + + io.prometheus + simpleclient + io.prometheus simpleclient_dropwizard @@ -140,14 +223,39 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> io.prometheus simpleclient_common + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-databind + com.fasterxml.jackson.datatype jackson-datatype-jsr310 + + com.github.jnr + jnr-constants + com.github.jnr jnr-posix + + com.google.code.gson + gson + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + org.apache.hadoop hadoop-hdfs-client @@ -165,6 +273,19 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> ${hdds.version} + + jakarta.annotation + jakarta.annotation-api + + + jakarta.ws.rs + jakarta.ws.rs-api + + + javax.servlet + javax.servlet-api + + org.apache.ozone @@ -182,6 +303,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-test-utils test + + org.eclipse.jetty + jetty-util-ajax + test + diff --git a/hadoop-hdds/interface-server/pom.xml b/hadoop-hdds/interface-server/pom.xml index 6c689171c8d3..df65c1e2b2af 100644 --- a/hadoop-hdds/interface-server/pom.xml +++ b/hadoop-hdds/interface-server/pom.xml @@ -34,6 +34,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ratis + ratis-thirdparty-misc + com.google.protobuf protobuf-java diff --git a/hadoop-hdds/managed-rocksdb/pom.xml b/hadoop-hdds/managed-rocksdb/pom.xml index b95fcc3479d6..125783222e54 100644 --- a/hadoop-hdds/managed-rocksdb/pom.xml +++ b/hadoop-hdds/managed-rocksdb/pom.xml @@ -36,10 +36,30 @@ org.apache.ozone hdds-common + + + org.apache.ratis + ratis-common + + org.rocksdb rocksdbjni + + org.slf4j + slf4j-api + + + + com.google.guava + guava + + + + jakarta.annotation + jakarta.annotation-api + diff --git a/hadoop-hdds/rocks-native/pom.xml b/hadoop-hdds/rocks-native/pom.xml index 509586806c81..5fc9949514bd 100644 --- a/hadoop-hdds/rocks-native/pom.xml +++ b/hadoop-hdds/rocks-native/pom.xml @@ -25,14 +25,37 @@ hdds-rocks-native + + org.apache.ozone + hdds-common + org.apache.ozone hdds-managed-rocksdb + + + org.apache.commons + commons-lang3 + + org.eclipse.jetty jetty-io + + org.rocksdb + rocksdbjni + + + org.slf4j + slf4j-api + + + + com.google.guava + guava + diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/pom.xml b/hadoop-hdds/rocksdb-checkpoint-differ/pom.xml index 22f87556054c..3e535c5f5f26 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/pom.xml +++ b/hadoop-hdds/rocksdb-checkpoint-differ/pom.xml @@ -41,6 +41,14 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-common + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-interface-client + org.apache.ozone hdds-managed-rocksdb @@ -49,6 +57,7 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-rocks-native + com.google.guava guava @@ -57,6 +66,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.commons commons-lang3 + + org.apache.ratis + ratis-common + org.jgrapht jgrapht-core @@ -65,11 +78,29 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.jgrapht jgrapht-ext + + org.slf4j + slf4j-api + org.slf4j slf4j-reload4j + + com.github.vlsi.mxgraph + jgraphx + + + com.google.protobuf + protobuf-java + + + + commons-collections + commons-collections + + org.apache.ozone diff --git a/hadoop-hdds/server-scm/pom.xml b/hadoop-hdds/server-scm/pom.xml index f1a998f27cfb..32408e8904b2 100644 --- a/hadoop-hdds/server-scm/pom.xml +++ b/hadoop-hdds/server-scm/pom.xml @@ -41,6 +41,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-common + + org.apache.ozone + hdds-config + org.apache.ozone @@ -55,6 +59,18 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-hadoop-dependency-server + + org.apache.ozone + hdds-interface-admin + + + org.apache.ozone + hdds-interface-client + + + org.apache.ozone + hdds-interface-server + org.apache.ozone hdds-server-framework @@ -65,6 +81,44 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-docs provided + + + org.apache.ratis + ratis-client + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-grpc + + + org.apache.ratis + ratis-netty + + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-server-api + + + org.apache.ratis + ratis-server + + + org.apache.ratis + ratis-thirdparty-misc + + + + org.bouncycastle + bcpkix-jdk18on + org.bouncycastle bcprov-jdk18on @@ -74,6 +128,14 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> metrics-core + + org.apache.commons + commons-compress + + + org.apache.commons + commons-lang3 + org.apache.commons commons-text @@ -90,6 +152,54 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.eclipse.jetty + jetty-webapp + + + org.slf4j + slf4j-api + + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.google.guava + guava + + + + info.picocli + picocli + + + jakarta.annotation + jakarta.annotation-api + + + javax.servlet + javax.servlet-api + + + + commons-collections + commons-collections + + + commons-io + commons-io + + org.apache.commons diff --git a/hadoop-hdds/tools/pom.xml b/hadoop-hdds/tools/pom.xml index 5e7238f5f3eb..daf6f3d40f42 100644 --- a/hadoop-hdds/tools/pom.xml +++ b/hadoop-hdds/tools/pom.xml @@ -37,6 +37,22 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-common + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-container-service + + + org.apache.ozone + hdds-interface-admin + + + org.apache.ozone + hdds-interface-client + org.apache.ozone hdds-server-framework @@ -45,6 +61,15 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-client + + + org.apache.commons + commons-lang3 + + + org.apache.ratis + ratis-common + ratis-tools org.apache.ratis @@ -86,6 +111,37 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-server-scm + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + com.google.guava + guava + + + + info.picocli + picocli + + + + commons-io + commons-io + + org.apache.ozone @@ -109,6 +165,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-test-utils test + + commons-codec + commons-codec + test + diff --git a/pom.xml b/pom.xml index 25364c27ed3b..32e849670ac2 100644 --- a/pom.xml +++ b/pom.xml @@ -145,6 +145,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 1.2.2 2.3.3 2.3.9 + 0.10.4 3.1.19 0.1.55 2.0 @@ -200,6 +201,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 4.2.2 2.6.1 2.1.1 + 2.1.6 2.12.7 0.21.0 @@ -291,6 +293,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 1.15.0 2.5.0 1.4.0 + 3.9.8.1 5.3.37 3.11.10 @@ -480,6 +483,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs commons-validator ${commons-validator.version} + + com.github.jnr + jnr-constants + ${jnr-constants.version} + com.github.jnr jnr-posix @@ -666,6 +674,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs ${bonecp.version} + + org.apache.ratis + ratis-thirdparty-misc + ${ratis.thirdparty.version} + org.apache.ratis ratis-proto-shaded @@ -686,6 +699,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.apache.ratis ${ratis.version} + + org.apache.ratis + ratis-metrics-api + ${ratis.version} + ratis-metrics-dropwizard3 org.apache.ratis @@ -701,6 +719,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.apache.ratis ${ratis.version} + + org.apache.ratis + ratis-proto + ${ratis.version} + org.apache.ratis ratis-tools @@ -1009,6 +1032,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs + + io.jaegertracing + jaeger-core + ${jaeger.version} + org.jetbrains.kotlin kotlin-bom @@ -1016,6 +1044,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs pom import + + io.opentracing + opentracing-api + ${opentracing.version} + io.opentracing opentracing-util @@ -1025,6 +1058,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs io.prometheus simpleclient_dropwizard ${prometheus.version} + + + io.prometheus + simpleclient + ${prometheus.version} io.prometheus @@ -1170,6 +1208,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs jakarta.annotation-api ${jakarta.annotation.version} + + jakarta.ws.rs + jakarta.ws.rs-api + ${jakarta.ws.rs-api.version} + joda-time joda-time @@ -1215,6 +1258,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs hadoop-shaded-guava ${hadoop-shaded-guava.version} + + com.github.vlsi.mxgraph + jgraphx + ${jgraphx.version} + com.nimbusds nimbus-jose-jwt From 91d6fc65e861ff15fccd87a33d74fb6b26c08765 Mon Sep 17 00:00:00 2001 From: hao guo Date: Tue, 13 Aug 2024 03:03:39 +0800 Subject: [PATCH 14/50] HDDS-11281. Optimize the lock of ReadChunk (#7034) --- .../keyvalue/helpers/ChunkUtils.java | 95 ++++++++----------- .../keyvalue/helpers/TestChunkUtils.java | 77 ++++++++++----- 2 files changed, 93 insertions(+), 79 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java index c3f6ac921206..0fac45571c78 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java @@ -39,10 +39,10 @@ import java.util.EnumSet; import java.util.List; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Supplier; +import java.util.concurrent.locks.ReadWriteLock; import java.util.function.ToLongFunction; +import com.google.common.util.concurrent.Striped; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.OzoneConsts; @@ -65,6 +65,7 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNSUPPORTED_REQUEST; import static org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil.onFailure; +import org.apache.ratis.util.AutoCloseableLock; import org.apache.ratis.util.function.CheckedFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,8 +75,6 @@ */ public final class ChunkUtils { - private static final Set LOCKS = ConcurrentHashMap.newKeySet(); - private static final Logger LOG = LoggerFactory.getLogger(ChunkUtils.class); @@ -91,10 +90,29 @@ public final class ChunkUtils { StandardOpenOption.READ )); public static final FileAttribute[] NO_ATTRIBUTES = {}; + public static final int DEFAULT_FILE_LOCK_STRIPED_SIZE = 2048; + private static Striped fileStripedLock = + Striped.readWriteLock(DEFAULT_FILE_LOCK_STRIPED_SIZE); /** Never constructed. **/ private ChunkUtils() { + } + + @VisibleForTesting + public static void setStripedLock(Striped stripedLock) { + fileStripedLock = stripedLock; + } + + private static ReadWriteLock getFileLock(Path filePath) { + return fileStripedLock.get(filePath); + } + private static AutoCloseableLock getFileReadLock(Path filePath) { + return AutoCloseableLock.acquire(getFileLock(filePath).readLock()); + } + + private static AutoCloseableLock getFileWriteLock(Path filePath) { + return AutoCloseableLock.acquire(getFileLock(filePath).writeLock()); } /** @@ -156,24 +174,19 @@ private static void writeData(ChunkBuffer data, String filename, private static long writeDataToFile(File file, ChunkBuffer data, long offset, boolean sync) { final Path path = file.toPath(); - try { - return processFileExclusively(path, () -> { - FileChannel channel = null; - try { - channel = open(path, WRITE_OPTIONS, NO_ATTRIBUTES); - - try (FileLock ignored = channel.lock()) { - return writeDataToChannel(channel, data, offset); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } finally { - closeFile(channel, sync); + try (AutoCloseableLock ignoredLock = getFileWriteLock(path)) { + FileChannel channel = null; + try { + channel = open(path, WRITE_OPTIONS, NO_ATTRIBUTES); + + try (FileLock ignored = channel.lock()) { + return writeDataToChannel(channel, data, offset); } - }); - } catch (InterruptedException e) { - throw new UncheckedIOException(new InterruptedIOException( - "Interrupted while waiting to write file " + path)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } finally { + closeFile(channel, sync); + } } } @@ -211,20 +224,11 @@ private static void readData(File file, long offset, long len, final long startTime = Time.monotonicNow(); final long bytesRead; - try { - bytesRead = processFileExclusively(path, () -> { - try (FileChannel channel = open(path, READ_OPTIONS, NO_ATTRIBUTES); - FileLock ignored = channel.lock(offset, len, true)) { - return readMethod.apply(channel); - } catch (IOException e) { - onFailure(volume); - throw new UncheckedIOException(e); - } - }); - } catch (UncheckedIOException e) { + try (AutoCloseableLock ignoredLock = getFileReadLock(path); + FileChannel channel = open(path, READ_OPTIONS, NO_ATTRIBUTES)) { + bytesRead = readMethod.apply(channel); + } catch (IOException e) { onFailure(volume); - throw wrapInStorageContainerException(e.getCause()); - } catch (InterruptedException e) { throw wrapInStorageContainerException(e); } @@ -391,29 +395,6 @@ public static void verifyChunkFileExists(File file) } } - @VisibleForTesting - static T processFileExclusively(Path path, Supplier op) - throws InterruptedException { - long period = 1; - for (;;) { - if (LOCKS.add(path)) { - break; - } else { - Thread.sleep(period); - // exponentially backoff until the sleep time is over 1 second. - if (period < 1000) { - period *= 2; - } - } - } - - try { - return op.get(); - } finally { - LOCKS.remove(path); - } - } - private static void closeFile(FileChannel file, boolean sync) { if (file != null) { try { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/helpers/TestChunkUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/helpers/TestChunkUtils.java index 1a1158a210f5..5cd264af9989 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/helpers/TestChunkUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/helpers/TestChunkUtils.java @@ -27,9 +27,11 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; @@ -37,6 +39,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.util.concurrent.Striped; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.common.ChunkBuffer; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; @@ -81,6 +84,7 @@ static ChunkBuffer readData(File file, long off, long len) @Test void concurrentReadOfSameFile() throws Exception { + int threads = 10; String s = "Hello World"; byte[] array = s.getBytes(UTF_8); ChunkBuffer data = ChunkBuffer.wrap(ByteBuffer.wrap(array)); @@ -89,7 +93,6 @@ void concurrentReadOfSameFile() throws Exception { int offset = 0; File file = tempFile.toFile(); ChunkUtils.writeData(file, data, offset, len, null, true); - int threads = 10; ExecutorService executor = new ThreadPoolExecutor(threads, threads, 0, TimeUnit.SECONDS, new LinkedBlockingQueue<>()); AtomicInteger processed = new AtomicInteger(); @@ -127,38 +130,68 @@ void concurrentReadOfSameFile() throws Exception { } @Test - void concurrentProcessing() throws Exception { - final int perThreadWait = 1000; - final int maxTotalWait = 5000; - int threads = 20; + void concurrentReadWriteOfSameFile() { + final int threads = 10; + ChunkUtils.setStripedLock(Striped.readWriteLock(threads)); + final byte[] array = "Hello World".getBytes(UTF_8); - ExecutorService executor = new ThreadPoolExecutor(threads, threads, + Path tempFile = tempDir.toPath().resolve("concurrent_read_write"); + File file = tempFile.toFile(); + AtomicInteger success = new AtomicInteger(0); + AtomicInteger fail = new AtomicInteger(0); + + ExecutorService executor = new ThreadPoolExecutor(10, 10, 0, TimeUnit.SECONDS, new LinkedBlockingQueue<>()); - AtomicInteger processed = new AtomicInteger(); + + List> futures = new ArrayList<>(); + for (int i = 0; i < threads; i++) { - Path path = tempDir.toPath().resolve(String.valueOf(i)); - executor.execute(() -> { + final int threadNumber = i; + final ChunkBuffer data = ChunkBuffer.wrap(ByteBuffer.wrap(array)); + final int len = data.limit(); + final int offset = i * len; + + CompletableFuture future = CompletableFuture.runAsync(() -> { try { - ChunkUtils.processFileExclusively(path, () -> { - try { - Thread.sleep(perThreadWait); - } catch (InterruptedException e) { - e.printStackTrace(); + ChunkUtils.writeData(file, data, offset, len, null, true); + success.getAndIncrement(); + } catch (StorageContainerException e) { + throw new RuntimeException(e); + } + }, executor).whenCompleteAsync((v, e) -> { + if (e == null) { + try { + final ChunkBuffer chunk = readData(file, offset, len); + // There should be only one element in readBuffers + final List buffers = chunk.asByteBufferList(); + assertEquals(1, buffers.size()); + final ByteBuffer readBuffer = buffers.get(0); + + LOG.info("Read data ({}): {}", threadNumber, + new String(readBuffer.array(), UTF_8)); + if (!Arrays.equals(array, readBuffer.array())) { + fail.getAndIncrement(); } - processed.incrementAndGet(); - return null; - }); - } catch (InterruptedException e) { - e.printStackTrace(); + assertEquals(len, readBuffer.remaining()); + } catch (Exception ee) { + LOG.error("Failed to read data ({})", threadNumber, ee); + fail.getAndIncrement(); + } + } else { + fail.getAndIncrement(); } - }); + }, executor); + futures.add(future); } try { - GenericTestUtils.waitFor(() -> processed.get() == threads, - 100, maxTotalWait); + for (CompletableFuture future : futures) { + future.join(); + } } finally { executor.shutdownNow(); } + assertEquals(success.get(), threads); + assertEquals(fail.get(), 0); } @Test From 8e701bf40e44d236ca50736216b0cdc39970b5ec Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Tue, 13 Aug 2024 16:45:30 +0800 Subject: [PATCH 15/50] HDDS-10749. Shutdown datanode when RatisServer is down (#6587) --- .../hadoop/ozone/HddsDatanodeService.java | 6 ++- .../statemachine/DatanodeStateMachine.java | 9 ++-- .../server/ratis/ContainerStateMachine.java | 53 ++++++++++++++++++- .../server/ratis/XceiverServerRatis.java | 12 +++-- .../container/ozoneimpl/OzoneContainer.java | 7 +-- .../container/common/ContainerTestUtils.java | 2 +- .../server/ratis/TestCSMMetrics.java | 2 +- .../ozoneimpl/TestOzoneContainerWithTLS.java | 2 +- .../ozoneimpl/TestSecureOzoneContainer.java | 2 +- .../container/server/TestContainerServer.java | 2 +- .../server/TestSecureContainerServer.java | 2 +- 11 files changed, 81 insertions(+), 18 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java index b2ed8691a6fe..6b32b74dc7c4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java @@ -288,7 +288,7 @@ public void start() { .register(REPLICATION_STREAMS_LIMIT_KEY, this::reconfigReplicationStreamsLimit); - datanodeStateMachine = new DatanodeStateMachine(datanodeDetails, conf, + datanodeStateMachine = new DatanodeStateMachine(this, datanodeDetails, conf, dnCertClient, secretKeyClient, this::terminateDatanode, reconfigurationHandler); try { @@ -620,6 +620,10 @@ public void saveNewCertId(String newCertId) { } } + public boolean isStopped() { + return isStopped.get(); + } + /** * Check ozone admin privilege, throws exception if not admin. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java index 7b3202b4a458..a460e30ede39 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.hdds.utils.NettyMetrics; +import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.HddsDatanodeStopService; import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage; import org.apache.hadoop.ozone.container.common.report.ReportManager; @@ -137,7 +138,9 @@ public class DatanodeStateMachine implements Closeable { * @param certClient - Datanode Certificate client, required if security is * enabled */ - public DatanodeStateMachine(DatanodeDetails datanodeDetails, + @SuppressWarnings("checkstyle:ParameterNumber") + public DatanodeStateMachine(HddsDatanodeService hddsDatanodeService, + DatanodeDetails datanodeDetails, ConfigurationSource conf, CertificateClient certClient, SecretKeyClient secretKeyClient, @@ -177,7 +180,7 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, // HDDS-3116 for more details. constructionLock.writeLock().lock(); try { - container = new OzoneContainer(this.datanodeDetails, + container = new OzoneContainer(hddsDatanodeService, this.datanodeDetails, conf, context, certClient, secretKeyClient); } finally { constructionLock.writeLock().unlock(); @@ -273,7 +276,7 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, @VisibleForTesting public DatanodeStateMachine(DatanodeDetails datanodeDetails, ConfigurationSource conf) throws IOException { - this(datanodeDetails, conf, null, null, null, + this(null, datanodeDetails, conf, null, null, null, new ReconfigurationHandler("DN", (OzoneConfiguration) conf, op -> { })); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 90fca79550b7..28b9e151ff36 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -26,6 +26,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -60,6 +61,7 @@ import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.hdds.utils.Cache; import org.apache.hadoop.hdds.utils.ResourceCache; +import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.common.utils.BufferUtils; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; @@ -70,6 +72,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.ratis.proto.RaftProtos; import org.apache.ratis.proto.RaftProtos.StateMachineEntryProto; import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.proto.RaftProtos.RaftPeerRole; @@ -94,6 +97,7 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat; +import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.TaskQueue; import org.apache.ratis.util.function.CheckedSupplier; import org.apache.ratis.util.JavaUtils; @@ -198,19 +202,23 @@ long getStartTime() { private final Semaphore applyTransactionSemaphore; private final boolean waitOnBothFollowers; + private final HddsDatanodeService datanodeService; + private static Semaphore semaphore = new Semaphore(1); + /** * CSM metrics. */ private final CSMMetrics metrics; @SuppressWarnings("parameternumber") - public ContainerStateMachine(RaftGroupId gid, + public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupId gid, ContainerDispatcher dispatcher, ContainerController containerController, List chunkExecutors, XceiverServerRatis ratisServer, ConfigurationSource conf, String threadNamePrefix) { + this.datanodeService = hddsDatanodeService; this.gid = gid; this.dispatcher = dispatcher; this.containerController = containerController; @@ -904,6 +912,49 @@ public void notifyTermIndexUpdated(long term, long index) { removeStateMachineDataIfNeeded(index); } + @Override + public void notifyServerShutdown(RaftProtos.RoleInfoProto roleInfo, boolean allServer) { + // if datanodeService is stopped , it indicates this `close` originates + // from `HddsDatanodeService.stop()`, otherwise, it indicates this `close` originates from ratis. + if (allServer) { + if (datanodeService != null && !datanodeService.isStopped()) { + LOG.info("{} is closed by ratis", gid); + if (semaphore.tryAcquire()) { + // run with a different thread, so this raft group can be closed + Runnable runnable = () -> { + try { + int closed = 0, total = 0; + try { + Thread.sleep(5000); // sleep 5s + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + Iterator iterator = ratisServer.getServer().getGroupIds().iterator(); + while (iterator.hasNext()) { + RaftGroupId id = iterator.next(); + RaftServer.Division division = ratisServer.getServer().getDivision(id); + if (division.getRaftServer().getLifeCycleState() == LifeCycle.State.CLOSED) { + closed++; + } + total++; + } + LOG.error("Container statemachine is closed by ratis, terminating HddsDatanodeService. " + + "closed({})/total({})", closed, total); + datanodeService.terminateDatanode(); + } catch (IOException e) { + LOG.warn("Failed to get division for raft groups", e); + LOG.error("Container statemachine is closed by ratis, terminating HddsDatanodeService"); + datanodeService.terminateDatanode(); + } + }; + CompletableFuture.runAsync(runnable); + } + } else { + LOG.info("{} is closed by HddsDatanodeService", gid); + } + } + } + private CompletableFuture applyTransaction( ContainerCommandRequestProto request, DispatcherContext context, Consumer exceptionHandler) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index dc0c4b067603..2ae372320e0a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.tracing.TracingUtil; import org.apache.hadoop.hdds.utils.HddsServerUtil; +import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.impl.ContainerData; @@ -177,13 +178,15 @@ private static long nextCallId() { private final boolean shouldDeleteRatisLogDirectory; private final boolean streamEnable; private final DatanodeRatisServerConfig ratisServerConfig; + private final HddsDatanodeService datanodeService; - private XceiverServerRatis(DatanodeDetails dd, + private XceiverServerRatis(HddsDatanodeService hddsDatanodeService, DatanodeDetails dd, ContainerDispatcher dispatcher, ContainerController containerController, StateContext context, ConfigurationSource conf, Parameters parameters) throws IOException { this.conf = conf; Objects.requireNonNull(dd, "DatanodeDetails == null"); + datanodeService = hddsDatanodeService; datanodeDetails = dd; ratisServerConfig = conf.getObject(DatanodeRatisServerConfig.class); assignPorts(); @@ -241,7 +244,7 @@ private int determinePort(String key, int defaultValue) { } private ContainerStateMachine getStateMachine(RaftGroupId gid) { - return new ContainerStateMachine(gid, dispatcher, containerController, + return new ContainerStateMachine(datanodeService, gid, dispatcher, containerController, chunkExecutors, this, conf, datanodeDetails.threadNamePrefix()); } @@ -521,14 +524,14 @@ private void setPendingRequestsLimits(RaftProperties properties) { .valueOf(pendingRequestsMegaBytesLimit, TraditionalBinaryPrefix.MEGA)); } - public static XceiverServerRatis newXceiverServerRatis( + public static XceiverServerRatis newXceiverServerRatis(HddsDatanodeService hddsDatanodeService, DatanodeDetails datanodeDetails, ConfigurationSource ozoneConf, ContainerDispatcher dispatcher, ContainerController containerController, CertificateClient caClient, StateContext context) throws IOException { Parameters parameters = createTlsParameters( new SecurityConfig(ozoneConf), caClient); - return new XceiverServerRatis(datanodeDetails, dispatcher, + return new XceiverServerRatis(hddsDatanodeService, datanodeDetails, dispatcher, containerController, context, ozoneConf, parameters); } @@ -591,6 +594,7 @@ private int getRealPort(InetSocketAddress address, Port.Name name) { public void stop() { if (isStarted) { try { + LOG.info("Stopping {} {}", getClass().getSimpleName(), server.getId()); // shutdown server before the executors as while shutting down, // some of the tasks would be executed using the executors. server.close(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 4fa211a92c5a..b3809fff4b11 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hdds.security.token.TokenVerifier; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.utils.HddsServerUtil; +import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.impl.BlockDeletingService; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; @@ -138,7 +139,7 @@ enum InitializingStatus { * @throws DiskOutOfSpaceException * @throws IOException */ - public OzoneContainer( + public OzoneContainer(HddsDatanodeService hddsDatanodeService, DatanodeDetails datanodeDetails, ConfigurationSource conf, StateContext context, CertificateClient certClient, SecretKeyVerifierClient secretKeyClient) throws IOException { @@ -205,7 +206,7 @@ public OzoneContainer( */ controller = new ContainerController(containerSet, handlers); - writeChannel = XceiverServerRatis.newXceiverServerRatis( + writeChannel = XceiverServerRatis.newXceiverServerRatis(hddsDatanodeService, datanodeDetails, config, hddsDispatcher, controller, certClient, context); @@ -277,7 +278,7 @@ public OzoneContainer( public OzoneContainer( DatanodeDetails datanodeDetails, ConfigurationSource conf, StateContext context) throws IOException { - this(datanodeDetails, conf, context, null, null); + this(null, datanodeDetails, conf, context, null, null); } public GrpcTlsConfig getTlsClientConfig() { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java index c63f82025e09..53ba8b685782 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java @@ -347,7 +347,7 @@ public static XceiverServerRatis newXceiverServerRatis( conf.setInt(OzoneConfigKeys.HDDS_CONTAINER_RATIS_IPC_PORT, dn.getPort(DatanodeDetails.Port.Name.RATIS).getValue()); - return XceiverServerRatis.newXceiverServerRatis(dn, conf, + return XceiverServerRatis.newXceiverServerRatis(null, dn, conf, getNoopContainerDispatcher(), getEmptyContainerController(), null, null); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestCSMMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestCSMMetrics.java index 0fd31bb4b728..e68831b494f2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestCSMMetrics.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestCSMMetrics.java @@ -189,7 +189,7 @@ static XceiverServerRatis newXceiverServerRatis( conf.set(OzoneConfigKeys.HDDS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir); final ContainerDispatcher dispatcher = new TestContainerDispatcher(); - return XceiverServerRatis.newXceiverServerRatis(dn, conf, dispatcher, + return XceiverServerRatis.newXceiverServerRatis(null, dn, conf, dispatcher, new ContainerController(new ContainerSet(1000), Maps.newHashMap()), null, null); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java index a1e8e1781f51..50cb8f6b9be9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java @@ -311,7 +311,7 @@ private OzoneContainer createAndStartOzoneContainerInstance() { try { StateContext stateContext = ContainerTestUtils.getMockContext(dn, conf); container = new OzoneContainer( - dn, conf, stateContext, caClient, keyClient); + null, dn, conf, stateContext, caClient, keyClient); MutableVolumeSet volumeSet = container.getVolumeSet(); StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()) .forEach(hddsVolume -> hddsVolume.setDbParentDir(tempFolder.toFile())); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java index 5585696dfc31..92d716f7a406 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java @@ -137,7 +137,7 @@ void testCreateOzoneContainer(boolean requireToken, boolean hasToken, conf.setBoolean(OzoneConfigKeys.HDDS_CONTAINER_IPC_RANDOM_PORT, false); DatanodeDetails dn = MockDatanodeDetails.randomDatanodeDetails(); - container = new OzoneContainer(dn, conf, ContainerTestUtils + container = new OzoneContainer(null, dn, conf, ContainerTestUtils .getMockContext(dn, conf), caClient, secretKeyClient); MutableVolumeSet volumeSet = container.getVolumeSet(); StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestContainerServer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestContainerServer.java index 630c4d314959..8db7b1374721 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestContainerServer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestContainerServer.java @@ -132,7 +132,7 @@ static XceiverServerRatis newXceiverServerRatis( conf.set(OzoneConfigKeys.HDDS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir); final ContainerDispatcher dispatcher = new TestContainerDispatcher(); - return XceiverServerRatis.newXceiverServerRatis(dn, conf, dispatcher, + return XceiverServerRatis.newXceiverServerRatis(null, dn, conf, dispatcher, new ContainerController(new ContainerSet(1000), Maps.newHashMap()), caClient, null); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestSecureContainerServer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestSecureContainerServer.java index 8044685bb747..0bdf61b3bd54 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestSecureContainerServer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestSecureContainerServer.java @@ -218,7 +218,7 @@ XceiverServerRatis newXceiverServerRatis( conf.set(OzoneConfigKeys.HDDS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir); final ContainerDispatcher dispatcher = createDispatcher(dn, UUID.randomUUID(), conf); - return XceiverServerRatis.newXceiverServerRatis(dn, conf, dispatcher, + return XceiverServerRatis.newXceiverServerRatis(null, dn, conf, dispatcher, new ContainerController(new ContainerSet(1000), Maps.newHashMap()), caClient, null); } From 9963fa66b0fe9932f76838a32d0fdaca08c33fb1 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Tue, 13 Aug 2024 11:51:31 +0200 Subject: [PATCH 16/50] HDDS-10749. (addendum) Shutdown datanode when RatisServer is down (#6587) --- .../hadoop/ozone/container/metrics/TestContainerMetrics.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/metrics/TestContainerMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/metrics/TestContainerMetrics.java index 068cb01a9671..e6264cd3e114 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/metrics/TestContainerMetrics.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/metrics/TestContainerMetrics.java @@ -253,7 +253,7 @@ private XceiverServerSpi newXceiverServerRatis(DatanodeDetails dn, MutableVolume CONF.set(OzoneConfigKeys.HDDS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir); final ContainerDispatcher dispatcher = createDispatcher(dn, volumeSet); - return XceiverServerRatis.newXceiverServerRatis(dn, CONF, dispatcher, + return XceiverServerRatis.newXceiverServerRatis(null, dn, CONF, dispatcher, new ContainerController(new ContainerSet(1000), Maps.newHashMap()), null, null); } From 31ec0263805f9cbc28f5633e4c0b71d1d1be1ea7 Mon Sep 17 00:00:00 2001 From: Sumit Agrawal Date: Wed, 14 Aug 2024 10:05:07 +0530 Subject: [PATCH 17/50] HDDS-11306. OM support system audit log (#7061) --- .../hadoop/ozone/audit/AuditLoggerType.java | 3 +- .../src/shell/conf/om-audit-log4j2.properties | 30 ++++++++++++++++-- .../hadoop/ozone/audit/OMSystemAction.java | 31 +++++++++++++++++++ .../apache/hadoop/ozone/om/OzoneManager.java | 19 ++++++++++++ 4 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/audit/OMSystemAction.java diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/audit/AuditLoggerType.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/audit/AuditLoggerType.java index d37d22118daa..8dec4f4fc3d3 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/audit/AuditLoggerType.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/audit/AuditLoggerType.java @@ -24,7 +24,8 @@ public enum AuditLoggerType { DNLOGGER("DNAudit"), OMLOGGER("OMAudit"), SCMLOGGER("SCMAudit"), - S3GLOGGER("S3GAudit"); + S3GLOGGER("S3GAudit"), + OMSYSTEMLOGGER("OMSystemAudit"); private String type; diff --git a/hadoop-ozone/dist/src/shell/conf/om-audit-log4j2.properties b/hadoop-ozone/dist/src/shell/conf/om-audit-log4j2.properties index b9b11bb62895..40d02bae2c39 100644 --- a/hadoop-ozone/dist/src/shell/conf/om-audit-log4j2.properties +++ b/hadoop-ozone/dist/src/shell/conf/om-audit-log4j2.properties @@ -60,7 +60,7 @@ filter.write.onMismatch=NEUTRAL #appender.console.layout.pattern=%d{DEFAULT} | %-5level | %c{1} | %msg | %throwable{3} %n # Comment this line when using both console and rolling appenders -appenders=rolling +appenders=rolling,sysrolling # Rolling File Appender with size & time thresholds. # Rolling is triggered when either threshold is breached. @@ -88,13 +88,39 @@ appender.rolling.strategy.delete.ifFileName.glob=om-audit-*.log.gz appender.rolling.strategy.delete.ifLastModified.type=IfLastModified appender.rolling.strategy.delete.ifLastModified.age=30d -loggers=audit +appender.sysrolling.type=RollingFile +appender.sysrolling.name=SysRollingFile +appender.sysrolling.fileName =${sys:hadoop.log.dir}/om-sys-audit-${hostName}.log +appender.sysrolling.filePattern=${sys:hadoop.log.dir}/om-sys-audit-${hostName}-%d{yyyy-MM-dd-HH-mm-ss}-%i.log.gz +appender.sysrolling.layout.type=PatternLayout +appender.sysrolling.layout.pattern=%d{DEFAULT} | %-5level | %c{1} | %msg | %throwable{3} %n +appender.sysrolling.policies.type=Policies +appender.sysrolling.policies.time.type=TimeBasedTriggeringPolicy +appender.sysrolling.policies.time.interval=86400 +appender.sysrolling.policies.size.type=SizeBasedTriggeringPolicy +appender.sysrolling.policies.size.size=64MB +appender.sysrolling.strategy.type=DefaultRolloverStrategy +appender.sysrolling.strategy.delete.type=Delete +appender.sysrolling.strategy.delete.basePath=${sys:hadoop.log.dir} +appender.sysrolling.strategy.delete.maxDepth=1 +appender.sysrolling.strategy.delete.ifFileName.type=IfFileName +appender.sysrolling.strategy.delete.ifFileName.glob=om-sys-audit-*.log.gz +appender.sysrolling.strategy.delete.ifLastModified.type=IfLastModified +appender.sysrolling.strategy.delete.ifLastModified.age=30d + +loggers=audit,sysaudit logger.audit.type=AsyncLogger logger.audit.name=OMAudit logger.audit.level=INFO logger.audit.appenderRefs=rolling logger.audit.appenderRef.file.ref=RollingFile +logger.sysaudit.type=AsyncLogger +logger.sysaudit.name=OMSystemAudit +logger.sysaudit.level=INFO +logger.sysaudit.appenderRefs=sysrolling +logger.sysaudit.appenderRef.file.ref=SysRollingFile + rootLogger.level=INFO #rootLogger.appenderRefs=stdout #rootLogger.appenderRef.stdout.ref=STDOUT diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/audit/OMSystemAction.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/audit/OMSystemAction.java new file mode 100644 index 000000000000..9f5b6ccebcf1 --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/audit/OMSystemAction.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.audit; + +/** + * Enum to define Audit Action types for system audit in OzoneManager. This will in addition to OMAction + * as present for request. + */ +public enum OMSystemAction implements AuditAction { + STARTUP; + + @Override + public String getAction() { + return this.toString(); + } +} diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 03896a042ec5..9ceb9bb96d12 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -91,6 +91,7 @@ import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.OzoneManagerVersion; +import org.apache.hadoop.ozone.audit.OMSystemAction; import org.apache.hadoop.ozone.om.helpers.LeaseKeyInfo; import org.apache.hadoop.ozone.om.helpers.ListOpenFilesResult; import org.apache.hadoop.ozone.om.helpers.SnapshotDiffJob; @@ -351,6 +352,9 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl private static final AuditLogger AUDIT = new AuditLogger( AuditLoggerType.OMLOGGER); + private static final AuditLogger SYSTEMAUDIT = new AuditLogger( + AuditLoggerType.OMSYSTEMLOGGER); + private static final String OM_DAEMON = "om"; // This is set for read requests when OMRequest has S3Authentication set, @@ -1644,11 +1648,15 @@ public OMPerformanceMetrics getPerfMetrics() { * Start service. */ public void start() throws IOException { + Map auditMap = new HashMap(); + auditMap.put("OmState", omState.name()); if (omState == State.BOOTSTRAPPING) { if (isBootstrapping) { + auditMap.put("Bootstrap", "normal"); // Check that all OM configs have been updated with the new OM info. checkConfigBeforeBootstrap(); } else if (isForcedBootstrapping) { + auditMap.put("Bootstrap", "force"); LOG.warn("Skipped checking whether existing OM configs have been " + "updated with this OM information as force bootstrap is called."); } @@ -1736,12 +1744,17 @@ public void start() throws IOException { } omState = State.RUNNING; + auditMap.put("NewOmState", omState.name()); + SYSTEMAUDIT.logWriteSuccess(buildAuditMessageForSuccess(OMSystemAction.STARTUP, auditMap)); } /** * Restarts the service. This method re-initializes the rpc server. */ public void restart() throws IOException { + Map auditMap = new HashMap(); + auditMap.put("OmState", omState.name()); + auditMap.put("Trigger", "restart"); setInstanceVariablesFromConf(); LOG.info(buildRpcServerStartMessage("OzoneManager RPC server", @@ -1808,6 +1821,8 @@ public void restart() throws IOException { startJVMPauseMonitor(); setStartTime(); omState = State.RUNNING; + auditMap.put("NewOmState", omState.name()); + SYSTEMAUDIT.logWriteSuccess(buildAuditMessageForSuccess(OMSystemAction.STARTUP, auditMap)); } /** @@ -3037,6 +3052,10 @@ public AuditLogger getAuditLogger() { return AUDIT; } + public AuditLogger getSystemAuditLogger() { + return SYSTEMAUDIT; + } + @Override public AuditMessage buildAuditMessageForSuccess(AuditAction op, Map auditMap) { From 181eda4bef16d2ca5eba637994021efd3bbb3e99 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Wed, 14 Aug 2024 08:11:17 +0200 Subject: [PATCH 18/50] HDDS-11305. Avoid accidental usage of commons-lang v2 (#7069) --- .../java/org/apache/hadoop/ozone/om/TestScmClient.java | 2 +- .../hadoop/ozone/om/service/TestKeyDeletingService.java | 2 +- pom.xml | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java index 228f668d01a8..08d9b9bc2cb5 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java @@ -43,7 +43,7 @@ import static com.google.common.collect.Sets.newHashSet; import static java.util.Arrays.asList; -import static org.apache.commons.lang.RandomStringUtils.randomAlphabetic; +import static org.apache.commons.lang3.RandomStringUtils.randomAlphabetic; import static org.apache.hadoop.hdds.client.ReplicationConfig.fromTypeAndFactor; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index cf538f581c78..8163592cfc6d 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -34,7 +34,7 @@ import java.util.concurrent.atomic.AtomicLong; import com.google.common.collect.ImmutableMap; -import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; diff --git a/pom.xml b/pom.xml index 32e849670ac2..1db3952fe004 100644 --- a/pom.xml +++ b/pom.xml @@ -1576,6 +1576,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.jetbrains.annotations.Nullable + + true + Use commons-lang v3 + + org.apache.commons.lang.** + + From 380bda1d9119ba0baf86128f72e657a2ca5eab3c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 12:30:43 +0200 Subject: [PATCH 19/50] HDDS-11297. Bump slf4j to 2.0.15 (#7052) --- hadoop-ozone/dist/src/shell/ozone/ozone | 1 + pom.xml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/shell/ozone/ozone b/hadoop-ozone/dist/src/shell/ozone/ozone index 99fce8c2d44e..22ceed9ed3c6 100755 --- a/hadoop-ozone/dist/src/shell/ozone/ozone +++ b/hadoop-ozone/dist/src/shell/ozone/ozone @@ -250,6 +250,7 @@ function ozone_suppress_shell_log && [[ -z "${OZONE_ORIGINAL_ROOT_LOGGER}" ]]; then OZONE_LOGLEVEL=OFF OZONE_ROOT_LOGGER="${OZONE_LOGLEVEL},console" + OZONE_OPTS="${OZONE_OPTS} -Dslf4j.internal.verbosity=ERROR" fi } diff --git a/pom.xml b/pom.xml index 1db3952fe004..a6b7613c786b 100644 --- a/pom.xml +++ b/pom.xml @@ -174,7 +174,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 4.4.16 - 2.0.13 + 2.0.15 2.23.1 3.4.4 1.2.25 From 8a7c23d9a4af3a2dd720c2c42178304c34f20c86 Mon Sep 17 00:00:00 2001 From: tanvipenumudy <46785609+tanvipenumudy@users.noreply.github.com> Date: Thu, 15 Aug 2024 02:39:21 +0530 Subject: [PATCH 20/50] HDDS-11302. Ozone wrapper configurations to increase ipc.server.read.threadpool.size for SCM and Datanode (#7058) Co-authored-by: tanvipenumudy --- .../apache/hadoop/hdds/HddsConfigKeys.java | 3 ++ .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 12 +++++ .../src/main/resources/ozone-default.xml | 50 +++++++++++++++++++ .../HddsDatanodeClientProtocolServer.java | 9 +++- .../scm/server/SCMBlockProtocolServer.java | 7 ++- .../scm/server/SCMClientProtocolServer.java | 7 ++- .../scm/server/SCMDatanodeProtocolServer.java | 7 ++- .../scm/server/SCMSecurityProtocolServer.java | 5 +- .../scm/server/StorageContainerManager.java | 4 +- 9 files changed, 97 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index abdd25f0680c..87707f75dc45 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -367,6 +367,9 @@ private HddsConfigKeys() { public static final String HDDS_DATANODE_HANDLER_COUNT_KEY = "hdds.datanode.handler.count"; public static final int HDDS_DATANODE_HANDLER_COUNT_DEFAULT = 10; + public static final String HDDS_DATANODE_READ_THREADPOOL_KEY = + "hdds.datanode.read.threadpool"; + public static final int HDDS_DATANODE_READ_THREADPOOL_DEFAULT = 10; public static final String HDDS_DATANODE_HTTP_BIND_HOST_DEFAULT = "0.0.0.0"; public static final int HDDS_DATANODE_HTTP_BIND_PORT_DEFAULT = 9882; public static final int HDDS_DATANODE_HTTPS_BIND_PORT_DEFAULT = 9883; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 593764ee09cf..36d4dbd45a2e 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -228,15 +228,27 @@ public final class ScmConfigKeys { "ozone.scm.handler.count.key"; public static final String OZONE_SCM_CLIENT_HANDLER_COUNT_KEY = "ozone.scm.client.handler.count.key"; + public static final String OZONE_SCM_CLIENT_READ_THREADPOOL_KEY = + "ozone.scm.client.read.threadpool"; + public static final int OZONE_SCM_CLIENT_READ_THREADPOOL_DEFAULT = 10; public static final String OZONE_SCM_BLOCK_HANDLER_COUNT_KEY = "ozone.scm.block.handler.count.key"; + public static final String OZONE_SCM_BLOCK_READ_THREADPOOL_KEY = + "ozone.scm.block.read.threadpool"; + public static final int OZONE_SCM_BLOCK_READ_THREADPOOL_DEFAULT = 10; public static final String OZONE_SCM_DATANODE_HANDLER_COUNT_KEY = "ozone.scm.datanode.handler.count.key"; + public static final String OZONE_SCM_DATANODE_READ_THREADPOOL_KEY = + "ozone.scm.datanode.read.threadpool"; + public static final int OZONE_SCM_DATANODE_READ_THREADPOOL_DEFAULT = 10; public static final int OZONE_SCM_HANDLER_COUNT_DEFAULT = 100; public static final String OZONE_SCM_SECURITY_HANDLER_COUNT_KEY = "ozone.scm.security.handler.count.key"; public static final int OZONE_SCM_SECURITY_HANDLER_COUNT_DEFAULT = 2; + public static final String OZONE_SCM_SECURITY_READ_THREADPOOL_KEY = + "ozone.scm.security.read.threadpool"; + public static final int OZONE_SCM_SECURITY_READ_THREADPOOL_DEFAULT = 1; public static final String OZONE_SCM_DEADNODE_INTERVAL = "ozone.scm.dead.node.interval"; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 186b5aaded9f..ad06f2f6e334 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1140,6 +1140,36 @@ The default value is 100. + + ozone.scm.client.read.threadpool + 10 + OZONE, MANAGEMENT, PERFORMANCE + + The number of threads in RPC server reading from the socket used by Client to access SCM. + This config overrides Hadoop configuration "ipc.server.read.threadpool.size" for SCMClientProtocolServer. + The default value is 10. + + + + ozone.scm.block.read.threadpool + 10 + OZONE, MANAGEMENT, PERFORMANCE + + The number of threads in RPC server reading from the socket when accessing blocks. + This config overrides Hadoop configuration "ipc.server.read.threadpool.size" for SCMBlockProtocolServer. + The default value is 10. + + + + ozone.scm.datanode.read.threadpool + 10 + OZONE, MANAGEMENT, PERFORMANCE + + The number of threads in RPC server reading from the socket used by DataNode to access SCM. + This config overrides Hadoop configuration "ipc.server.read.threadpool.size" for SCMDatanodeProtocolServer. + The default value is 10. + + hdds.heartbeat.interval 30s @@ -2493,6 +2523,16 @@ OZONE, HDDS, SECURITY Threads configured for SCMSecurityProtocolServer. + + ozone.scm.security.read.threadpool + 1 + OZONE, HDDS, SECURITY, PERFORMANCE + + The number of threads in RPC server reading from the socket when performing security related operations with SCM. + This config overrides Hadoop configuration "ipc.server.read.threadpool.size" for SCMSecurityProtocolServer. + The default value is 1. + + ozone.scm.security.service.address @@ -2935,6 +2975,16 @@ service endpoints. + + hdds.datanode.read.threadpool + 10 + OZONE, HDDS, PERFORMANCE + + The number of threads in RPC server reading from the socket for Datanode client service endpoints. + This config overrides Hadoop configuration "ipc.server.read.threadpool.size" for HddsDatanodeClientProtocolServer. + The default value is 10. + + ozone.client.failover.max.attempts 500 diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeClientProtocolServer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeClientProtocolServer.java index 8b0b3a7ca239..b0308f79ed5c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeClientProtocolServer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeClientProtocolServer.java @@ -42,6 +42,8 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_CLIENT_ADDRESS_KEY; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_HANDLER_COUNT_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_HANDLER_COUNT_KEY; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_READ_THREADPOOL_KEY; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_READ_THREADPOOL_DEFAULT; import static org.apache.hadoop.hdds.HddsUtils.preserveThreadName; import static org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name.CLIENT_RPC; @@ -106,6 +108,8 @@ private RPC.Server getRpcServer(OzoneConfiguration configuration, final int handlerCount = conf.getInt(HDDS_DATANODE_HANDLER_COUNT_KEY, HDDS_DATANODE_HANDLER_COUNT_DEFAULT); + final int readThreads = conf.getInt(HDDS_DATANODE_READ_THREADPOOL_KEY, + HDDS_DATANODE_READ_THREADPOOL_DEFAULT); ReconfigureProtocolServerSideTranslatorPB reconfigureServerProtocol = new ReconfigureProtocolServerSideTranslatorPB(reconfigurationHandler); BlockingService reconfigureService = ReconfigureProtocolProtos @@ -113,7 +117,7 @@ private RPC.Server getRpcServer(OzoneConfiguration configuration, reconfigureServerProtocol); return preserveThreadName(() -> startRpcServer(configuration, rpcAddress, - ReconfigureProtocolDatanodePB.class, reconfigureService, handlerCount)); + ReconfigureProtocolDatanodePB.class, reconfigureService, handlerCount, readThreads)); } /** @@ -130,7 +134,7 @@ private RPC.Server getRpcServer(OzoneConfiguration configuration, private RPC.Server startRpcServer( Configuration configuration, InetSocketAddress addr, Class protocol, BlockingService instance, - int handlerCount) + int handlerCount, int readThreads) throws IOException { return new RPC.Builder(configuration) .setProtocol(protocol) @@ -138,6 +142,7 @@ private RPC.Server startRpcServer( .setBindAddress(addr.getHostString()) .setPort(addr.getPort()) .setNumHandlers(handlerCount) + .setNumReaders(readThreads) .setVerbose(false) .setSecretManager(null) .build(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index a035751796e5..7ed23b110c78 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -73,6 +73,8 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_HANDLER_COUNT_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_READ_THREADPOOL_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_READ_THREADPOOL_DEFAULT; import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.IO_EXCEPTION; import static org.apache.hadoop.hdds.scm.net.NetConstants.NODE_COST_DEFAULT; import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT; @@ -117,6 +119,8 @@ public SCMBlockProtocolServer(OzoneConfiguration conf, final int handlerCount = conf.getInt(OZONE_SCM_BLOCK_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_DEFAULT, LOG::info); + final int readThreads = conf.getInt(OZONE_SCM_BLOCK_READ_THREADPOOL_KEY, + OZONE_SCM_BLOCK_READ_THREADPOOL_DEFAULT); RPC.setProtocolEngine(conf, ScmBlockLocationProtocolPB.class, ProtobufRpcEngine.class); @@ -142,7 +146,8 @@ public SCMBlockProtocolServer(OzoneConfiguration conf, scmBlockAddress, ScmBlockLocationProtocolPB.class, blockProtoPbService, - handlerCount); + handlerCount, + readThreads); blockRpcAddress = updateRPCListenAddress( conf, scm.getScmNodeDetails().getBlockProtocolServerAddressKey(), diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 828b452d3000..40d153a6bb41 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -120,6 +120,8 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CLIENT_HANDLER_COUNT_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CLIENT_READ_THREADPOOL_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CLIENT_READ_THREADPOOL_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmUtils.checkIfCertSignRequestAllowed; import static org.apache.hadoop.hdds.scm.ha.HASecurityUtils.createSCMRatisTLSConfig; import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.startRpcServer; @@ -150,6 +152,8 @@ public SCMClientProtocolServer(OzoneConfiguration conf, final int handlerCount = conf.getInt(OZONE_SCM_CLIENT_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_DEFAULT, LOG::info); + final int readThreads = conf.getInt(OZONE_SCM_CLIENT_READ_THREADPOOL_KEY, + OZONE_SCM_CLIENT_READ_THREADPOOL_DEFAULT); RPC.setProtocolEngine(conf, StorageContainerLocationProtocolPB.class, ProtobufRpcEngine.class); @@ -173,7 +177,8 @@ public SCMClientProtocolServer(OzoneConfiguration conf, scmAddress, StorageContainerLocationProtocolPB.class, storageProtoPbService, - handlerCount); + handlerCount, + readThreads); // Add reconfigureProtocolService. ReconfigureProtocolServerSideTranslatorPB reconfigureServerProtocol diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java index 3d864d4ea212..0cc6ab7ab9db 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java @@ -102,6 +102,8 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_HANDLER_COUNT_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_READ_THREADPOOL_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_READ_THREADPOOL_DEFAULT; import static org.apache.hadoop.hdds.scm.events.SCMEvents.CONTAINER_REPORT; import static org.apache.hadoop.hdds.scm.events.SCMEvents.PIPELINE_REPORT; import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.startRpcServer; @@ -161,6 +163,8 @@ public SCMDatanodeProtocolServer(final OzoneConfiguration conf, final int handlerCount = conf.getInt(OZONE_SCM_DATANODE_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_DEFAULT, LOG::info); + final int readThreads = conf.getInt(OZONE_SCM_DATANODE_READ_THREADPOOL_KEY, + OZONE_SCM_DATANODE_READ_THREADPOOL_DEFAULT); RPC.setProtocolEngine(conf, getProtocolClass(), ProtobufRpcEngine.class); @@ -176,7 +180,8 @@ public SCMDatanodeProtocolServer(final OzoneConfiguration conf, datanodeRpcAddr, getProtocolClass(), dnProtoPbService, - handlerCount); + handlerCount, + readThreads); datanodeRpcAddress = updateRPCListenAddress( conf, getDatanodeAddressKey(), datanodeRpcAddr, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java index 6fea1f106f05..88b3c8877460 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java @@ -129,6 +129,8 @@ public class SCMSecurityProtocolServer implements SCMSecurityProtocol, final int handlerCount = conf.getInt(ScmConfigKeys.OZONE_SCM_SECURITY_HANDLER_COUNT_KEY, ScmConfigKeys.OZONE_SCM_SECURITY_HANDLER_COUNT_DEFAULT); + final int readThreads = conf.getInt(ScmConfigKeys.OZONE_SCM_SECURITY_READ_THREADPOOL_KEY, + ScmConfigKeys.OZONE_SCM_SECURITY_READ_THREADPOOL_DEFAULT); rpcAddress = HddsServerUtil .getScmSecurityInetAddress(conf); // SCM security service RPC service. @@ -157,7 +159,8 @@ public class SCMSecurityProtocolServer implements SCMSecurityProtocol, rpcAddress, SCMSecurityProtocolPB.class, secureProtoPbService, - handlerCount); + handlerCount, + readThreads); HddsServerUtil.addPBProtocol(conf, SecretKeyProtocolDatanodePB.class, secretKeyService, rpcServer); HddsServerUtil.addPBProtocol(conf, SecretKeyProtocolOmPB.class, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 5c0248f162d1..aaf6bbfc9c42 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -1103,7 +1103,8 @@ public static RPC.Server startRpcServer( InetSocketAddress addr, Class protocol, BlockingService instance, - int handlerCount) + int handlerCount, + int readThreads) throws IOException { RPC.Server rpcServer = preserveThreadName(() -> new RPC.Builder(conf) @@ -1112,6 +1113,7 @@ public static RPC.Server startRpcServer( .setBindAddress(addr.getHostString()) .setPort(addr.getPort()) .setNumHandlers(handlerCount) + .setNumReaders(readThreads) .setVerbose(false) .setSecretManager(null) .build()); From 6310efdb21863bb50beff19cd2a4bab15c2a2b78 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Wed, 14 Aug 2024 14:59:15 -0700 Subject: [PATCH 21/50] HDDS-11292. [hsync] Move HBASE_SUPPORT layout upgrade test into its own test. (#7066) --- .../org/apache/hadoop/fs/ozone/TestHSync.java | 81 ------ .../hadoop/fs/ozone/TestHSyncUpgrade.java | 240 ++++++++++++++++++ 2 files changed, 240 insertions(+), 81 deletions(-) create mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSyncUpgrade.java diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java index 466490290b7c..98d7388310b3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java @@ -90,10 +90,7 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; -import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol; import org.apache.hadoop.ozone.om.service.OpenKeyCleanupService; -import org.apache.hadoop.ozone.om.upgrade.OMLayoutFeature; -import org.apache.hadoop.ozone.upgrade.UpgradeFinalizer; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Time; import org.apache.ozone.test.GenericTestUtils; @@ -121,8 +118,6 @@ import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_SCHEME; import static org.apache.hadoop.ozone.TestDataUtil.cleanupDeletedTable; import static org.apache.hadoop.ozone.TestDataUtil.cleanupOpenKeyTable; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isDone; -import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isStarting; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY; @@ -130,9 +125,6 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_EXPIRE_THRESHOLD; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_LEASE_HARD_LIMIT; -import static org.apache.hadoop.ozone.om.OmUpgradeConfig.ConfigStrings.OZONE_OM_INIT_DEFAULT_LAYOUT_VERSION; -import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.NOT_SUPPORTED_OPERATION_PRIOR_FINALIZATION; -import static org.apache.ozone.test.LambdaTestUtils.await; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertArrayEquals; @@ -170,9 +162,6 @@ public class TestHSync { private static OpenKeyCleanupService openKeyCleanupService; - private static final int POLL_INTERVAL_MILLIS = 500; - private static final int POLL_MAX_WAIT_MILLIS = 120_000; - @BeforeAll public static void init() throws Exception { final BucketLayout layout = BUCKET_LAYOUT; @@ -193,7 +182,6 @@ public static void init() throws Exception { CONF.setTimeDuration(OZONE_OM_LEASE_HARD_LIMIT, EXPIRE_THRESHOLD_MS, TimeUnit.MILLISECONDS); CONF.set(OzoneConfigKeys.OZONE_OM_LEASE_SOFT_LIMIT, "0s"); - CONF.setInt(OZONE_OM_INIT_DEFAULT_LAYOUT_VERSION, OMLayoutFeature.QUOTA.layoutVersion()); ClientConfigForTesting.newBuilder(StorageUnit.BYTES) .setBlockSize(BLOCK_SIZE) @@ -226,9 +214,6 @@ public static void init() throws Exception { openKeyCleanupService = (OpenKeyCleanupService) cluster.getOzoneManager().getKeyManager().getOpenKeyCleanupService(); openKeyCleanupService.suspend(); - - preFinalizationChecks(); - finalizeOMUpgrade(); } @AfterAll @@ -239,72 +224,6 @@ public static void teardown() { } } - private static void preFinalizationChecks() throws IOException { - final String rootPath = String.format("%s://%s/", - OZONE_OFS_URI_SCHEME, CONF.get(OZONE_OM_ADDRESS_KEY)); - CONF.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); - - final String dir = OZONE_ROOT + bucket.getVolumeName() - + OZONE_URI_DELIMITER + bucket.getName(); - - final Path file = new Path(dir, "pre-finalization"); - try (RootedOzoneFileSystem fs = (RootedOzoneFileSystem)FileSystem.get(CONF)) { - try (FSDataOutputStream outputStream = fs.create(file, true)) { - OMException omException = assertThrows(OMException.class, outputStream::hsync); - assertFinalizationExceptionForHsyncLeaseRecovery(omException); - } - final OzoneManagerProtocol omClient = client.getObjectStore() - .getClientProxy().getOzoneManagerClient(); - OMException omException = assertThrows(OMException.class, - () -> omClient.listOpenFiles("", 100, "")); - assertFinalizationException(omException); - - omException = assertThrows(OMException.class, - () -> fs.recoverLease(file)); - assertFinalizationException(omException); - - fs.delete(file, false); - } - } - - private static void assertFinalizationExceptionForHsyncLeaseRecovery(OMException omException) { - assertEquals(NOT_SUPPORTED_OPERATION_PRIOR_FINALIZATION, - omException.getResult()); - assertThat(omException.getMessage()) - .contains("Cluster does not have the HBase support feature finalized yet"); - } - - private static void assertFinalizationException(OMException omException) { - assertEquals(NOT_SUPPORTED_OPERATION_PRIOR_FINALIZATION, - omException.getResult()); - assertThat(omException.getMessage()) - .contains("cannot be invoked before finalization."); - } - - /** - * Trigger OM upgrade finalization from the client and block until completion - * (status FINALIZATION_DONE). - */ - private static void finalizeOMUpgrade() throws Exception { - // Trigger OM upgrade finalization. Ref: FinalizeUpgradeSubCommand#call - final OzoneManagerProtocol omClient = client.getObjectStore() - .getClientProxy().getOzoneManagerClient(); - final String upgradeClientID = "Test-Upgrade-Client-" + UUID.randomUUID(); - UpgradeFinalizer.StatusAndMessages finalizationResponse = - omClient.finalizeUpgrade(upgradeClientID); - - // The status should transition as soon as the client call above returns - assertTrue(isStarting(finalizationResponse.status())); - // Wait for the finalization to be marked as done. - // 10s timeout should be plenty. - await(POLL_MAX_WAIT_MILLIS, POLL_INTERVAL_MILLIS, () -> { - final UpgradeFinalizer.StatusAndMessages progress = - omClient.queryUpgradeFinalizationProgress( - upgradeClientID, false, false); - return isDone(progress.status()); - }); - } - @Test // Making this the first test to be run to avoid db key composition headaches @Order(1) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSyncUpgrade.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSyncUpgrade.java new file mode 100644 index 000000000000..917ce57fe7d8 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSyncUpgrade.java @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.ozone; + +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.scm.storage.BlockInputStream; +import org.apache.hadoop.hdds.scm.storage.BlockOutputStream; +import org.apache.hadoop.hdds.scm.storage.BufferPool; +import org.apache.hadoop.hdds.utils.IOUtils; +import org.apache.hadoop.ozone.ClientConfigForTesting; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; +import org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl; +import org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore; +import org.apache.hadoop.ozone.om.exceptions.OMException; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol; +import org.apache.hadoop.ozone.om.service.OpenKeyCleanupService; +import org.apache.hadoop.ozone.om.upgrade.OMLayoutFeature; +import org.apache.hadoop.ozone.upgrade.UpgradeFinalizer; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.Timeout; +import org.slf4j.event.Level; + +import java.io.IOException; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_ROOT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; +import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isDone; +import static org.apache.hadoop.ozone.admin.scm.FinalizeUpgradeCommandUtil.isStarting; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_LEASE_HARD_LIMIT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_EXPIRE_THRESHOLD; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY; +import static org.apache.hadoop.ozone.om.OmUpgradeConfig.ConfigStrings.OZONE_OM_INIT_DEFAULT_LAYOUT_VERSION; +import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.NOT_SUPPORTED_OPERATION_PRIOR_FINALIZATION; +import static org.apache.ozone.test.LambdaTestUtils.await; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test HSync upgrade. + */ +@Timeout(value = 300) +@TestMethodOrder(OrderAnnotation.class) +public class TestHSyncUpgrade { + private MiniOzoneCluster cluster; + private OzoneBucket bucket; + + private final OzoneConfiguration conf = new OzoneConfiguration(); + private OzoneClient client; + private static final BucketLayout BUCKET_LAYOUT = BucketLayout.FILE_SYSTEM_OPTIMIZED; + + private static final int CHUNK_SIZE = 4 << 12; + private static final int FLUSH_SIZE = 3 * CHUNK_SIZE; + private static final int MAX_FLUSH_SIZE = 2 * FLUSH_SIZE; + private static final int BLOCK_SIZE = 2 * MAX_FLUSH_SIZE; + private static final int SERVICE_INTERVAL = 100; + private static final int EXPIRE_THRESHOLD_MS = 140; + + private static final int POLL_INTERVAL_MILLIS = 500; + private static final int POLL_MAX_WAIT_MILLIS = 120_000; + + @BeforeEach + public void init() throws Exception { + final BucketLayout layout = BUCKET_LAYOUT; + + conf.setBoolean(OZONE_OM_RATIS_ENABLE_KEY, false); + conf.set(OZONE_DEFAULT_BUCKET_LAYOUT, layout.name()); + conf.setBoolean(OzoneConfigKeys.OZONE_FS_HSYNC_ENABLED, true); + conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); + // Reduce KeyDeletingService interval + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); + conf.setTimeDuration(OZONE_DIR_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); + conf.setBoolean("ozone.client.incremental.chunk.list", true); + conf.setBoolean("ozone.client.stream.putblock.piggybacking", true); + conf.setTimeDuration(OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_INTERVAL, + SERVICE_INTERVAL, TimeUnit.MILLISECONDS); + conf.setTimeDuration(OZONE_OM_OPEN_KEY_EXPIRE_THRESHOLD, + EXPIRE_THRESHOLD_MS, TimeUnit.MILLISECONDS); + conf.setTimeDuration(OZONE_OM_LEASE_HARD_LIMIT, + EXPIRE_THRESHOLD_MS, TimeUnit.MILLISECONDS); + conf.set(OzoneConfigKeys.OZONE_OM_LEASE_SOFT_LIMIT, "0s"); + conf.setInt(OZONE_OM_INIT_DEFAULT_LAYOUT_VERSION, OMLayoutFeature.MULTITENANCY_SCHEMA.layoutVersion()); + + ClientConfigForTesting.newBuilder(StorageUnit.BYTES) + .setBlockSize(BLOCK_SIZE) + .setChunkSize(CHUNK_SIZE) + .setStreamBufferFlushSize(FLUSH_SIZE) + .setStreamBufferMaxSize(MAX_FLUSH_SIZE) + .setDataStreamBufferFlushSize(MAX_FLUSH_SIZE) + .setDataStreamMinPacketSize(CHUNK_SIZE) + .setDataStreamWindowSize(5 * CHUNK_SIZE) + .applyTo(conf); + + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(5) + .build(); + cluster.waitForClusterToBeReady(); + client = cluster.newClient(); + + // create a volume and a bucket to be used by OzoneFileSystem + bucket = TestDataUtil.createVolumeAndBucket(client, layout); + + // Enable DEBUG level logging for relevant classes + GenericTestUtils.setLogLevel(BlockManagerImpl.LOG, Level.DEBUG); + GenericTestUtils.setLogLevel(AbstractDatanodeStore.LOG, Level.DEBUG); + GenericTestUtils.setLogLevel(BlockOutputStream.LOG, Level.DEBUG); + GenericTestUtils.setLogLevel(BlockInputStream.LOG, Level.DEBUG); + GenericTestUtils.setLogLevel(KeyValueHandler.LOG, Level.DEBUG); + + GenericTestUtils.setLogLevel(BufferPool.LOG, Level.DEBUG); + + OpenKeyCleanupService openKeyCleanupService = + (OpenKeyCleanupService) cluster.getOzoneManager().getKeyManager() + .getOpenKeyCleanupService(); + openKeyCleanupService.suspend(); + } + + @AfterEach + public void teardown() { + IOUtils.closeQuietly(client); + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void upgrade() throws Exception { + preFinalizationChecks(); + finalizeOMUpgrade(); + } + + private void preFinalizationChecks() throws IOException { + final String rootPath = String.format("%s://%s/", + OZONE_OFS_URI_SCHEME, conf.get(OZONE_OM_ADDRESS_KEY)); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + + final String dir = OZONE_ROOT + bucket.getVolumeName() + + OZONE_URI_DELIMITER + bucket.getName(); + + final Path file = new Path(dir, "pre-finalization"); + try (RootedOzoneFileSystem fs = (RootedOzoneFileSystem)FileSystem.get(conf)) { + try (FSDataOutputStream outputStream = fs.create(file, true)) { + OMException omException = assertThrows(OMException.class, outputStream::hsync); + assertFinalizationExceptionForHsync(omException); + } + final OzoneManagerProtocol omClient = client.getObjectStore() + .getClientProxy().getOzoneManagerClient(); + OMException omException = assertThrows(OMException.class, + () -> omClient.listOpenFiles("", 100, "")); + assertFinalizationException(omException); + + omException = assertThrows(OMException.class, + () -> fs.recoverLease(file)); + assertFinalizationException(omException); + + fs.delete(file, false); + } + } + + private void assertFinalizationExceptionForHsync(OMException omException) { + assertEquals(NOT_SUPPORTED_OPERATION_PRIOR_FINALIZATION, + omException.getResult()); + assertThat(omException.getMessage()) + .contains("Cluster does not have the hsync support feature finalized yet"); + } + + private void assertFinalizationException(OMException omException) { + assertEquals(NOT_SUPPORTED_OPERATION_PRIOR_FINALIZATION, + omException.getResult()); + assertThat(omException.getMessage()) + .contains("cannot be invoked before finalization."); + } + + /** + * Trigger OM upgrade finalization from the client and block until completion + * (status FINALIZATION_DONE). + */ + private void finalizeOMUpgrade() throws Exception { + // Trigger OM upgrade finalization. Ref: FinalizeUpgradeSubCommand#call + final OzoneManagerProtocol omClient = client.getObjectStore() + .getClientProxy().getOzoneManagerClient(); + final String upgradeClientID = "Test-Upgrade-Client-" + UUID.randomUUID(); + UpgradeFinalizer.StatusAndMessages finalizationResponse = + omClient.finalizeUpgrade(upgradeClientID); + + // The status should transition as soon as the client call above returns + assertTrue(isStarting(finalizationResponse.status())); + // Wait for the finalization to be marked as done. + // 10s timeout should be plenty. + await(POLL_MAX_WAIT_MILLIS, POLL_INTERVAL_MILLIS, () -> { + final UpgradeFinalizer.StatusAndMessages progress = + omClient.queryUpgradeFinalizationProgress( + upgradeClientID, false, false); + return isDone(progress.status()); + }); + } + +} From eb5e25eaef5f1cac944a12552a87e1f24f7d1da7 Mon Sep 17 00:00:00 2001 From: hao guo Date: Thu, 15 Aug 2024 11:20:57 +0800 Subject: [PATCH 22/50] HDDS-8784. trigger compaction outside of volume check. (#6611) --- .../statemachine/DatanodeConfiguration.java | 43 +++++++++++++++++++ .../container/common/volume/HddsVolume.java | 26 +++++++---- .../common/volume/VolumeInfoMetrics.java | 8 ++++ .../container/ozoneimpl/OzoneContainer.java | 36 +++++++++++++--- .../keyvalue/TestKeyValueContainer.java | 3 +- 5 files changed, 101 insertions(+), 15 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index a8b0d8cfa4bc..22dff7505cee 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -110,6 +110,11 @@ public class DatanodeConfiguration extends ReconfigurableConfig { public static final Boolean OZONE_DATANODE_CHECK_EMPTY_CONTAINER_DIR_ON_DELETE_DEFAULT = false; + private static final long + AUTO_COMPACTION_SMALL_SST_FILE_INTERVAL_MINUTES_DEFAULT = 120; + private static final int + AUTO_COMPACTION_SMALL_SST_FILE_THREADS_DEFAULT = 1; + /** * Number of threads per volume that Datanode will use for chunk read. */ @@ -536,6 +541,24 @@ public void setWaitOnAllFollowers(boolean val) { ) private int autoCompactionSmallSstFileNum = 512; + @Config(key = "rocksdb.auto-compaction-small-sst-file.interval.minutes", + defaultValue = "120", + type = ConfigType.LONG, + tags = { DATANODE }, + description = "Auto compact small SST files interval in minutes." + ) + private long autoCompactionSmallSstFileIntervalMinutes = + AUTO_COMPACTION_SMALL_SST_FILE_INTERVAL_MINUTES_DEFAULT; + + @Config(key = "rocksdb.auto-compaction-small-sst-file.threads", + defaultValue = "1", + type = ConfigType.INT, + tags = { DATANODE }, + description = "Auto compact small SST files threads." + ) + private int autoCompactionSmallSstFileThreads = + AUTO_COMPACTION_SMALL_SST_FILE_THREADS_DEFAULT; + /** * Whether to check container directory or not to determine * container is empty. @@ -910,4 +933,24 @@ public int getAutoCompactionSmallSstFileNum() { public void setAutoCompactionSmallSstFileNum(int num) { this.autoCompactionSmallSstFileNum = num; } + + public long getAutoCompactionSmallSstFileIntervalMinutes() { + return autoCompactionSmallSstFileIntervalMinutes; + } + + public void setAutoCompactionSmallSstFileIntervalMinutes( + long autoCompactionSmallSstFileIntervalMinutes) { + this.autoCompactionSmallSstFileIntervalMinutes = + autoCompactionSmallSstFileIntervalMinutes; + } + + public int getAutoCompactionSmallSstFileThreads() { + return autoCompactionSmallSstFileThreads; + } + + public void setAutoCompactionSmallSstFileThreads( + int autoCompactionSmallSstFileThreads) { + this.autoCompactionSmallSstFileThreads = + autoCompactionSmallSstFileThreads; + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index d4cdaf2cfe41..b22b9148bb1d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -38,6 +38,7 @@ import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures.SchemaV3; +import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -276,15 +277,6 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) return VolumeCheckResult.FAILED; } - // TODO HDDS-8784 trigger compaction outside of volume check. Then the - // exception can be removed. - if (df.autoCompactionSmallSstFile()) { - // Calculate number of files per level and size per level - RawDB rawDB = DatanodeStoreCache.getInstance().getDB( - dbFile.getAbsolutePath(), getConf()); - rawDB.getStore().compactionIfNeeded(); - } - return VolumeCheckResult.HEALTHY; } @@ -470,4 +462,20 @@ private void closeDbStore() { LOG.info("SchemaV3 db is stopped at {} for volume {}", containerDBPath, getStorageID()); } + + public void compactDb() { + File dbFile = new File(getDbParentDir(), CONTAINER_DB_NAME); + String dbFilePath = dbFile.getAbsolutePath(); + try { + // Calculate number of files per level and size per level + RawDB rawDB = + DatanodeStoreCache.getInstance().getDB(dbFilePath, getConf()); + long start = Time.monotonicNowNanos(); + rawDB.getStore().compactionIfNeeded(); + volumeInfoMetrics.dbCompactTimesNanoSecondsIncr( + Time.monotonicNowNanos() - start); + } catch (Exception e) { + LOG.warn("compact rocksdb error in {}", dbFilePath, e); + } + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java index e59cab0d539f..68140600db95 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java @@ -22,8 +22,10 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.ozone.OzoneConsts; + /** * This class is used to track Volume Info stats for each HDDS Volume. */ @@ -33,6 +35,8 @@ public class VolumeInfoMetrics { private String metricsSourceName = VolumeInfoMetrics.class.getSimpleName(); private final HddsVolume volume; + @Metric("Returns the RocksDB compact times of the Volume") + private MutableRate dbCompactLatency; /** * @param identifier Typically, path to volume root. E.g. /data/hdds @@ -145,4 +149,8 @@ public long getCommitted() { return volume.getCommittedBytes(); } + public void dbCompactTimesNanoSecondsIncr(long time) { + dbCompactLatency.add(time); + } + } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index b3809fff4b11..5cdeaaa57870 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -74,6 +74,9 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; @@ -122,7 +125,7 @@ public class OzoneContainer { private final ReplicationServer replicationServer; private DatanodeDetails datanodeDetails; private StateContext context; - + private ScheduledExecutorService dbCompactionExecutorService; private final ContainerMetrics metrics; @@ -158,9 +161,22 @@ public OzoneContainer(HddsDatanodeService hddsDatanodeService, dbVolumeSet = HddsServerUtil.getDatanodeDbDirs(conf).isEmpty() ? null : new MutableVolumeSet(datanodeDetails.getUuidString(), conf, context, VolumeType.DB_VOLUME, volumeChecker); + final DatanodeConfiguration dnConf = + conf.getObject(DatanodeConfiguration.class); if (SchemaV3.isFinalizedAndEnabled(config)) { HddsVolumeUtil.loadAllHddsVolumeDbStore( volumeSet, dbVolumeSet, false, LOG); + if (dnConf.autoCompactionSmallSstFile()) { + this.dbCompactionExecutorService = Executors.newScheduledThreadPool( + dnConf.getAutoCompactionSmallSstFileThreads(), + new ThreadFactoryBuilder().setNameFormat( + datanodeDetails.threadNamePrefix() + + "RocksDBCompactionThread-%d").build()); + this.dbCompactionExecutorService.scheduleWithFixedDelay(this::compactDb, + dnConf.getAutoCompactionSmallSstFileIntervalMinutes(), + dnConf.getAutoCompactionSmallSstFileIntervalMinutes(), + TimeUnit.MINUTES); + } } long recoveringContainerTimeout = config.getTimeDuration( @@ -221,8 +237,7 @@ public OzoneContainer(HddsDatanodeService hddsDatanodeService, readChannel = new XceiverServerGrpc( datanodeDetails, config, hddsDispatcher, certClient); - Duration blockDeletingSvcInterval = conf.getObject( - DatanodeConfiguration.class).getBlockDeletionInterval(); + Duration blockDeletingSvcInterval = dnConf.getBlockDeletionInterval(); long blockDeletingServiceTimeout = config .getTimeDuration(OZONE_BLOCK_DELETING_SERVICE_TIMEOUT, @@ -239,8 +254,8 @@ public OzoneContainer(HddsDatanodeService hddsDatanodeService, datanodeDetails.threadNamePrefix(), context.getParent().getReconfigurationHandler()); - Duration recoveringContainerScrubbingSvcInterval = conf.getObject( - DatanodeConfiguration.class).getRecoveringContainerScrubInterval(); + Duration recoveringContainerScrubbingSvcInterval = + dnConf.getRecoveringContainerScrubInterval(); long recoveringContainerScrubbingServiceTimeout = config .getTimeDuration(OZONE_RECOVERING_CONTAINER_SCRUBBING_SERVICE_TIMEOUT, @@ -492,6 +507,9 @@ public void stop() { if (dbVolumeSet != null) { dbVolumeSet.shutdown(); } + if (dbCompactionExecutorService != null) { + dbCompactionExecutorService.shutdown(); + } blockDeletingService.shutdown(); recoveringContainerScrubbingService.shutdown(); ContainerMetrics.remove(); @@ -591,4 +609,12 @@ public ReplicationServer getReplicationServer() { return replicationServer; } + public void compactDb() { + for (StorageVolume volume : volumeSet.getVolumesList()) { + HddsVolume hddsVolume = (HddsVolume) volume; + CompletableFuture.runAsync(hddsVolume::compactDb, + dbCompactionExecutorService); + } + } + } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java index 15d0faefdf91..e1a3de30ddf3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java @@ -137,6 +137,7 @@ private void init(ContainerTestVersionInfo versionInfo) throws Exception { CodecBuffer.enableLeakDetection(); DatanodeConfiguration dc = CONF.getObject(DatanodeConfiguration.class); + dc.setAutoCompactionSmallSstFile(true); dc.setAutoCompactionSmallSstFileNum(100); dc.setRocksdbDeleteObsoleteFilesPeriod(5000); CONF.setFromObject(dc); @@ -896,7 +897,7 @@ void testAutoCompactionSmallSstFile( CONF).getStore(); List fileMetaDataList1 = ((RDBStore)(dnStore.getStore())).getDb().getLiveFilesMetaData(); - hddsVolume.check(true); + hddsVolume.compactDb(); // Sleep a while to wait for compaction to complete Thread.sleep(7000); List fileMetaDataList2 = From 593337bad94bb28b1495539fd0d9cd31d8a3361c Mon Sep 17 00:00:00 2001 From: Chung En Lee Date: Thu, 15 Aug 2024 17:26:04 +0800 Subject: [PATCH 23/50] HDDS-11282. Combine test setup for datanode upgrade unit tests (#7063) --- .../ozone/container/common/ScmTestMock.java | 8 +- .../TestDatanodeUpgradeToHBaseSupport.java | 242 ++--------- .../TestDatanodeUpgradeToSchemaV3.java | 382 +++++------------- .../upgrade/TestDatanodeUpgradeToScmHA.java | 331 +++++---------- .../container/upgrade/UpgradeTestHelper.java | 271 +++++++++++++ 5 files changed, 495 insertions(+), 739 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/UpgradeTestHelper.java diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ScmTestMock.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ScmTestMock.java index 5cb698482a70..c333ba1b6988 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ScmTestMock.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ScmTestMock.java @@ -74,9 +74,9 @@ public ScmTestMock() { scmId = UUID.randomUUID().toString(); } - public ScmTestMock(String clusterId, String scmId) { + public ScmTestMock(String clusterId) { this.clusterId = clusterId; - this.scmId = scmId; + this.scmId = UUID.randomUUID().toString(); } // Map of datanode to containers @@ -368,6 +368,10 @@ public void setClusterId(String clusterId) { this.clusterId = clusterId; } + public String getScmId() { + return scmId; + } + public void setScmId(String scmId) { this.scmId = scmId; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToHBaseSupport.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToHBaseSupport.java index 37bea49b342a..55396446d503 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToHBaseSupport.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToHBaseSupport.java @@ -20,44 +20,28 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.pipeline.MockPipeline; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.ozone.container.ContainerTestHelper; -import org.apache.hadoop.ozone.container.common.ContainerTestUtils; -import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.ozone.container.common.ScmTestMock; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; -import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine; -import org.apache.hadoop.ozone.container.common.states.endpoint.VersionEndpointTask; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import java.io.File; -import java.io.IOException; import java.net.InetSocketAddress; -import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import java.util.List; -import java.util.Random; -import java.util.UUID; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.OPEN; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; /** * Tests upgrading a single datanode from HADOOP_PRC_PORTS_IN_DATANODEDETAILS to HBASE_SUPPORT. @@ -67,22 +51,19 @@ public class TestDatanodeUpgradeToHBaseSupport { private Path tempFolder; private DatanodeStateMachine dsm; + private ContainerDispatcher dispatcher; private OzoneConfiguration conf; private static final String CLUSTER_ID = "clusterID"; private RPC.Server scmRpcServer; private InetSocketAddress address; - private Random random; - private void initTests() throws Exception { conf = new OzoneConfiguration(); setup(); } private void setup() throws Exception { - random = new Random(); - address = SCMTestUtils.getReuseableAddress(); conf.setSocketAddr(ScmConfigKeys.OZONE_SCM_NAMES, address); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, @@ -107,26 +88,30 @@ public void teardown() throws Exception { public void testIncrementalChunkListBeforeAndAfterUpgrade() throws Exception { initTests(); // start DN and SCM - startScmServer(); - addHddsVolume(); - startPreFinalizedDatanode(); - final Pipeline pipeline = getPipeline(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.HADOOP_PRC_PORTS_IN_DATANODEDETAILS.layoutVersion()); + dispatcher = dsm.getContainer().getDispatcher(); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); // Add data to read. - final long containerID = addContainer(pipeline); + final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); // incremental chunk list should be rejected before finalizing. - putBlock(containerID, pipeline, true, ContainerProtos.Result.UNSUPPORTED_REQUEST); + UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline, true, ContainerProtos.Result.UNSUPPORTED_REQUEST); Container container = dsm.getContainer().getContainerSet().getContainer(containerID); assertEquals(OPEN, container.getContainerData().getState()); // close container to allow upgrade. - closeContainer(containerID, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline); dsm.finalizeUpgrade(); assertTrue(dsm.getLayoutVersionManager().isAllowed(HDDSLayoutFeature.HBASE_SUPPORT)); // open a new container after finalization - final long containerID2 = addContainer(pipeline); + final long containerID2 = UpgradeTestHelper.addContainer(dispatcher, pipeline); // incremental chunk list should work after finalizing. - putBlock(containerID2, pipeline, true); + UpgradeTestHelper.putBlock(dispatcher, containerID2, pipeline, true); Container container2 = dsm.getContainer().getContainerSet().getContainer(containerID2); assertEquals(OPEN, container2.getContainerData().getState()); } @@ -138,195 +123,36 @@ public void testIncrementalChunkListBeforeAndAfterUpgrade() throws Exception { public void testBlockFinalizationBeforeAndAfterUpgrade() throws Exception { initTests(); // start DN and SCM - startScmServer(); - addHddsVolume(); - startPreFinalizedDatanode(); - final Pipeline pipeline = getPipeline(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.HADOOP_PRC_PORTS_IN_DATANODEDETAILS.layoutVersion()); + dispatcher = dsm.getContainer().getDispatcher(); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); // Add data to read. - final long containerID = addContainer(pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = putBlock(containerID, pipeline, false); - finalizeBlock(containerID, writeChunk.getBlockID().getLocalID(), ContainerProtos.Result.UNSUPPORTED_REQUEST); + final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); + ContainerProtos.WriteChunkRequestProto writeChunk = + UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline, false); + UpgradeTestHelper.finalizeBlock( + dispatcher, containerID, writeChunk.getBlockID().getLocalID(), ContainerProtos.Result.UNSUPPORTED_REQUEST); Container container = dsm.getContainer().getContainerSet().getContainer(containerID); assertEquals(OPEN, container.getContainerData().getState()); // close container to allow upgrade. - closeContainer(containerID, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline); dsm.finalizeUpgrade(); assertTrue(dsm.getLayoutVersionManager().isAllowed(HDDSLayoutFeature.HBASE_SUPPORT)); - final long containerID2 = addContainer(pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk2 = putBlock(containerID2, pipeline, false); + final long containerID2 = UpgradeTestHelper.addContainer(dispatcher, pipeline); + ContainerProtos.WriteChunkRequestProto writeChunk2 = + UpgradeTestHelper.putBlock(dispatcher, containerID2, pipeline, false); // Make sure we can read after finalizing too. - finalizeBlock(containerID2, writeChunk2.getBlockID().getLocalID(), ContainerProtos.Result.SUCCESS); + UpgradeTestHelper.finalizeBlock( + dispatcher, containerID2, writeChunk2.getBlockID().getLocalID(), ContainerProtos.Result.SUCCESS); Container container2 = dsm.getContainer().getContainerSet().getContainer(containerID2); assertEquals(OPEN, container2.getContainerData().getState()); } - /** - * Starts the datanode with the fore layout version, and calls the version - * endpoint task to get cluster ID and SCM ID. - * - * The daemon for the datanode state machine is not started in this test. - * This greatly speeds up execution time. - * It means we do not have heartbeat functionality or pre-finalize - * upgrade actions, but neither of those things are needed for these tests. - */ - public void startPreFinalizedDatanode() throws Exception { - // Set layout version. - conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, tempFolder.toString()); - DatanodeLayoutStorage layoutStorage = new DatanodeLayoutStorage(conf, - UUID.randomUUID().toString(), - HDDSLayoutFeature.HADOOP_PRC_PORTS_IN_DATANODEDETAILS.layoutVersion()); - layoutStorage.initialize(); - - // Build and start the datanode. - DatanodeDetails dd = ContainerTestUtils.createDatanodeDetails(); - DatanodeStateMachine newDsm = new DatanodeStateMachine(dd, conf); - int actualMlv = newDsm.getLayoutVersionManager().getMetadataLayoutVersion(); - assertEquals( - HDDSLayoutFeature.HADOOP_PRC_PORTS_IN_DATANODEDETAILS.layoutVersion(), - actualMlv); - if (dsm != null) { - dsm.close(); - } - dsm = newDsm; - - callVersionEndpointTask(); - } - - /** - * Get the cluster ID and SCM ID from SCM to the datanode. - */ - public void callVersionEndpointTask() throws Exception { - try (EndpointStateMachine esm = ContainerTestUtils.createEndpoint(conf, - address, 1000)) { - VersionEndpointTask vet = new VersionEndpointTask(esm, conf, - dsm.getContainer()); - esm.setState(EndpointStateMachine.EndPointStates.GETVERSION); - vet.call(); - } - } - - public String startScmServer() throws IOException { - String scmID = UUID.randomUUID().toString(); - ScmTestMock scmServerImpl = new ScmTestMock(CLUSTER_ID, scmID); - scmRpcServer = SCMTestUtils.startScmRpcServer(conf, - scmServerImpl, address, 10); - return scmID; - } - - /// CONTAINER OPERATIONS /// - public void readChunk(ContainerProtos.WriteChunkRequestProto writeChunk, - Pipeline pipeline) throws Exception { - ContainerProtos.ContainerCommandRequestProto readChunkRequest = - ContainerTestHelper.getReadChunkRequest(pipeline, writeChunk); - - dispatchRequest(readChunkRequest); - } - - public ContainerProtos.WriteChunkRequestProto putBlock(long containerID, - Pipeline pipeline, boolean incremental) throws Exception { - return putBlock(containerID, pipeline, incremental, ContainerProtos.Result.SUCCESS); - } - - public ContainerProtos.WriteChunkRequestProto putBlock(long containerID, - Pipeline pipeline, boolean incremental, ContainerProtos.Result expectedResult) throws Exception { - ContainerProtos.ContainerCommandRequestProto writeChunkRequest = - getWriteChunk(containerID, pipeline); - dispatchRequest(writeChunkRequest); - - ContainerProtos.ContainerCommandRequestProto putBlockRequest = - ContainerTestHelper.getPutBlockRequest(pipeline, - writeChunkRequest.getWriteChunk(), incremental); - dispatchRequest(putBlockRequest, expectedResult); - - return writeChunkRequest.getWriteChunk(); - } - - public ContainerProtos.ContainerCommandRequestProto getWriteChunk( - long containerID, Pipeline pipeline) throws Exception { - return ContainerTestHelper.getWriteChunkRequest(pipeline, - ContainerTestHelper.getTestBlockID(containerID), 100); - } - - public Pipeline getPipeline() { - return MockPipeline.createPipeline( - Collections.singletonList(dsm.getDatanodeDetails())); - } - - public long addContainer(Pipeline pipeline) - throws Exception { - long containerID = random.nextInt(Integer.MAX_VALUE); - ContainerProtos.ContainerCommandRequestProto createContainerRequest = - ContainerTestHelper.getCreateContainerRequest(containerID, pipeline); - dispatchRequest(createContainerRequest); - - return containerID; - } - - public void deleteContainer(long containerID, Pipeline pipeline) - throws Exception { - ContainerProtos.ContainerCommandRequestProto deleteContainerRequest = - ContainerTestHelper.getDeleteContainer(pipeline, containerID, true); - dispatchRequest(deleteContainerRequest); - } - - public void closeContainer(long containerID, Pipeline pipeline) - throws Exception { - closeContainer(containerID, pipeline, ContainerProtos.Result.SUCCESS); - } - - public void closeContainer(long containerID, Pipeline pipeline, - ContainerProtos.Result expectedResult) throws Exception { - ContainerProtos.ContainerCommandRequestProto closeContainerRequest = - ContainerTestHelper.getCloseContainer(pipeline, containerID); - dispatchRequest(closeContainerRequest, expectedResult); - } - - public void finalizeBlock(long containerID, long localID, ContainerProtos.Result expectedResult) { - ContainerInfo container = mock(ContainerInfo.class); - when(container.getContainerID()).thenReturn(containerID); - - ContainerProtos.ContainerCommandRequestProto finalizeBlockRequest = - ContainerTestHelper.getFinalizeBlockRequest(localID, container, UUID.randomUUID().toString()); - - dispatchRequest(finalizeBlockRequest, expectedResult); - } - - public void dispatchRequest( - ContainerProtos.ContainerCommandRequestProto request) { - dispatchRequest(request, ContainerProtos.Result.SUCCESS); - } - - public void dispatchRequest( - ContainerProtos.ContainerCommandRequestProto request, - ContainerProtos.Result expectedResult) { - ContainerProtos.ContainerCommandResponseProto response = - dsm.getContainer().getDispatcher().dispatch(request, null); - assertEquals(expectedResult, response.getResult()); - } - - /// VOLUME OPERATIONS /// - - /** - * Append a datanode volume to the existing volumes in the configuration. - * @return The root directory for the new volume. - */ - public File addHddsVolume() throws IOException { - - File vol = Files.createDirectory(tempFolder.resolve(UUID.randomUUID() - .toString())).toFile(); - String[] existingVolumes = - conf.getStrings(ScmConfigKeys.HDDS_DATANODE_DIR_KEY); - List allVolumes = new ArrayList<>(); - if (existingVolumes != null) { - allVolumes.addAll(Arrays.asList(existingVolumes)); - } - - allVolumes.add(vol.getAbsolutePath()); - conf.setStrings(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, - allVolumes.toArray(new String[0])); - - return vol; - } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToSchemaV3.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToSchemaV3.java index 23b7da263465..fc599f7f9130 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToSchemaV3.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToSchemaV3.java @@ -20,7 +20,6 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.pipeline.MockPipeline; @@ -29,21 +28,17 @@ import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.container.ContainerTestHelper; import org.apache.hadoop.ozone.container.common.ContainerTestUtils; import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.ozone.container.common.ScmTestMock; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; -import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine; -import org.apache.hadoop.ozone.container.common.states.endpoint.VersionEndpointTask; -import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; import org.apache.hadoop.ozone.container.common.volume.DbVolume; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; -import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; @@ -52,15 +47,10 @@ import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; -import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; -import java.util.Random; import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -91,8 +81,6 @@ public class TestDatanodeUpgradeToSchemaV3 { private RPC.Server scmRpcServer; private InetSocketAddress address; - private Random random; - private void initTests(Boolean enable) throws Exception { boolean schemaV3Enabled = enable; conf = new OzoneConfiguration(); @@ -106,8 +94,6 @@ private void initTests(Boolean enable) throws Exception { } private void setup() throws Exception { - random = new Random(); - address = SCMTestUtils.getReuseableAddress(); conf.setSocketAddr(ScmConfigKeys.OZONE_SCM_NAMES, address); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, @@ -135,10 +121,12 @@ public void teardown() throws Exception { public void testDBOnHddsVolume(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); // start DN and SCM - startScmServer(); - addHddsVolume(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); - startPreFinalizedDatanode(); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); HddsVolume dataVolume = (HddsVolume) dsm.getContainer().getVolumeSet() .getVolumesList().get(0); assertNull(dataVolume.getDbVolume()); @@ -170,11 +158,13 @@ public void testDBOnHddsVolume(boolean schemaV3Enabled) throws Exception { public void testDBOnDbVolume(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); // start DN and SCM - startScmServer(); - addHddsVolume(); - addDbVolume(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); + UpgradeTestHelper.addDbVolume(conf, tempFolder); - startPreFinalizedDatanode(); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); HddsVolume dataVolume = (HddsVolume) dsm.getContainer().getVolumeSet() .getVolumesList().get(0); assertNull(dataVolume.getDbParentDir()); @@ -209,9 +199,10 @@ public void testDBCreatedInFinalize(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); // start DN and SCM - startScmServer(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); // add one HddsVolume - addHddsVolume(); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); // Set layout version. DatanodeLayoutStorage layoutStorage = new DatanodeLayoutStorage(conf, @@ -232,7 +223,7 @@ public void testDBCreatedInFinalize(boolean schemaV3Enabled) assertNull(dataVolume.getDbParentDir()); // Restart DN and finalize upgrade - restartDatanode( + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, false, tempFolder, address, HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion(), true); dsm.finalizeUpgrade(); @@ -255,13 +246,15 @@ public void testDBCreatedInFinalize(boolean schemaV3Enabled) public void testFinalizeTwice(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); // start DN and SCM - startScmServer(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); // add one HddsVolume and two DbVolume - addHddsVolume(); - addDbVolume(); - addDbVolume(); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); + UpgradeTestHelper.addDbVolume(conf, tempFolder); + UpgradeTestHelper.addDbVolume(conf, tempFolder); - startPreFinalizedDatanode(); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); dsm.finalizeUpgrade(); DbVolume dbVolume = ((HddsVolume) dsm.getContainer().getVolumeSet() @@ -283,15 +276,18 @@ public void testAddHddsVolumeAfterFinalize(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); // start DN and SCM - startScmServer(); - addHddsVolume(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); - startPreFinalizedDatanode(); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); dsm.finalizeUpgrade(); // Add a new HddsVolume. It should have DB created after DN restart. - addHddsVolume(); - restartDatanode(HDDSLayoutFeature.DATANODE_SCHEMA_V3.layoutVersion(), + UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, false, tempFolder, address, + HDDSLayoutFeature.DATANODE_SCHEMA_V3.layoutVersion(), false); for (StorageVolume vol: dsm.getContainer().getVolumeSet().getVolumesList()) { @@ -314,10 +310,12 @@ public void testAddHddsVolumeAfterFinalize(boolean schemaV3Enabled) public void testAddDbVolumeAfterFinalize(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); - startScmServer(); - addHddsVolume(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); - startPreFinalizedDatanode(); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); HddsVolume hddsVolume = (HddsVolume) dsm.getContainer().getVolumeSet() .getVolumesList().get(0); assertNull(hddsVolume.getDbParentDir()); @@ -328,8 +326,9 @@ public void testAddDbVolumeAfterFinalize(boolean schemaV3Enabled) hddsVolume.getStorageDir().getAbsolutePath())); // Add a new DbVolume - addDbVolume(); - restartDatanode(HDDSLayoutFeature.DATANODE_SCHEMA_V3.layoutVersion(), + UpgradeTestHelper.addDbVolume(conf, tempFolder); + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, false, tempFolder, address, + HDDSLayoutFeature.DATANODE_SCHEMA_V3.layoutVersion(), false); // HddsVolume should still use the rocksDB under it's volume @@ -354,15 +353,18 @@ public void testAddDbAndHddsVolumeAfterFinalize(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); // start DN and SCM - startScmServer(); - addHddsVolume(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); - startPreFinalizedDatanode(); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); dsm.finalizeUpgrade(); - addDbVolume(); - File newDataVolume = addHddsVolume(); - restartDatanode(HDDSLayoutFeature.DATANODE_SCHEMA_V3.layoutVersion(), + UpgradeTestHelper.addDbVolume(conf, tempFolder); + File newDataVolume = UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, false, tempFolder, address, + HDDSLayoutFeature.DATANODE_SCHEMA_V3.layoutVersion(), false); DbVolume dbVolume = (DbVolume) dsm.getContainer().getDbVolumeSet() @@ -419,18 +421,22 @@ public void testWriteWithV3Disabled(boolean schemaV3Enabled) public void testWrite(boolean enable, String expectedVersion) throws Exception { // start DN and SCM - startScmServer(); - addHddsVolume(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); // Disable Schema V3 conf.setBoolean(DatanodeConfiguration.CONTAINER_SCHEMA_V3_ENABLED, false); - startPreFinalizedDatanode(); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); + ContainerDispatcher dispatcher = dsm.getContainer().getDispatcher(); dsm.finalizeUpgrade(); - final Pipeline pipeline = getPipeline(); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); // Create a container to write data. - final long containerID1 = addContainer(pipeline); - putBlock(containerID1, pipeline); - closeContainer(containerID1, pipeline); + final long containerID1 = UpgradeTestHelper.addContainer(dispatcher, pipeline); + UpgradeTestHelper.putBlock(dispatcher, containerID1, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, containerID1, pipeline); KeyValueContainer container = (KeyValueContainer) dsm.getContainer().getContainerSet().getContainer(containerID1); // When SchemaV3 is disabled, new data should be saved as SchemaV2. @@ -440,13 +446,15 @@ public void testWrite(boolean enable, String expectedVersion) // Set SchemaV3 enable status conf.setBoolean(DatanodeConfiguration.CONTAINER_SCHEMA_V3_ENABLED, enable); - restartDatanode(HDDSLayoutFeature.DATANODE_SCHEMA_V3.layoutVersion(), + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, false, tempFolder, address, + HDDSLayoutFeature.DATANODE_SCHEMA_V3.layoutVersion(), false); + dispatcher = dsm.getContainer().getDispatcher(); // Write new data - final long containerID2 = addContainer(pipeline); - putBlock(containerID2, pipeline); - closeContainer(containerID2, pipeline); + final long containerID2 = UpgradeTestHelper.addContainer(dispatcher, pipeline); + UpgradeTestHelper.putBlock(dispatcher, containerID2, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, containerID2, pipeline); container = (KeyValueContainer) dsm.getContainer().getContainerSet().getContainer(containerID2); // If SchemaV3 is enabled, new data should be saved as SchemaV3 @@ -464,16 +472,20 @@ public void testReadsDuringFinalize(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); // start DN and SCM - startScmServer(); - addHddsVolume(); - startPreFinalizedDatanode(); - final Pipeline pipeline = getPipeline(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); + ContainerDispatcher dispatcher = dsm.getContainer().getDispatcher(); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); // Add data to read. - final long containerID = addContainer(pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = putBlock(containerID, - pipeline); - closeContainer(containerID, pipeline); + final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); + ContainerProtos.WriteChunkRequestProto writeChunk = + UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline); // Create thread to keep reading during finalization. ExecutorService executor = Executors.newFixedThreadPool(1); @@ -481,10 +493,10 @@ public void testReadsDuringFinalize(boolean schemaV3Enabled) // Layout version check should be thread safe. while (!dsm.getLayoutVersionManager() .isAllowed(HDDSLayoutFeature.DATANODE_SCHEMA_V3)) { - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); } // Make sure we can read after finalizing too. - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); return null; }); @@ -502,8 +514,9 @@ public void testReadsDuringFinalize(boolean schemaV3Enabled) public void testFinalizeFailure(boolean schemaV3Enabled) throws Exception { initTests(schemaV3Enabled); // start DN and SCM - startScmServer(); - addHddsVolume(); + scmRpcServer = SCMTestUtils.startScmRpcServer(conf, + new ScmTestMock(CLUSTER_ID), address, 10); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); // Let HddsVolume be formatted to mimic the real cluster upgrade // Set layout version. DatanodeLayoutStorage layoutStorage = new DatanodeLayoutStorage(conf, @@ -523,15 +536,17 @@ public void testFinalizeFailure(boolean schemaV3Enabled) throws Exception { assertNull(dataVolume.getDbParentDir()); // Restart DN - restartDatanode( + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, false, tempFolder, address, HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion(), true); + ContainerDispatcher dispatcher = dsm.getContainer().getDispatcher(); // Write some data. - final Pipeline pipeline = getPipeline(); - final long containerID = addContainer(pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = putBlock(containerID, - pipeline); - closeContainer(containerID, pipeline); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); + final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); + ContainerProtos.WriteChunkRequestProto writeChunk = + UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline); KeyValueContainer container = (KeyValueContainer) dsm.getContainer().getContainerSet().getContainer(containerID); assertEquals(OzoneConsts.SCHEMA_V2, @@ -558,227 +573,18 @@ public void testFinalizeFailure(boolean schemaV3Enabled) throws Exception { dsm.getContainer().getContainerSet().getContainer(containerID); assertEquals(OzoneConsts.SCHEMA_V2, container.getContainerData().getSchemaVersion()); - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); // SchemaV3 is not finalized, so still ERASURE_CODED_STORAGE_SUPPORT - restartDatanode( + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, false, tempFolder, address, HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion(), true); + dispatcher = dsm.getContainer().getDispatcher(); // Old data is readable after DN restart container = (KeyValueContainer) dsm.getContainer().getContainerSet().getContainer(containerID); assertEquals(OzoneConsts.SCHEMA_V2, container.getContainerData().getSchemaVersion()); - readChunk(writeChunk, pipeline); - } - - public void checkContainerPathID(long containerID, String expectedID) { - KeyValueContainerData data = - (KeyValueContainerData) dsm.getContainer().getContainerSet() - .getContainer(containerID).getContainerData(); - assertThat(data.getChunksPath()).contains(expectedID); - assertThat(data.getMetadataPath()).contains(expectedID); - } - - public List getHddsSubdirs(File volume) { - File[] subdirsArray = getHddsRoot(volume).listFiles(File::isDirectory); - assertNotNull(subdirsArray); - return Arrays.asList(subdirsArray); - } - - public File getHddsRoot(File volume) { - return new File(HddsVolumeUtil.getHddsRoot(volume.getAbsolutePath())); - } - - /** - * Starts the datanode with the fore layout version, and calls the version - * endpoint task to get cluster ID and SCM ID. - * - * The daemon for the datanode state machine is not started in this test. - * This greatly speeds up execution time. - * It means we do not have heartbeat functionality or pre-finalize - * upgrade actions, but neither of those things are needed for these tests. - */ - public void startPreFinalizedDatanode() throws Exception { - // Set layout version. - conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, tempFolder.toString()); - DatanodeLayoutStorage layoutStorage = new DatanodeLayoutStorage(conf, - UUID.randomUUID().toString(), - HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion()); - layoutStorage.initialize(); - - // Build and start the datanode. - DatanodeDetails dd = ContainerTestUtils.createDatanodeDetails(); - DatanodeStateMachine newDsm = new DatanodeStateMachine(dd, conf); - int actualMlv = newDsm.getLayoutVersionManager().getMetadataLayoutVersion(); - assertEquals( - HDDSLayoutFeature.ERASURE_CODED_STORAGE_SUPPORT.layoutVersion(), - actualMlv); - if (dsm != null) { - dsm.close(); - } - dsm = newDsm; - - callVersionEndpointTask(); - } - - public void restartDatanode(int expectedMlv, boolean exactMatch) - throws Exception { - // Stop existing datanode. - DatanodeDetails dd = dsm.getDatanodeDetails(); - dsm.close(); - - // Start new datanode with the same configuration. - dsm = new DatanodeStateMachine(dd, conf); - int mlv = dsm.getLayoutVersionManager().getMetadataLayoutVersion(); - if (exactMatch) { - assertEquals(expectedMlv, mlv); - } else { - assertThat(expectedMlv).isLessThanOrEqualTo(mlv); - } - - callVersionEndpointTask(); - } - - /** - * Get the cluster ID and SCM ID from SCM to the datanode. - */ - public void callVersionEndpointTask() throws Exception { - try (EndpointStateMachine esm = ContainerTestUtils.createEndpoint(conf, - address, 1000)) { - VersionEndpointTask vet = new VersionEndpointTask(esm, conf, - dsm.getContainer()); - esm.setState(EndpointStateMachine.EndPointStates.GETVERSION); - vet.call(); - } - } - - public String startScmServer() throws IOException { - String scmID = UUID.randomUUID().toString(); - ScmTestMock scmServerImpl = new ScmTestMock(CLUSTER_ID, scmID); - scmRpcServer = SCMTestUtils.startScmRpcServer(conf, - scmServerImpl, address, 10); - return scmID; - } - - /// CONTAINER OPERATIONS /// - public void readChunk(ContainerProtos.WriteChunkRequestProto writeChunk, - Pipeline pipeline) throws Exception { - ContainerProtos.ContainerCommandRequestProto readChunkRequest = - ContainerTestHelper.getReadChunkRequest(pipeline, writeChunk); - - dispatchRequest(readChunkRequest); - } - - public ContainerProtos.WriteChunkRequestProto putBlock(long containerID, - Pipeline pipeline) throws Exception { - ContainerProtos.ContainerCommandRequestProto writeChunkRequest = - getWriteChunk(containerID, pipeline); - dispatchRequest(writeChunkRequest); - - ContainerProtos.ContainerCommandRequestProto putBlockRequest = - ContainerTestHelper.getPutBlockRequest(pipeline, - writeChunkRequest.getWriteChunk()); - dispatchRequest(putBlockRequest); - - return writeChunkRequest.getWriteChunk(); - } - - public ContainerProtos.ContainerCommandRequestProto getWriteChunk( - long containerID, Pipeline pipeline) throws Exception { - return ContainerTestHelper.getWriteChunkRequest(pipeline, - ContainerTestHelper.getTestBlockID(containerID), 100); - } - - public Pipeline getPipeline() { - return MockPipeline.createPipeline( - Collections.singletonList(dsm.getDatanodeDetails())); - } - - public long addContainer(Pipeline pipeline) - throws Exception { - long containerID = random.nextInt(Integer.MAX_VALUE); - ContainerProtos.ContainerCommandRequestProto createContainerRequest = - ContainerTestHelper.getCreateContainerRequest(containerID, pipeline); - dispatchRequest(createContainerRequest); - - return containerID; - } - - public void deleteContainer(long containerID, Pipeline pipeline) - throws Exception { - ContainerProtos.ContainerCommandRequestProto deleteContainerRequest = - ContainerTestHelper.getDeleteContainer(pipeline, containerID, true); - dispatchRequest(deleteContainerRequest); - } - - public void closeContainer(long containerID, Pipeline pipeline) - throws Exception { - closeContainer(containerID, pipeline, ContainerProtos.Result.SUCCESS); - } - - public void closeContainer(long containerID, Pipeline pipeline, - ContainerProtos.Result expectedResult) throws Exception { - ContainerProtos.ContainerCommandRequestProto closeContainerRequest = - ContainerTestHelper.getCloseContainer(pipeline, containerID); - dispatchRequest(closeContainerRequest, expectedResult); - } - - public void dispatchRequest( - ContainerProtos.ContainerCommandRequestProto request) { - dispatchRequest(request, ContainerProtos.Result.SUCCESS); - } - - public void dispatchRequest( - ContainerProtos.ContainerCommandRequestProto request, - ContainerProtos.Result expectedResult) { - ContainerProtos.ContainerCommandResponseProto response = - dsm.getContainer().getDispatcher().dispatch(request, null); - assertEquals(expectedResult, response.getResult()); - } - - /// VOLUME OPERATIONS /// - - /** - * Append a datanode volume to the existing volumes in the configuration. - * @return The root directory for the new volume. - */ - public File addHddsVolume() throws IOException { - - File vol = Files.createDirectory(tempFolder.resolve(UUID.randomUUID() - .toString())).toFile(); - String[] existingVolumes = - conf.getStrings(ScmConfigKeys.HDDS_DATANODE_DIR_KEY); - List allVolumes = new ArrayList<>(); - if (existingVolumes != null) { - allVolumes.addAll(Arrays.asList(existingVolumes)); - } - - allVolumes.add(vol.getAbsolutePath()); - conf.setStrings(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, - allVolumes.toArray(new String[0])); - - return vol; - } - - /** - * Append a db volume to the existing volumes in the configuration. - * @return The root directory for the new volume. - */ - public File addDbVolume() throws Exception { - File vol = Files.createDirectory(tempFolder.resolve(UUID.randomUUID() - .toString())).toFile(); - String[] existingVolumes = - conf.getStrings(OzoneConfigKeys.HDDS_DATANODE_CONTAINER_DB_DIR); - List allVolumes = new ArrayList<>(); - if (existingVolumes != null) { - allVolumes.addAll(Arrays.asList(existingVolumes)); - } - - allVolumes.add(vol.getAbsolutePath()); - conf.setStrings(OzoneConfigKeys.HDDS_DATANODE_CONTAINER_DB_DIR, - allVolumes.toArray(new String[0])); - - return vol; + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java index 59b88bcbea46..d4a27e74cdaa 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java @@ -18,9 +18,7 @@ package org.apache.hadoop.ozone.container.upgrade; -import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.hdds.scm.ScmConfigKeys; @@ -28,17 +26,12 @@ import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.ozone.container.ContainerTestHelper; -import org.apache.hadoop.ozone.container.common.ContainerTestUtils; -import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.ozone.container.common.ScmTestMock; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; -import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine; -import org.apache.hadoop.ozone.container.common.states.endpoint.VersionEndpointTask; import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; -import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.replication.ContainerImporter; import org.apache.hadoop.ozone.container.replication.ContainerReplicationSource; @@ -55,11 +48,9 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Random; import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -81,6 +72,7 @@ public class TestDatanodeUpgradeToScmHA { private Path tempFolder; private DatanodeStateMachine dsm; + private ContainerDispatcher dispatcher; private OzoneConfiguration conf; private static final String CLUSTER_ID = "clusterID"; private boolean scmHAAlreadyEnabled; @@ -89,8 +81,6 @@ public class TestDatanodeUpgradeToScmHA { private InetSocketAddress address; private ScmTestMock scmServerImpl; - private Random random; - private void setScmHAEnabled(boolean enableSCMHA) throws Exception { this.scmHAAlreadyEnabled = enableSCMHA; @@ -100,8 +90,6 @@ private void setScmHAEnabled(boolean enableSCMHA) } private void setup() throws Exception { - random = new Random(); - address = SCMTestUtils.getReuseableAddress(); conf.setSocketAddr(ScmConfigKeys.OZONE_SCM_NAMES, address); } @@ -124,15 +112,18 @@ public void testReadsDuringFinalization(boolean enableSCMHA) setScmHAEnabled(enableSCMHA); // start DN and SCM startScmServer(); - addVolume(); - startPreFinalizedDatanode(); - final Pipeline pipeline = getPipeline(); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); + dispatcher = dsm.getContainer().getDispatcher(); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); // Add data to read. - final long containerID = addContainer(pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = putBlock(containerID, - pipeline); - closeContainer(containerID, pipeline); + final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); + ContainerProtos.WriteChunkRequestProto writeChunk = + UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline); // Create thread to keep reading during finalization. ExecutorService executor = Executors.newFixedThreadPool(1); @@ -140,10 +131,10 @@ public void testReadsDuringFinalization(boolean enableSCMHA) // Layout version check should be thread safe. while (!dsm.getLayoutVersionManager() .isAllowed(HDDSLayoutFeature.SCM_HA)) { - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); } // Make sure we can read after finalizing too. - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); return null; }); @@ -159,40 +150,45 @@ public void testImportContainer(boolean enableSCMHA) throws Exception { setScmHAEnabled(enableSCMHA); // start DN and SCM startScmServer(); - addVolume(); - startPreFinalizedDatanode(); - final Pipeline pipeline = getPipeline(); + UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); + dispatcher = dsm.getContainer().getDispatcher(); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); // Pre-export a container to continuously import and delete. - final long exportContainerID = addContainer(pipeline); + final long exportContainerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); ContainerProtos.WriteChunkRequestProto exportWriteChunk = - putBlock(exportContainerID, pipeline); - closeContainer(exportContainerID, pipeline); + UpgradeTestHelper.putBlock(dispatcher, exportContainerID, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, exportContainerID, pipeline); File exportedContainerFile = exportContainer(exportContainerID); - deleteContainer(exportContainerID, pipeline); + UpgradeTestHelper.deleteContainer(dispatcher, exportContainerID, pipeline); // Export another container to import while pre-finalized and read // finalized. - final long exportContainerID2 = addContainer(pipeline); + final long exportContainerID2 = UpgradeTestHelper.addContainer(dispatcher, pipeline); ContainerProtos.WriteChunkRequestProto exportWriteChunk2 = - putBlock(exportContainerID2, pipeline); - closeContainer(exportContainerID2, pipeline); + UpgradeTestHelper.putBlock(dispatcher, exportContainerID2, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, exportContainerID2, pipeline); File exportedContainerFile2 = exportContainer(exportContainerID2); - deleteContainer(exportContainerID2, pipeline); + UpgradeTestHelper.deleteContainer(dispatcher, exportContainerID2, pipeline); // Make sure we can import and read a container pre-finalized. importContainer(exportContainerID2, exportedContainerFile2); - readChunk(exportWriteChunk2, pipeline); + UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk2, pipeline); // Now SCM and enough other DNs finalize to enable SCM HA. This DN is // restarted with SCM HA config and gets a different SCM ID. conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); changeScmID(); - restartDatanode(HDDSLayoutFeature.INITIAL_VERSION.layoutVersion(), true); + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, true, tempFolder, address, + HDDSLayoutFeature.INITIAL_VERSION.layoutVersion(), true); + dispatcher = dsm.getContainer().getDispatcher(); // Make sure the existing container can be read. - readChunk(exportWriteChunk2, pipeline); + UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk2, pipeline); // Create thread to keep importing containers during the upgrade. // Since the datanode's MLV is behind SCM's, container creation is not @@ -204,12 +200,12 @@ public void testImportContainer(boolean enableSCMHA) throws Exception { while (!dsm.getLayoutVersionManager() .isAllowed(HDDSLayoutFeature.SCM_HA)) { importContainer(exportContainerID, exportedContainerFile); - readChunk(exportWriteChunk, pipeline); - deleteContainer(exportContainerID, pipeline); + UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk, pipeline); + UpgradeTestHelper.deleteContainer(dispatcher, exportContainerID, pipeline); } // Make sure we can import after finalizing too. importContainer(exportContainerID, exportedContainerFile); - readChunk(exportWriteChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk, pipeline); return null; }); @@ -220,7 +216,7 @@ public void testImportContainer(boolean enableSCMHA) throws Exception { // Make sure we can read the container that was imported while // pre-finalized after finalizing. - readChunk(exportWriteChunk2, pipeline); + UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk2, pipeline); } @ParameterizedTest(name = "{index}: scmHAAlreadyEnabled={0}") @@ -230,10 +226,14 @@ public void testFailedVolumeDuringFinalization(boolean enableSCMHA) setScmHAEnabled(enableSCMHA); /// SETUP /// - String originalScmID = startScmServer(); - File volume = addVolume(); - startPreFinalizedDatanode(); - final Pipeline pipeline = getPipeline(); + startScmServer(); + String originalScmID = scmServerImpl.getScmId(); + File volume = UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); + dispatcher = dsm.getContainer().getDispatcher(); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); /// PRE-FINALIZED: Write and Read from formatted volume /// @@ -243,10 +243,10 @@ public void testFailedVolumeDuringFinalization(boolean enableSCMHA) dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); // Add container with data, make sure it can be read and written. - final long containerID = addContainer(pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = putBlock(containerID, - pipeline); - readChunk(writeChunk, pipeline); + final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); + ContainerProtos.WriteChunkRequestProto writeChunk = + UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); checkPreFinalizedVolumePathID(volume, originalScmID, CLUSTER_ID); checkContainerPathID(containerID, originalScmID, CLUSTER_ID); @@ -256,7 +256,7 @@ public void testFailedVolumeDuringFinalization(boolean enableSCMHA) failVolume(volume); // Since volume is failed, container should be marked unhealthy. // Finalization should proceed anyways. - closeContainer(containerID, pipeline, + UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline, ContainerProtos.Result.CONTAINER_FILES_CREATE_ERROR); State containerState = dsm.getContainer().getContainerSet() .getContainer(containerID).getContainerState(); @@ -286,11 +286,13 @@ public void testFailedVolumeDuringFinalization(boolean enableSCMHA) // imported to it. // This should log a warning about reading from an unhealthy container // but otherwise proceed successfully. - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); /// FINALIZED: Restart datanode to upgrade the failed volume /// - restartDatanode(HDDSLayoutFeature.SCM_HA.layoutVersion(), false); + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, true, tempFolder, address, + HDDSLayoutFeature.SCM_HA.layoutVersion(), false); + dispatcher = dsm.getContainer().getDispatcher(); assertEquals(1, dsm.getContainer().getVolumeSet().getVolumesList().size()); @@ -301,12 +303,12 @@ public void testFailedVolumeDuringFinalization(boolean enableSCMHA) checkContainerPathID(containerID, originalScmID, CLUSTER_ID); // Read container from before upgrade. The upgrade required it to be closed. - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); // Write and read container after upgrade. - long newContainerID = addContainer(pipeline); + long newContainerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); ContainerProtos.WriteChunkRequestProto newWriteChunk = - putBlock(newContainerID, pipeline); - readChunk(newWriteChunk, pipeline); + UpgradeTestHelper.putBlock(dispatcher, newContainerID, pipeline); + UpgradeTestHelper.readChunk(dispatcher, newWriteChunk, pipeline); // The new container should use cluster ID in its path. // The volume it is placed on is up to the implementation. checkContainerPathID(newContainerID, CLUSTER_ID); @@ -318,10 +320,14 @@ public void testFormattingNewVolumes(boolean enableSCMHA) throws Exception { setScmHAEnabled(enableSCMHA); /// SETUP /// - String originalScmID = startScmServer(); - File preFinVolume1 = addVolume(); - startPreFinalizedDatanode(); - final Pipeline pipeline = getPipeline(); + startScmServer(); + String originalScmID = scmServerImpl.getScmId(); + File preFinVolume1 = UpgradeTestHelper.addHddsVolume(conf, tempFolder); + dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, + HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); + dispatcher = dsm.getContainer().getDispatcher(); + final Pipeline pipeline = MockPipeline.createPipeline( + Collections.singletonList(dsm.getDatanodeDetails())); /// PRE-FINALIZED: Write and Read from formatted volume /// @@ -331,10 +337,10 @@ public void testFormattingNewVolumes(boolean enableSCMHA) throws Exception { dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); // Add container with data, make sure it can be read and written. - final long containerID = addContainer(pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = putBlock(containerID, - pipeline); - readChunk(writeChunk, pipeline); + final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); + ContainerProtos.WriteChunkRequestProto writeChunk = + UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); checkPreFinalizedVolumePathID(preFinVolume1, originalScmID, CLUSTER_ID); checkContainerPathID(containerID, originalScmID, CLUSTER_ID); @@ -346,9 +352,11 @@ public void testFormattingNewVolumes(boolean enableSCMHA) throws Exception { conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); changeScmID(); // A new volume is added that must be formatted. - File preFinVolume2 = addVolume(); + File preFinVolume2 = UpgradeTestHelper.addHddsVolume(conf, tempFolder); - restartDatanode(HDDSLayoutFeature.INITIAL_VERSION.layoutVersion(), true); + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, true, tempFolder, address, + HDDSLayoutFeature.INITIAL_VERSION.layoutVersion(), true); + dispatcher = dsm.getContainer().getDispatcher(); assertEquals(2, dsm.getContainer().getVolumeSet().getVolumesList().size()); @@ -356,7 +364,7 @@ public void testFormattingNewVolumes(boolean enableSCMHA) throws Exception { dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); // Because DN mlv would be behind SCM mlv, only reads are allowed. - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); // On restart, there should have been no changes to the paths already used. checkPreFinalizedVolumePathID(preFinVolume1, originalScmID, CLUSTER_ID); @@ -369,7 +377,7 @@ public void testFormattingNewVolumes(boolean enableSCMHA) throws Exception { /// FINALIZE /// - closeContainer(containerID, pipeline); + UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline); dsm.finalizeUpgrade(); LambdaTestUtils.await(2000, 500, () -> dsm.getLayoutVersionManager() @@ -379,11 +387,13 @@ public void testFormattingNewVolumes(boolean enableSCMHA) throws Exception { // Add a new volume that should be formatted with cluster ID only, since // DN has finalized. - File finVolume = addVolume(); + File finVolume = UpgradeTestHelper.addHddsVolume(conf, tempFolder); // Yet another SCM ID is received this time, but it should not matter. changeScmID(); - restartDatanode(HDDSLayoutFeature.SCM_HA.layoutVersion(), false); + dsm = UpgradeTestHelper.restartDatanode(conf, dsm, true, tempFolder, address, + HDDSLayoutFeature.SCM_HA.layoutVersion(), false); + dispatcher = dsm.getContainer().getDispatcher(); assertEquals(3, dsm.getContainer().getVolumeSet().getVolumesList().size()); @@ -400,12 +410,12 @@ public void testFormattingNewVolumes(boolean enableSCMHA) throws Exception { /// FINALIZED: Read old data and write + read new data /// // Read container from before upgrade. The upgrade required it to be closed. - readChunk(writeChunk, pipeline); + UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); // Write and read container after upgrade. - long newContainerID = addContainer(pipeline); + long newContainerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); ContainerProtos.WriteChunkRequestProto newWriteChunk = - putBlock(newContainerID, pipeline); - readChunk(newWriteChunk, pipeline); + UpgradeTestHelper.putBlock(dispatcher, newContainerID, pipeline); + UpgradeTestHelper.readChunk(dispatcher, newWriteChunk, pipeline); // The new container should use cluster ID in its path. // The volume it is placed on is up to the implementation. checkContainerPathID(newContainerID, CLUSTER_ID); @@ -496,82 +506,18 @@ public File getHddsRoot(File volume) { /// CLUSTER OPERATIONS /// - /** - * Starts the datanode with the first layout version, and calls the version - * endpoint task to get cluster ID and SCM ID. - * - * The daemon for the datanode state machine is not started in this test. - * This greatly speeds up execution time. - * It means we do not have heartbeat functionality or pre-finalize - * upgrade actions, but neither of those things are needed for these tests. - */ - public void startPreFinalizedDatanode() throws Exception { - // Set layout version. - conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, - tempFolder.toString()); - DatanodeLayoutStorage layoutStorage = new DatanodeLayoutStorage(conf, - UUID.randomUUID().toString(), - HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); - layoutStorage.initialize(); - - // Build and start the datanode. - DatanodeDetails dd = ContainerTestUtils.createDatanodeDetails(); - DatanodeStateMachine newDsm = new DatanodeStateMachine(dd, conf); - int actualMlv = newDsm.getLayoutVersionManager().getMetadataLayoutVersion(); - assertEquals(HDDSLayoutFeature.INITIAL_VERSION.layoutVersion(), - actualMlv); - dsm = newDsm; - - callVersionEndpointTask(); - } - - public void restartDatanode(int expectedMlv, boolean exactMatch) - throws Exception { - // Stop existing datanode. - DatanodeDetails dd = dsm.getDatanodeDetails(); - dsm.close(); - - // Start new datanode with the same configuration. - dsm = new DatanodeStateMachine(dd, conf); - StorageVolumeUtil.getHddsVolumesList(dsm.getContainer().getVolumeSet().getVolumesList()) - .forEach(hddsVolume -> hddsVolume.setDbParentDir(tempFolder.toFile())); - int mlv = dsm.getLayoutVersionManager().getMetadataLayoutVersion(); - if (exactMatch) { - assertEquals(expectedMlv, mlv); - } else { - assertThat(expectedMlv).isLessThanOrEqualTo(mlv); - } - - callVersionEndpointTask(); - } - - /** - * Get the cluster ID and SCM ID from SCM to the datanode. - */ - public void callVersionEndpointTask() throws Exception { - try (EndpointStateMachine esm = ContainerTestUtils.createEndpoint(conf, - address, 1000)) { - VersionEndpointTask vet = new VersionEndpointTask(esm, conf, - dsm.getContainer()); - esm.setState(EndpointStateMachine.EndPointStates.GETVERSION); - vet.call(); - } - } - - public String startScmServer() throws Exception { - String scmID = UUID.randomUUID().toString(); - scmServerImpl = new ScmTestMock(CLUSTER_ID, scmID); + private void startScmServer() throws Exception { + scmServerImpl = new ScmTestMock(CLUSTER_ID); scmRpcServer = SCMTestUtils.startScmRpcServer(conf, scmServerImpl, address, 10); - return scmID; } /** * Updates the SCM ID on the SCM server. Datanode will not be aware of this - * until {@link this#callVersionEndpointTask} is called. + * until {@link UpgradeTestHelper#callVersionEndpointTask} is called. * @return the new scm ID. */ - public String changeScmID() { + private String changeScmID() { String scmID = UUID.randomUUID().toString(); scmServerImpl.setScmId(scmID); return scmID; @@ -579,72 +525,10 @@ public String changeScmID() { /// CONTAINER OPERATIONS /// - public void readChunk(ContainerProtos.WriteChunkRequestProto writeChunk, - Pipeline pipeline) throws Exception { - ContainerProtos.ContainerCommandRequestProto readChunkRequest = - ContainerTestHelper.getReadChunkRequest(pipeline, writeChunk); - - dispatchRequest(readChunkRequest); - } - - public ContainerProtos.WriteChunkRequestProto putBlock(long containerID, - Pipeline pipeline) throws Exception { - ContainerProtos.ContainerCommandRequestProto writeChunkRequest = - getWriteChunk(containerID, pipeline); - dispatchRequest(writeChunkRequest); - - ContainerProtos.ContainerCommandRequestProto putBlockRequest = - ContainerTestHelper.getPutBlockRequest(pipeline, - writeChunkRequest.getWriteChunk()); - dispatchRequest(putBlockRequest); - - return writeChunkRequest.getWriteChunk(); - } - - public ContainerProtos.ContainerCommandRequestProto getWriteChunk( - long containerID, Pipeline pipeline) throws Exception { - return ContainerTestHelper.getWriteChunkRequest(pipeline, - ContainerTestHelper.getTestBlockID(containerID), 100); - } - - public Pipeline getPipeline() { - return MockPipeline.createPipeline( - Collections.singletonList(dsm.getDatanodeDetails())); - } - - public long addContainer(Pipeline pipeline) - throws Exception { - long containerID = random.nextInt(Integer.MAX_VALUE); - ContainerProtos.ContainerCommandRequestProto createContainerRequest = - ContainerTestHelper.getCreateContainerRequest(containerID, pipeline); - dispatchRequest(createContainerRequest); - - return containerID; - } - - public void deleteContainer(long containerID, Pipeline pipeline) - throws Exception { - ContainerProtos.ContainerCommandRequestProto deleteContainerRequest = - ContainerTestHelper.getDeleteContainer(pipeline, containerID, true); - dispatchRequest(deleteContainerRequest); - } - - public void closeContainer(long containerID, Pipeline pipeline) - throws Exception { - closeContainer(containerID, pipeline, ContainerProtos.Result.SUCCESS); - } - - public void closeContainer(long containerID, Pipeline pipeline, - ContainerProtos.Result expectedResult) throws Exception { - ContainerProtos.ContainerCommandRequestProto closeContainerRequest = - ContainerTestHelper.getCloseContainer(pipeline, containerID); - dispatchRequest(closeContainerRequest, expectedResult); - } - /** * Exports the specified container to a temporary file and returns the file. */ - public File exportContainer(long containerId) throws Exception { + private File exportContainer(long containerId) throws Exception { final ContainerReplicationSource replicationSource = new OnDemandContainerReplicationSource( dsm.getContainer().getController()); @@ -663,7 +547,7 @@ public File exportContainer(long containerId) throws Exception { * Imports the container found in {@code source} to the datanode with the ID * {@code containerID}. */ - public void importContainer(long containerID, File source) throws Exception { + private void importContainer(long containerID, File source) throws Exception { ContainerImporter replicator = new ContainerImporter(dsm.getConf(), dsm.getContainer().getContainerSet(), @@ -679,43 +563,8 @@ public void importContainer(long containerID, File source) throws Exception { NO_COMPRESSION); } - public void dispatchRequest( - ContainerProtos.ContainerCommandRequestProto request) { - dispatchRequest(request, ContainerProtos.Result.SUCCESS); - } - - public void dispatchRequest( - ContainerProtos.ContainerCommandRequestProto request, - ContainerProtos.Result expectedResult) { - ContainerProtos.ContainerCommandResponseProto response = - dsm.getContainer().getDispatcher().dispatch(request, null); - assertEquals(expectedResult, response.getResult()); - } - /// VOLUME OPERATIONS /// - /** - * Append a datanode volume to the existing volumes in the configuration. - * @return The root directory for the new volume. - */ - public File addVolume() throws Exception { - - File vol = Files.createDirectory( - tempFolder.resolve(UUID.randomUUID().toString())).toFile(); - String[] existingVolumes = - conf.getStrings(ScmConfigKeys.HDDS_DATANODE_DIR_KEY); - List allVolumes = new ArrayList<>(); - if (existingVolumes != null) { - allVolumes.addAll(Arrays.asList(existingVolumes)); - } - - allVolumes.add(vol.getAbsolutePath()); - conf.setStrings(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, - allVolumes.toArray(new String[0])); - - return vol; - } - /** * Renames the specified volume directory so it will appear as failed to * the datanode. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/UpgradeTestHelper.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/UpgradeTestHelper.java new file mode 100644 index 000000000000..28b9163f3cf0 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/UpgradeTestHelper.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.upgrade; + +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.container.ContainerTestHelper; +import org.apache.hadoop.ozone.container.common.ContainerTestUtils; +import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; +import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine; +import org.apache.hadoop.ozone.container.common.states.endpoint.VersionEndpointTask; +import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; +import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; + +import java.io.File; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Helpers for upgrade tests. + */ +public final class UpgradeTestHelper { + private UpgradeTestHelper() { + } + private static final Random RANDOM = new Random(); + + /** + * Starts the datanode with the fore layout version, and calls the version + * endpoint task to get cluster ID and SCM ID. + * + * The daemon for the datanode state machine is not started in this test. + * This greatly speeds up execution time. + * It means we do not have heartbeat functionality or pre-finalize + * upgrade actions, but neither of those things are needed for these tests. + */ + public static DatanodeStateMachine startPreFinalizedDatanode( + OzoneConfiguration conf, Path tempFolder, + DatanodeStateMachine dsm, InetSocketAddress address, + int metadataLayoutVersion) + throws Exception { + // Set layout version. + conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, tempFolder.toString()); + DatanodeLayoutStorage layoutStorage = new DatanodeLayoutStorage(conf, + UUID.randomUUID().toString(), + metadataLayoutVersion); + layoutStorage.initialize(); + if (dsm != null) { + dsm.close(); + } + + // Build and start the datanode. + DatanodeDetails dd = ContainerTestUtils.createDatanodeDetails(); + dsm = new DatanodeStateMachine(dd, conf); + int actualMlv = dsm.getLayoutVersionManager().getMetadataLayoutVersion(); + assertEquals( + metadataLayoutVersion, + actualMlv); + + + callVersionEndpointTask(conf, dsm.getContainer(), address); + return dsm; + } + + public static DatanodeStateMachine restartDatanode( + OzoneConfiguration conf, DatanodeStateMachine dsm, boolean shouldSetDbParentDir, + Path tempFolder, InetSocketAddress address, int expectedMlv, boolean exactMatch) + throws Exception { + // Stop existing datanode. + DatanodeDetails dd = dsm.getDatanodeDetails(); + dsm.close(); + + // Start new datanode with the same configuration. + dsm = new DatanodeStateMachine(dd, conf); + if (shouldSetDbParentDir) { + StorageVolumeUtil.getHddsVolumesList(dsm.getContainer().getVolumeSet().getVolumesList()) + .forEach(hddsVolume -> hddsVolume.setDbParentDir(tempFolder.toFile())); + } + int mlv = dsm.getLayoutVersionManager().getMetadataLayoutVersion(); + if (exactMatch) { + assertEquals(expectedMlv, mlv); + } else { + assertThat(expectedMlv).isLessThanOrEqualTo(mlv); + } + + callVersionEndpointTask(conf, dsm.getContainer(), address); + return dsm; + } + + /** + * Get the cluster ID and SCM ID from SCM to the datanode. + */ + public static void callVersionEndpointTask( + OzoneConfiguration conf, OzoneContainer container, InetSocketAddress address) + throws Exception { + try (EndpointStateMachine esm = ContainerTestUtils.createEndpoint(conf, + address, 1000)) { + VersionEndpointTask vet = new VersionEndpointTask(esm, conf, + container); + esm.setState(EndpointStateMachine.EndPointStates.GETVERSION); + vet.call(); + } + } + + /** + * Append a datanode volume to the existing volumes in the configuration. + * @return The root directory for the new volume. + */ + public static File addHddsVolume(OzoneConfiguration conf, Path tempFolder) throws IOException { + + File vol = Files.createDirectory(tempFolder.resolve(UUID.randomUUID() + .toString())).toFile(); + String[] existingVolumes = + conf.getStrings(ScmConfigKeys.HDDS_DATANODE_DIR_KEY); + List allVolumes = new ArrayList<>(); + if (existingVolumes != null) { + allVolumes.addAll(Arrays.asList(existingVolumes)); + } + + allVolumes.add(vol.getAbsolutePath()); + conf.setStrings(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, + allVolumes.toArray(new String[0])); + + return vol; + } + + /** + * Append a db volume to the existing volumes in the configuration. + * @return The root directory for the new volume. + */ + public static File addDbVolume(OzoneConfiguration conf, Path tempFolder) throws Exception { + File vol = Files.createDirectory(tempFolder.resolve(UUID.randomUUID() + .toString())).toFile(); + String[] existingVolumes = + conf.getStrings(OzoneConfigKeys.HDDS_DATANODE_CONTAINER_DB_DIR); + List allVolumes = new ArrayList<>(); + if (existingVolumes != null) { + allVolumes.addAll(Arrays.asList(existingVolumes)); + } + + allVolumes.add(vol.getAbsolutePath()); + conf.setStrings(OzoneConfigKeys.HDDS_DATANODE_CONTAINER_DB_DIR, + allVolumes.toArray(new String[0])); + + return vol; + } + + + public static void dispatchRequest( + ContainerDispatcher dispatcher, + ContainerProtos.ContainerCommandRequestProto request) { + dispatchRequest(dispatcher, request, ContainerProtos.Result.SUCCESS); + } + + public static void dispatchRequest( + ContainerDispatcher dispatcher, ContainerProtos.ContainerCommandRequestProto request, + ContainerProtos.Result expectedResult) { + ContainerProtos.ContainerCommandResponseProto response = + dispatcher.dispatch(request, null); + assertEquals(expectedResult, response.getResult()); + } + + public static void readChunk( + ContainerDispatcher dispatcher, ContainerProtos.WriteChunkRequestProto writeChunk, + Pipeline pipeline) throws Exception { + ContainerProtos.ContainerCommandRequestProto readChunkRequest = + ContainerTestHelper.getReadChunkRequest(pipeline, writeChunk); + dispatchRequest(dispatcher, readChunkRequest); + } + + public static ContainerProtos.WriteChunkRequestProto putBlock( + ContainerDispatcher dispatcher, long containerID, Pipeline pipeline, + boolean incremental) throws Exception { + return putBlock(dispatcher, containerID, pipeline, incremental, ContainerProtos.Result.SUCCESS); + } + + public static ContainerProtos.WriteChunkRequestProto putBlock( + ContainerDispatcher dispatcher, long containerID, Pipeline pipeline) throws Exception { + return putBlock(dispatcher, containerID, pipeline, false, ContainerProtos.Result.SUCCESS); + } + + public static ContainerProtos.WriteChunkRequestProto putBlock( + ContainerDispatcher dispatcher, long containerID, Pipeline pipeline, + boolean incremental, ContainerProtos.Result expectedResult) throws Exception { + ContainerProtos.ContainerCommandRequestProto writeChunkRequest = + ContainerTestHelper.getWriteChunkRequest(pipeline, + ContainerTestHelper.getTestBlockID(containerID), 100); + dispatchRequest(dispatcher, writeChunkRequest); + + ContainerProtos.ContainerCommandRequestProto putBlockRequest = + ContainerTestHelper.getPutBlockRequest(pipeline, + writeChunkRequest.getWriteChunk(), incremental); + dispatchRequest(dispatcher, putBlockRequest, expectedResult); + return writeChunkRequest.getWriteChunk(); + } + + public static long addContainer(ContainerDispatcher dispatcher, Pipeline pipeline) + throws Exception { + long containerID = RANDOM.nextInt(Integer.MAX_VALUE); + ContainerProtos.ContainerCommandRequestProto createContainerRequest = + ContainerTestHelper.getCreateContainerRequest(containerID, pipeline); + dispatchRequest(dispatcher, createContainerRequest); + + return containerID; + } + + public static void deleteContainer(ContainerDispatcher dispatcher, long containerID, Pipeline pipeline) + throws Exception { + ContainerProtos.ContainerCommandRequestProto deleteContainerRequest = + ContainerTestHelper.getDeleteContainer(pipeline, containerID, true); + dispatchRequest(dispatcher, deleteContainerRequest); + } + + public static void closeContainer(ContainerDispatcher dispatcher, long containerID, Pipeline pipeline) + throws Exception { + closeContainer(dispatcher, containerID, pipeline, ContainerProtos.Result.SUCCESS); + } + + public static void closeContainer( + ContainerDispatcher dispatcher, long containerID, Pipeline pipeline, + ContainerProtos.Result expectedResult) throws Exception { + ContainerProtos.ContainerCommandRequestProto closeContainerRequest = + ContainerTestHelper.getCloseContainer(pipeline, containerID); + dispatchRequest(dispatcher, closeContainerRequest, expectedResult); + } + + public static void finalizeBlock( + ContainerDispatcher dispatcher, long containerID, long localID, ContainerProtos.Result expectedResult) { + ContainerInfo container = mock(ContainerInfo.class); + when(container.getContainerID()).thenReturn(containerID); + + ContainerProtos.ContainerCommandRequestProto finalizeBlockRequest = + ContainerTestHelper.getFinalizeBlockRequest(localID, container, UUID.randomUUID().toString()); + + UpgradeTestHelper.dispatchRequest(dispatcher, finalizeBlockRequest, expectedResult); + } +} From ad7b8db90bda9fa0f83f4c096e958bf310315565 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Thu, 15 Aug 2024 16:26:03 -0700 Subject: [PATCH 24/50] HDDS-11137. Removed locks from SnapshotPurge and SnapshotSetProperty APIs. (#7018) --- .../snapshot/OMSnapshotPurgeRequest.java | 122 +++++------------- .../OMSnapshotSetPropertyRequest.java | 32 +---- .../snapshot/OMSnapshotPurgeResponse.java | 2 +- 3 files changed, 33 insertions(+), 123 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java index 29c7628e3cca..2a9cfa6baf0d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java @@ -19,10 +19,7 @@ package org.apache.hadoop.ozone.om.request.snapshot; -import org.apache.commons.lang3.tuple.Triple; -import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OMMetrics; -import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.ratis.server.protocol.TermIndex; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; @@ -44,15 +41,11 @@ import java.io.IOException; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; -import java.util.Set; import java.util.UUID; -import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK; - /** * Handles OMSnapshotPurge Request. * This is an OM internal request. Does not need @RequireSnapshotFeatureState. @@ -92,62 +85,34 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn new HashMap<>(); // Each snapshot purge operation does three things: - // 1. Update the snapshot chain, - // 2. Update the deep clean flag for the next active snapshot (So that it can be + // 1. Update the deep clean flag for the next active snapshot (So that it can be // deep cleaned by the KeyDeletingService in the next run), + // 2. Update the snapshot chain, // 3. Finally, purge the snapshot. - // All of these steps have to be performed only when it acquires all the necessary - // locks (lock on the snapshot to be purged, lock on the next active snapshot, and - // lock on the next path and global previous snapshots). Ideally, there is no need - // for locks for snapshot purge and can rely on OMStateMachine because OMStateMachine - // is going to process each request sequentially. - // - // But there is a problem with that. After filtering unnecessary SST files for a snapshot, - // SstFilteringService updates that snapshot's SstFilter flag. SstFilteringService cannot - // use SetSnapshotProperty API because it runs on each OM independently and One OM does - // not know if the snapshot has been filtered on the other OM in HA environment. - // - // If locks are not taken snapshot purge and SstFilteringService will cause a race condition - // and override one's update with another. + // There is no need to take lock for snapshot purge as of now. We can simply rely on OMStateMachine + // because it executes transaction sequentially. for (String snapTableKey : snapshotDbKeys) { - // To acquire all the locks, a set is maintained which is keyed by snapshotTableKey. - // snapshotTableKey is nothing but /volumeName/bucketName/snapshotName. - // Once all the locks are acquired, it performs the three steps mentioned above and - // release all the locks after that. - Set> lockSet = new HashSet<>(4, 1); - try { - if (omMetadataManager.getSnapshotInfoTable().get(snapTableKey) == null) { - // Snapshot may have been purged in the previous iteration of SnapshotDeletingService. - LOG.warn("The snapshot {} is not longer in snapshot table, It maybe removed in the previous " + - "Snapshot purge request.", snapTableKey); - continue; - } - - acquireLock(lockSet, snapTableKey, omMetadataManager); - SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable().get(snapTableKey); - - SnapshotInfo nextSnapshot = - SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, omSnapshotManager); - - if (nextSnapshot != null) { - acquireLock(lockSet, nextSnapshot.getTableKey(), omMetadataManager); - } - - // Update the chain first so that it has all the necessary locks before updating deep clean. - updateSnapshotChainAndCache(lockSet, omMetadataManager, fromSnapshot, trxnLogIndex, - updatedPathPreviousAndGlobalSnapshots); - updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager, trxnLogIndex, updatedSnapInfos); - // Remove and close snapshot's RocksDB instance from SnapshotCache. - omSnapshotManager.invalidateCacheEntry(fromSnapshot.getSnapshotId()); - // Update SnapshotInfoTable cache. - omMetadataManager.getSnapshotInfoTable() - .addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex)); - } finally { - for (Triple lockKey: lockSet) { - omMetadataManager.getLock() - .releaseWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(), lockKey.getMiddle(), lockKey.getRight()); - } + SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable().get(snapTableKey); + if (fromSnapshot == null) { + // Snapshot may have been purged in the previous iteration of SnapshotDeletingService. + LOG.warn("The snapshot {} is not longer in snapshot table, It maybe removed in the previous " + + "Snapshot purge request.", snapTableKey); + continue; } + + SnapshotInfo nextSnapshot = + SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, omSnapshotManager); + + // Step 1: Update the deep clean flag for the next active snapshot + updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager, trxnLogIndex, updatedSnapInfos); + // Step 2: Update the snapshot chain. + updateSnapshotChainAndCache(omMetadataManager, fromSnapshot, trxnLogIndex, + updatedPathPreviousAndGlobalSnapshots); + // Remove and close snapshot's RocksDB instance from SnapshotCache. + omSnapshotManager.invalidateCacheEntry(fromSnapshot.getSnapshotId()); + // Step 3: Purge the snapshot from SnapshotInfoTable cache. + omMetadataManager.getSnapshotInfoTable() + .addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex)); } omClientResponse = new OMSnapshotPurgeResponse(omResponse.build(), @@ -168,41 +133,19 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn return omClientResponse; } - private void acquireLock(Set> lockSet, String snapshotTableKey, - OMMetadataManager omMetadataManager) throws IOException { - SnapshotInfo snapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapshotTableKey); - - // It should not be the case that lock is required for non-existing snapshot. - if (snapshotInfo == null) { - LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.", snapshotTableKey); - throw new OMException("Snapshot: '{" + snapshotTableKey + "}' doesn't not exist in snapshot table.", - OMException.ResultCodes.FILE_NOT_FOUND); - } - Triple lockKey = Triple.of(snapshotInfo.getVolumeName(), snapshotInfo.getBucketName(), - snapshotInfo.getName()); - if (!lockSet.contains(lockKey)) { - mergeOmLockDetails(omMetadataManager.getLock() - .acquireWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(), lockKey.getMiddle(), lockKey.getRight())); - lockSet.add(lockKey); - } - } - private void updateSnapshotInfoAndCache(SnapshotInfo snapInfo, OmMetadataManagerImpl omMetadataManager, long trxnLogIndex, - Map updatedSnapInfos) throws IOException { + Map updatedSnapInfos) throws IOException { if (snapInfo != null) { - // Fetch the latest value again after acquiring lock. - SnapshotInfo updatedSnapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapInfo.getTableKey()); - // Setting next snapshot deep clean to false, Since the // current snapshot is deleted. We can potentially // reclaim more keys in the next snapshot. - updatedSnapshotInfo.setDeepClean(false); + snapInfo.setDeepClean(false); // Update table cache first - omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(updatedSnapshotInfo.getTableKey()), - CacheValue.get(trxnLogIndex, updatedSnapshotInfo)); - updatedSnapInfos.put(updatedSnapshotInfo.getTableKey(), updatedSnapshotInfo); + omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(snapInfo.getTableKey()), + CacheValue.get(trxnLogIndex, snapInfo)); + updatedSnapInfos.put(snapInfo.getTableKey(), snapInfo); } } @@ -213,7 +156,6 @@ private void updateSnapshotInfoAndCache(SnapshotInfo snapInfo, * update in DB. */ private void updateSnapshotChainAndCache( - Set> lockSet, OmMetadataManagerImpl metadataManager, SnapshotInfo snapInfo, long trxnLogIndex, @@ -247,18 +189,12 @@ private void updateSnapshotChainAndCache( snapInfo.getSnapshotPath(), snapInfo.getSnapshotId()); nextPathSnapshotKey = snapshotChainManager .getTableKey(nextPathSnapshotId); - - // Acquire lock from the snapshot - acquireLock(lockSet, nextPathSnapshotKey, metadataManager); } String nextGlobalSnapshotKey = null; if (hasNextGlobalSnapshot) { UUID nextGlobalSnapshotId = snapshotChainManager.nextGlobalSnapshot(snapInfo.getSnapshotId()); nextGlobalSnapshotKey = snapshotChainManager.getTableKey(nextGlobalSnapshotId); - - // Acquire lock from the snapshot - acquireLock(lockSet, nextGlobalSnapshotKey, metadataManager); } SnapshotInfo nextPathSnapInfo = diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotSetPropertyRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotSetPropertyRequest.java index c4ca3dc99e3c..53047fd8026b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotSetPropertyRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotSetPropertyRequest.java @@ -38,7 +38,6 @@ import java.io.IOException; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND; -import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK; /** * Updates the exclusive size of the snapshot. @@ -55,7 +54,7 @@ public OMSnapshotSetPropertyRequest(OMRequest omRequest) { public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIndex termIndex) { OMMetrics omMetrics = ozoneManager.getMetrics(); - OMClientResponse omClientResponse = null; + OMClientResponse omClientResponse; OMMetadataManager metadataManager = ozoneManager.getMetadataManager(); OzoneManagerProtocolProtos.OMResponse.Builder omResponse = @@ -63,33 +62,16 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn OzoneManagerProtocolProtos.SetSnapshotPropertyRequest setSnapshotPropertyRequest = getOmRequest() .getSetSnapshotPropertyRequest(); - SnapshotInfo updatedSnapInfo = null; String snapshotKey = setSnapshotPropertyRequest.getSnapshotKey(); - boolean acquiredSnapshotLock = false; - String volumeName = null; - String bucketName = null; - String snapshotName = null; try { - SnapshotInfo snapshotInfo = metadataManager.getSnapshotInfoTable().get(snapshotKey); - if (snapshotInfo == null) { + SnapshotInfo updatedSnapInfo = metadataManager.getSnapshotInfoTable().get(snapshotKey); + if (updatedSnapInfo == null) { LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.", snapshotKey); throw new OMException("Snapshot: '{" + snapshotKey + "}' doesn't not exist in snapshot table.", FILE_NOT_FOUND); } - volumeName = snapshotInfo.getVolumeName(); - bucketName = snapshotInfo.getBucketName(); - snapshotName = snapshotInfo.getName(); - - mergeOmLockDetails(metadataManager.getLock() - .acquireWriteLock(SNAPSHOT_LOCK, volumeName, bucketName, snapshotName)); - - acquiredSnapshotLock = getOmLockDetails().isLockAcquired(); - - updatedSnapInfo = metadataManager.getSnapshotInfoTable() - .get(snapshotKey); - if (setSnapshotPropertyRequest.hasDeepCleanedDeletedDir()) { updatedSnapInfo.setDeepCleanedDeletedDir(setSnapshotPropertyRequest @@ -126,14 +108,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn createErrorOMResponse(omResponse, ex)); omMetrics.incNumSnapshotSetPropertyFails(); LOG.error("Failed to execute snapshotSetPropertyRequest: {{}}.", setSnapshotPropertyRequest, ex); - } finally { - if (acquiredSnapshotLock) { - mergeOmLockDetails(metadataManager.getLock() - .releaseWriteLock(SNAPSHOT_LOCK, volumeName, bucketName, snapshotName)); - } - if (omClientResponse != null) { - omClientResponse.setOmLockDetails(getOmLockDetails()); - } } return omClientResponse; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java index 45b0c5e05909..ea9e68cc9ad9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java @@ -81,9 +81,9 @@ protected void addToDBBatch(OMMetadataManager omMetadataManager, OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl) omMetadataManager; + updateSnapInfo(metadataManager, batchOperation, updatedSnapInfos); updateSnapInfo(metadataManager, batchOperation, updatedPreviousAndGlobalSnapInfos); - updateSnapInfo(metadataManager, batchOperation, updatedSnapInfos); for (String dbKey: snapshotDbKeys) { // Skip the cache here because snapshot is purged from cache in OMSnapshotPurgeRequest. SnapshotInfo snapshotInfo = omMetadataManager From 97babb30474581577c75b1240b5063cf91f7b535 Mon Sep 17 00:00:00 2001 From: Siyao Meng <50227127+smengcl@users.noreply.github.com> Date: Thu, 15 Aug 2024 22:48:40 -0700 Subject: [PATCH 25/50] HDDS-11184. [hsync] Add a client config to limit write concurrency on the same key (#7073) --- .../hadoop/hdds/scm/OzoneClientConfig.java | 16 +++ .../ozone/client/io/KeyOutputStream.java | 111 +++++++++------ .../client/io/KeyOutputStreamSemaphore.java | 71 ++++++++++ .../ozone/client/io/TestKeyOutputStream.java | 126 ++++++++++++++++++ .../org/apache/hadoop/fs/ozone/TestHSync.java | 2 + 5 files changed, 286 insertions(+), 40 deletions(-) create mode 100644 hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStreamSemaphore.java create mode 100644 hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/io/TestKeyOutputStream.java diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java index b130f48776c1..bbc52bfe4f9d 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java @@ -264,6 +264,14 @@ public enum ChecksumCombineMode { tags = ConfigTag.CLIENT) private boolean enablePutblockPiggybacking = false; + @Config(key = "key.write.concurrency", + defaultValue = "1", + description = "Maximum concurrent writes allowed on each key. " + + "Defaults to 1 which matches the behavior before HDDS-9844. " + + "For unlimited write concurrency, set this to -1 or any negative integer value.", + tags = ConfigTag.CLIENT) + private int maxConcurrentWritePerKey = 1; + @PostConstruct public void validate() { Preconditions.checkState(streamBufferSize > 0); @@ -485,4 +493,12 @@ public void setIncrementalChunkList(boolean enable) { public boolean getIncrementalChunkList() { return this.incrementalChunkList; } + + public void setMaxConcurrentWritePerKey(int maxConcurrentWritePerKey) { + this.maxConcurrentWritePerKey = maxConcurrentWritePerKey; + } + + public int getMaxConcurrentWritePerKey() { + return this.maxConcurrentWritePerKey; + } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java index 59c0fa134aa4..549607c59ad1 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java @@ -110,6 +110,13 @@ enum StreamAction { private ContainerClientMetrics clientMetrics; private OzoneManagerVersion ozoneManagerVersion; + private final int maxConcurrentWritePerKey; + private final KeyOutputStreamSemaphore keyOutputStreamSemaphore; + + KeyOutputStreamSemaphore getRequestSemaphore() { + return keyOutputStreamSemaphore; + } + public KeyOutputStream(ReplicationConfig replicationConfig, BlockOutputStreamEntryPool blockOutputStreamEntryPool) { this.replication = replicationConfig; closed = false; @@ -120,6 +127,10 @@ public KeyOutputStream(ReplicationConfig replicationConfig, BlockOutputStreamEnt retryCount = 0; offset = 0; this.blockOutputStreamEntryPool = blockOutputStreamEntryPool; + // Force write concurrency to 1 per key when using this constructor. + // At the moment, this constructor is only used by ECKeyOutputStream. + this.maxConcurrentWritePerKey = 1; + this.keyOutputStreamSemaphore = new KeyOutputStreamSemaphore(maxConcurrentWritePerKey); } protected BlockOutputStreamEntryPool getBlockOutputStreamEntryPool() { @@ -150,6 +161,8 @@ public KeyOutputStream(Builder b) { this.replication = b.replicationConfig; this.blockOutputStreamEntryPool = new BlockOutputStreamEntryPool(b); final OzoneClientConfig config = b.getClientConfig(); + this.maxConcurrentWritePerKey = config.getMaxConcurrentWritePerKey(); + this.keyOutputStreamSemaphore = new KeyOutputStreamSemaphore(maxConcurrentWritePerKey); this.retryPolicyMap = HddsClientUtils.getRetryPolicyByException( config.getMaxRetryCount(), config.getRetryInterval()); this.retryCount = 0; @@ -179,7 +192,7 @@ public synchronized void addPreallocateBlocks(OmKeyLocationInfoGroup version, lo } @Override - public synchronized void write(int b) throws IOException { + public void write(int b) throws IOException { byte[] buf = new byte[1]; buf[0] = (byte) b; write(buf, 0, 1); @@ -200,24 +213,31 @@ public synchronized void write(int b) throws IOException { @Override public void write(byte[] b, int off, int len) throws IOException { - checkNotClosed(); - if (b == null) { - throw new NullPointerException(); - } - if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length) - || ((off + len) < 0)) { - throw new IndexOutOfBoundsException(); - } - if (len == 0) { - return; - } - synchronized (this) { - handleWrite(b, off, len, false); - writeOffset += len; + try { + getRequestSemaphore().acquire(); + checkNotClosed(); + if (b == null) { + throw new NullPointerException(); + } + if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length) + || ((off + len) < 0)) { + throw new IndexOutOfBoundsException(); + } + if (len == 0) { + return; + } + + synchronized (this) { + handleWrite(b, off, len, false); + writeOffset += len; + } + } finally { + getRequestSemaphore().release(); } } - private void handleWrite(byte[] b, int off, long len, boolean retry) + @VisibleForTesting + void handleWrite(byte[] b, int off, long len, boolean retry) throws IOException { while (len > 0) { try { @@ -441,40 +461,51 @@ protected boolean checkIfContainerToExclude(Throwable t) { @Override public void flush() throws IOException { - checkNotClosed(); - handleFlushOrClose(StreamAction.FLUSH); + try { + getRequestSemaphore().acquire(); + checkNotClosed(); + handleFlushOrClose(StreamAction.FLUSH); + } finally { + getRequestSemaphore().release(); + } } @Override public void hflush() throws IOException { + // Note: Semaphore acquired and released inside hsync(). hsync(); } @Override public void hsync() throws IOException { - if (replication.getReplicationType() != ReplicationType.RATIS) { - throw new UnsupportedOperationException( - "Replication type is not " + ReplicationType.RATIS); - } - if (replication.getRequiredNodes() <= 1) { - throw new UnsupportedOperationException("The replication factor = " - + replication.getRequiredNodes() + " <= 1"); - } - if (ozoneManagerVersion.compareTo(OzoneManagerVersion.HBASE_SUPPORT) < 0) { - throw new UnsupportedOperationException("Hsync API requires OM version " - + OzoneManagerVersion.HBASE_SUPPORT + " or later. Current OM version " - + ozoneManagerVersion); - } - checkNotClosed(); - final long hsyncPos = writeOffset; - - handleFlushOrClose(StreamAction.HSYNC); + try { + getRequestSemaphore().acquire(); - synchronized (this) { - Preconditions.checkState(offset >= hsyncPos, - "offset = %s < hsyncPos = %s", offset, hsyncPos); - MetricUtil.captureLatencyNs(clientMetrics::addHsyncLatency, - () -> blockOutputStreamEntryPool.hsyncKey(hsyncPos)); + if (replication.getReplicationType() != ReplicationType.RATIS) { + throw new UnsupportedOperationException( + "Replication type is not " + ReplicationType.RATIS); + } + if (replication.getRequiredNodes() <= 1) { + throw new UnsupportedOperationException("The replication factor = " + + replication.getRequiredNodes() + " <= 1"); + } + if (ozoneManagerVersion.compareTo(OzoneManagerVersion.HBASE_SUPPORT) < 0) { + throw new UnsupportedOperationException("Hsync API requires OM version " + + OzoneManagerVersion.HBASE_SUPPORT + " or later. Current OM version " + + ozoneManagerVersion); + } + checkNotClosed(); + final long hsyncPos = writeOffset; + handleFlushOrClose(StreamAction.HSYNC); + + synchronized (this) { + Preconditions.checkState(offset >= hsyncPos, + "offset = %s < hsyncPos = %s", offset, hsyncPos); + MetricUtil.captureLatencyNs(clientMetrics::addHsyncLatency, + () -> blockOutputStreamEntryPool.hsyncKey(hsyncPos)); + } + } finally { + getRequestSemaphore().release(); } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStreamSemaphore.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStreamSemaphore.java new file mode 100644 index 000000000000..36031a9cf4df --- /dev/null +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStreamSemaphore.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.client.io; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.util.concurrent.Semaphore; + +/** + * Helper class that streamlines request semaphore usage in KeyOutputStream. + */ +public class KeyOutputStreamSemaphore { + + public static final Logger LOG = LoggerFactory.getLogger(KeyOutputStreamSemaphore.class); + private final Semaphore requestSemaphore; + + KeyOutputStreamSemaphore(int maxConcurrentWritePerKey) { + LOG.info("Initializing semaphore with maxConcurrentWritePerKey = {}", maxConcurrentWritePerKey); + if (maxConcurrentWritePerKey > 0) { + requestSemaphore = new Semaphore(maxConcurrentWritePerKey); + } else if (maxConcurrentWritePerKey == 0) { + throw new IllegalArgumentException("Invalid config. ozone.client.key.write.concurrency cannot be set to 0"); + } else { + requestSemaphore = null; + } + } + + public int getQueueLength() { + return requestSemaphore != null ? requestSemaphore.getQueueLength() : 0; + } + + public void acquire() throws IOException { + if (requestSemaphore != null) { + try { + LOG.debug("Acquiring semaphore"); + requestSemaphore.acquire(); + LOG.debug("Acquired semaphore"); + } catch (InterruptedException e) { + final String errMsg = "Write aborted. Interrupted waiting for KeyOutputStream semaphore: " + e.getMessage(); + LOG.error(errMsg); + Thread.currentThread().interrupt(); + throw new InterruptedIOException(errMsg); + } + } + } + + public void release() { + if (requestSemaphore != null) { + LOG.debug("Releasing semaphore"); + requestSemaphore.release(); + LOG.debug("Released semaphore"); + } + } +} diff --git a/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/io/TestKeyOutputStream.java b/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/io/TestKeyOutputStream.java new file mode 100644 index 000000000000..6b6abceff36a --- /dev/null +++ b/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/io/TestKeyOutputStream.java @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.client.io; + +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.event.Level; + +import java.io.IOException; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Tests KeyOutputStream. + * This is a unit test meant to verify specific behaviors of KeyOutputStream. + */ +public class TestKeyOutputStream { + + @BeforeAll + static void init() { + GenericTestUtils.setLogLevel(KeyOutputStreamSemaphore.LOG, Level.DEBUG); + } + + @Test + void testConcurrentWriteLimitOne() throws Exception { + // Verify the semaphore is working to limit the number of concurrent writes allowed. + KeyOutputStreamSemaphore sema1 = new KeyOutputStreamSemaphore(1); + KeyOutputStream keyOutputStream = mock(KeyOutputStream.class); + when(keyOutputStream.getRequestSemaphore()).thenReturn(sema1); + + final AtomicInteger countWrite = new AtomicInteger(0); + // mock write() + doAnswer(invocation -> { + countWrite.getAndIncrement(); + return invocation.callRealMethod(); + }).when(keyOutputStream).write(any(), anyInt(), anyInt()); + + final ConcurrentHashMap mapNotifiers = new ConcurrentHashMap<>(); + + final AtomicInteger countHandleWrite = new AtomicInteger(0); + // mock handleWrite() + doAnswer(invocation -> { + final long tid = Thread.currentThread().getId(); + System.out.println("handleWrite() called from tid " + tid); + final CountDownLatch latch = mapNotifiers.compute(tid, (k, v) -> + v != null ? v : new CountDownLatch(1)); + countHandleWrite.getAndIncrement(); + // doing some "work" + latch.await(); + return null; + }).when(keyOutputStream).handleWrite(any(), anyInt(), anyLong(), anyBoolean()); + + final Runnable writeRunnable = () -> { + try { + keyOutputStream.write(new byte[4], 0, 4); + } catch (IOException e) { + throw new RuntimeException(e); + } + }; + + final Thread thread1 = new Thread(writeRunnable); + thread1.start(); + + final Thread thread2 = new Thread(writeRunnable); + thread2.start(); + + // Wait for both threads to enter write() + GenericTestUtils.waitFor(() -> countWrite.get() == 2, 100, 3000); + // One thread should enter handleWrite() + GenericTestUtils.waitFor(() -> countHandleWrite.get() == 1, 100, 3000); + // The other thread is waiting on the semaphore + GenericTestUtils.waitFor(() -> sema1.getQueueLength() == 1, 100, 3000); + + // handleWrite is triggered only once because of the semaphore and the synchronized block + verify(keyOutputStream, times(1)).handleWrite(any(), anyInt(), anyLong(), anyBoolean()); + + // Now, allow the current thread to finish handleWrite + // There is only one thread in handleWrite() so mapNotifiers should have only one entry. + assertEquals(1, mapNotifiers.size()); + Entry entry = mapNotifiers.entrySet().stream().findFirst().get(); + mapNotifiers.remove(entry.getKey()); + entry.getValue().countDown(); + + // Wait for the other thread to proceed + GenericTestUtils.waitFor(() -> countHandleWrite.get() == 2, 100, 3000); + verify(keyOutputStream, times(2)).handleWrite(any(), anyInt(), anyLong(), anyBoolean()); + + // Allow the other thread to finish handleWrite + entry = mapNotifiers.entrySet().stream().findFirst().get(); + mapNotifiers.remove(entry.getKey()); + entry.getValue().countDown(); + + // Let threads finish + thread2.join(); + thread1.join(); + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java index 98d7388310b3..91792da76f2f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java @@ -175,6 +175,8 @@ public static void init() throws Exception { CONF.setTimeDuration(OZONE_DIR_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); CONF.setBoolean("ozone.client.incremental.chunk.list", true); CONF.setBoolean("ozone.client.stream.putblock.piggybacking", true); + // Unlimited key write concurrency + CONF.setInt("ozone.client.key.write.concurrency", -1); CONF.setTimeDuration(OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_INTERVAL, SERVICE_INTERVAL, TimeUnit.MILLISECONDS); CONF.setTimeDuration(OZONE_OM_OPEN_KEY_EXPIRE_THRESHOLD, From 042c73e84abe6e8b2dcec084aed96398db8ef85f Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Fri, 16 Aug 2024 15:33:05 +0200 Subject: [PATCH 26/50] HDDS-11279. Add direct dependencies in hadoop-ozone (#7067) --- hadoop-ozone/client/pom.xml | 54 ++++++++ hadoop-ozone/common/pom.xml | 84 +++++++++++++ hadoop-ozone/csi/pom.xml | 39 ++++++ hadoop-ozone/httpfsgateway/pom.xml | 28 +++++ hadoop-ozone/insight/pom.xml | 36 ++++++ hadoop-ozone/integration-test/pom.xml | 5 + hadoop-ozone/interface-client/pom.xml | 20 +++ hadoop-ozone/interface-storage/pom.xml | 21 ++++ hadoop-ozone/ozone-manager/pom.xml | 168 ++++++++++++++++++++++++- hadoop-ozone/ozonefs-common/pom.xml | 51 ++++++++ hadoop-ozone/ozonefs/pom.xml | 34 +++++ hadoop-ozone/pom.xml | 20 +++ hadoop-ozone/recon-codegen/pom.xml | 18 +++ hadoop-ozone/recon/pom.xml | 150 ++++++++++++++++++++++ hadoop-ozone/s3-secret-store/pom.xml | 8 ++ hadoop-ozone/s3gateway/pom.xml | 105 ++++++++++++++++ hadoop-ozone/tools/pom.xml | 141 +++++++++++++++++++++ pom.xml | 45 ++++++- 18 files changed, 1025 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/client/pom.xml b/hadoop-ozone/client/pom.xml index dac609caa46d..545faba51ac1 100644 --- a/hadoop-ozone/client/pom.xml +++ b/hadoop-ozone/client/pom.xml @@ -31,19 +31,73 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-client + org.apache.ozone hdds-erasurecode + + org.apache.ozone + hdds-interface-client + org.apache.ozone ozone-common + + org.apache.ozone + ozone-interface-client + + + org.apache.commons + commons-lang3 + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-thirdparty-misc + + + org.slf4j + slf4j-api + + + + com.fasterxml.jackson.core + jackson-annotations + com.github.stephenc.jcip jcip-annotations + + com.google.guava + guava + + + + commons-collections + commons-collections + + + jakarta.annotation + jakarta.annotation-api + diff --git a/hadoop-ozone/common/pom.xml b/hadoop-ozone/common/pom.xml index 15d961e09491..bd16a0a5dfe5 100644 --- a/hadoop-ozone/common/pom.xml +++ b/hadoop-ozone/common/pom.xml @@ -61,21 +61,105 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-client + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-interface-client + org.apache.ozone ozone-interface-client + + + org.apache.commons + commons-lang3 + org.apache.hadoop hadoop-hdfs-client + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-thirdparty-misc + + + org.slf4j + slf4j-api + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-databind + com.github.stephenc.jcip jcip-annotations + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + io.grpc + grpc-api + + + com.google.code.findbugs + jsr305 + + + + + io.grpc + grpc-stub + + + io.netty + netty-common + + + io.netty + netty-handler + + + jakarta.annotation + jakarta.annotation-api + + + io.grpc + grpc-inprocess + test + io.grpc grpc-testing diff --git a/hadoop-ozone/csi/pom.xml b/hadoop-ozone/csi/pom.xml index 04c153f3988c..a0565d7e8909 100644 --- a/hadoop-ozone/csi/pom.xml +++ b/hadoop-ozone/csi/pom.xml @@ -33,6 +33,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ozone + hdds-common + com.google.protobuf protobuf-java-util @@ -70,6 +74,15 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-server-framework + + org.apache.ozone + ozone-common + + + + commons-io + commons-io + com.google.code.findbugs jsr305 @@ -90,6 +103,14 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> io.grpc grpc-netty + + io.netty + netty-transport + + + io.netty + netty-transport-classes-epoll + io.netty netty-transport-native-epoll @@ -111,10 +132,24 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> ch.qos.reload4j reload4j + + org.slf4j + slf4j-api + org.slf4j slf4j-reload4j + + io.grpc + grpc-api + + + com.google.code.findbugs + jsr305 + + + io.grpc grpc-protobuf @@ -133,6 +168,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> io.grpc grpc-stub + + info.picocli + picocli + org.apache.ozone ozone-client diff --git a/hadoop-ozone/httpfsgateway/pom.xml b/hadoop-ozone/httpfsgateway/pom.xml index 5be2a4be83f6..7664643b153e 100644 --- a/hadoop-ozone/httpfsgateway/pom.xml +++ b/hadoop-ozone/httpfsgateway/pom.xml @@ -40,6 +40,14 @@ + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + org.apache.ozone hdds-server-framework @@ -86,6 +94,10 @@ commons-codec runtime + + org.apache.commons + commons-lang3 + ch.qos.reload4j reload4j @@ -138,6 +150,18 @@ + + com.fasterxml.jackson.core + jackson-databind + + + com.google.guava + guava + + + jakarta.ws.rs + jakarta.ws.rs-api + jakarta.xml.bind jakarta.xml.bind-api @@ -146,6 +170,10 @@ org.glassfish.hk2 hk2-api + + org.glassfish.jersey.core + jersey-server + org.glassfish.jersey.inject jersey-hk2 diff --git a/hadoop-ozone/insight/pom.xml b/hadoop-ozone/insight/pom.xml index ae0bc5cd3aab..bcfb1660244d 100644 --- a/hadoop-ozone/insight/pom.xml +++ b/hadoop-ozone/insight/pom.xml @@ -31,6 +31,26 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-container-service + + + org.apache.ozone + hdds-interface-admin + + + org.apache.ozone + hdds-interface-client + org.apache.ozone ozone-manager @@ -51,6 +71,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone-filesystem + + org.apache.ozone + ozone-interface-client + org.apache.ozone hdds-server-framework @@ -59,6 +83,18 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-tools + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + info.picocli + picocli + jakarta.xml.bind jakarta.xml.bind-api diff --git a/hadoop-ozone/integration-test/pom.xml b/hadoop-ozone/integration-test/pom.xml index e1ba4af83db7..f66f64d2874f 100644 --- a/hadoop-ozone/integration-test/pom.xml +++ b/hadoop-ozone/integration-test/pom.xml @@ -238,6 +238,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ratis + ratis-server + + org.hamcrest hamcrest diff --git a/hadoop-ozone/interface-client/pom.xml b/hadoop-ozone/interface-client/pom.xml index c196dca045da..18d9584fbc85 100644 --- a/hadoop-ozone/interface-client/pom.xml +++ b/hadoop-ozone/interface-client/pom.xml @@ -33,6 +33,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + com.google.guava + guava + com.google.protobuf protobuf-java @@ -41,6 +45,22 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds-interface-client + + + org.apache.hadoop.thirdparty + hadoop-shaded-protobuf_3_7 + + + + io.grpc + grpc-api + + + com.google.code.findbugs + jsr305 + + + io.grpc grpc-protobuf diff --git a/hadoop-ozone/interface-storage/pom.xml b/hadoop-ozone/interface-storage/pom.xml index ca7f14f1fa50..ab1cc275ac1d 100644 --- a/hadoop-ozone/interface-storage/pom.xml +++ b/hadoop-ozone/interface-storage/pom.xml @@ -31,11 +31,32 @@ + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-interface-client + org.apache.ozone ozone-common + + org.apache.ozone + rocksdb-checkpoint-differ + + + org.apache.ratis + ratis-common + + + + com.google.guava + guava + com.google.protobuf protobuf-java diff --git a/hadoop-ozone/ozone-manager/pom.xml b/hadoop-ozone/ozone-manager/pom.xml index 5f6bc54c5af9..ae427727def5 100644 --- a/hadoop-ozone/ozone-manager/pom.xml +++ b/hadoop-ozone/ozone-manager/pom.xml @@ -32,6 +32,35 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> + + org.apache.ozone + hdds-annotation-processing + test + + + org.apache.ozone + hdds-client + + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-interface-server + + + org.apache.ozone + hdds-managed-rocksdb + + + org.apache.ozone + ozone-interface-client + org.aspectj @@ -85,25 +114,132 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> rocksdb-checkpoint-differ + + org.apache.commons + commons-compress + + + org.apache.commons + commons-lang3 + + + org.apache.kerby + kerby-util + + + org.apache.logging.log4j + log4j-api + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-grpc + + + org.apache.ratis + ratis-netty + + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-server-api + + + org.apache.ratis + ratis-server + + + org.apache.ratis + ratis-thirdparty-misc + + org.bouncycastle bcprov-jdk18on + + io.grpc + grpc-api + + + com.google.code.findbugs + jsr305 + + + + + io.grpc + grpc-netty + + + io.grpc + grpc-stub + + + io.netty + netty-common + + + io.netty + netty-handler + io.netty netty-tcnative-boringssl-static runtime + + io.netty + netty-transport + org.reflections reflections + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-databind + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + com.sun.jersey jersey-client + + info.picocli + picocli + + + jakarta.annotation + jakarta.annotation-api + + + jakarta.xml.bind + jakarta.xml.bind-api + + + javax.servlet + javax.servlet-api + @@ -134,7 +270,7 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ranger - ranger-intg + ranger-plugins-common ${ranger.version} compile diff --git a/hadoop-ozone/ozonefs-common/pom.xml b/hadoop-ozone/ozonefs-common/pom.xml index 99ab7ba21bf8..6132f9bc125a 100644 --- a/hadoop-ozone/ozonefs-common/pom.xml +++ b/hadoop-ozone/ozonefs-common/pom.xml @@ -43,10 +43,22 @@ + + org.apache.ozone + hdds-client + + + org.apache.ozone + hdds-config + org.apache.ozone hdds-hadoop-dependency-client + + org.apache.ozone + hdds-interface-client + org.apache.ozone ozone-client @@ -60,6 +72,45 @@ ozone-common + + commons-collections + commons-collections + + + org.apache.commons + commons-lang3 + + + org.apache.httpcomponents + httpclient + + + org.apache.ratis + ratis-common + + + org.slf4j + slf4j-api + + + + com.google.guava + guava + + + + io.opentracing + opentracing-api + + + io.opentracing + opentracing-util + + + jakarta.annotation + jakarta.annotation-api + + org.apache.ozone diff --git a/hadoop-ozone/ozonefs/pom.xml b/hadoop-ozone/ozonefs/pom.xml index 33bbb893cfd7..176f21b98603 100644 --- a/hadoop-ozone/ozonefs/pom.xml +++ b/hadoop-ozone/ozonefs/pom.xml @@ -73,15 +73,49 @@ + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-common + org.apache.ozone ozone-client + + org.apache.ozone + ozone-common + org.apache.ozone ozone-filesystem-common + + org.apache.ratis + ratis-common + + + + com.google.guava + guava + + + io.opentracing + opentracing-api + + + io.opentracing + opentracing-util + + + org.slf4j + slf4j-api + + org.apache.ozone diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index 059db6b95130..e262895664fc 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -138,6 +138,11 @@ ozone-filesystem-hadoop2 ${ozone.version} + + org.apache.ozone + hdds-annotation-processing + ${hdds.version} + org.apache.ozone hdds-config @@ -148,11 +153,26 @@ hdds-erasurecode ${hdds.version} + + org.apache.ozone + hdds-interface-admin + ${hdds.version} + org.apache.ozone hdds-interface-client ${hdds.version} + + org.apache.ozone + hdds-interface-server + ${hdds.version} + + + org.apache.ozone + hdds-managed-rocksdb + ${hdds.version} + org.apache.ozone ozone-s3-secret-store diff --git a/hadoop-ozone/recon-codegen/pom.xml b/hadoop-ozone/recon-codegen/pom.xml index 26a70c459748..bb7756a9de37 100644 --- a/hadoop-ozone/recon-codegen/pom.xml +++ b/hadoop-ozone/recon-codegen/pom.xml @@ -28,18 +28,36 @@ + + org.apache.ozone + hdds-config + org.apache.ozone ozone-common + + + commons-io + commons-io + org.apache.derby derby + + + org.slf4j + slf4j-api + org.springframework spring-jdbc + + org.springframework + spring-tx + org.jooq jooq-codegen diff --git a/hadoop-ozone/recon/pom.xml b/hadoop-ozone/recon/pom.xml index 0e75e0850b0a..a24252c1ed62 100644 --- a/hadoop-ozone/recon/pom.xml +++ b/hadoop-ozone/recon/pom.xml @@ -238,10 +238,46 @@ + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-container-service + + + org.apache.ozone + hdds-interface-client + + + org.apache.ozone + hdds-interface-server + + + org.apache.ozone + hdds-managed-rocksdb + + + org.apache.ozone + hdds-server-framework + org.apache.ozone ozone-common + + org.apache.ozone + ozone-interface-client + + + org.apache.ozone + ozone-interface-storage + org.apache.ozone ozone-reconcodegen @@ -272,6 +308,48 @@ org.apache.ozone hdds-server-scm + + + commons-collections + commons-collections + + + org.apache.commons + commons-compress + + + commons-io + commons-io + + + org.apache.commons + commons-lang3 + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-proto + + + + aopalliance + aopalliance + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-databind + + + com.google.guava + guava + com.google.inject guice @@ -284,10 +362,18 @@ com.google.inject.extensions guice-servlet + + com.google.protobuf + protobuf-java + org.glassfish.jersey.containers jersey-container-servlet + + info.picocli + picocli + org.glassfish.jersey.containers jersey-container-servlet-core @@ -296,6 +382,14 @@ org.glassfish.hk2 guice-bridge + + org.glassfish.hk2.external + jakarta.inject + + + org.glassfish.jersey.core + jersey-common + org.glassfish.jersey.core jersey-server @@ -332,18 +426,74 @@ org.apache.derby derby + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.glassfish.hk2 + hk2-api + + + org.reflections + reflections + + + org.rocksdb + rocksdbjni + org.xerial sqlite-jdbc + + org.slf4j + slf4j-api + + + org.springframework + spring-core + org.springframework spring-jdbc + + org.springframework + spring-tx + jakarta.activation jakarta.activation-api + + jakarta.annotation + jakarta.annotation-api + + + jakarta.validation + jakarta.validation-api + + + jakarta.ws.rs + jakarta.ws.rs-api + + + jakarta.xml.bind + jakarta.xml.bind-api + + + javax.inject + javax.inject + + + javax.servlet + javax.servlet-api + org.javassist javassist diff --git a/hadoop-ozone/s3-secret-store/pom.xml b/hadoop-ozone/s3-secret-store/pom.xml index df34dd8cdc88..b2da4c9e3c87 100644 --- a/hadoop-ozone/s3-secret-store/pom.xml +++ b/hadoop-ozone/s3-secret-store/pom.xml @@ -47,6 +47,14 @@ com.bettercloud vault-java-driver + + jakarta.annotation + jakarta.annotation-api + + + org.slf4j + slf4j-api + diff --git a/hadoop-ozone/s3gateway/pom.xml b/hadoop-ozone/s3gateway/pom.xml index 01fe0a257bdb..c26171d98ac4 100644 --- a/hadoop-ozone/s3gateway/pom.xml +++ b/hadoop-ozone/s3gateway/pom.xml @@ -31,11 +31,31 @@ + + org.apache.ozone + hdds-client + + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-interface-server + org.apache.ozone ozone-common compile + + org.apache.ozone + ozone-interface-client + org.javassist javassist @@ -44,14 +64,55 @@ org.apache.ozone hdds-server-framework + + commons-codec + commons-codec + + + commons-io + commons-io + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.kerby + kerby-util + + + org.apache.ratis + ratis-common + + org.jboss.weld.servlet weld-servlet-shaded + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-webapp + org.glassfish.jersey.containers jersey-container-servlet-core + + org.glassfish.jersey.core + jersey-common + + + org.glassfish.jersey.core + jersey-server + org.glassfish.jersey.ext.cdi jersey-cdi1x @@ -68,6 +129,14 @@ org.glassfish.hk2 hk2-api + + org.slf4j + slf4j-api + + + com.fasterxml.jackson.core + jackson-databind + com.fasterxml.jackson.dataformat jackson-dataformat-xml @@ -76,10 +145,30 @@ com.fasterxml.jackson.module jackson-module-jaxb-annotations + + com.google.guava + guava + + + javax.annotation + javax.annotation-api + javax.enterprise cdi-api + + javax.servlet + javax.servlet-api + + + jakarta.annotation + jakarta.annotation-api + + + jakarta.ws.rs + jakarta.ws.rs-api + jakarta.xml.bind jakarta.xml.bind-api @@ -92,6 +181,10 @@ jakarta.activation jakarta.activation-api + + info.picocli + picocli + io.grpc grpc-netty @@ -118,6 +211,18 @@ io.netty netty-transport + + io.opentracing + opentracing-api + + + io.opentracing + opentracing-noop + + + io.opentracing + opentracing-util + org.apache.ozone hdds-hadoop-dependency-server diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index 98a6fef13e17..04c1c8602cb0 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -29,6 +29,42 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> jar + + org.apache.ozone + hdds-client + + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-container-service + + + org.apache.ozone + hdds-interface-admin + + + org.apache.ozone + hdds-interface-client + + + org.apache.ozone + hdds-interface-server + + + org.apache.ozone + hdds-managed-rocksdb + + + org.apache.ozone + hdds-server-scm + org.apache.ozone ozone-manager @@ -45,10 +81,22 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone-client + + org.apache.ozone + ozone-filesystem-common + org.apache.ozone ozone-filesystem + + org.apache.ozone + ozone-interface-client + + + org.apache.ozone + ozone-interface-storage + org.apache.ozone hdds-server-framework @@ -68,6 +116,55 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-hadoop-dependency-server + + commons-codec + commons-codec + + + commons-io + commons-io + + + org.apache.commons + commons-lang3 + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.ratis + ratis-client + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-server-api + + + org.apache.ratis + ratis-thirdparty-misc + + + org.apache.ratis + ratis-tools + + + + info.picocli + picocli + jakarta.xml.bind jakarta.xml.bind-api @@ -76,6 +173,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.glassfish.jaxb jaxb-runtime + + jakarta.annotation + jakarta.annotation-api + jakarta.activation jakarta.activation-api @@ -84,6 +185,14 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> io.dropwizard.metrics metrics-core + + io.opentracing + opentracing-api + + + io.opentracing + opentracing-util + com.amazonaws aws-java-sdk-core @@ -92,10 +201,42 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> com.amazonaws aws-java-sdk-s3 + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + com.google.guava + guava + + + org.jooq + jooq + org.kohsuke.metainf-services metainf-services + + org.rocksdb + rocksdbjni + + + org.slf4j + slf4j-api + diff --git a/pom.xml b/pom.xml index a6b7613c786b..62303669e318 100644 --- a/pom.xml +++ b/pom.xml @@ -201,9 +201,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 4.2.2 2.6.1 2.1.1 + 2.0.2 2.1.6 + 1 2.12.7 + 1.0 0.21.0 2.29.2 32.1.3-jre @@ -412,6 +415,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs ${hadoop.version} test-jar + + aopalliance + aopalliance + ${aopalliance.version} + com.google.guava guava @@ -468,6 +476,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs kerb-util ${kerby.version} + + org.apache.kerby + kerby-util + ${kerby.version} + commons-codec commons-codec @@ -513,6 +526,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs cdi-api ${cdi-api.version} + + javax.inject + javax.inject + ${javax.inject.version} + javax.servlet javax.servlet-api @@ -601,6 +619,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs jersey-container-servlet-core ${jersey2.version} + + org.glassfish.jersey.core + jersey-common + ${jersey2.version} + org.glassfish.jersey.core jersey-server @@ -937,7 +960,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.springframework - spring-jdbc + spring-core ${spring.version} @@ -946,6 +969,16 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs + + org.springframework + spring-jdbc + ${spring.version} + + + org.springframework + spring-tx + ${spring.version} + com.fasterxml.woodstox woodstox-core @@ -1049,6 +1082,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs opentracing-api ${opentracing.version} + + io.opentracing + opentracing-noop + ${opentracing.version} + io.opentracing opentracing-util @@ -1208,6 +1246,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs jakarta.annotation-api ${jakarta.annotation.version} + + jakarta.validation + jakarta.validation-api + ${jakarta.validation.version} + jakarta.ws.rs jakarta.ws.rs-api From 6216e50b2a112f5c7fd2f54c1cd08c45f9d146f7 Mon Sep 17 00:00:00 2001 From: Chung En Lee Date: Sat, 17 Aug 2024 01:49:41 +0800 Subject: [PATCH 27/50] HDDS-9941. Do not use heap buffer in KeyValueContainerCheck.verifyChecksum (#6882) --- .../keyvalue/KeyValueContainerCheck.java | 11 +++++-- .../keyvalue/helpers/TestChunkUtils.java | 31 ++++++++++--------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java index 70539111fb99..c235109f2cbc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java @@ -47,6 +47,7 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import org.apache.hadoop.util.DirectBufferPool; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,6 +71,7 @@ public class KeyValueContainerCheck { private String metadataPath; private HddsVolume volume; private KeyValueContainer container; + private static final DirectBufferPool BUFFER_POOL = new DirectBufferPool(); public KeyValueContainerCheck(String metadataPath, ConfigurationSource conf, long containerID, HddsVolume volume, KeyValueContainer container) { @@ -364,8 +366,12 @@ chunkFile, new IOException("Missing chunk file " + } } else if (chunk.getChecksumData().getType() != ContainerProtos.ChecksumType.NONE) { - ScanResult result = verifyChecksum(block, chunk, chunkFile, layout, + int bytesPerChecksum = chunk.getChecksumData().getBytesPerChecksum(); + ByteBuffer buffer = BUFFER_POOL.getBuffer(bytesPerChecksum); + ScanResult result = verifyChecksum(block, chunk, chunkFile, layout, buffer, throttler, canceler); + buffer.clear(); + BUFFER_POOL.returnBuffer(buffer); if (!result.isHealthy()) { return result; } @@ -377,7 +383,7 @@ chunkFile, new IOException("Missing chunk file " + private static ScanResult verifyChecksum(BlockData block, ContainerProtos.ChunkInfo chunk, File chunkFile, - ContainerLayoutVersion layout, + ContainerLayoutVersion layout, ByteBuffer buffer, DataTransferThrottler throttler, Canceler canceler) { ChecksumData checksumData = ChecksumData.getFromProtoBuf(chunk.getChecksumData()); @@ -385,7 +391,6 @@ private static ScanResult verifyChecksum(BlockData block, int bytesPerChecksum = checksumData.getBytesPerChecksum(); Checksum cal = new Checksum(checksumData.getChecksumType(), bytesPerChecksum); - ByteBuffer buffer = ByteBuffer.allocate(bytesPerChecksum); long bytesRead = 0; try (FileChannel channel = FileChannel.open(chunkFile.toPath(), ChunkUtils.READ_OPTIONS, ChunkUtils.NO_ATTRIBUTES)) { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/helpers/TestChunkUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/helpers/TestChunkUtils.java index 5cd264af9989..d04f3a5167f9 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/helpers/TestChunkUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/helpers/TestChunkUtils.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; -import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -107,12 +106,15 @@ void concurrentReadOfSameFile() throws Exception { assertEquals(1, buffers.size()); final ByteBuffer readBuffer = buffers.get(0); + int remaining = readBuffer.remaining(); + byte[] readArray = new byte[remaining]; + readBuffer.get(readArray); LOG.info("Read data ({}): {}", threadNumber, - new String(readBuffer.array(), UTF_8)); - if (!Arrays.equals(array, readBuffer.array())) { + new String(readArray, UTF_8)); + if (!Arrays.equals(array, readArray)) { failed.set(true); } - assertEquals(len, readBuffer.remaining()); + assertEquals(len, remaining); } catch (Exception e) { LOG.error("Failed to read data ({})", threadNumber, e); failed.set(true); @@ -210,9 +212,11 @@ void serialRead() throws IOException { final List buffers = chunk.asByteBufferList(); assertEquals(1, buffers.size()); final ByteBuffer readBuffer = buffers.get(0); - - assertArrayEquals(array, readBuffer.array()); - assertEquals(len, readBuffer.remaining()); + int remain = readBuffer.remaining(); + byte[] readArray = new byte[remain]; + readBuffer.get(readArray); + assertArrayEquals(array, readArray); + assertEquals(len, remain); } @Test @@ -266,29 +270,29 @@ void testReadData() throws Exception { // large file final int large = 10 << 20; // 10MB assertThat(large).isGreaterThan(MAPPED_BUFFER_THRESHOLD); - runTestReadFile(large, dir, true); + runTestReadFile(large, dir); // small file final int small = 30 << 10; // 30KB assertThat(small).isLessThanOrEqualTo(MAPPED_BUFFER_THRESHOLD); - runTestReadFile(small, dir, false); + runTestReadFile(small, dir); // boundary case - runTestReadFile(MAPPED_BUFFER_THRESHOLD, dir, false); + runTestReadFile(MAPPED_BUFFER_THRESHOLD, dir); // empty file - runTestReadFile(0, dir, false); + runTestReadFile(0, dir); for (int i = 0; i < 10; i++) { final int length = RANDOM.nextInt(2 * MAPPED_BUFFER_THRESHOLD) + 1; - runTestReadFile(length, dir, length > MAPPED_BUFFER_THRESHOLD); + runTestReadFile(length, dir); } } finally { FileUtils.deleteDirectory(dir); } } - void runTestReadFile(int length, File dir, boolean isMapped) + void runTestReadFile(int length, File dir) throws Exception { final File file; for (int i = length; ; i++) { @@ -328,7 +332,6 @@ void runTestReadFile(int length, File dir, boolean isMapped) RANDOM.setSeed(seed); for (ByteBuffer b : buffers) { - assertEquals(isMapped, b instanceof MappedByteBuffer); RANDOM.nextBytes(array); assertEquals(ByteBuffer.wrap(array, 0, b.remaining()), b); } From 37273d1271e55d1b6648bf766e009153d25c0501 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Fri, 16 Aug 2024 14:52:56 -0700 Subject: [PATCH 28/50] HDDS-11320. Update OM, SCM, Datanode conf for RATIS-2135. (#7080) --- .../hadoop/hdds/conf/RatisConfUtils.java | 44 ++++++++++++++ .../hadoop/hdds/conf/TestRatisConfUtils.java | 57 +++++++++++++++++++ .../server/ratis/XceiverServerRatis.java | 21 ++++--- .../apache/hadoop/hdds/scm/ha/RatisUtil.java | 14 +++-- .../om/ratis/OzoneManagerRatisServer.java | 14 ++--- 5 files changed, 123 insertions(+), 27 deletions(-) create mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/RatisConfUtils.java create mode 100644 hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/conf/TestRatisConfUtils.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/RatisConfUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/RatisConfUtils.java new file mode 100644 index 000000000000..3b247273abdf --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/RatisConfUtils.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.conf; + +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.grpc.GrpcConfigKeys; +import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; + +/** + * Utilities for Ratis configurations. + */ +public class RatisConfUtils { + /** For {@link GrpcConfigKeys}. */ + public static class Grpc { + /** For setting {@link GrpcConfigKeys#setMessageSizeMax(RaftProperties, SizeInBytes)}. */ + public static void setMessageSizeMax(RaftProperties properties, int max) { + Preconditions.assertTrue(max > 0, () -> "max = " + max + " <= 0"); + + final long logAppenderBufferByteLimit = RaftServerConfigKeys.Log.Appender.bufferByteLimit(properties).getSize(); + Preconditions.assertTrue(max >= logAppenderBufferByteLimit, + () -> "max = " + max + " < logAppenderBufferByteLimit = " + logAppenderBufferByteLimit); + + // Need an 1MB gap; see RATIS-2135 + GrpcConfigKeys.setMessageSizeMax(properties, SizeInBytes.valueOf(max + SizeInBytes.ONE_MB.getSize())); + } + } +} diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/conf/TestRatisConfUtils.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/conf/TestRatisConfUtils.java new file mode 100644 index 000000000000..50bf524f0258 --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/conf/TestRatisConfUtils.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.conf; + +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.grpc.GrpcConfigKeys; +import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.util.SizeInBytes; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test {@link RatisConfUtils}. + */ +public class TestRatisConfUtils { + private static final Logger LOG = LoggerFactory.getLogger(TestRatisConfUtils.class); + + @Test + void testGrpcSetMessageSizeMax() { + final RaftProperties properties = new RaftProperties(); + + final int logAppenderBufferByteLimit = 1000; + + // setMessageSizeMax without setBufferByteLimit + Assertions.assertThrows(IllegalStateException.class, + () -> RatisConfUtils.Grpc.setMessageSizeMax(properties, logAppenderBufferByteLimit)); + + RaftServerConfigKeys.Log.Appender.setBufferByteLimit(properties, SizeInBytes.valueOf(logAppenderBufferByteLimit)); + + // setMessageSizeMax with a value smaller than logAppenderBufferByteLimit + Assertions.assertThrows(IllegalStateException.class, + () -> RatisConfUtils.Grpc.setMessageSizeMax(properties, logAppenderBufferByteLimit - 1)); + + // setMessageSizeMax with the correct logAppenderBufferByteLimit + RatisConfUtils.Grpc.setMessageSizeMax(properties, logAppenderBufferByteLimit); + + final SizeInBytes max = GrpcConfigKeys.messageSizeMax(properties, LOG::info); + Assertions.assertEquals(SizeInBytes.ONE_MB.getSize(), max.getSize() - logAppenderBufferByteLimit); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index 2ae372320e0a..7899cdcc0e67 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.DatanodeRatisServerConfig; +import org.apache.hadoop.hdds.conf.RatisConfUtils; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -279,11 +280,14 @@ public RaftProperties newRaftProperties() { final RpcType rpc = setRpcType(properties); // set raft segment size - setRaftSegmentAndWriteBufferSize(properties); + final int logAppenderBufferByteLimit = setRaftSegmentAndWriteBufferSize(properties); + + // set grpc message size max + final int max = Math.max(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE, logAppenderBufferByteLimit); + RatisConfUtils.Grpc.setMessageSizeMax(properties, max); // set raft segment pre-allocated size - final long raftSegmentPreallocatedSize = - setRaftSegmentPreallocatedSize(properties); + setRaftSegmentPreallocatedSize(properties); // setup ratis stream if datastream is enabled if (streamEnable) { @@ -314,11 +318,6 @@ public RaftProperties newRaftProperties() { RaftServerConfigKeys.setStorageDir(properties, storageDirs); - // For grpc set the maximum message size - GrpcConfigKeys.setMessageSizeMax(properties, - SizeInBytes.valueOf(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE - + raftSegmentPreallocatedSize)); - // Set the ratis port number if (rpc == SupportedRpcType.GRPC) { GrpcConfigKeys.Admin.setPort(properties, adminPort); @@ -407,17 +406,16 @@ private void setTimeoutForRetryCache(RaftProperties properties) { .setExpiryTime(properties, retryCacheTimeout); } - private long setRaftSegmentPreallocatedSize(RaftProperties properties) { + private void setRaftSegmentPreallocatedSize(RaftProperties properties) { final long raftSegmentPreallocatedSize = (long) conf.getStorageSize( OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY, OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT, StorageUnit.BYTES); RaftServerConfigKeys.Log.setPreallocatedSize(properties, SizeInBytes.valueOf(raftSegmentPreallocatedSize)); - return raftSegmentPreallocatedSize; } - private void setRaftSegmentAndWriteBufferSize(RaftProperties properties) { + private int setRaftSegmentAndWriteBufferSize(RaftProperties properties) { final int logAppenderQueueNumElements = conf.getInt( HDDS_CONTAINER_RATIS_LOG_APPENDER_QUEUE_NUM_ELEMENTS, HDDS_CONTAINER_RATIS_LOG_APPENDER_QUEUE_NUM_ELEMENTS_DEFAULT); @@ -446,6 +444,7 @@ private void setRaftSegmentAndWriteBufferSize(RaftProperties properties) { SizeInBytes.valueOf(raftSegmentSize)); RaftServerConfigKeys.Log.setWriteBufferSize(properties, SizeInBytes.valueOf(raftSegmentBufferSize)); + return logAppenderQueueByteLimit; } private void setStateMachineDataConfigurations(RaftProperties properties) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java index ccef5aab24ee..229ba6afe0de 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java @@ -20,6 +20,7 @@ import com.google.common.base.Preconditions; import com.google.protobuf.ServiceException; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.RatisConfUtils; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.ratis.RatisHelper; import org.apache.hadoop.hdds.ratis.ServerNotLeaderException; @@ -69,8 +70,9 @@ public static RaftProperties newRaftProperties( // TODO: Check the default values. final RaftProperties properties = new RaftProperties(); setRaftStorageDir(properties, conf); - setRaftRpcProperties(properties, conf); - setRaftLogProperties(properties, conf); + + final int logAppenderBufferByteLimit = setRaftLogProperties(properties, conf); + setRaftRpcProperties(properties, conf, logAppenderBufferByteLimit); setRaftRetryCacheProperties(properties, conf); setRaftSnapshotProperties(properties, conf); setRaftLeadElectionProperties(properties, conf); @@ -100,15 +102,14 @@ public static void setRaftStorageDir(final RaftProperties properties, * @param ozoneConf ConfigurationSource */ private static void setRaftRpcProperties(final RaftProperties properties, - ConfigurationSource ozoneConf) { + ConfigurationSource ozoneConf, int logAppenderBufferByteLimit) { RatisHelper.setRpcType(properties, RpcType.valueOf(ozoneConf.get(ScmConfigKeys.OZONE_SCM_HA_RATIS_RPC_TYPE, ScmConfigKeys.OZONE_SCM_HA_RATIS_RPC_TYPE_DEFAULT))); GrpcConfigKeys.Server.setPort(properties, ozoneConf .getInt(ScmConfigKeys.OZONE_SCM_RATIS_PORT_KEY, ScmConfigKeys.OZONE_SCM_RATIS_PORT_DEFAULT)); - GrpcConfigKeys.setMessageSizeMax(properties, - SizeInBytes.valueOf("32m")); + RatisConfUtils.Grpc.setMessageSizeMax(properties, logAppenderBufferByteLimit); long ratisRequestTimeout = ozoneConf.getTimeDuration( ScmConfigKeys.OZONE_SCM_HA_RATIS_REQUEST_TIMEOUT, ScmConfigKeys.OZONE_SCM_HA_RATIS_REQUEST_TIMEOUT_DEFAULT, @@ -161,7 +162,7 @@ private static void setRaftLeadElectionProperties( * @param properties RaftProperties instance which will be updated * @param ozoneConf ConfigurationSource */ - private static void setRaftLogProperties(final RaftProperties properties, + private static int setRaftLogProperties(final RaftProperties properties, final ConfigurationSource ozoneConf) { Log.setSegmentSizeMax(properties, SizeInBytes.valueOf((long) ozoneConf.getStorageSize( @@ -195,6 +196,7 @@ private static void setRaftLogProperties(final RaftProperties properties, ozoneConf.getInt(ScmConfigKeys.OZONE_SCM_HA_RAFT_LOG_PURGE_GAP, ScmConfigKeys.OZONE_SCM_HA_RAFT_LOG_PURGE_GAP_DEFAULT)); Log.setSegmentCacheNumMax(properties, 2); + return logAppenderQueueByteLimit; } /** diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index aa9612ef8058..78d6ed89d2d1 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.RatisConfUtils; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.ratis.RatisHelper; import org.apache.hadoop.hdds.security.SecurityConfig; @@ -624,17 +625,16 @@ public static RaftProperties newRaftProperties(ConfigurationSource conf, // Set Ratis storage directory RaftServerConfigKeys.setStorageDir(properties, Collections.singletonList(new File(ratisStorageDir))); - final int logAppenderQueueByteLimit = (int) conf.getStorageSize( + final int logAppenderBufferByteLimit = (int) conf.getStorageSize( OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT, OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT, StorageUnit.BYTES); + setRaftLogProperties(properties, logAppenderBufferByteLimit, conf); // For grpc config - setGrpcConfig(properties, logAppenderQueueByteLimit); + RatisConfUtils.Grpc.setMessageSizeMax(properties, logAppenderBufferByteLimit); setRaftLeaderElectionProperties(properties, conf); - setRaftLogProperties(properties, logAppenderQueueByteLimit, conf); - setRaftRpcProperties(properties, conf); setRaftRetryCacheProperties(properties, conf); @@ -693,12 +693,6 @@ private static void setRaftLogProperties(RaftProperties properties, RaftServerConfigKeys.Log.setSegmentCacheNumMax(properties, 2); } - private static void setGrpcConfig(RaftProperties properties, int logAppenderQueueByteLimit) { - // For grpc set the maximum message size - // TODO: calculate the optimal max message size - GrpcConfigKeys.setMessageSizeMax(properties, SizeInBytes.valueOf(logAppenderQueueByteLimit)); - } - private static void setRaftRpcProperties(RaftProperties properties, ConfigurationSource conf) { // Set the server request timeout TimeUnit serverRequestTimeoutUnit = OMConfigKeys.OZONE_OM_RATIS_SERVER_REQUEST_TIMEOUT_DEFAULT.getUnit(); From bc790de06259ee12e42aaa9ca33b468e1ada2d11 Mon Sep 17 00:00:00 2001 From: len548 <63490262+len548@users.noreply.github.com> Date: Fri, 16 Aug 2024 23:54:21 +0200 Subject: [PATCH 29/50] HDDS-11319. Reduce XceiverClientRatis info log to debug (#7082) --- .../java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java index 2794ca9c61b0..eb0ed0a885cb 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java @@ -137,7 +137,7 @@ private XceiverClientRatis(Pipeline pipeline, RpcType rpcType, throw new IllegalArgumentException(watchType + " is not supported. " + "Currently only ALL_COMMITTED or MAJORITY_COMMITTED are supported"); } - LOG.info("WatchType {}. Majority {}, ", this.watchType, this.majority); + LOG.debug("WatchType {}. Majority {}, ", this.watchType, this.majority); if (LOG.isTraceEnabled()) { LOG.trace("new XceiverClientRatis for pipeline " + pipeline.getId(), new Throwable("TRACE")); From 7106fac34186bddf8b8b08a3e1566d99d2252d46 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Fri, 16 Aug 2024 15:53:35 -0700 Subject: [PATCH 30/50] HDDS-11310. Remove EndpointStateMachine.EndPointStates.value. (#7070) --- .../statemachine/EndpointStateMachine.java | 72 ++++--------------- ...atanodeProtocolClientSideTranslatorPB.java | 4 +- 2 files changed, 14 insertions(+), 62 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java index 0b9005422ef9..a6c3b11de926 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java @@ -26,7 +26,6 @@ import org.slf4j.LoggerFactory; import java.io.Closeable; -import java.io.IOException; import java.net.InetSocketAddress; import java.time.ZonedDateTime; import java.util.concurrent.ExecutorService; @@ -51,7 +50,7 @@ public class EndpointStateMachine private final InetSocketAddress address; private final Lock lock; private final ConfigurationSource conf; - private EndPointStates state; + private EndPointStates state = EndPointStates.FIRST; private VersionResponse version; private ZonedDateTime lastSuccessfulHeartbeat; private boolean isPassive; @@ -72,7 +71,6 @@ public EndpointStateMachine(InetSocketAddress address, this.endPoint = endPoint; this.missedCount = new AtomicLong(0); this.address = address; - state = EndPointStates.getInitState(); lock = new ReentrantLock(); this.conf = conf; executorService = Executors.newSingleThreadExecutor( @@ -153,11 +151,9 @@ public ExecutorService getExecutorService() { /** * Closes the connection. - * - * @throws IOException */ @Override - public void close() throws IOException { + public void close() { if (endPoint != null) { endPoint.close(); } @@ -232,9 +228,8 @@ public void logIfNeeded(Exception ex) { String serverName = "SCM"; if (isPassive) { - // Recon connection failures can be logged 10 times lower than regular - // SCM. - missCounter = this.getMissedCount() % (10 * getLogWarnInterval(conf)); + // Recon connection failures can be logged 10 times lower than regular SCM. + missCounter = this.getMissedCount() % (10L * getLogWarnInterval(conf)); serverName = "Recon"; } @@ -272,50 +267,16 @@ public void setPassive(boolean passive) { *

* This is a sorted list of states that EndPoint will traverse. *

- * GetNextState will move this enum from getInitState to getLastState. + * {@link #getNextState()} will move from {@link #FIRST} to {@link #LAST}. */ public enum EndPointStates { - GETVERSION(1), - REGISTER(2), - HEARTBEAT(3), - SHUTDOWN(4); // if you add value after this please edit getLastState too. - private final int value; - - /** - * Constructs endPointStates. - * - * @param value state. - */ - EndPointStates(int value) { - this.value = value; - } - - /** - * Returns the first State. - * - * @return First State. - */ - public static EndPointStates getInitState() { - return GETVERSION; - } - - /** - * The last state of endpoint states. - * - * @return last state. - */ - public static EndPointStates getLastState() { - return SHUTDOWN; - } + GETVERSION, + REGISTER, + HEARTBEAT, + SHUTDOWN; - /** - * returns the numeric value associated with the endPoint. - * - * @return int. - */ - public int getValue() { - return value; - } + private static final EndPointStates FIRST = values()[0]; + private static final EndPointStates LAST = values()[values().length - 1]; /** * Returns the next logical state that endPoint should move to. @@ -324,15 +285,8 @@ public int getValue() { * @return NextState. */ public EndPointStates getNextState() { - if (this.getValue() < getLastState().getValue()) { - int stateValue = this.getValue() + 1; - for (EndPointStates iter : values()) { - if (stateValue == iter.getValue()) { - return iter; - } - } - } - return getLastState(); + final int n = this.ordinal(); + return n >= LAST.ordinal() ? LAST : values()[n + 1]; } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerDatanodeProtocolClientSideTranslatorPB.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerDatanodeProtocolClientSideTranslatorPB.java index 3e9ed9493eb0..eeb99b5a3db2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerDatanodeProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerDatanodeProtocolClientSideTranslatorPB.java @@ -84,11 +84,9 @@ public StorageContainerDatanodeProtocolClientSideTranslatorPB( * fail require careful attention. It is strongly advised to relinquish the * underlying resources and to internally mark the {@code Closeable} * as closed, prior to throwing the {@code IOException}. - * - * @throws IOException if an I/O error occurs */ @Override - public void close() throws IOException { + public void close() { RPC.stopProxy(rpcProxy); } From 2cc31b95e64aa482814c76c209d024988d6a3593 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 14:10:01 +0200 Subject: [PATCH 31/50] HDDS-11335. Bump exec-maven-plugin to 3.4.1 (#7087) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 62303669e318..ad61d99b3d3c 100644 --- a/pom.xml +++ b/pom.xml @@ -272,7 +272,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 3.7.1 4.2.2 0.45.0 - 3.3.0 + 3.4.1 2.4.0 1.0-beta-1 1.0-M1 From 99af44f8eb13525039f6d393ce1d78d1d4cdc068 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 15:23:26 +0200 Subject: [PATCH 32/50] HDDS-11336. Bump slf4j to 2.0.16 (#7086) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ad61d99b3d3c..88b897874188 100644 --- a/pom.xml +++ b/pom.xml @@ -174,7 +174,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 4.4.16 - 2.0.15 + 2.0.16 2.23.1 3.4.4 1.2.25 From d639baa3b68ee81151acfcda5330771eb66d5c57 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Sat, 17 Aug 2024 17:43:02 +0200 Subject: [PATCH 33/50] HDDS-11325. Mark testWriteMoreThanMaxFlushSize as flaky --- .../apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java | 2 ++ .../ozone/client/rpc/TestBlockOutputStreamWithFailures.java | 1 + 2 files changed, 3 insertions(+) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java index 2a6b2246b9c7..7f184f4c41a6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStream.java @@ -48,6 +48,7 @@ import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.TestHelper; +import org.apache.ozone.test.tag.Flaky; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.TestInstance; @@ -661,6 +662,7 @@ void testWriteExactlyMaxFlushSize(boolean flushDelay, boolean enablePiggybacking @ParameterizedTest @MethodSource("clientParameters") + @Flaky("HDDS-11325") void testWriteMoreThanMaxFlushSize(boolean flushDelay, boolean enablePiggybacking) throws Exception { OzoneClientConfig config = newClientConfig(cluster.getConf(), flushDelay, enablePiggybacking); try (OzoneClient client = newClient(cluster.getConf(), config)) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java index 8d69da3ef3e1..f823add57bdc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java @@ -93,6 +93,7 @@ private static Stream clientParameters() { @ParameterizedTest @MethodSource("clientParameters") + @Flaky("HDDS-11325") void testContainerClose(boolean flushDelay, boolean enablePiggybacking) throws Exception { OzoneClientConfig config = newClientConfig(cluster.getConf(), flushDelay, enablePiggybacking); try (OzoneClient client = newClient(cluster.getConf(), config)) { From dd8daa46eca865bb9267f1c3a1df6df6582a9ccd Mon Sep 17 00:00:00 2001 From: Siyao Meng <50227127+smengcl@users.noreply.github.com> Date: Sat, 17 Aug 2024 10:24:10 -0700 Subject: [PATCH 34/50] HDDS-11327. [hsync] Revert config default ozone.fs.hsync.enabled to false (#7079) --- .../src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java | 2 +- hadoop-hdds/common/src/main/resources/ozone-default.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 38ebc80b27e6..a1547a536b9c 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -126,7 +126,7 @@ public final class OzoneConfigKeys { public static final String OZONE_FS_HSYNC_ENABLED = "ozone.fs.hsync.enabled"; public static final boolean OZONE_FS_HSYNC_ENABLED_DEFAULT - = true; + = false; /** * hsync lease soft limit. diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ad06f2f6e334..ee060bf7ebe4 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -4213,7 +4213,7 @@ ozone.fs.hsync.enabled - true + false OZONE, CLIENT Enable hsync/hflush. By default they are disabled. From 56e877902384da905387eb51a5642d4d47bead70 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 21:03:22 +0200 Subject: [PATCH 35/50] HDDS-11337. Bump Spring Framework to 5.3.39 (#7084) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 88b897874188..683e1bc2f2ee 100644 --- a/pom.xml +++ b/pom.xml @@ -298,7 +298,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 1.4.0 3.9.8.1 - 5.3.37 + 5.3.39 3.11.10 From a267c28492c43b347207f1a58cd3c22c7c7e9ece Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 22:10:12 +0200 Subject: [PATCH 36/50] HDDS-11338. Bump zstd-jni to 1.5.6-4 (#7085) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 683e1bc2f2ee..b5a6323bed9e 100644 --- a/pom.xml +++ b/pom.xml @@ -114,7 +114,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 3.2.2 1.27.0 2.10.1 - 1.5.6-3 + 1.5.6-4 1.4.0 2.16.1 3.14.0 From 7f24f2d3e41c4bffb94bd93b4b1adcf075da082c Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Sun, 18 Aug 2024 12:03:33 +0200 Subject: [PATCH 37/50] HDDS-11323. Mark TestLeaseRecovery as flaky --- .../test/java/org/apache/hadoop/fs/ozone/TestLeaseRecovery.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestLeaseRecovery.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestLeaseRecovery.java index 6ec233fc35bb..a4a9bcff4704 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestLeaseRecovery.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestLeaseRecovery.java @@ -45,6 +45,7 @@ import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.utils.FaultInjectorImpl; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.tag.Flaky; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -82,6 +83,7 @@ * Test cases for recoverLease() API. */ @Timeout(300) +@Flaky("HDDS-11323") public class TestLeaseRecovery { private MiniOzoneCluster cluster; From bb80a14f35a1cce2b0940987b37d236a41b7bc6f Mon Sep 17 00:00:00 2001 From: Smita <112169209+smitajoshi12@users.noreply.github.com> Date: Mon, 19 Aug 2024 21:30:00 +0530 Subject: [PATCH 38/50] HDDS-11246. [Recon] Use optional chaining instead of explicit undefined check for Objects in Container and Pipeline Module. (#7037) --- .../src/views/missingContainers/missingContainers.tsx | 8 ++++---- .../ozone-recon-web/src/views/pipelines/pipelines.tsx | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/missingContainers/missingContainers.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/missingContainers/missingContainers.tsx index 1f583a554948..fff4a05a5036 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/missingContainers/missingContainers.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/missingContainers/missingContainers.tsx @@ -398,22 +398,22 @@ export class MissingContainers extends React.Component, I 0) ? ` (${missingDataSource.length})` : ''}`}> + tab={`Missing (${missingDataSource?.length ?? 0})`}> {generateTable(missingDataSource)} 0) ? ` (${underReplicatedDataSource.length})` : ''}`}> + tab={`Under-Replicated (${underReplicatedDataSource?.length ?? 0})`}> {generateTable(underReplicatedDataSource)} 0) ? ` (${overReplicatedDataSource.length})` : ''}`}> + tab={`Over-Replicated (${overReplicatedDataSource?.length ?? 0})`}> {generateTable(overReplicatedDataSource)} 0) ? ` (${misReplicatedDataSource.length})` : ''}`}> + tab={`Mis-Replicated (${misReplicatedDataSource?.length ?? 0})`}> {generateTable(misReplicatedDataSource)} diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/pipelines/pipelines.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/pipelines/pipelines.tsx index e0167bd11bd1..c53be4f7a205 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/pipelines/pipelines.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/pipelines/pipelines.tsx @@ -116,8 +116,8 @@ const COLUMNS = [ render: (datanodes: string[]) =>

{datanodes && datanodes.map(datanode =>
- triggerNode}> - {datanode && datanode.hostName} + triggerNode}> + {datanode?.hostName ?? 'N/A'}
)} From 88b88fffb1816db0c686060aa786c71ce7098138 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Tue, 20 Aug 2024 01:43:52 +0800 Subject: [PATCH 39/50] HDDS-11324. Negative value preOpLatencyMs in DN audit log (#7093) --- .../container/common/impl/HddsDispatcher.java | 65 +++++++++++-------- .../hadoop/ozone/audit/AuditLogger.java | 10 ++- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java index cac4df73cc71..c5855b38b74e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java @@ -96,7 +96,9 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor { private static final String AUDIT_PARAM_FORCE_DELETE = "forceDelete"; private static final String AUDIT_PARAM_START_CONTAINER_ID = "startContainerID"; private static final String AUDIT_PARAM_BLOCK_DATA = "blockData"; - private static final String AUDIT_PARAM_BLOCK_DATA_SIZE = "blockDataSize"; + private static final String AUDIT_PARAM_BLOCK_DATA_OFFSET = "offset"; + private static final String AUDIT_PARAM_BLOCK_DATA_SIZE = "size"; + private static final String AUDIT_PARAM_BLOCK_DATA_STAGE = "stage"; private static final String AUDIT_PARAM_COUNT = "count"; private static final String AUDIT_PARAM_START_LOCAL_ID = "startLocalID"; private static final String AUDIT_PARAM_PREV_CHUNKNAME = "prevChunkName"; @@ -112,7 +114,7 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor { private String clusterId; private ContainerMetrics metrics; private final TokenVerifier tokenVerifier; - private long slowOpThresholdMs; + private long slowOpThresholdNs; private VolumeUsage.MinFreeSpaceCalculator freeSpaceCalculator; /** @@ -134,7 +136,7 @@ public HddsDispatcher(ConfigurationSource config, ContainerSet contSet, HddsConfigKeys.HDDS_CONTAINER_CLOSE_THRESHOLD_DEFAULT); this.tokenVerifier = tokenVerifier != null ? tokenVerifier : new NoopTokenVerifier(); - this.slowOpThresholdMs = getSlowOpThresholdMs(conf); + this.slowOpThresholdNs = getSlowOpThresholdMs(conf) * 1000000; protocolMetrics = new ProtocolMessageMetrics<>( @@ -279,7 +281,7 @@ private ContainerCommandResponseProto dispatchRequest( "ContainerID " + containerID + " has been lost and cannot be recreated on this DataNode", ContainerProtos.Result.CONTAINER_MISSING); - audit(action, eventType, msg, AuditEventStatus.FAILURE, sce); + audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, sce); return ContainerUtils.logAndReturnError(LOG, sce, msg); } @@ -306,7 +308,7 @@ private ContainerCommandResponseProto dispatchRequest( StorageContainerException sce = new StorageContainerException( "ContainerID " + containerID + " creation failed", responseProto.getResult()); - audit(action, eventType, msg, AuditEventStatus.FAILURE, sce); + audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, sce); return ContainerUtils.logAndReturnError(LOG, sce, msg); } Preconditions.checkArgument(isWriteStage && container2BCSIDMap != null @@ -325,13 +327,13 @@ private ContainerCommandResponseProto dispatchRequest( StorageContainerException sce = new StorageContainerException( "ContainerID " + containerID + " does not exist", ContainerProtos.Result.CONTAINER_NOT_FOUND); - audit(action, eventType, msg, AuditEventStatus.FAILURE, sce); + audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, sce); return ContainerUtils.logAndReturnError(LOG, sce, msg); } containerType = getContainerType(container); } else { if (!msg.hasCreateContainer()) { - audit(action, eventType, msg, AuditEventStatus.FAILURE, + audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, new Exception("MALFORMED_REQUEST")); return malformedRequest(msg); } @@ -348,10 +350,10 @@ private ContainerCommandResponseProto dispatchRequest( "ContainerType " + containerType, ContainerProtos.Result.CONTAINER_INTERNAL_ERROR); // log failure - audit(action, eventType, msg, AuditEventStatus.FAILURE, ex); + audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, ex); return ContainerUtils.logAndReturnError(LOG, ex, msg); } - perf.appendPreOpLatencyMs(Time.monotonicNow() - startTime); + perf.appendPreOpLatencyNano(Time.monotonicNowNanos() - startTime); responseProto = handler.handle(msg, container, dispatcherContext); long opLatencyNs = Time.monotonicNowNanos() - startTime; if (responseProto != null) { @@ -417,7 +419,7 @@ private ContainerCommandResponseProto dispatchRequest( } if (result == Result.SUCCESS) { updateBCSID(container, dispatcherContext, cmdType); - audit(action, eventType, msg, AuditEventStatus.SUCCESS, null); + audit(action, eventType, msg, dispatcherContext, AuditEventStatus.SUCCESS, null); } else { //TODO HDDS-7096: // This is a too general place for on demand scanning. @@ -425,16 +427,16 @@ private ContainerCommandResponseProto dispatchRequest( // and move this general scan to where it is more appropriate. // Add integration tests to test the full functionality. OnDemandContainerDataScanner.scanContainer(container); - audit(action, eventType, msg, AuditEventStatus.FAILURE, + audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, new Exception(responseProto.getMessage())); } - perf.appendOpLatencyMs(opLatencyNs); - performanceAudit(action, msg, perf, opLatencyNs); + perf.appendOpLatencyNanos(opLatencyNs); + performanceAudit(action, msg, dispatcherContext, perf, opLatencyNs); return responseProto; } else { // log failure - audit(action, eventType, msg, AuditEventStatus.FAILURE, + audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, new Exception("UNSUPPORTED_REQUEST")); return unsupportedRequest(msg); } @@ -547,7 +549,7 @@ public void validateContainerCommand( StorageContainerException ex = new StorageContainerException( "Invalid ContainerType " + containerType, ContainerProtos.Result.CONTAINER_INTERNAL_ERROR); - audit(action, eventType, msg, AuditEventStatus.FAILURE, ex); + audit(action, eventType, msg, null, AuditEventStatus.FAILURE, ex); throw ex; } @@ -567,12 +569,12 @@ public void validateContainerCommand( // if the container is not open/recovering, no updates can happen. Just // throw an exception ContainerNotOpenException cex = new ContainerNotOpenException(log); - audit(action, eventType, msg, AuditEventStatus.FAILURE, cex); + audit(action, eventType, msg, null, AuditEventStatus.FAILURE, cex); throw cex; } } else if (HddsUtils.isReadOnly(msg) && containerState == State.INVALID) { InvalidContainerStateException iex = new InvalidContainerStateException(log); - audit(action, eventType, msg, AuditEventStatus.FAILURE, iex); + audit(action, eventType, msg, null, AuditEventStatus.FAILURE, iex); throw iex; } } @@ -678,14 +680,14 @@ private EventType getEventType(ContainerCommandRequestProto msg) { } private void audit(AuditAction action, EventType eventType, - ContainerCommandRequestProto msg, AuditEventStatus result, - Throwable exception) { + ContainerCommandRequestProto msg, DispatcherContext dispatcherContext, + AuditEventStatus result, Throwable exception) { Map params; AuditMessage amsg; switch (result) { case SUCCESS: if (isAllowed(action.getAction())) { - params = getAuditParams(msg); + params = getAuditParams(msg, dispatcherContext); if (eventType == EventType.READ && AUDIT.getLogger().isInfoEnabled(AuditMarker.READ.getMarker())) { amsg = buildAuditMessageForSuccess(action, params); @@ -699,7 +701,7 @@ private void audit(AuditAction action, EventType eventType, break; case FAILURE: - params = getAuditParams(msg); + params = getAuditParams(msg, dispatcherContext); if (eventType == EventType.READ && AUDIT.getLogger().isErrorEnabled(AuditMarker.READ.getMarker())) { amsg = buildAuditMessageForFailure(action, params, exception); @@ -719,9 +721,9 @@ private void audit(AuditAction action, EventType eventType, } private void performanceAudit(AuditAction action, ContainerCommandRequestProto msg, - PerformanceStringBuilder performance, long opLatencyMs) { - if (isOperationSlow(opLatencyMs)) { - Map params = getAuditParams(msg); + DispatcherContext dispatcherContext, PerformanceStringBuilder performance, long opLatencyNs) { + if (isOperationSlow(opLatencyNs)) { + Map params = getAuditParams(msg, dispatcherContext); AuditMessage auditMessage = buildAuditMessageForPerformance(action, params, performance); AUDIT.logPerformance(auditMessage); @@ -837,7 +839,7 @@ private static DNAction getAuditAction(Type cmdType) { } private static Map getAuditParams( - ContainerCommandRequestProto msg) { + ContainerCommandRequestProto msg, DispatcherContext dispatcherContext) { Map auditParams = new TreeMap<>(); Type cmdType = msg.getCmdType(); String containerID = String.valueOf(msg.getContainerID()); @@ -904,6 +906,8 @@ private static Map getAuditParams( case ReadChunk: auditParams.put(AUDIT_PARAM_BLOCK_DATA, BlockID.getFromProtobuf(msg.getReadChunk().getBlockID()).toString()); + auditParams.put(AUDIT_PARAM_BLOCK_DATA_OFFSET, + String.valueOf(msg.getReadChunk().getChunkData().getOffset())); auditParams.put(AUDIT_PARAM_BLOCK_DATA_SIZE, String.valueOf(msg.getReadChunk().getChunkData().getLen())); return auditParams; @@ -918,8 +922,13 @@ private static Map getAuditParams( auditParams.put(AUDIT_PARAM_BLOCK_DATA, BlockID.getFromProtobuf(msg.getWriteChunk().getBlockID()) .toString()); + auditParams.put(AUDIT_PARAM_BLOCK_DATA_OFFSET, + String.valueOf(msg.getWriteChunk().getChunkData().getOffset())); auditParams.put(AUDIT_PARAM_BLOCK_DATA_SIZE, String.valueOf(msg.getWriteChunk().getChunkData().getLen())); + if (dispatcherContext != null && dispatcherContext.getStage() != null) { + auditParams.put(AUDIT_PARAM_BLOCK_DATA_STAGE, dispatcherContext.getStage().toString()); + } return auditParams; case ListChunk: @@ -936,6 +945,8 @@ private static Map getAuditParams( auditParams.put(AUDIT_PARAM_BLOCK_DATA, BlockData.getFromProtoBuf(msg.getPutSmallFile() .getBlock().getBlockData()).toString()); + auditParams.put(AUDIT_PARAM_BLOCK_DATA_OFFSET, + String.valueOf(msg.getPutSmallFile().getChunkInfo().getOffset())); auditParams.put(AUDIT_PARAM_BLOCK_DATA_SIZE, String.valueOf(msg.getPutSmallFile().getChunkInfo().getLen())); } catch (IOException ex) { @@ -975,7 +986,7 @@ private static Map getAuditParams( } - private boolean isOperationSlow(long opLatencyMs) { - return opLatencyMs >= slowOpThresholdMs; + private boolean isOperationSlow(long opLatencyNs) { + return opLatencyNs >= slowOpThresholdNs; } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/audit/AuditLogger.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/audit/AuditLogger.java index 042887e4e533..5e8996df1727 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/audit/AuditLogger.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/audit/AuditLogger.java @@ -168,12 +168,20 @@ public void appendOpLatencyNanos(long nanos) { /** * Appends pre-operation operation latency in milliseconds. - * @param millis Latency in nanoseconds. + * @param millis Latency in milliseconds. */ public void appendPreOpLatencyMs(long millis) { append("preOpLatencyMs", millis); } + /** + * Appends pre-operation operation latency in milliseconds. + * @param nanos Latency in nanoseconds. + */ + public void appendPreOpLatencyNano(long nanos) { + append("preOpLatencyMs", TimeUnit.NANOSECONDS.toMillis(nanos)); + } + /** * Appends whole operation latency in milliseconds. * @param millis Latency in milliseconds. From ebad3500332fc2639615b36dde2680bdb3c5d5ca Mon Sep 17 00:00:00 2001 From: Abhishek Pal <43001336+devabhishekpal@users.noreply.github.com> Date: Tue, 20 Aug 2024 10:50:22 +0530 Subject: [PATCH 40/50] HDDS-11155. Improve Volumes page UI (#7048) --- .../webapps/recon/ozone-recon-web/src/app.tsx | 12 +- .../ozone-recon-web/src/utils/common.tsx | 5 +- .../src/v2/components/aclDrawer/aclDrawer.tsx | 119 ++++++ .../src/v2/components/eChart/eChart.tsx | 2 +- .../src/v2/components/loader/loader.tsx | 40 ++ .../src/v2/components/search/search.tsx | 70 ++++ .../src/v2/components/select/columnTag.tsx | 67 ++++ .../src/v2/components/select/multiSelect.tsx | 104 ++++++ .../src/v2/components/select/singleSelect.tsx | 87 +++++ .../src/v2/constants/acl.constants.tsx | 37 ++ .../src/v2/constants/select.constants.tsx | 62 +++ .../src/v2/hooks/debounce.hook.tsx | 35 ++ .../src/v2/pages/volumes/volumes.less | 41 ++ .../src/v2/pages/volumes/volumes.tsx | 353 ++++++++++++++++++ .../ozone-recon-web/src/v2/routes-v2.tsx | 10 +- .../ozone-recon-web/src/v2/types/acl.types.ts | 47 +++ .../src/v2/types/bucket.types.ts | 55 +++ .../src/v2/types/volume.types.ts | 44 +++ 18 files changed, 1180 insertions(+), 10 deletions(-) create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/aclDrawer/aclDrawer.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/loader/loader.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/search/search.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/columnTag.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/multiSelect.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/singleSelect.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/constants/acl.constants.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/constants/select.constants.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/hooks/debounce.hook.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/volumes/volumes.less create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/volumes/volumes.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/acl.types.ts create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/bucket.types.ts create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/volume.types.ts diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.tsx index c52fe9efa922..0ad6aa3f174c 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/app.tsx @@ -16,7 +16,7 @@ * limitations under the License. */ -import React from 'react'; +import React, { Suspense } from 'react'; import { Switch as AntDSwitch, Layout } from 'antd'; import NavBar from './components/navBar/navBar'; @@ -27,6 +27,8 @@ import { routesV2 } from '@/v2/routes-v2'; import { MakeRouteWithSubRoutes } from '@/makeRouteWithSubRoutes'; import classNames from 'classnames'; +import Loader from '@/v2/components/loader/loader'; + import './app.less'; const { @@ -80,9 +82,11 @@ class App extends React.Component, IAppState> { {(enableNewUI) - ? routesV2.map( - (route, index) => - ) + ? }> + {routesV2.map( + (route, index) => + )} + : routes.map( (route, index) => ) diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/common.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/common.tsx index 6886fd189f6c..f641b8797d9e 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/common.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/common.tsx @@ -44,9 +44,8 @@ const showInfoNotification = (title: string, description: string) => { export const showDataFetchError = (error: string) => { let title = 'Error while fetching data'; - if (error.includes('CanceledError')) { - error = 'Previous request cancelled because context changed' - } + + if (error.includes('CanceledError')) return; if (error.includes('metadata')) { title = 'Metadata Initialization:'; showInfoNotification(title, error); diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/aclDrawer/aclDrawer.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/aclDrawer/aclDrawer.tsx new file mode 100644 index 000000000000..af0931c17fac --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/aclDrawer/aclDrawer.tsx @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React, { useEffect, useState } from 'react'; +import { Table, Drawer, Tag } from 'antd'; + +import { AclRightsColorMap, AclIdColorMap } from '@/v2/constants/acl.constants'; +import { Acl, ACLIdentity, ACLIdentityTypeList } from '@/v2/types/acl.types'; +import { ColumnType } from 'antd/es/table'; + +// ------------- Types -------------- // +type AclDrawerProps = { + visible: boolean; + acls: Acl[] | undefined; + entityName: string; + entityType: string; + onClose: () => void; +} + + +// ------------- Component -------------- // +const AclPanel: React.FC = ({ + visible, + acls, + entityType, + entityName, + onClose +}) => { + const [isVisible, setIsVisible] = useState(false); + + useEffect(() => { + setIsVisible(visible); + }, [visible]); + + const renderAclList = (_: string, acl: Acl) => { + return acl.aclList.map(aclRight => ( + + {aclRight} + + )) + } + + const renderAclIdentityType = (acl: string) => { + return ( + + {acl} + + ) + } + + const COLUMNS: ColumnType[] = [ + { + title: 'Name', + dataIndex: 'name', + key: 'name', + sorter: (a: Acl, b: Acl) => a.name.localeCompare(b.name), + }, + { + title: 'ACL Type', + dataIndex: 'type', + key: 'type', + filterMultiple: true, + filters: ACLIdentityTypeList.map(state => ({ text: state, value: state })), + onFilter: (value: ACLIdentity, record: Acl) => (record.type === value), + sorter: (a: Acl, b: Acl) => a.type.localeCompare(b.type), + render: renderAclIdentityType + }, + { + title: 'ACL Scope', + dataIndex: 'scope', + key: 'scope', + }, + { + title: 'ACLs', + dataIndex: 'aclList', + key: 'acls', + render: renderAclList + } + ]; + + return ( +
+ + +
+
+
+ ); +}; + +export default AclPanel; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/eChart/eChart.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/eChart/eChart.tsx index 8be22fcc9f65..79fa07603386 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/eChart/eChart.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/eChart/eChart.tsx @@ -83,7 +83,7 @@ const EChart = ({ } }, [loading, theme]); // If we switch theme we should put chart in loading mode, and also if loading changes i.e completes then hide loader - return
; + return
; } export default EChart; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/loader/loader.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/loader/loader.tsx new file mode 100644 index 000000000000..b05eaa5f0a59 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/loader/loader.tsx @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from "react" +import { Spin } from "antd" +import { LoadingOutlined } from "@ant-design/icons" + +// ------------- Constants -------------- // +const loaderStyle: React.CSSProperties = { + height: '100%', + width: '100%', + textAlign: 'center', + paddingTop: '25%' +} + +// ------------- Component -------------- // +const Loader: React.FC = () => { + return ( +
+ }/> +
+ ) +} + +export default Loader; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/search/search.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/search/search.tsx new file mode 100644 index 000000000000..21d4341787ed --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/search/search.tsx @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from 'react'; +import { Input, Select } from 'antd'; + +import { Option } from '@/v2/components/select/singleSelect'; + +// ------------- Types -------------- // +type SearchProps = { + searchColumn?: string; + searchInput: string; + searchOptions?: Option[]; + onSearchChange: ( + arg0: React.ChangeEvent + ) => void; + onChange: ( + value: string, + //OptionType, OptionGroupData and OptionData are not + //currently exported by AntD hence set to any + option: any + ) => void; +} + +// ------------- Component -------------- // +const Search: React.FC = ({ + searchColumn, + searchInput = '', + searchOptions = [], + onSearchChange = () => {}, + onChange = () => {} // Assign default value as a void function +}) => { + + const selectFilter = searchColumn + ? ( + ) +} + +export default Search; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/columnTag.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/columnTag.tsx new file mode 100644 index 000000000000..f367504286f2 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/columnTag.tsx @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from "react"; +import { Tag } from "antd"; +import { createPortal } from "react-dom"; + + +// ------------- Types -------------- // +/** + * Due to design decisions we are currently not using the Tags + * Until we reach a concensus on a better way to display the filter + * Keeping the code in case we require it in the future + */ +export type TagProps = { + label: string; + closable: boolean; + tagRef: React.RefObject; + onClose: (arg0: string) => void; +} + +// ------------- Component -------------- // +const ColumnTag: React.FC = ({ + label = '', + closable = true, + tagRef = null, + onClose = () => {} // Assign default value as void funciton +}) => { + const onPreventMouseDown = (event: React.MouseEvent) => { + // By default when clickin on the tags the text will get selected + // which might interfere with user experience as people would want to close tags + // but accidentally select tag text. Hence we prevent this behaviour. + event.preventDefault(); + event.stopPropagation(); + }; + + if (!tagRef?.current) return null; + + return createPortal( + (onClose(label))} + style={{marginRight: 3}}> + {label} + , + tagRef.current + ); +} + +export default ColumnTag; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/multiSelect.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/multiSelect.tsx new file mode 100644 index 000000000000..7a6b494aaeb6 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/multiSelect.tsx @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from "react"; +import { + default as ReactSelect, + Props as ReactSelectProps, + components, + OptionProps, + ValueType +} from 'react-select'; + +import { selectStyles } from "@/v2/constants/select.constants"; + + +// ------------- Types -------------- // +export type Option = { + label: string; + value: string; +} + +interface MultiSelectProps extends ReactSelectProps { + options: Option[]; + selected: Option[]; + placeholder: string; + fixedColumn: string; + columnLength: number; + onChange: (arg0: ValueType) => void; + onTagClose: (arg0: string) => void; +} + +// ------------- Component -------------- // +const MultiSelect: React.FC = ({ + options = [], + selected = [], + maxSelected = 5, + placeholder = 'Columns', + fixedColumn, + columnLength, + tagRef, + onTagClose = () => { }, // Assign default value as a void function + onChange = () => { }, // Assign default value as a void function + ...props +}) => { + + const Option: React.FC> = (props) => { + return ( +
+ + null} /> + + +
+ ) + } + + return ( + ) => { + if (selected?.length === options.length) return onChange!(options); + return onChange!(selected); + }} + styles={selectStyles} /> + ) +} + +export default MultiSelect; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/singleSelect.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/singleSelect.tsx new file mode 100644 index 000000000000..41ab03f5982c --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/select/singleSelect.tsx @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from "react"; +import Select, { + Props as ReactSelectProps, + components, + ValueType, + ValueContainerProps, + StylesConfig +} from 'react-select'; + +import { selectStyles } from "@/v2/constants/select.constants"; + + +// ------------- Types -------------- // +export type Option = { + label: string; + value: string; +} + +interface SingleSelectProps extends ReactSelectProps { + options: Option[]; + placeholder: string; + onChange: (arg0: ValueType) => void; +} + +// ------------- Component -------------- // +const SingleSelect: React.FC = ({ + options = [], + placeholder = 'Limit', + onChange = () => { }, // Assign default value as a void function + ...props // Desctructure other select props +}) => { + + + const ValueContainer = ({ children, ...props }: ValueContainerProps) => { + const selectedLimit = props.getValue() as Option[]; + return ( + + {React.Children.map(children, (child) => ( + ((child as React.ReactElement> + | React.ReactPortal)?.type as React.JSXElementConstructor)).name === "DummyInput" + ? child + : null + )} + Limit: {selectedLimit[0]?.label ?? ''} + + ); + }; + + return ( +