diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GlobalStateIdContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GlobalStateIdContext.java index 7d613594efd64..85397a273c62d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GlobalStateIdContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GlobalStateIdContext.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hdfs.server.namenode.ha.ObserverReadProxyProvider; import org.apache.hadoop.hdfs.server.namenode.ha.ReadOnly; import org.apache.hadoop.ipc.AlignmentContext; +import org.apache.hadoop.ipc.ObserverRetryOnActiveException; import org.apache.hadoop.ipc.RetriableException; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcRequestHeaderProto; @@ -156,9 +157,9 @@ public long receiveRequestState(RpcRequestHeaderProto header, ESTIMATED_TRANSACTIONS_PER_SECOND * TimeUnit.MILLISECONDS.toSeconds(clientWaitTime) * ESTIMATED_SERVER_TIME_MULTIPLIER) { - throw new RetriableException( - "Observer Node is too far behind: serverStateId = " - + serverStateId + " clientStateId = " + clientStateId); + throw new ObserverRetryOnActiveException("Retrying to Active NameNode, Observer Node is too" + + " far behind: serverStateId = " + serverStateId + + " clientStateId = " + clientStateId); } return clientStateId; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java index 55d17d3bb27c9..b27beb585dc87 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java @@ -518,6 +518,31 @@ public void testObserverRetryActiveException() throws Exception { assertTrue(thrownRetryException); } + /** + * Test that, when the server stateId is too far behind the + * client stateId, the request should be retried directly to + * Active NameNode, instead of constantly trying again. + */ + @Test + public void testObserverRetryActiveExceptionWhenStateIdTooStale() throws Exception { + dfs.mkdir(testPath, FsPermission.getDefault()); + assertSentTo(0); + + // Set large stateId on the client,the server stateId is too far behind + // the client stateId and will retry to active. + long realStateId = HATestUtil.setACStateId(dfs, 1000000); + FileStatus fileStatus = dfs.getFileStatus(testPath); + assertNotNull(fileStatus); + assertSentTo(0); + + // StateId restored to normal, request processed by observer. + HATestUtil.setACStateId(dfs, realStateId); + FileStatus fileStatus2= dfs.getFileStatus(testPath); + assertNotNull(fileStatus2); + assertSentTo(2); + + } + /** * Test that for open call, if access time update is required, * the open call should go to active, instead of observer.