You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
使用 flink1.14.4 flink-doris-connector-1.14 1.1.1 版本写入doris 2.1.6当一个be节点长时间失联时,flink job失败且无法从checkpoint恢复。
核心报错如下:
2024-10-12 14:13:48,049 ERROR org.apache.doris.flink.sink.committer.DorisCommitter [] - commit transaction failed:
org.apache.http.conn.HttpHostConnectException: Connect to 10.126.72.64:8040 [/10.126.72.64] failed: Connection refused (Connection
refused)
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:156) ~[
blob_p-61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.connect(PoolingHttpClientConnectionManager.java:374) ~[bl
ob_p-61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.MainClientExec.establishRoute(MainClientExec.java:393) ~[blob_p-61f0320fe7cc111cc844c65
75ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:236) ~[blob_p-61f0320fe7cc111cc844c6575ef7cc
48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186) ~[blob_p-61f0320fe7cc111cc844c6575ef7cc48af
8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89) ~[blob_p-61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-
139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110) ~[blob_p-61f0320fe7cc111cc844c6575ef7cc48af
8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185) ~[blob_p-61f0320fe7cc111cc844c65
75ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83) ~[blob_p-61f0320fe7cc111cc844c657
5ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108) ~[blob_p-61f0320fe7cc111cc844c65
75ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.doris.flink.sink.committer.DorisCommitter.commitTransaction(DorisCommitter.java:91) ~[blob_p-61f0320fe7cc11
1cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.doris.flink.sink.committer.DorisCommitter.commit(DorisCommitter.java:71) ~[blob_p-61f0320fe7cc111cc844c6575
ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.flink.streaming.runtime.operators.sink.StreamingCommitterHandler.commit(StreamingCommitterHandler.java:54)
~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.operators.sink.AbstractStreamingCommitterHandler.retry(AbstractStreamingCommitterHa
ndler.java:99) ~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.operators.sink.AbstractCommitterHandler.retry(AbstractCommitterHandler.java:66) ~[f
link-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.operators.sink.CommitRetrier.retry(CommitRetrier.java:80) ~[flink-dist_2.11-1.14.4.
jar:1.14.4]
at org.apache.flink.streaming.runtime.operators.sink.CommitRetrier.lambda$retryAt$0(CommitRetrier.java:63) ~[flink-dist_2
.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTask.invokeProcessingTimeCallback(StreamTask.java:1693) ~[flink-dist_2.
11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$null$22(StreamTask.java:1684) ~[flink-dist_2.11-1.14.4.jar:
1.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:50) ~[fl
ink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.mailbox.Mail.run(Mail.java:90) ~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMailsWhenDefaultActionUnavailable(MailboxProc
essor.java:338) ~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMail(MailboxProcessor.java:324) ~[flink-dist_
2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:201) ~[flink-di
st_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:809) ~[flink-dist_2.11-1.14.4.jar:1
.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:761) ~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:958) [flink-dist_2.11-1.14.4.jar:1.14.
4]
at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:937) [flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:766) [flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:575) [flink-dist_2.11-1.14.4.jar:1.14.4]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_322]
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_322]
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_322]
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_322]
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_322]
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_322]
at java.net.Socket.connect(Socket.java:607) ~[?:1.8.0_322]
at org.apache.http.conn.socket.PlainConnectionSocketFactory.connectSocket(PlainConnectionSocketFactory.java:75) ~[blob_p-
61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:142) ~[
blob_p-61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
I encountered a similar issue where, when setting sink.enable.batch-mode=true and restarting the Doris cluster, the program was unable to resume writing.
driver version:24.0.0
doris version : 2.1.X
I encountered a similar issue where, when setting sink.enable.batch-mode=true and restarting the Doris cluster, the program was unable to resume writing. driver version:24.0.0 doris version : 2.1.X
What is the specific error? Because when you turn on this parameter, sink.enable.batch-mode=true, you should not reach the DorisCommiter method
Search before asking
Version
flink1.14.4 flink-doris-connector-1.14 1.1.1 版本写入doris 2.1.6
What's Wrong?
使用 flink1.14.4 flink-doris-connector-1.14 1.1.1 版本写入doris 2.1.6当一个be节点长时间失联时,flink job失败且无法从checkpoint恢复。
核心报错如下:
2024-10-12 14:13:48,049 ERROR org.apache.doris.flink.sink.committer.DorisCommitter [] - commit transaction failed:
org.apache.http.conn.HttpHostConnectException: Connect to 10.126.72.64:8040 [/10.126.72.64] failed: Connection refused (Connection
refused)
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:156) ~[
blob_p-61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.connect(PoolingHttpClientConnectionManager.java:374) ~[bl
ob_p-61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.MainClientExec.establishRoute(MainClientExec.java:393) ~[blob_p-61f0320fe7cc111cc844c65
75ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:236) ~[blob_p-61f0320fe7cc111cc844c6575ef7cc
48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186) ~[blob_p-61f0320fe7cc111cc844c6575ef7cc48af
8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89) ~[blob_p-61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-
139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110) ~[blob_p-61f0320fe7cc111cc844c6575ef7cc48af
8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185) ~[blob_p-61f0320fe7cc111cc844c65
75ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83) ~[blob_p-61f0320fe7cc111cc844c657
5ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108) ~[blob_p-61f0320fe7cc111cc844c65
75ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.doris.flink.sink.committer.DorisCommitter.commitTransaction(DorisCommitter.java:91) ~[blob_p-61f0320fe7cc11
1cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.doris.flink.sink.committer.DorisCommitter.commit(DorisCommitter.java:71) ~[blob_p-61f0320fe7cc111cc844c6575
ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.flink.streaming.runtime.operators.sink.StreamingCommitterHandler.commit(StreamingCommitterHandler.java:54)
~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.operators.sink.AbstractStreamingCommitterHandler.retry(AbstractStreamingCommitterHa
ndler.java:99) ~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.operators.sink.AbstractCommitterHandler.retry(AbstractCommitterHandler.java:66) ~[f
link-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.operators.sink.CommitRetrier.retry(CommitRetrier.java:80) ~[flink-dist_2.11-1.14.4.
jar:1.14.4]
at org.apache.flink.streaming.runtime.operators.sink.CommitRetrier.lambda$retryAt$0(CommitRetrier.java:63) ~[flink-dist_2
.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTask.invokeProcessingTimeCallback(StreamTask.java:1693) ~[flink-dist_2.
11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$null$22(StreamTask.java:1684) ~[flink-dist_2.11-1.14.4.jar:
1.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:50) ~[fl
ink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.mailbox.Mail.run(Mail.java:90) ~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMailsWhenDefaultActionUnavailable(MailboxProc
essor.java:338) ~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMail(MailboxProcessor.java:324) ~[flink-dist_
2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:201) ~[flink-di
st_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:809) ~[flink-dist_2.11-1.14.4.jar:1
.14.4]
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:761) ~[flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:958) [flink-dist_2.11-1.14.4.jar:1.14.
4]
at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:937) [flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:766) [flink-dist_2.11-1.14.4.jar:1.14.4]
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:575) [flink-dist_2.11-1.14.4.jar:1.14.4]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_322]
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_322]
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_322]
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_322]
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_322]
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_322]
at java.net.Socket.connect(Socket.java:607) ~[?:1.8.0_322]
at org.apache.http.conn.socket.PlainConnectionSocketFactory.connectSocket(PlainConnectionSocketFactory.java:75) ~[blob_p-
61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:142) ~[
blob_p-61f0320fe7cc111cc844c6575ef7cc48af8a4b1f-139883e44a5481fda2030ca9306e6ec8:?]
看起来是从checkpoint恢复时,读取的host还是故障节点ip,所以访问连接不上
What You Expected?
我们没法升级flink1.14这个环境,请问 flink-doris-connector-1.14 1.1.1版本这个问题可以修复下吗?系统对高可用较高,be节点当磁盘故障时,无法短时间恢复,那么这个问题就肯定会复现。
How to Reproduce?
No response
Anything Else?
No response
Are you willing to submit PR?
Code of Conduct
The text was updated successfully, but these errors were encountered: