-
Notifications
You must be signed in to change notification settings - Fork 28.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-47475][CORE][K8S] Support spark.kubernetes.jars.avoidDownloadSchemes
for K8s Cluster Mode
#45715
[SPARK-47475][CORE][K8S] Support spark.kubernetes.jars.avoidDownloadSchemes
for K8s Cluster Mode
#45715
Changes from 1 commit
f99898f
5a58357
1a5ea3a
62655fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -401,16 +401,24 @@ private[spark] class SparkSubmit extends Logging { | |
// SPARK-33782 : This downloads all the files , jars , archiveFiles and pyfiles to current | ||
// working directory | ||
// SPARK-43540: add current working directory into driver classpath | ||
// SPARK-47475: make download to driver optional so executors may fetch resource from remote | ||
// url directly to avoid overwhelming driver network when resource is big and executor count | ||
// is high | ||
val workingDirectory = "." | ||
childClasspath += workingDirectory | ||
def downloadResourcesToCurrentDirectory(uris: String, isArchive: Boolean = false): | ||
|
||
def downloadResourcesToCurrentDirectory(uris: String, | ||
isArchive: Boolean = false, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you take a look at
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this case, the following is the rule.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ya, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the tips @dongjoon-hyun! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. |
||
avoidDownload: String => Boolean = _ => false): | ||
String = { | ||
val resolvedUris = Utils.stringToSeq(uris).map(Utils.resolveURI) | ||
val (avoidDownloads, toDownloads) = | ||
resolvedUris.partition(uri => avoidDownload(uri.getScheme)) | ||
val localResources = downloadFileList( | ||
resolvedUris.map( | ||
toDownloads.map( | ||
Utils.getUriBuilder(_).fragment(null).build().toString).mkString(","), | ||
targetDir, sparkConf, hadoopConf) | ||
Utils.stringToSeq(localResources).map(Utils.resolveURI).zip(resolvedUris).map { | ||
(Utils.stringToSeq(localResources).map(Utils.resolveURI).zip(toDownloads).map { | ||
case (localResources, resolvedUri) => | ||
val source = new File(localResources.getPath).getCanonicalFile | ||
val dest = new File( | ||
|
@@ -427,14 +435,19 @@ private[spark] class SparkSubmit extends Logging { | |
// Keep the URIs of local files with the given fragments. | ||
Utils.getUriBuilder( | ||
localResources).fragment(resolvedUri.getFragment).build().toString | ||
}.mkString(",") | ||
} ++ avoidDownloads.map(_.toString)).mkString(",") | ||
} | ||
|
||
val avoidJarDownloadSchemes = sparkConf.get(KUBERNETES_AVOID_JAR_DOWNLOAD_SCHEMES) | ||
|
||
def avoidJarDownload(scheme: String): Boolean = | ||
avoidJarDownloadSchemes.contains("*") || avoidJarDownloadSchemes.contains(scheme) | ||
|
||
val filesLocalFiles = Option(args.files).map { | ||
downloadResourcesToCurrentDirectory(_) | ||
}.orNull | ||
val jarsLocalJars = Option(args.jars).map { | ||
downloadResourcesToCurrentDirectory(_) | ||
val updatedJars = Option(args.jars).map { | ||
downloadResourcesToCurrentDirectory(_, avoidDownload = avoidJarDownload) | ||
}.orNull | ||
val archiveLocalFiles = Option(args.archives).map { | ||
downloadResourcesToCurrentDirectory(_, true) | ||
|
@@ -445,7 +458,7 @@ private[spark] class SparkSubmit extends Logging { | |
args.files = filesLocalFiles | ||
args.archives = archiveLocalFiles | ||
args.pyFiles = pyLocalFiles | ||
args.jars = jarsLocalJars | ||
args.jars = updatedJars | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1458,6 +1458,18 @@ package object config { | |
.doubleConf | ||
.createWithDefault(1.5) | ||
|
||
private[spark] val KUBERNETES_AVOID_JAR_DOWNLOAD_SCHEMES = | ||
ConfigBuilder("spark.kubernetes.jars.avoidDownloadSchemes") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW, the AS-IS PR title match with this?
|
||
.doc("Comma-separated list of schemes for which jars will NOT be downloaded to the " + | ||
"driver local disk prior to be distributed to executors, only for kubernetes deployment. " + | ||
"For use in cases when the jars are big and executor counts are high, " + | ||
"concurrent download causes network saturation and timeouts. " + | ||
"Wildcard '*' is denoted to not downloading jars for any the schemes.") | ||
.version("4.0.0") | ||
.stringConf | ||
.toSequence | ||
.createWithDefault(Nil) | ||
|
||
private[spark] val FORCE_DOWNLOAD_SCHEMES = | ||
ConfigBuilder("spark.yarn.dist.forceDownloadSchemes") | ||
.doc("Comma-separated list of schemes for which resources will be downloaded to the " + | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to add a new empty line here.