From a90a0e98f5be965452261388050debe5a7396dac Mon Sep 17 00:00:00 2001 From: jvican Date: Thu, 9 Nov 2017 16:42:55 +0100 Subject: [PATCH] Make classpath hashing more lightweight And make it parallel! This patch adds a cache that relies on filesystem metadata to cache hashes for jars that have the same last modified time across different compiler iterations. This is important because until now there was a significant overhead when running `compile` on multi-module builds that have gigantic classpaths. In this scenario, the previous algorithm computed hashes for all jars transitively across all these projects. This patch is conservative; there are several things that are wrong with the status quo of classpath hashing. The most important one is the fact that Zinc has been doing `hashCode` on a SHA-1 checksum, which doesn't make sense. The second one is that we don't need a SHA-1 checksum for the kind of checks we want to do. https://github.com/sbt/zinc/pull/371 explains why. The third limitation with this check is that file hashes are implemented internally as `int`s, which is not enough to represent the richness of the checksum. My previous PR also tackles this problem, which will be solved in the long term. Therefore, this pull request only tackles these two things: * Caching of classpath entry hashes. * Parallelize this IO-bound task. Results, on my local machine: - No parallel hashing of the first 500 jars in my ivy cache: 1330ms. - Parallel hashing of the first 500 jars in my ivy cache: 770ms. - Second parallel hashing of the first 500 jars in my ivy cache: 1ms. Fixes #433. --- .../internal/inc/MixedAnalyzingCompiler.scala | 8 +-- .../internal/inc/caching/ClasspathCache.scala | 43 ++++++++++++++ .../sbt/inc/cached/CachedHashingSpec.scala | 58 +++++++++++++++++++ 3 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 zinc/src/main/scala/sbt/internal/inc/caching/ClasspathCache.scala create mode 100644 zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala diff --git a/zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala b/zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala index df0952f9c5..c6423a5a71 100644 --- a/zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala +++ b/zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala @@ -11,6 +11,7 @@ package inc import java.io.File import java.lang.ref.{ Reference, SoftReference } +import java.nio.file.Files import java.util.Optional import inc.javac.AnalyzingJavaCompiler @@ -20,6 +21,7 @@ import xsbti.compile._ import sbt.io.IO import sbt.util.{ InterfaceUtil, Logger } import sbt.internal.inc.JavaInterfaceUtil.EnrichOption +import sbt.internal.inc.caching.ClasspathCache import xsbti.compile.ClassFileManager /** An instance of an analyzing compiler that can run both javac + scalac. */ @@ -181,13 +183,11 @@ object MixedAnalyzingCompiler { incrementalCompilerOptions: IncOptions, extra: List[(String, String)] ): CompileConfiguration = { - val classpathHash = classpath map { x => - FileHash.of(x, Stamper.forHash(x).hashCode) - } + val classpathHash = ClasspathCache.hashClasspath(classpath) val compileSetup = MiniSetup.of( output, MiniOptions.of( - classpathHash.toArray, + classpathHash, options.toArray, javacOptions.toArray ), diff --git a/zinc/src/main/scala/sbt/internal/inc/caching/ClasspathCache.scala b/zinc/src/main/scala/sbt/internal/inc/caching/ClasspathCache.scala new file mode 100644 index 0000000000..cda89f31e0 --- /dev/null +++ b/zinc/src/main/scala/sbt/internal/inc/caching/ClasspathCache.scala @@ -0,0 +1,43 @@ +package sbt.internal.inc.caching + +import java.io.File +import java.nio.file.Files +import java.util.concurrent.ConcurrentHashMap +import java.nio.file.attribute.{ BasicFileAttributes, FileTime } + +import xsbti.compile.FileHash +import sbt.internal.inc.{ EmptyStamp, Stamper } + +object ClasspathCache { + // For more safety, store both the time and size + private type JarMetadata = (FileTime, Long) + private[this] val cacheMetadataJar = new ConcurrentHashMap[File, (JarMetadata, FileHash)]() + private[this] final val emptyStampCode = EmptyStamp.hashCode() + private def emptyFileHash(file: File) = FileHash.of(file, emptyStampCode) + private def genFileHash(file: File, metadata: JarMetadata): FileHash = { + val newHash = FileHash.of(file, Stamper.forHash(file).hashCode()) + cacheMetadataJar.put(file, (metadata, newHash)) + newHash + } + + def hashClasspath(classpath: Seq[File]): Array[FileHash] = { + // #433: Cache jars with their metadata to avoid recomputing hashes transitively in other projects + def fromCacheOrHash(file: File): FileHash = { + if (!file.exists()) emptyFileHash(file) + else { + // `readAttributes` needs to be guarded by `file.exists()`, otherwise it fails + val attrs = Files.readAttributes(file.toPath, classOf[BasicFileAttributes]) + if (attrs.isDirectory) emptyFileHash(file) + else { + val currentMetadata = (attrs.lastModifiedTime(), attrs.size()) + Option(cacheMetadataJar.get(file)) match { + case Some((metadata, hashHit)) if metadata == currentMetadata => hashHit + case None => genFileHash(file, currentMetadata) + } + } + } + } + + classpath.toParArray.map(fromCacheOrHash).toArray + } +} diff --git a/zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala b/zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala new file mode 100644 index 0000000000..16db222b8a --- /dev/null +++ b/zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala @@ -0,0 +1,58 @@ +package sbt.inc.cached + +import java.nio.file.Paths + +import sbt.inc.{ BaseCompilerSpec, SourceFiles } +import sbt.internal.inc.{ Analysis, CompileOutput, MixedAnalyzingCompiler } +import sbt.io.IO + +class CachedHashingSpec extends BaseCompilerSpec { + def timeMs[R](block: => R): Long = { + val t0 = System.nanoTime() + block // call-by-name + val t1 = System.nanoTime() + (t1 - t0) / 1000000 + } + + "zinc" should "cache jar generation" in { + IO.withTemporaryDirectory { tempDir => + val classes = Seq(SourceFiles.Good) + val sources0 = Map(Paths.get("src") -> classes.map(path => Paths.get(path))) + val projectSetup = ProjectSetup(tempDir.toPath(), sources0, Nil) + val compiler = projectSetup.createCompiler() + + import compiler.in.{ setup, options, compilers, previousResult } + import sbt.internal.inc.JavaInterfaceUtil._ + import sbt.io.syntax.{ file, fileToRichFile, singleFileFinder } + + val javac = compilers.javaTools.javac + val scalac = compilers.scalac + val giganticClasspath = file(sys.props("user.home"))./(".ivy2").**("*.jar").get.take(500) + + def genConfig = MixedAnalyzingCompiler.makeConfig( + scalac, + javac, + options.sources, + giganticClasspath, + CompileOutput(options.classesDirectory), + setup.cache, + setup.progress.toOption, + options.scalacOptions, + options.javacOptions, + Analysis.empty, + previousResult.setup.toOption, + setup.perClasspathEntryLookup, + setup.reporter, + options.order, + setup.skip, + setup.incrementalCompilerOptions, + setup.extra.toList.map(_.toScalaTuple) + ) + + val hashingTime = timeMs(genConfig) + val cachedHashingTime = timeMs(genConfig) + assert(cachedHashingTime < (hashingTime * 0.20), + s"Cache jar didn't work: $cachedHashingTime is >= than 20% of $hashingTime.") + } + } +}