From 6dd03352012351988cb965e6eb5ff7a5b275ef38 Mon Sep 17 00:00:00 2001 From: Jack Koenig Date: Fri, 20 Aug 2021 17:08:40 -0700 Subject: [PATCH] Add BufferedCustomFileEmission (#2334) Uses virtual method .getBytesBuffered: Iterable[Array[Byte]] to optimize file emission. (cherry picked from commit dc2fbad9d6431cb52a7ad26937a100b288b86206) --- .../firrtl/options/StageAnnotations.scala | 26 +++++++++++++++++ .../phases/WriteOutputAnnotations.scala | 25 ++++++++++++++--- .../phases/WriteOutputAnnotationsSpec.scala | 28 +++++++++++++++++++ 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/src/main/scala/firrtl/options/StageAnnotations.scala b/src/main/scala/firrtl/options/StageAnnotations.scala index 2b12dd4e6d..584f2842ad 100644 --- a/src/main/scala/firrtl/options/StageAnnotations.scala +++ b/src/main/scala/firrtl/options/StageAnnotations.scala @@ -71,7 +71,33 @@ trait CustomFileEmission { this: Annotation => val name = view[StageOptions](annotations).getBuildFileName(baseFileName(annotations), suffix) new File(name) } +} + +/** A buffered version of [[CustomFileEmission]] + * + * This is especially useful for serializing large data structures. When emitting Strings, it makes + * it much easier to avoid materializing the entire serialized String in memory. It also helps + * avoid materializing intermediate datastructures in memory. Finally, it reduces iteration + * overhead and helps optimize I/O performance. + * + * It may seem strange to use `Array[Byte]` in an otherwise immutable API, but for maximal + * performance it is best to use the JVM primitive that file I/O uses. These Arrays should only + * used immutably even though the Java API technically does allow mutating them. + */ +trait BufferedCustomFileEmission extends CustomFileEmission { this: Annotation => + + /** A buffered version of [[getBytes]] for more efficient serialization + * + * If you only need to serialize an `Iterable[String]`, you can use the `String.getBytes` method. + * It's also helpful to create a `view` which will do the `.map` lazily instead of eagerly, + * improving GC performance. + * {{{ + * def getBytesBuffered: Iterable[Array[Byte]] = myListString.view.map(_.getBytes) + * }}} + */ + def getBytesBuffered: Iterable[Array[Byte]] + final def getBytes: Iterable[Byte] = getBytesBuffered.flatten } /** Holds the name of the target directory diff --git a/src/main/scala/firrtl/options/phases/WriteOutputAnnotations.scala b/src/main/scala/firrtl/options/phases/WriteOutputAnnotations.scala index 0e26a5f714..2cf4c92f10 100644 --- a/src/main/scala/firrtl/options/phases/WriteOutputAnnotations.scala +++ b/src/main/scala/firrtl/options/phases/WriteOutputAnnotations.scala @@ -4,7 +4,16 @@ package firrtl.options.phases import firrtl.AnnotationSeq import firrtl.annotations.{Annotation, DeletedAnnotation, JsonProtocol} -import firrtl.options.{CustomFileEmission, Dependency, Phase, PhaseException, StageOptions, Unserializable, Viewer} +import firrtl.options.{ + BufferedCustomFileEmission, + CustomFileEmission, + Dependency, + Phase, + PhaseException, + StageOptions, + Unserializable, + Viewer +} import java.io.{BufferedOutputStream, File, FileOutputStream, PrintWriter} @@ -38,9 +47,17 @@ class WriteOutputAnnotations extends Phase { filesWritten.get(canonical) match { case None => val w = new BufferedOutputStream(new FileOutputStream(filename)) - a.getBytes match { - case arr: mutable.WrappedArray[Byte] => w.write(arr.array.asInstanceOf[Array[Byte]]) - case other => other.foreach(w.write(_)) + a match { + // Further optimized emission + case buf: BufferedCustomFileEmission => + val it = buf.getBytesBuffered + it.foreach(bytearr => w.write(bytearr)) + // Regular emission + case _ => + a.getBytes match { + case arr: mutable.WrappedArray[Byte] => w.write(arr.array.asInstanceOf[Array[Byte]]) + case other => other.foreach(w.write(_)) + } } w.close() filesWritten(canonical) = a diff --git a/src/test/scala/firrtlTests/options/phases/WriteOutputAnnotationsSpec.scala b/src/test/scala/firrtlTests/options/phases/WriteOutputAnnotationsSpec.scala index e9b8454090..edc56425f7 100644 --- a/src/test/scala/firrtlTests/options/phases/WriteOutputAnnotationsSpec.scala +++ b/src/test/scala/firrtlTests/options/phases/WriteOutputAnnotationsSpec.scala @@ -7,6 +7,7 @@ import java.io.File import firrtl.AnnotationSeq import firrtl.annotations.{DeletedAnnotation, NoTargetAnnotation} import firrtl.options.{ + BufferedCustomFileEmission, CustomFileEmission, InputAnnotationFileAnnotation, OutputAnnotationFileAnnotation, @@ -171,6 +172,25 @@ class WriteOutputAnnotationsSpec extends AnyFlatSpec with Matchers with firrtl.t result should equal(data) } + it should "write BufferedCustomFileEmission annotations" in new Fixture { + val file = new File("write-CustomFileEmission-annotations.anno.json") + val data = List("hi", "bye", "yo") + val annotations = Seq( + TargetDirAnnotation(dir), + OutputAnnotationFileAnnotation(file.toString), + WriteOutputAnnotationsSpec.Buffered(data) + ) + val serializedFileName = view[StageOptions](annotations).getBuildFileName("Buffered", Some(".Emission")) + val out = phase.transform(annotations) + + info(s"file '$serializedFileName' exists") + new File(serializedFileName) should (exist) + + info(s"file '$serializedFileName' is correct") + val result = scala.io.Source.fromFile(serializedFileName).mkString + result should equal(data.mkString) + } + it should "error if multiple annotations try to write to the same file" in new Fixture { val file = new File("write-CustomFileEmission-annotations-error.anno.json") val annotations = Seq( @@ -215,4 +235,12 @@ private object WriteOutputAnnotationsSpec { case class Replacement(file: String) extends NoTargetAnnotation + case class Buffered(content: List[String]) extends NoTargetAnnotation with BufferedCustomFileEmission { + + override protected def baseFileName(a: AnnotationSeq): String = "Buffered" + + override protected def suffix: Option[String] = Some(".Emission") + + override def getBytesBuffered: Iterable[Array[Byte]] = content.view.map(_.getBytes) + } }