Skip to content

Commit

Permalink
Remove duplication in the code creating optimizer variables
Browse files Browse the repository at this point in the history
  • Loading branch information
juliabeliaeva committed Apr 13, 2022
1 parent f50974b commit 1d6b5d1
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 156 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
package org.jetbrains.kotlinx.dl.api.core.optimizer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
Expand Down Expand Up @@ -74,7 +72,9 @@ public class AdaDelta(
epsilonConstant = tf.constant(epsilon, getDType())

for ((i, variable) in weights.withIndex()) {
val (accumSlot, accumUpdateSlot) = createAdaDeltaSlot(graph, tf, variable.asOutput())
val output = variable.asOutput()
val accumSlot = createSlot(ACCUMULATOR, output, tf, graph)
val accumUpdateSlot = createSlot(ACCUMULATOR_UPDATE, output, tf, graph)

targets.add(
tf.train.applyAdadelta(
Expand All @@ -91,19 +91,6 @@ public class AdaDelta(
return targets
}

private fun createAdaDeltaSlot(graph: KGraph, tf: Ops, v: Output<Float>): Pair<Variable<Float>, Variable<Float>> {
val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR))
val accumulatorInitializer = tf.withName(accumInitializerName)
.fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType()))
val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumulatorInitializer)

val accumUpdateInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR_UPDATE))
val updateInitializer: Operand<Float> = tf.withName(accumUpdateInitializerName)
.fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType()))
val accumulatorUpdate = createSlot(graph, tf, v.asOutput(), ACCUMULATOR_UPDATE, updateInitializer)
return accumulator to accumulatorUpdate
}

override val optimizerName: String get() = "Adadelta"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
package org.jetbrains.kotlinx.dl.api.core.optimizer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
Expand Down Expand Up @@ -64,7 +62,7 @@ public class AdaGrad(
learningRateConst = tf.constant(learningRate, getDType())

for ((i, variable) in weights.withIndex()) {
val slot = createAdaGradSlot(graph, tf, variable.asOutput())
val slot = createSlot(ACCUMULATOR, variable.asOutput(), tf, graph)

targets.add(
tf.train.applyAdagrad(
Expand All @@ -80,14 +78,6 @@ public class AdaGrad(
return targets
}

private fun createAdaGradSlot(graph: KGraph, tf: Ops, v: Output<Float>): Variable<Float> {
val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR))

val initializer: Operand<Float> = tf.withName(accumInitializerName)
.fill(tf.shape(v), tf.constant(initialAccumulatorValue))
return createSlot(graph, tf, v.asOutput(), ACCUMULATOR, initializer)
}

override val optimizerName: String get() = "Adagrad"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.Shape
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Assign
Expand Down Expand Up @@ -83,7 +82,9 @@ public class AdaGradDA(
graph.addOptimizerVariableInitializer(globalStepInit)

for ((i, variable) in weights.withIndex()) {
val (gradSlot, gradSquaredSlot) = createAdaGradDASlot(graph, tf, variable.asOutput())
val output = variable.asOutput()
val gradSlot = createSlot(ACCUMULATOR, output, tf, graph)
val gradSquaredSlot = createSlot(SQUARED_ACCUMULATOR, output, tf, graph)
targets.add(
tf.train.applyAdagradDa(
variable,
Expand All @@ -105,20 +106,6 @@ public class AdaGradDA(
return targets
}

private fun createAdaGradDASlot(graph: KGraph, tf: Ops, v: Output<Float>): Pair<Variable<Float>, Variable<Float>> {
val accumulatorInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR))
val accumInitializer: Operand<Float> = tf.withName(accumulatorInitializerName)
.fill(tf.shape(v), tf.constant(0.0f))
val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer)

val squareAccumInitializerName = defaultInitializerOpName(createName(v, SQUARED_ACCUMULATOR))
val sqInitializer: Operand<Float> = tf.withName(squareAccumInitializerName)
.fill(tf.shape(v), tf.constant(initialAccumulatorValue))

val squaredAccumulator = createSlot(graph, tf, v.asOutput(), SQUARED_ACCUMULATOR, sqInitializer)
return accumulator to squaredAccumulator
}

override val optimizerName: String get() = "AdaGradDA"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.Shape
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Assign
Expand Down Expand Up @@ -99,7 +98,9 @@ public class Adam(
graph.addOptimizerVariableInitializer(betaTwoPowerInit)

for ((i, variable) in weights.withIndex()) {
val (firstMomentSlot, secondMomentSlot) = createAdamSlot(graph, tf, variable.asOutput())
val output = variable.asOutput()
val firstMomentSlot = createSlot(FIRST_MOMENT, output, tf, graph)
val secondMomentSlot = createSlot(SECOND_MOMENT, output, tf, graph)
targets.add(
tf.train.applyAdam(
variable,
Expand Down Expand Up @@ -133,19 +134,6 @@ public class Adam(
return targets
}

private fun createAdamSlot(graph: KGraph, tf: Ops, v: Output<Float>): Pair<Variable<Float>, Variable<Float>> {
val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT))
val firstMomentInitializer =
tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType()))
val firstMoment = createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer)

val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT))
val secondMomentInitializer =
tf.withName(secondMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType()))
val secondMoment = createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer)
return firstMoment to secondMoment
}

override val optimizerName: String get() = "Adam"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.Shape
import org.tensorflow.op.Ops
import org.tensorflow.op.Scope
Expand Down Expand Up @@ -91,7 +90,9 @@ public class Adamax(
val scope = Scope(graph.tfGraph)

for ((i, variable) in weights.withIndex()) {
val (firstMomentSlot, secondMomentSlot) = createAdamaxSlot(graph, tf, variable.asOutput())
val output = variable.asOutput()
val firstMomentSlot = createSlot(FIRST_MOMENT, output, tf, graph)
val secondMomentSlot = createSlot(SECOND_MOMENT, output, tf, graph)
targets.add(
ApplyAdaMax.create(
scope,
Expand All @@ -117,20 +118,6 @@ public class Adamax(
return targets
}

private fun createAdamaxSlot(graph: KGraph, tf: Ops, v: Output<Float>): Pair<Variable<Float>, Variable<Float>> {
val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT))
val firstMomentInitializer =
tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType()))
val firstMoment = createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer)

val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT))
val secondMomentInitializer = tf.withName(secondMomentInitializerName)
.fill(tf.shape(v), tf.constant(0.0f, getDType()))
val secondMoment = createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer)

return firstMoment to secondMoment
}

override val optimizerName: String get() = "Adamax"

override val isRunningOnGPU: Boolean get() = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
package org.jetbrains.kotlinx.dl.api.core.optimizer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
Expand Down Expand Up @@ -91,7 +89,9 @@ public class Ftrl(
learningRatePowerConst = tf.constant(learningRatePower, getDType())

for ((i, variable) in weights.withIndex()) {
val (accumSlot, linearSlot) = createFtrlSlot(graph, tf, variable.asOutput())
val output = variable.asOutput()
val accumSlot = createSlot(ACCUMULATOR, output, tf, graph)
val linearSlot = createSlot(LINEAR_ACCUMULATOR, output, tf, graph)

val options = ApplyFtrl.useLocking(true)

Expand All @@ -114,20 +114,6 @@ public class Ftrl(
return targets
}

private fun createFtrlSlot(graph: KGraph, tf: Ops, v: Output<Float>): Pair<Variable<Float>, Variable<Float>> {
val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR))
val accumInitializer = tf.withName(accumInitializerName)
.fill(tf.shape(v), tf.constant(initialAccumulatorValue))
val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer)

val linearAccumInitializerName = defaultInitializerOpName(createName(v, LINEAR_ACCUMULATOR))
val linearAccumInitializer = tf.withName(linearAccumInitializerName)
.fill(tf.shape(v), tf.constant(0.0f))
val linearAccumulator = createSlot(graph, tf, v.asOutput(), LINEAR_ACCUMULATOR, linearAccumInitializer)

return accumulator to linearAccumulator
}

override val optimizerName: String get() = "Ftrl"

override val isRunningOnGPU: Boolean get() = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
package org.jetbrains.kotlinx.dl.api.core.optimizer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
Expand Down Expand Up @@ -50,7 +48,7 @@ public class Momentum(
momentumConst = tf.constant(momentum)

for ((i, variable) in weights.withIndex()) {
val slot = createMomentumSlot(graph, tf, variable.asOutput())
val slot = createSlot(MOMENTUM, variable.asOutput(), tf, graph)

targets.add(
tf.train.applyMomentum(
Expand All @@ -67,13 +65,6 @@ public class Momentum(
return targets
}

private fun createMomentumSlot(graph: KGraph, tf: Ops, v: Output<Float>): Variable<Float> {
val momentumInitializerName = defaultInitializerOpName(createName(v, MOMENTUM))
val initializer: Operand<Float> = tf.withName(momentumInitializerName)
.fill(tf.shape(v), tf.constant(0.0f))
return createSlot(graph, tf, v.asOutput(), MOMENTUM, initializer)
}

override val optimizerName: String get() = "Momentum"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.util.defaultAssignOpName
import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Assign
import org.tensorflow.op.core.Gradients
import org.tensorflow.op.core.Variable

Expand Down Expand Up @@ -74,38 +74,28 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) {
* Creates a slot in the graph for the specified variable with the specified name. Adds the slot's
* initializer to the graph's initializers.
*
* @param [graph] KGraph to be updated.
* @param [tf] TensorFlow graph API for building operations.
* @param [variable] The variable to create the slot for.
* @param [slotName] The name of the slot.
* @param [initializer] The initializer for the slot.
* @param [variable] The variable to create the slot for.
* @param [tf] TensorFlow graph API for building operations.
* @param [graph] KGraph to be updated.
*/
protected open fun createSlot(
graph: KGraph,
tf: Ops,
variable: Output<Float>,
slotName: String,
initializer: Operand<Float>
): Variable<Float> {
val createName: String = createName(variable, slotName)
val slot: Variable<Float> = tf.withName(createName).variable(variable.shape(), getDType())
protected fun createSlot(slotName: String, variable: Output<Float>, tf: Ops, graph: KGraph): Variable<Float> {
val slotVariableName = defaultOptimizerVariableName(variable.op().name() + "-" + slotName)
val slot = tf.withName(slotVariableName).variable(variable.shape(), getDType())

val initializerOpName = defaultInitializerOpName(slotVariableName)
val initializerOp = tf.withName(initializerOpName)
.fill(tf.shape(variable), tf.dtypes.cast(tf.constant(0.0f), getDType()))

val assignName = defaultAssignOpName(createName(variable, slotName))
val slotInit: Assign<Float> = tf.withName(assignName).assign(slot, initializer)
val assignOpName = defaultAssignOpName(slotVariableName)
val assignOp = tf.withName(assignOpName).assign(slot, initializerOp)

graph.addOptimizerVariableInitializer(slotInit)
graph.addOptimizerVariableInitializer(assignOp)
graph.addOptimizerVariable(slot)

return slot
}

/**
* Creates name for [variable] used in slot with name [slotName].
*/
internal open fun createName(variable: Output<Float>, slotName: String): String {
return defaultOptimizerVariableName(variable.op().name() + "-" + slotName)
}

/** True, if optimizer is implemented for GPU. */
internal abstract val isRunningOnGPU: Boolean
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
package org.jetbrains.kotlinx.dl.api.core.optimizer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
Expand Down Expand Up @@ -65,12 +63,12 @@ public class RMSProp(
epsilonConstant = tf.constant(epsilon, getDType())

for ((i, variable) in weights.withIndex()) {
val slots = createRMSPropSlot(graph, tf, variable.asOutput())
val rmsSlot: Variable<Float> = slots[0]
val momentumSlot: Variable<Float> = slots[1]
val output = variable.asOutput()
val rmsSlot = createSlot(RMS, output, tf, graph)
val momentumSlot = createSlot(MOMENTUM, output, tf, graph)

if (centered) {
val mgSlot: Variable<Float> = slots[2]
val mgSlot = createSlot(MG, output, tf, graph)
targets.add(
tf.train.applyCenteredRmsProp(
variable,
Expand Down Expand Up @@ -104,31 +102,6 @@ public class RMSProp(
return targets
}

private fun createRMSPropSlot(graph: KGraph, tf: Ops, v: Output<Float>): List<Variable<Float>> {
val rmsInitializerName = defaultInitializerOpName(createName(v, RMS))

val rmsInitializer: Operand<Float> = tf.withName(rmsInitializerName)
.fill(tf.shape(v), tf.dtypes.cast(tf.constant(1.0f), getDType()))
val rms = createSlot(graph, tf, v.asOutput(), RMS, rmsInitializer)

val momentumInitializerName = defaultInitializerOpName(createName(v, MOMENTUM))
val momentumInitializer: Operand<Float> = tf.withName(momentumInitializerName)
.fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType()))
val momentum = createSlot(graph, tf, v.asOutput(), MOMENTUM, momentumInitializer)

if (centered) {
val mgInitializerName = defaultInitializerOpName(createName(v, MG))
val mgInitializer: Operand<Float> = tf.withName(mgInitializerName)
.fill(
tf.shape(v),
tf.constant(0.0f)
)
val mg = createSlot(graph, tf, v.asOutput(), MG, mgInitializer)
return listOf(rms, momentum, mg)
}
return listOf(rms, momentum)
}

override val optimizerName: String get() = "RMSProp"

override val isRunningOnGPU: Boolean get() = true
Expand Down

0 comments on commit 1d6b5d1

Please sign in to comment.