Lower the default learning rate for albert

Just noticed while porting #1767 that the default learning rate for our classifier does not work for albert pretrained checkpoints. Let's lower it for this model
keras-team · Aug 19, 2024 · 558ac3f · 558ac3f
1 parent 180c7ec
commit 558ac3f
Showing 1 changed file with 17 additions and 0 deletions.
diff --git a/keras_nlp/src/models/albert/albert_classifier.py b/keras_nlp/src/models/albert/albert_classifier.py
@@ -189,6 +189,23 @@ def __init__(
         self.activation = keras.activations.get(activation)
         self.dropout = dropout
 
+    def compile(
+        self,
+        optimizer="auto",
+        loss="auto",
+        *,
+        weighted_metrics="auto",
+        **kwargs,
+    ):
+        if optimizer == "auto":
+            optimizer = keras.optimizers.Adam(1e-5)
+        super().compile(
+            optimizer=optimizer,
+            loss=loss,
+            weighted_metrics=weighted_metrics,
+            **kwargs,
+        )
+
     def get_config(self):
         config = super().get_config()
         config.update(