NVIDIA-Merlin · radekosmulski · Apr 28, 2023 · Apr 18, 2023 · Apr 18, 2023 · Apr 18, 2023
diff --git a/tests/unit/torch/test_torchscript_with_topk.py b/tests/unit/torch/test_torchscript_with_topk.py
@@ -0,0 +1,32 @@
+import torch
+
+import transformers4rec.torch as tr
+from transformers4rec.config import transformer as tconf
+
+
+def test_torchscript_with_topk(torch_yoochoose_like, yoochoose_schema):
+    input_module = tr.TabularSequenceFeatures.from_schema(
+        yoochoose_schema,
+        max_sequence_length=20,
+        d_output=64,
+        masking="causal",
+    )
+    prediction_task = tr.NextItemPredictionTask(weight_tying=True)
+    transformer_config = tconf.XLNetConfig.build(
+        d_model=64, n_head=8, n_layer=2, total_seq_length=20
+    )
+    model = transformer_config.to_torch_model(input_module, prediction_task)
+
+    _ = model(torch_yoochoose_like, training=False)
+
+    topk = 10
+    model.top_k = topk
+    model.eval()
+
+    traced_model = torch.jit.trace(model, torch_yoochoose_like, strict=False)
+
+    assert isinstance(traced_model, torch.jit.TopLevelTracedModule)
+    assert torch.allclose(
+        model(torch_yoochoose_like)[0], traced_model(torch_yoochoose_like)[0], rtol=1e-02
+    )
+    assert traced_model(torch_yoochoose_like)[0].shape[1] == topk
diff --git a/transformers4rec/torch/model/base.py b/transformers4rec/torch/model/base.py
@@ -371,10 +371,13 @@ def forward(
         testing: bool = False,
         targets: Union[torch.Tensor, TabularData] = None,
         call_body: bool = False,
+        top_k: Optional[int] = -1,
         **kwargs,
     ) -> Union[torch.Tensor, TabularData]:
         outputs = {}
 
+        from transformers4rec.torch.model.prediction_task import NextItemPredictionTask
+
         if call_body:
             body_outputs = self.body(body_outputs, training=training, testing=testing, **kwargs)
 
@@ -400,9 +403,20 @@ def forward(
             outputs = {"loss": loss, "labels": labels, "predictions": predictions}
         else:
             for name, task in self.prediction_task_dict.items():
-                outputs[name] = task(
-                    body_outputs, targets=targets, training=training, testing=testing, **kwargs
-                )
+                if isinstance(task, NextItemPredictionTask):
+                    outputs[name] = task(
+                        body_outputs,
+                        targets=targets,
+                        training=training,
+                        testing=testing,
+                        top_k=top_k,
+                        **kwargs,
+                    )
+
+                else:
+                    outputs[name] = task(
+                        body_outputs, targets=targets, training=training, testing=testing, **kwargs
+                    )
 
         return outputs
 
@@ -483,6 +497,7 @@ def __init__(
         optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam,
         name: str = None,
         max_sequence_length: Optional[int] = None,
+        top_k: Optional[int] = -1,
     ):
         """Model class that can aggregate one or multiple heads.
         Parameters
@@ -497,9 +512,12 @@ def __init__(
             Optimizer-class to use during fitting
         name: str, optional
             Name of the model.
-        max_sequence_length : int, optional
+        max_sequence_length: int, optional
             The maximum sequence length supported by the model.
             Used to truncate sequence inputs longer than this value.
+        top_k: int, optional
+            The number of items to return at the inference step once the model is deployed.
+            Default is -1, which will return all items.
         """
         if head_weights:
             if not isinstance(head_weights, list):
@@ -517,6 +535,7 @@ def __init__(
         self.head_reduction = head_reduction
         self.optimizer = optimizer
         self.max_sequence_length = max_sequence_length
+        self.top_k = top_k
 
     def forward(self, inputs: TabularData, targets=None, training=False, testing=False, **kwargs):
         # Convert inputs to float32 which is the default type, expected by PyTorch
@@ -565,6 +584,7 @@ def forward(self, inputs: TabularData, targets=None, training=False, testing=Fal
                         targets=targets,
                         training=training,
                         testing=testing,
+                        top_k=self.top_k,
                         **kwargs,
                     )
                 )
@@ -756,6 +776,8 @@ def input_schema(self):
 
     @property
     def output_schema(self):
+        from merlin.schema import Tags
+
         from .prediction_task import BinaryClassificationTask, RegressionTask
 
         # if the model has one head with one task, the output is a tensor
@@ -781,8 +803,28 @@ def output_schema(self):
                 properties = {
                     "int_domain": int_domain,
                 }
-                col_schema = ColumnSchema(name, dtype=np.float32, properties=properties, dims=dims)
-                output_cols.append(col_schema)
+                # in case one sets top_k at the inference step we return two outputs
+                if self.top_k > 0:
+                    # be sure categ item-id dtype in model.input schema and output schema matches
+                    col_name = self.input_schema.select_by_tag(Tags.ITEM_ID).column_names[0]
+                    col_dtype = (
+                        self.input_schema.select_by_tag(Tags.ITEM_ID)
+                        .column_schemas[col_name]
+                        .dtype.name
+                    )
+                    col_schema_scores = ColumnSchema(
+                        "item_id_scores", dtype=np.float32, properties=properties, dims=dims
+                    )
+                    col_schema_ids = ColumnSchema(
+                        "item_ids", dtype=np.dtype(col_dtype), properties=properties, dims=dims
+                    )
+                    output_cols.append(col_schema_scores)
+                    output_cols.append(col_schema_ids)
+                else:
+                    col_schema = ColumnSchema(
+                        name, dtype=np.float32, properties=properties, dims=dims
+                    )
+                    output_cols.append(col_schema)
 
         return Core_Schema(output_cols)
 

diff --git a/transformers4rec/torch/model/prediction_task.py b/transformers4rec/torch/model/prediction_task.py
@@ -303,7 +303,9 @@ def build(self, body, input_size, device=None, inputs=None, task_block=None, pre
             body, input_size, device=device, inputs=inputs, task_block=task_block, pre=pre
         )
 
-    def forward(self, inputs: torch.Tensor, targets=None, training=False, testing=False, **kwargs):
+    def forward(
+        self, inputs: torch.Tensor, targets=None, training=False, testing=False, top_k=-1, **kwargs
+    ):
         if isinstance(inputs, (tuple, list)):
             inputs = inputs[0]
         x = inputs.float()
@@ -342,7 +344,11 @@ def forward(self, inputs: torch.Tensor, targets=None, training=False, testing=Fa
             # Compute predictions probs
             x, _ = self.pre(x)  # type: ignore
 
-            return x
+            if top_k == -1:
+                return x
+            else:
+                preds_sorted_item_scores, preds_sorted_item_ids = torch.topk(x, k=top_k, dim=-1)
+                return preds_sorted_item_scores, preds_sorted_item_ids
 
     def remove_pad_3d(self, inp_tensor, non_pad_mask):
         # inp_tensor: (n_batch x seqlen x emb_dim)

diff --git a/transformers4rec/torch/trainer.py b/transformers4rec/torch/trainer.py
@@ -529,6 +529,12 @@ def evaluation_loop(
                     else nested_concat(labels_host, labels, padding_index=0)
                 )
             if preds is not None and self.args.predict_top_k > 0:
+                if self.model.top_k != -1:
+                    raise ValueError(
+                        "you cannot set top_k argument in the model class and the, "
+                        "predict_top_k  in the trainer at the same time. Please ensure setting "
+                        "only predict_top_k"
+                    )
                 # get outputs of next-item scores
                 if isinstance(preds, dict):
                     assert any(