Skip to content

Commit

Permalink
Merge pull request #2 from a-r-r-o-w/latte-2
Browse files Browse the repository at this point in the history
update _toctree.yml for docs and fix example
  • Loading branch information
maxin-cn authored Jul 11, 2024
2 parents 2ea37c9 + 521ed5c commit 7988119
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 13 deletions.
2 changes: 2 additions & 0 deletions docs/source/en/_toctree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,8 @@
title: DiTTransformer2DModel
- local: api/models/hunyuan_transformer2d
title: HunyuanDiT2DModel
- local: api/models/latte_transformer3d
title: LatteTransformer3DModel
- local: api/models/lumina_nextdit2d
title: LuminaNextDiT2DModel
- local: api/models/transformer_temporal
Expand Down
1 change: 0 additions & 1 deletion docs/source/en/api/models/latte_transformer3d.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,3 @@ A Diffusion Transformer model for 3D data from [Latte](https://github.com/Vchite
## LatteTransformer3DModel

[[autodoc]] LatteTransformer3DModel

19 changes: 15 additions & 4 deletions src/diffusers/models/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,21 @@ def get_timestep_embedding(
"""
This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
:param timesteps: a 1-D Tensor of N indices, one per batch element.
These may be fractional.
:param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the
embeddings. :return: an [N x dim] Tensor of positional embeddings.
Args
timesteps (torch.Tensor):
a 1-D Tensor of N indices, one per batch element. These may be fractional.
embedding_dim (int):
the dimension of the output.
flip_sin_to_cos (bool):
Whether the embedding order should be `cos, sin` (if True) or `sin, cos` (if False)
downscale_freq_shift (float):
Controls the delta between frequencies between dimensions
scale (float):
Scaling factor applied to the embeddings.
max_period (int):
Controls the maximum frequency of the embeddings
Returns
torch.Tensor: an [N x dim] Tensor of positional embeddings.
"""
assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array"

Expand Down
6 changes: 3 additions & 3 deletions src/diffusers/pipelines/latte/pipeline_latte.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@
>>> from diffusers.utils import export_to_gif
>>> # You can replace the checkpoint id with "maxin-cn/Latte-1" too.
>>> pipe = LattePipeline.from_pretrained("maxin-cn/Latte-1", torch_dtype=torch.float16)
>>> pipe = LattePipeline.from_pretrained("maxin-cn/Latte-1", torch_dtype=torch.float16).to("cuda")
>>> # Enable memory optimizations.
>>> pipe.enable_model_cpu_offload()
>>> prompt = "A small cactus with a happy face in the Sahara desert."
>>> videos = pipe(prompt).frames
>>> videos = pipe(prompt).frames[0]
>>> export_to_gif(videos, "latte.gif")
```
"""
Expand Down Expand Up @@ -576,7 +576,7 @@ def prepare_latents(
# scale the initial noise by the standard deviation required by the scheduler
latents = latents * self.scheduler.init_noise_sigma
return latents

@property
def guidance_scale(self):
return self._guidance_scale
Expand Down
8 changes: 4 additions & 4 deletions src/diffusers/utils/dummy_torch_and_transformers_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class LDMTextToImagePipeline(metaclass=DummyObject):
class LattePipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

def __init__(self, *args, **kwargs):
Expand All @@ -692,7 +692,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class LEditsPPPipelineStableDiffusion(metaclass=DummyObject):
class LDMTextToImagePipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

def __init__(self, *args, **kwargs):
Expand All @@ -707,7 +707,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class LEditsPPPipelineStableDiffusionXL(metaclass=DummyObject):
class LEditsPPPipelineStableDiffusion(metaclass=DummyObject):
_backends = ["torch", "transformers"]

def __init__(self, *args, **kwargs):
Expand All @@ -722,7 +722,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class LattePipeline(metaclass=DummyObject):
class LEditsPPPipelineStableDiffusionXL(metaclass=DummyObject):
_backends = ["torch", "transformers"]

def __init__(self, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/latte/test_latte.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.

import gc
import inspect
import tempfile
import unittest

Expand All @@ -38,7 +39,6 @@
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineTesterMixin, to_np

import inspect

enable_full_determinism()

Expand Down

0 comments on commit 7988119

Please sign in to comment.