Skip to content

Commit

Permalink
./format.sh
Browse files Browse the repository at this point in the history
Signed-off-by: Harry Mellor <[email protected]>
  • Loading branch information
hmellor committed Jan 8, 2025
1 parent c7d6971 commit 1c1f732
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 15 deletions.
3 changes: 2 additions & 1 deletion docs/source/generate_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ def generate(self) -> str:
ROOT_DIR)

content = f"Source <gh-file:{self.path.relative_to(ROOT_DIR)}>.\n\n"
include = "include" if self.main_file.suffix == ".md" else "literalinclude"
include = "include" if self.main_file.suffix == ".md" else \
"literalinclude"
if include == "literalinclude":
content += f"# {self.title}\n\n"
content += f":::{{{include}}} {make_relative(self.main_file)}\n"
Expand Down
10 changes: 5 additions & 5 deletions tools/profiler/visualize_layerwise_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,11 +534,11 @@ def make_plot_title_suffix(profile_json: dict) -> str:
if __name__ == "__main__":
parser = argparse.ArgumentParser()

parser.add_argument(
"--json-trace",
type=str,
required=True,
help="json trace file output by examples/offline_inference/offline_profile.py")
parser.add_argument("--json-trace",
type=str,
required=True,
help="json trace file output by \
examples/offline_inference/offline_profile.py")
parser.add_argument("--output-directory",
type=str,
required=False,
Expand Down
3 changes: 2 additions & 1 deletion vllm/model_executor/model_loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,8 @@ class ShardedStateLoader(BaseModelLoader):
Model loader that directly loads each worker's model state dict, which
enables a fast load path for large tensor-parallel models where each worker
only needs to read its own shard rather than the entire checkpoint. See
`examples/offline_inference/save_sharded_state.py` for creating a sharded checkpoint.
`examples/offline_inference/save_sharded_state.py` for creating a sharded
checkpoint.
"""

DEFAULT_PATTERN = "model-rank-{rank}-part-{part}.safetensors"
Expand Down
13 changes: 6 additions & 7 deletions vllm/model_executor/model_loader/tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,13 +363,12 @@ def deserialize(self):
def tensorizer_weights_iterator(
tensorizer_args: "TensorizerArgs"
) -> Generator[Tuple[str, torch.Tensor], None, None]:
logger.warning(
"Deserializing HuggingFace models is not optimized for "
"loading on vLLM, as tensorizer is forced to load to CPU. "
"Consider deserializing a vLLM model instead for faster "
"load times. See the "
"examples/other/tensorize_vllm_model.py example script "
"for serializing vLLM models.")
logger.warning("Deserializing HuggingFace models is not optimized for "
"loading on vLLM, as tensorizer is forced to load to CPU. "
"Consider deserializing a vLLM model instead for faster "
"load times. See the "
"examples/other/tensorize_vllm_model.py example script "
"for serializing vLLM models.")

deserializer_args = tensorizer_args.deserializer_params
stream_params = tensorizer_args.stream_params
Expand Down
3 changes: 2 additions & 1 deletion vllm/model_executor/model_loader/weight_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,8 @@ def kv_cache_scales_loader(
KV cache scaling factors. The serialization should represent a dictionary
whose keys are the TP ranks and values are another dictionary mapping layers
to their KV cache scaling factors.
Keep this function in sync with the output of examples/other/fp8/extract_scales.py
Keep this function in sync with the output of
examples/other/fp8/extract_scales.py
"""
try:
with open(filename) as f:
Expand Down

0 comments on commit 1c1f732

Please sign in to comment.