diff --git a/flake.lock b/flake.lock index 44802e18f30..d304e3ccc7a 100644 --- a/flake.lock +++ b/flake.lock @@ -978,16 +978,15 @@ "nixpkgs": "nixpkgs_6" }, "locked": { - "lastModified": 1736179589, - "narHash": "sha256-/zZCSieBJncVXqOFbvbSov76g2eWAxVxEJNNA6SmQKc=", + "lastModified": 1736436388, + "narHash": "sha256-CIyxVPpM9RrSwthNT/4DQ10YPk/uwzP7AeE83kBNsrE=", "owner": "huggingface", "repo": "text-generation-inference-nix", - "rev": "fc7ff53b2cd5c984ad1434f20c271e3b7600d1c4", + "rev": "5103c3fb1f9ad1fd33b6e09ff05e957884b112d5", "type": "github" }, "original": { "owner": "huggingface", - "ref": "flashinfer-v0.2", "repo": "text-generation-inference-nix", "type": "github" } diff --git a/flake.nix b/flake.nix index a302db3eac8..83cedfa620f 100644 --- a/flake.nix +++ b/flake.nix @@ -5,7 +5,7 @@ inputs.nixpkgs.follows = "tgi-nix/nixpkgs"; }; nix-filter.url = "github:numtide/nix-filter"; - tgi-nix.url = "github:huggingface/text-generation-inference-nix/flashinfer-v0.2"; + tgi-nix.url = "github:huggingface/text-generation-inference-nix"; nixpkgs.follows = "tgi-nix/nixpkgs"; flake-utils.url = "github:numtide/flake-utils"; rust-overlay = { diff --git a/server/poetry.lock b/server/poetry.lock index 7cf440dd3e1..69133015ef0 100644 --- a/server/poetry.lock +++ b/server/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "accelerate" @@ -1289,12 +1289,12 @@ files = [ [[package]] name = "marlin-kernels" -version = "0.3.6" +version = "0.3.7" description = "Marlin quantization kernels" optional = true python-versions = ">=3.7" files = [ - {file = "marlin_kernels-0.3.6+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:afedaa9a15e8991442bc8c81f62833fbf5c1556ae9d7a5a9e13b747ce97beef9"}, + {file = "marlin_kernels-0.3.7+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:bb416d14623dc0ad0eeb2835446c37a41f994555f1baec8701de6d4c1fc17ec8"}, ] [package.dependencies] @@ -1302,16 +1302,16 @@ torch = "*" [package.source] type = "url" -url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp310-cp310-linux_x86_64.whl" +url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp310-cp310-linux_x86_64.whl" [[package]] name = "marlin-kernels" -version = "0.3.6" +version = "0.3.7" description = "Marlin quantization kernels" optional = true python-versions = ">=3.7" files = [ - {file = "marlin_kernels-0.3.6+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:c0c05621d5e87144415d8a6e439072bd844d5f3cb55e4c4c69eabdc4c94610f4"}, + {file = "marlin_kernels-0.3.7+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:a89bb61d718002d4432158641bce95c6fd68f9ee1a7d5402dd283903397f3185"}, ] [package.dependencies] @@ -1319,16 +1319,16 @@ torch = "*" [package.source] type = "url" -url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp311-cp311-linux_x86_64.whl" +url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp311-cp311-linux_x86_64.whl" [[package]] name = "marlin-kernels" -version = "0.3.6" +version = "0.3.7" description = "Marlin quantization kernels" optional = true python-versions = ">=3.7" files = [ - {file = "marlin_kernels-0.3.6+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:3be4662c8d25a3cdb1793dafe0e2e76dd600913a69a468e2c68d1fed4e149255"}, + {file = "marlin_kernels-0.3.7+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:ed938d196fc5e9cce9fc44cd2b889d5adc5ca7475c8a23858f1474d29e38bdbf"}, ] [package.dependencies] @@ -1336,16 +1336,16 @@ torch = "*" [package.source] type = "url" -url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp312-cp312-linux_x86_64.whl" +url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp312-cp312-linux_x86_64.whl" [[package]] name = "marlin-kernels" -version = "0.3.6" +version = "0.3.7" description = "Marlin quantization kernels" optional = true python-versions = ">=3.7" files = [ - {file = "marlin_kernels-0.3.6+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:89eac9d46bc084a256b538afda6053683eb7e505db0e0d4f6dbeca32368caac6"}, + {file = "marlin_kernels-0.3.7+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:113c54f68565ad476ca12366b4de92131fa3e9ddb16cbe8ad63272972a15ac28"}, ] [package.dependencies] @@ -1353,7 +1353,7 @@ torch = "*" [package.source] type = "url" -url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp39-cp39-linux_x86_64.whl" +url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp39-cp39-linux_x86_64.whl" [[package]] name = "mdurl" @@ -4097,4 +4097,4 @@ torch = ["torch"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "c7fdcff2b752cd3beb3995c1ecd15f0f4d9b4e117048b06ab991c6d0e0c86ff3" +content-hash = "25f96d5dea777bfa7a959f863e35d2e05e1a6172d0dd45193dbe25ac2f32cc25" diff --git a/server/pyproject.toml b/server/pyproject.toml index 0d56e9c7375..bc74a05ab0b 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -48,10 +48,10 @@ attention-kernels = [ { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, ] marlin-kernels = [ - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, ] moe-kernels = [ { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },