Support recent latest Torch versions

#3
by danieldk HF Staff - opened
This view is limited to 50 files because it contains too many changes.Β  See the raw diff here.
Files changed (50) hide show
  1. build.toml +5 -1
  2. build/torch210-cxx11-cu126-x86_64-linux/__init__.py +48 -0
  3. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/_ops.py +3 -3
  4. build/{torch28-cxx11-cu126-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so β†’ torch210-cxx11-cu126-x86_64-linux/_quantization_e384bb2.abi3.so} +2 -2
  5. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/compressed_tensors.py +0 -0
  6. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/cutlass.py +0 -0
  7. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/marlin.py +0 -0
  8. build/torch210-cxx11-cu126-x86_64-linux/metadata.json +3 -0
  9. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/platforms.py +0 -0
  10. build/torch210-cxx11-cu126-x86_64-linux/quantization/__init__.py +26 -0
  11. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/scalar_type.py +0 -0
  12. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/__init__.py +0 -0
  13. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils.py +0 -0
  14. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_fp4.py +0 -0
  15. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_fp8.py +0 -0
  16. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test.py +0 -0
  17. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test_24.py +0 -0
  18. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test_qqq.py +0 -0
  19. build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/quant_utils.py +0 -0
  20. build/torch210-cxx11-cu128-x86_64-linux/__init__.py +48 -0
  21. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/_ops.py +3 -3
  22. build/{torch28-cxx11-cu128-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so β†’ torch210-cxx11-cu128-x86_64-linux/_quantization_e384bb2.abi3.so} +2 -2
  23. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/compressed_tensors.py +0 -0
  24. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/cutlass.py +0 -0
  25. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/marlin.py +0 -0
  26. build/torch210-cxx11-cu128-x86_64-linux/metadata.json +3 -0
  27. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/platforms.py +0 -0
  28. build/torch210-cxx11-cu128-x86_64-linux/quantization/__init__.py +26 -0
  29. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/scalar_type.py +0 -0
  30. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/__init__.py +0 -0
  31. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils.py +0 -0
  32. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_fp4.py +0 -0
  33. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_fp8.py +0 -0
  34. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test.py +0 -0
  35. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test_24.py +0 -0
  36. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test_qqq.py +0 -0
  37. build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/quant_utils.py +0 -0
  38. build/torch210-cxx11-cu130-x86_64-linux/__init__.py +48 -0
  39. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/_ops.py +3 -3
  40. build/{torch28-cxx11-cu129-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so β†’ torch210-cxx11-cu130-x86_64-linux/_quantization_e384bb2.abi3.so} +2 -2
  41. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/compressed_tensors.py +0 -0
  42. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/cutlass.py +0 -0
  43. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/marlin.py +0 -0
  44. build/torch210-cxx11-cu130-x86_64-linux/metadata.json +3 -0
  45. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/platforms.py +0 -0
  46. build/torch210-cxx11-cu130-x86_64-linux/quantization/__init__.py +26 -0
  47. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/scalar_type.py +0 -0
  48. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/utils/__init__.py +0 -0
  49. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/utils/marlin_utils.py +0 -0
  50. build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/utils/marlin_utils_fp4.py +0 -0
build.toml CHANGED
@@ -1,6 +1,6 @@
1
  [general]
2
  name = "quantization"
3
- universal = false
4
 
5
  [torch]
6
  include = ["."]
@@ -22,6 +22,7 @@ cuda-capabilities = [
22
  "10.1",
23
  "12.0",
24
  ]
 
25
  depends = ["torch"]
26
  include = ["."]
27
  src = [
@@ -95,6 +96,7 @@ depends = ["torch"]
95
  include = ["."]
96
  src = [
97
  "compressed_tensors/int8_quant_kernels.cu",
 
98
  "dispatch_utils.h",
99
  "vectorization_utils.cuh",
100
  ]
@@ -119,6 +121,7 @@ include = ["."]
119
  src = [
120
  "fp8/common.cu",
121
  "fp8/common.cuh",
 
122
  "dispatch_utils.h",
123
  "utils.cuh",
124
  "vectorization.cuh",
@@ -229,6 +232,7 @@ cuda-capabilities = [
229
  "10.1",
230
  "12.0",
231
  ]
 
232
  depends = ["torch"]
233
  include = ["."]
234
  src = [
 
1
  [general]
2
  name = "quantization"
3
+ backends = ["cuda"]
4
 
5
  [torch]
6
  include = ["."]
 
22
  "10.1",
23
  "12.0",
24
  ]
25
+ cuda-flags = ["-static-global-template-stub=false"]
26
  depends = ["torch"]
27
  include = ["."]
28
  src = [
 
96
  include = ["."]
97
  src = [
98
  "compressed_tensors/int8_quant_kernels.cu",
99
+ "cub_helpers.h",
100
  "dispatch_utils.h",
101
  "vectorization_utils.cuh",
102
  ]
 
121
  src = [
122
  "fp8/common.cu",
123
  "fp8/common.cuh",
124
+ "cub_helpers.h",
125
  "dispatch_utils.h",
126
  "utils.cuh",
127
  "vectorization.cuh",
 
232
  "10.1",
233
  "12.0",
234
  ]
235
+ cuda-flags = ["-static-global-template-stub=false"]
236
  depends = ["torch"]
237
  include = ["."]
238
  src = [
build/torch210-cxx11-cu126-x86_64-linux/__init__.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .compressed_tensors import scaled_fp8_quant, scaled_int8_quant
2
+ from .cutlass import (
3
+ cutlass_scaled_mm_supports_block_fp8,
4
+ cutlass_scaled_mm_supports_fp8,
5
+ cutlass_scaled_mm,
6
+ cutlass_scaled_mm_azp,
7
+ )
8
+ from .marlin import (
9
+ awq_marlin_repack,
10
+ gptq_marlin_gemm,
11
+ gptq_marlin_repack,
12
+ gptq_marlin_24_gemm,
13
+ marlin_qqq_gemm,
14
+ marlin_gemm,
15
+ )
16
+ from .scalar_type import (
17
+ ScalarType,
18
+ scalar_types,
19
+ )
20
+ from ._ops import ops
21
+
22
+ from .utils import marlin_utils
23
+ from .utils import marlin_utils_fp4
24
+ from .utils import marlin_utils_fp8
25
+ from .utils import quant_utils
26
+
27
+
28
+ __all__ = [
29
+ "ScalarType",
30
+ "awq_marlin_repack",
31
+ "cutlass_scaled_mm",
32
+ "cutlass_scaled_mm_azp",
33
+ "cutlass_scaled_mm_supports_block_fp8",
34
+ "cutlass_scaled_mm_supports_fp8",
35
+ "gptq_marlin_24_gemm",
36
+ "gptq_marlin_gemm",
37
+ "gptq_marlin_repack",
38
+ "marlin_gemm",
39
+ "marlin_qqq_gemm",
40
+ "marlin_utils",
41
+ "marlin_utils_fp4",
42
+ "marlin_utils_fp8",
43
+ "ops",
44
+ "quant_utils",
45
+ "scalar_types",
46
+ "scaled_fp8_quant",
47
+ "scaled_int8_quant",
48
+ ]
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_eabe7c2
3
- ops = torch.ops._quantization_eabe7c2
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_eabe7c2::{op_name}"
 
1
  import torch
2
+ from . import _quantization_e384bb2
3
+ ops = torch.ops._quantization_e384bb2
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_e384bb2::{op_name}"
build/{torch28-cxx11-cu126-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so β†’ torch210-cxx11-cu126-x86_64-linux/_quantization_e384bb2.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72083bbb272942332e2586f6b0068dd7f4347981193db1e682e90f54b0b01c87
3
- size 160171952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e946af129d0f8ad6cd758d0270a5ff336a6cba021626d72b81e40529b55fc9
3
+ size 196115272
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/compressed_tensors.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/cutlass.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/marlin.py RENAMED
File without changes
build/torch210-cxx11-cu126-x86_64-linux/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python-depends": []
3
+ }
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/platforms.py RENAMED
File without changes
build/torch210-cxx11-cu126-x86_64-linux/quantization/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import sys
3
+
4
+ import importlib
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ def _import_from_path(file_path: Path) -> ModuleType:
9
+ # We cannot use the module name as-is, after adding it to `sys.modules`,
10
+ # it would also be used for other imports. So, we make a module name that
11
+ # depends on the path for it to be unique using the hex-encoded hash of
12
+ # the path.
13
+ path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
+ module_name = path_hash
15
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
16
+ if spec is None:
17
+ raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
+ module = importlib.util.module_from_spec(spec)
19
+ if module is None:
20
+ raise ImportError(f"Cannot load module {module_name} from spec")
21
+ sys.modules[module_name] = module
22
+ spec.loader.exec_module(module) # type: ignore
23
+ return module
24
+
25
+
26
+ globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/scalar_type.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/__init__.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_fp4.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_fp8.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test_24.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/marlin_utils_test_qqq.py RENAMED
File without changes
build/{torch28-cxx11-cu126-x86_64-linux/quantization β†’ torch210-cxx11-cu126-x86_64-linux}/utils/quant_utils.py RENAMED
File without changes
build/torch210-cxx11-cu128-x86_64-linux/__init__.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .compressed_tensors import scaled_fp8_quant, scaled_int8_quant
2
+ from .cutlass import (
3
+ cutlass_scaled_mm_supports_block_fp8,
4
+ cutlass_scaled_mm_supports_fp8,
5
+ cutlass_scaled_mm,
6
+ cutlass_scaled_mm_azp,
7
+ )
8
+ from .marlin import (
9
+ awq_marlin_repack,
10
+ gptq_marlin_gemm,
11
+ gptq_marlin_repack,
12
+ gptq_marlin_24_gemm,
13
+ marlin_qqq_gemm,
14
+ marlin_gemm,
15
+ )
16
+ from .scalar_type import (
17
+ ScalarType,
18
+ scalar_types,
19
+ )
20
+ from ._ops import ops
21
+
22
+ from .utils import marlin_utils
23
+ from .utils import marlin_utils_fp4
24
+ from .utils import marlin_utils_fp8
25
+ from .utils import quant_utils
26
+
27
+
28
+ __all__ = [
29
+ "ScalarType",
30
+ "awq_marlin_repack",
31
+ "cutlass_scaled_mm",
32
+ "cutlass_scaled_mm_azp",
33
+ "cutlass_scaled_mm_supports_block_fp8",
34
+ "cutlass_scaled_mm_supports_fp8",
35
+ "gptq_marlin_24_gemm",
36
+ "gptq_marlin_gemm",
37
+ "gptq_marlin_repack",
38
+ "marlin_gemm",
39
+ "marlin_qqq_gemm",
40
+ "marlin_utils",
41
+ "marlin_utils_fp4",
42
+ "marlin_utils_fp8",
43
+ "ops",
44
+ "quant_utils",
45
+ "scalar_types",
46
+ "scaled_fp8_quant",
47
+ "scaled_int8_quant",
48
+ ]
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_eabe7c2
3
- ops = torch.ops._quantization_eabe7c2
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_eabe7c2::{op_name}"
 
1
  import torch
2
+ from . import _quantization_e384bb2
3
+ ops = torch.ops._quantization_e384bb2
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_e384bb2::{op_name}"
build/{torch28-cxx11-cu128-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so β†’ torch210-cxx11-cu128-x86_64-linux/_quantization_e384bb2.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dec4454faaae3f515aac5e2ff702d910c660ea7b23d88d1ef643abddd71c21c2
3
- size 296976072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e996222b90650b5074935088ed5cf27816148197c6dd75023d56a0c7b7f67d1
3
+ size 332877416
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/compressed_tensors.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/cutlass.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/marlin.py RENAMED
File without changes
build/torch210-cxx11-cu128-x86_64-linux/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python-depends": []
3
+ }
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/platforms.py RENAMED
File without changes
build/torch210-cxx11-cu128-x86_64-linux/quantization/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import sys
3
+
4
+ import importlib
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ def _import_from_path(file_path: Path) -> ModuleType:
9
+ # We cannot use the module name as-is, after adding it to `sys.modules`,
10
+ # it would also be used for other imports. So, we make a module name that
11
+ # depends on the path for it to be unique using the hex-encoded hash of
12
+ # the path.
13
+ path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
+ module_name = path_hash
15
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
16
+ if spec is None:
17
+ raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
+ module = importlib.util.module_from_spec(spec)
19
+ if module is None:
20
+ raise ImportError(f"Cannot load module {module_name} from spec")
21
+ sys.modules[module_name] = module
22
+ spec.loader.exec_module(module) # type: ignore
23
+ return module
24
+
25
+
26
+ globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/scalar_type.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/__init__.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_fp4.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_fp8.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test_24.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/marlin_utils_test_qqq.py RENAMED
File without changes
build/{torch28-cxx11-cu128-x86_64-linux/quantization β†’ torch210-cxx11-cu128-x86_64-linux}/utils/quant_utils.py RENAMED
File without changes
build/torch210-cxx11-cu130-x86_64-linux/__init__.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .compressed_tensors import scaled_fp8_quant, scaled_int8_quant
2
+ from .cutlass import (
3
+ cutlass_scaled_mm_supports_block_fp8,
4
+ cutlass_scaled_mm_supports_fp8,
5
+ cutlass_scaled_mm,
6
+ cutlass_scaled_mm_azp,
7
+ )
8
+ from .marlin import (
9
+ awq_marlin_repack,
10
+ gptq_marlin_gemm,
11
+ gptq_marlin_repack,
12
+ gptq_marlin_24_gemm,
13
+ marlin_qqq_gemm,
14
+ marlin_gemm,
15
+ )
16
+ from .scalar_type import (
17
+ ScalarType,
18
+ scalar_types,
19
+ )
20
+ from ._ops import ops
21
+
22
+ from .utils import marlin_utils
23
+ from .utils import marlin_utils_fp4
24
+ from .utils import marlin_utils_fp8
25
+ from .utils import quant_utils
26
+
27
+
28
+ __all__ = [
29
+ "ScalarType",
30
+ "awq_marlin_repack",
31
+ "cutlass_scaled_mm",
32
+ "cutlass_scaled_mm_azp",
33
+ "cutlass_scaled_mm_supports_block_fp8",
34
+ "cutlass_scaled_mm_supports_fp8",
35
+ "gptq_marlin_24_gemm",
36
+ "gptq_marlin_gemm",
37
+ "gptq_marlin_repack",
38
+ "marlin_gemm",
39
+ "marlin_qqq_gemm",
40
+ "marlin_utils",
41
+ "marlin_utils_fp4",
42
+ "marlin_utils_fp8",
43
+ "ops",
44
+ "quant_utils",
45
+ "scalar_types",
46
+ "scaled_fp8_quant",
47
+ "scaled_int8_quant",
48
+ ]
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_eabe7c2
3
- ops = torch.ops._quantization_eabe7c2
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_eabe7c2::{op_name}"
 
1
  import torch
2
+ from . import _quantization_e384bb2
3
+ ops = torch.ops._quantization_e384bb2
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_e384bb2::{op_name}"
build/{torch28-cxx11-cu129-x86_64-linux/quantization/_quantization_eabe7c2.abi3.so β†’ torch210-cxx11-cu130-x86_64-linux/_quantization_e384bb2.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cf80d6102e5a72eac6a15260a296193fefaa2d878ed5ca7458e5e54707a85a
3
- size 304924864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94359faa54a316eb410e331755a19c96c17e87d74ffae035ca8187d62f58891
3
+ size 289949016
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/compressed_tensors.py RENAMED
File without changes
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/cutlass.py RENAMED
File without changes
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/marlin.py RENAMED
File without changes
build/torch210-cxx11-cu130-x86_64-linux/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python-depends": []
3
+ }
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/platforms.py RENAMED
File without changes
build/torch210-cxx11-cu130-x86_64-linux/quantization/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import sys
3
+
4
+ import importlib
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ def _import_from_path(file_path: Path) -> ModuleType:
9
+ # We cannot use the module name as-is, after adding it to `sys.modules`,
10
+ # it would also be used for other imports. So, we make a module name that
11
+ # depends on the path for it to be unique using the hex-encoded hash of
12
+ # the path.
13
+ path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
+ module_name = path_hash
15
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
16
+ if spec is None:
17
+ raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
+ module = importlib.util.module_from_spec(spec)
19
+ if module is None:
20
+ raise ImportError(f"Cannot load module {module_name} from spec")
21
+ sys.modules[module_name] = module
22
+ spec.loader.exec_module(module) # type: ignore
23
+ return module
24
+
25
+
26
+ globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/scalar_type.py RENAMED
File without changes
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/utils/__init__.py RENAMED
File without changes
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/utils/marlin_utils.py RENAMED
File without changes
build/{torch28-cxx11-cu129-x86_64-linux/quantization β†’ torch210-cxx11-cu130-x86_64-linux}/utils/marlin_utils_fp4.py RENAMED
File without changes