【官方教程】ChatGLM2-6B 部署与微调

Microsoft Windows [版本 10.0.19045.3086]
(c) Microsoft Corporation。保留所有权利。
C:\Users\Administrator>conda env list
# conda environments:
#
base D:\Develop\anaconda3
H:\OpenAI\ChatGLM2-6B\ENV
C:\Users\Administrator>cd /d H:\OpenAI\ChatGLM2-6B
H:\OpenAI\ChatGLM2-6B>conda activate H:\OpenAI\ChatGLM2-6B\ENV
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py
You are using a model of type chatglm to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
Loading checkpoint shards: 100%|█████████████████████████████████████████████| 7/7 [37:52<00:00, 324.60s/it]
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ H:\OpenAI\ChatGLM2-6B\cli_demo.py:8 in <module> │
│ │
│ 5 import readline │
│ 6 │
│ 7 tokenizer = AutoTokenizer.from_pretrained("THUDM\chatglm2-6b", trust_remote_code=True) │
│ ❱ 8 model = AutoModel.from_pretrained("THUDM\chatglm2-6b", trust_remote_code=True).quantize( │
│ 9 model = model.eval() │
│ 10 │
│ 11 os_name = platform.system() │
│ │
│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\modeling_chat │
│ glm.py:1109 in quantize │
│ │
│ 1106 │ │ │
│ 1107 │ │ self.config.quantization_bit = bits │
│ 1108 │ │ │
│ ❱ 1109 │ │ self.transformer.encoder = quantize(self.transformer.encoder, bits, empty_init=e │
│ 1110 │ │ │ │ │ │ │ │ │ │ │ **kwargs) │
│ 1111 │ │ return self │
│ 1112 │
│ │
│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │
│ py:155 in quantize │
│ │
│ 152 def quantize(model, weight_bit_width, empty_init=False, device=None): │
│ 153 │ """Replace fp16 linear with quantized linear""" │
│ 154 │ for layer in model.layers: │
│ ❱ 155 │ │ layer.self_attention.query_key_value = QuantizedLinear( │
│ 156 │ │ │ weight_bit_width=weight_bit_width, │
│ 157 │ │ │ weight=layer.self_attention.query_key_value.weight.to(torch.cuda.current_dev │
│ 158 │ │ │ bias=layer.self_attention.query_key_value.bias, │
│ │
│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │
│ py:139 in __init__ │
│ │
│ 136 │ │ │ self.weight_scale = weight.abs().max(dim=-1).values / ((2 ** (weight_bit_wid │
│ 137 │ │ │ self.weight = torch.round(weight / self.weight_scale[:, None]).to(torch.int8 │
│ 138 │ │ │ if weight_bit_width == 4: │
│ ❱ 139 │ │ │ │ self.weight = compress_int4_weight(self.weight) │
│ 140 │ │ │
│ 141 │ │ self.weight = Parameter(self.weight.to(device), requires_grad=False) │
│ 142 │ │ self.weight_scale = Parameter(self.weight_scale.to(device), requires_grad=False) │
│ │
│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │
│ py:78 in compress_int4_weight │
│ │
│ 75 │ │ gridDim = (n, 1, 1) │
│ 76 │ │ blockDim = (min(round_up(m, 32), 1024), 1, 1) │
│ 77 │ │ │
│ ❱ 78 │ │ kernels.int4WeightCompression( │
│ 79 │ │ │ gridDim, │
│ 80 │ │ │ blockDim, │
│ 81 │ │ │ 0, │
│ │
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:48 in __call__ │
│ │
│ 45 │ │ │ sharedMemBytes : int, stream : cudart.cudaStream_t, params : List[Any] ) -> │
│ 46 │ │ assert len(gridDim) == 3 │
│ 47 │ │ assert len(blockDim) == 3 │
│ ❱ 48 │ │ func = self._prepare_func() │
│ 49 │ │ │
│ 50 │ │ cuda.cuLaunchKernel(func, │
│ 51 │ │ │ gridDim[0], gridDim[1], gridDim[2], │
│ │
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:40 in _prepare_func │
│ │
│ 37 │ │ cudart.cudaSetDevice(curr_device) # ensure cudart context │
│ 38 │ │ if curr_device not in self._funcs: │
│ 39 │ │ │ self._funcs[curr_device] = cuda.cuModuleGetFunction( │
│ ❱ 40 │ │ │ │ self._module.get_module(), self._func_name │
│ 41 │ │ │ ) │
│ 42 │ │ return self._funcs[curr_device] │
│ 43 │
│ │
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:24 in get_module │
│ │
│ 21 │ │ curr_device = cudart.cudaGetDevice() │
│ 22 │ │ if curr_device not in self._module: │
│ 23 │ │ │ Device(curr_device).use() # force initialize context │
│ ❱ 24 │ │ │ self._module[curr_device] = cuda.cuModuleLoadData(self._code) │
│ 25 │ │ return self._module[curr_device] │
│ 26 │
│ 27 │
│ │
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\base.py:94 in wrapper │
│ │
│ 91 │ │ │ def decorator(f): │
│ 92 │ │ │ │ @wraps(f) │
│ 93 │ │ │ │ def wrapper(*args, **kwargs): │
│ ❱ 94 │ │ │ │ │ return f(*args, **kwargs) │
│ 95 │ │ │ │ return wrapper │
│ 96 │ │ │ return decorator │
│ 97 │
│ │
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\cuda.py:233 in cuModuleLoadData │
│ │
│ 230 @cuda.bind("cuModuleLoadData", [ctypes.POINTER(CUmodule), ctypes.c_void_p], CUresult) │
│ 231 def cuModuleLoadData(data : bytes) -> CUmodule: │
│ 232 │ module = CUmodule() │
│ ❱ 233 │ checkCUStatus(cuda.cuModuleLoadData(ctypes.byref(module), data)) │
│ 234 │ return module │
│ 235 │
│ 236 @cuda.bind("cuModuleUnload", [CUmodule], CUresult) │
│ │
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\cuda.py:216 in checkCUStatus │
│ │
│ 213 │
│ 214 def checkCUStatus(error : int) -> None: │
│ 215 │ if error != CUDA_SUCCESS: │
│ ❱ 216 │ │ raise RuntimeError("CUDA Error: %s" % cuGetErrorString(error)) │
│ 217 │
│ 218 @cuda.bind("cuDriverGetVersion", [ctypes.POINTER(ctypes.c_int)], CUresult) │
│ 219 def cuDriverGetVersion() -> int: │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: CUDA Error: no kernel image is available for execution on the device
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py
Traceback (most recent call last):
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 629, in _get_config_dict
resolved_config_file = cached_file(
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file
resolved_file = hf_hub_download(
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 110, in _inner_fn
validate_repo_id(arg_value)
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 164, in validate_repo_id
raise HFValidationError(
huggingface_hub.utils._validators.HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'THUDM\chatglm2-6b-int4'.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "H:\OpenAI\ChatGLM2-6B\cli_demo.py", line 8, in <module>
model = AutoModel.from_pretrained("THUDM\chatglm2-6b-int4", trust_remote_code=True).quantize(4).cuda()
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\auto_factory.py", line 456, in from_pretrained
config, kwargs = AutoConfig.from_pretrained(
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\configuration_auto.py", line 944, in from_pretrained
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 574, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 650, in _get_config_dict
raise EnvironmentError(
OSError: Can't load the configuration of 'THUDM\chatglm2-6b-int4'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'THUDM\chatglm2-6b-int4' is the correct path to a directory containing a config.json file
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py
Traceback (most recent call last):
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 629, in _get_config_dict
resolved_config_file = cached_file(
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file
resolved_file = hf_hub_download(
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 110, in _inner_fn
validate_repo_id(arg_value)
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 164, in validate_repo_id
raise HFValidationError(
huggingface_hub.utils._validators.HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'THUDM\chatglm2-6b-int4'.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "H:\OpenAI\ChatGLM2-6B\cli_demo.py", line 8, in <module>
model = AutoModel.from_pretrained("THUDM\chatglm2-6b-int4", trust_remote_code=True).quantize(4).cuda()
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\auto_factory.py", line 456, in from_pretrained
config, kwargs = AutoConfig.from_pretrained(
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\configuration_auto.py", line 944, in from_pretrained
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 574, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 650, in _get_config_dict
raise EnvironmentError(
OSError: Can't load the configuration of 'THUDM\chatglm2-6b-int4'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'THUDM\chatglm2-6b-int4' is the correct path to a directory containing a config.json file
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>
(H:\OpenAI\ChatGLM2-6B\ENV) C:\Users\Administrator>python
Python 3.9.6 (default, Aug 18 2021, 15:44:49) [MSC v.1916 64 bit (AMD64)] :: Anaconda, Inc. on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> x=torch.rand(4,6)
>>> print(x)
tensor([[0.4784, 0.6858, 0.0851, 0.2220, 0.0233, 0.4879],
[0.2230, 0.8423, 0.2485, 0.6101, 0.2816, 0.4068],
[0.5379, 0.6412, 0.2049, 0.8542, 0.3221, 0.2888],
[0.4783, 0.4648, 0.6325, 0.8418, 0.9524, 0.1783]])
>>> torch.cuda.is_available()
True
>>> torch.cuda.current_device()
0
>>> torch.cuda.device_count()
1
>>> torch.__version__
'2.0.1'
>>> torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))
True
>>> a=torch.Tensor([1,2])
>>> a=a.cuda()
>>> a
tensor([1., 2.], device='cuda:0')
>>>