【官方教程】ChatGLM2-6B 部署与微调

2023-07-21 09:48 作者:菁英小学生 0人读过 | 我要投稿

Microsoft Windows [版本 10.0.19045.3086]

C:\Users\Administrator>conda env list

# conda environments:

base D:\Develop\anaconda3

H:\OpenAI\ChatGLM2-6B\ENV

C:\Users\Administrator>cd /d H:\OpenAI\ChatGLM2-6B

H:\OpenAI\ChatGLM2-6B>conda activate H:\OpenAI\ChatGLM2-6B\ENV

(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py

You are using a model of type chatglm to instantiate a model of type . This is not supported for all configurations of models and can yield errors.

Loading checkpoint shards: 100%|█████████████████████████████████████████████| 7/7 [37:52<00:00, 324.60s/it]

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮

│ H:\OpenAI\ChatGLM2-6B\cli_demo.py:8 in <module> │

│ │

│ 5 import readline │

│ 6 │

│ 7 tokenizer = AutoTokenizer.from_pretrained("THUDM\chatglm2-6b", trust_remote_code=True) │

│ ❱ 8 model = AutoModel.from_pretrained("THUDM\chatglm2-6b", trust_remote_code=True).quantize( │

│ 9 model = model.eval() │

│ 10 │

│ 11 os_name = platform.system() │

│ │

│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\modeling_chat │

│ glm.py:1109 in quantize │

│ │

│ 1106 │ │ │

│ 1107 │ │ self.config.quantization_bit = bits │

│ 1108 │ │ │

│ ❱ 1109 │ │ self.transformer.encoder = quantize(self.transformer.encoder, bits, empty_init=e │

│ 1110 │ │ │ │ │ │ │ │ │ │ │ **kwargs) │

│ 1111 │ │ return self │

│ 1112 │

│ │

│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │

│ py:155 in quantize │

│ │

│ 152 def quantize(model, weight_bit_width, empty_init=False, device=None): │

│ 153 │ """Replace fp16 linear with quantized linear""" │

│ 154 │ for layer in model.layers: │

│ ❱ 155 │ │ layer.self_attention.query_key_value = QuantizedLinear( │

│ 156 │ │ │ weight_bit_width=weight_bit_width, │

│ 157 │ │ │ weight=layer.self_attention.query_key_value.weight.to(torch.cuda.current_dev │

│ 158 │ │ │ bias=layer.self_attention.query_key_value.bias, │

│ │

│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │

│ py:139 in __init__ │

│ │

│ 136 │ │ │ self.weight_scale = weight.abs().max(dim=-1).values / ((2 ** (weight_bit_wid │

│ 137 │ │ │ self.weight = torch.round(weight / self.weight_scale[:, None]).to(torch.int8 │

│ 138 │ │ │ if weight_bit_width == 4: │

│ ❱ 139 │ │ │ │ self.weight = compress_int4_weight(self.weight) │

│ 140 │ │ │

│ 141 │ │ self.weight = Parameter(self.weight.to(device), requires_grad=False) │

│ 142 │ │ self.weight_scale = Parameter(self.weight_scale.to(device), requires_grad=False) │

│ │

│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │

│ py:78 in compress_int4_weight │

│ │

│ 75 │ │ gridDim = (n, 1, 1) │

│ 76 │ │ blockDim = (min(round_up(m, 32), 1024), 1, 1) │

│ 77 │ │ │

│ ❱ 78 │ │ kernels.int4WeightCompression( │

│ 79 │ │ │ gridDim, │

│ 80 │ │ │ blockDim, │

│ 81 │ │ │ 0, │

│ │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:48 in __call__ │

│ │

│ 45 │ │ │ sharedMemBytes : int, stream : cudart.cudaStream_t, params : List[Any] ) -> │

│ 46 │ │ assert len(gridDim) == 3 │

│ 47 │ │ assert len(blockDim) == 3 │

│ ❱ 48 │ │ func = self._prepare_func() │

│ 49 │ │ │

│ 50 │ │ cuda.cuLaunchKernel(func, │

│ 51 │ │ │ gridDim[0], gridDim[1], gridDim[2], │

│ │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:40 in _prepare_func │

│ │

│ 37 │ │ cudart.cudaSetDevice(curr_device) # ensure cudart context │

│ 38 │ │ if curr_device not in self._funcs: │

│ 39 │ │ │ self._funcs[curr_device] = cuda.cuModuleGetFunction( │

│ ❱ 40 │ │ │ │ self._module.get_module(), self._func_name │

│ 41 │ │ │ ) │

│ 42 │ │ return self._funcs[curr_device] │

│ 43 │

│ │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:24 in get_module │

│ │

│ 21 │ │ curr_device = cudart.cudaGetDevice() │

│ 22 │ │ if curr_device not in self._module: │

│ 23 │ │ │ Device(curr_device).use() # force initialize context │

│ ❱ 24 │ │ │ self._module[curr_device] = cuda.cuModuleLoadData(self._code) │

│ 25 │ │ return self._module[curr_device] │

│ 26 │

│ 27 │

│ │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\base.py:94 in wrapper │

│ │

│ 91 │ │ │ def decorator(f): │

│ 92 │ │ │ │ @wraps(f) │

│ 93 │ │ │ │ def wrapper(*args, **kwargs): │

│ ❱ 94 │ │ │ │ │ return f(*args, **kwargs) │

│ 95 │ │ │ │ return wrapper │

│ 96 │ │ │ return decorator │

│ 97 │

│ │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\cuda.py:233 in cuModuleLoadData │

│ │

│ 230 @cuda.bind("cuModuleLoadData", [ctypes.POINTER(CUmodule), ctypes.c_void_p], CUresult) │

│ 231 def cuModuleLoadData(data : bytes) -> CUmodule: │

│ 232 │ module = CUmodule() │

│ ❱ 233 │ checkCUStatus(cuda.cuModuleLoadData(ctypes.byref(module), data)) │

│ 234 │ return module │

│ 235 │

│ 236 @cuda.bind("cuModuleUnload", [CUmodule], CUresult) │

│ │

│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\cuda.py:216 in checkCUStatus │

│ │

│ 213 │

│ 214 def checkCUStatus(error : int) -> None: │

│ 215 │ if error != CUDA_SUCCESS: │

│ ❱ 216 │ │ raise RuntimeError("CUDA Error: %s" % cuGetErrorString(error)) │

│ 217 │

│ 218 @cuda.bind("cuDriverGetVersion", [ctypes.POINTER(ctypes.c_int)], CUresult) │

│ 219 def cuDriverGetVersion() -> int: │

╰──────────────────────────────────────────────────────────────────────────────────────────────────╯

RuntimeError: CUDA Error: no kernel image is available for execution on the device

(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py

Traceback (most recent call last):

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 629, in _get_config_dict

resolved_config_file = cached_file(

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file

resolved_file = hf_hub_download(

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 110, in _inner_fn

validate_repo_id(arg_value)

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 164, in validate_repo_id

raise HFValidationError(

huggingface_hub.utils._validators.HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'THUDM\chatglm2-6b-int4'.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):

File "H:\OpenAI\ChatGLM2-6B\cli_demo.py", line 8, in <module>

model = AutoModel.from_pretrained("THUDM\chatglm2-6b-int4", trust_remote_code=True).quantize(4).cuda()

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\auto_factory.py", line 456, in from_pretrained

config, kwargs = AutoConfig.from_pretrained(

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\configuration_auto.py", line 944, in from_pretrained

config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 574, in get_config_dict

config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 650, in _get_config_dict

raise EnvironmentError(

OSError: Can't load the configuration of 'THUDM\chatglm2-6b-int4'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'THUDM\chatglm2-6b-int4' is the correct path to a directory containing a config.json file

(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>

(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py

Traceback (most recent call last):

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 629, in _get_config_dict

resolved_config_file = cached_file(

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file

resolved_file = hf_hub_download(

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 110, in _inner_fn

validate_repo_id(arg_value)

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 164, in validate_repo_id

raise HFValidationError(

During handling of the above exception, another exception occurred:

Traceback (most recent call last):

File "H:\OpenAI\ChatGLM2-6B\cli_demo.py", line 8, in <module>

model = AutoModel.from_pretrained("THUDM\chatglm2-6b-int4", trust_remote_code=True).quantize(4).cuda()

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\auto_factory.py", line 456, in from_pretrained

config, kwargs = AutoConfig.from_pretrained(

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\configuration_auto.py", line 944, in from_pretrained

config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 574, in get_config_dict

config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)

File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 650, in _get_config_dict

raise EnvironmentError(

(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>

(H:\OpenAI\ChatGLM2-6B\ENV) C:\Users\Administrator>python

Python 3.9.6 (default, Aug 18 2021, 15:44:49) [MSC v.1916 64 bit (AMD64)] :: Anaconda, Inc. on win32

Type "help", "copyright", "credits" or "license" for more information.

>>> import torch

>>> x=torch.rand(4,6)

>>> print(x)

tensor([[0.4784, 0.6858, 0.0851, 0.2220, 0.0233, 0.4879],

[0.2230, 0.8423, 0.2485, 0.6101, 0.2816, 0.4068],

[0.5379, 0.6412, 0.2049, 0.8542, 0.3221, 0.2888],

[0.4783, 0.4648, 0.6325, 0.8418, 0.9524, 0.1783]])

>>> torch.cuda.is_available()

True

>>> torch.cuda.current_device()

>>> torch.cuda.device_count()

>>> torch.__version__

'2.0.1'

>>> torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))

True

>>> a=torch.Tensor([1,2])

>>> a=a.cuda()

>>> a

tensor([1., 2.], device='cuda:0')

>>>

标签：

【官方教程】ChatGLM2-6B 部署与微调

【官方教程】ChatGLM2-6B 部署与微调的评论 (共条)

你可能也喜欢这些文章

最新发布的文章

【官方教程】ChatGLM2-6B 部署与微调

本文作者的其他文章

【官方教程】ChatGLM2-6B 部署与微调的评论 (共 条)

你可能也喜欢这些文章

最新发布的文章

【官方教程】ChatGLM2-6B 部署与微调的评论 (共条)