近日工作记录20211205-1215
2022-06-26 22:57 作者:Nofear-wave | 我要投稿
这一周尝试遍历了styleGAN2的14个特征层,并选取编辑向量为512维空间的标准正交基进行了尝试,取得了一些进展,现将近期尝试和下一步工作简单梳理。

(一)近日工作总结
1、对爱因斯坦潜在向量进行编辑

(二)基本结果展示

(三)存在的问题
1、数据的问题:利用id_invert框架的时候只在部分数据上表现良好,需要实验筛选至少一两个合适的数据(名人照片等)
2、方法过于简单,很多基于直观的尝试都需要进一步深化,提出某些方法
(四)下一步工作

梳理如下:
1、首要:输入多张图片,并提取512维中变化剧烈的关键维度(具体就是多找几张图片,分层输入生成512个文件,查看在哪些维度上变化比较剧烈,最后从其中挑选50-100个就行)
2、提取之后观察是否可泛化;需要设计一种数学方法来自动提取这些维度(机器学习可解释性方面的知识融合一下),或者找一个情绪识别器是否能做到提取。
3、完成提取了某些维度之后,再与现有方法进行对比,随机扰动和已有语义编辑比较容易生成数据,但PCA方法之后需要研究观察。
4、结合对比中提到的三种方法,需要掌握这两个指标的代码,并将四种方法生成图片的结果生成进行对比,需要完成3。
5、如果id_invert结果实在不行的话,可以先用别的encoder提取.npy向量,再放到这个代码里进行编辑。
(五)代码修改记录-防止忘记
1、这个是分层分维度编辑
def manipulate(latent_codes,
boundary,
start_distance=-5.0,
end_distance=5.0,
step=21,
layerwise_manipulation=False,
num_layers=1,
manipulate_layers=None,
is_code_layerwise=False,
is_boundary_layerwise=False,
layerwise_manipulation_strength=1.0,
flag=1):
if not (boundary.ndim >= 2 and boundary.shape[0] == 1):
raise ValueError(f'Boundary should be with shape [1, *code_shape] or '
f'[1, num_layers, *code_shape], but '
f'{boundary.shape} is received!')
if not layerwise_manipulation:
assert not is_code_layerwise
assert not is_boundary_layerwise
num_layers = 1
manipulate_layers = None
layerwise_manipulation_strength = 1.0
# Preprocessing for layer-wise manipulation.
# Parse indices of manipulation layers.
layer_indices = parse_indices(
manipulate_layers, min_val=0, max_val=num_layers - 1)
if not layer_indices:
layer_indices = list(range(num_layers))
# Make latent codes layer-wise if needed.
assert num_layers > 0
if not is_code_layerwise:
x = latent_codes[:, np.newaxis]
x = np.tile(x, [num_layers if axis == 1 else 1 for axis in range(x.ndim)])
else:
x = latent_codes
if x.shape[1] != num_layers:
raise ValueError(f'Latent codes should be with shape [num, num_layers, '
f'*code_shape], where `num_layers` equals to '
f'{num_layers}, but {x.shape} is received!')
# Make boundary layer-wise if needed.
if not is_boundary_layerwise:
b = boundary
b = np.tile(b, [num_layers if axis == 0 else 1 for axis in range(b.ndim)])
else:
b = boundary[0]
if b.shape[0] != num_layers:
raise ValueError(f'Boundary should be with shape [num_layers, '
f'*code_shape], where `num_layers` equals to '
f'{num_layers}, but {b.shape} is received!')
# Get layer-wise manipulation strength.
if isinstance(layerwise_manipulation_strength, (int, float)):
s = [float(layerwise_manipulation_strength) for _ in range(num_layers)]
elif isinstance(layerwise_manipulation_strength, (list, tuple)):
s = layerwise_manipulation_strength
if len(s) != num_layers:
raise ValueError(f'Shape of layer-wise manipulation strength `{len(s)}` '
f'mismatches number of layers `{num_layers}`!')
elif isinstance(layerwise_manipulation_strength, np.ndarray):
s = layerwise_manipulation_strength
if s.size != num_layers:
raise ValueError(f'Shape of layer-wise manipulation strength `{s.size}` '
f'mismatches number of layers `{num_layers}`!')
else:
raise ValueError(f'Unsupported type of `layerwise_manipulation_strength`!')
s = np.array(s).reshape(
[num_layers if axis == 0 else 1 for axis in range(b.ndim)])
b = b * s
if x.shape[1:] != b.shape:
raise ValueError(f'Latent code shape {x.shape} and boundary shape '
f'{b.shape} mismatch!')
num = x.shape[0]
code_shape = x.shape[2:]
x = x[:, np.newaxis]
b = b[np.newaxis, np.newaxis, :]
print(flag)
#2、3层同一维度编辑
mn = np.zeros((1,1,14,512))
mn[0][0][2][flag]=1
mn[0][0][3][flag]=1
#单层单维度改变
c = np.zeros(512)
c[flag]=1
#c[flag+1]=1 #单层多维度编辑
c = c.reshape(1,1,1,512)
l = np.linspace(start_distance, end_distance, step).reshape(
[step if axis == 1 else 1 for axis in range(x.ndim)])
results = np.tile(x, [step if axis == 1 else 1 for axis in range(x.ndim)])
is_manipulatable = np.zeros(results.shape, dtype=bool)
is_manipulatable[:, :, layer_indices] = True
results = np.where(is_manipulatable, x + l * c, results)
#results = np.where(is_manipulatable, x + l * mn, results)
assert results.shape == (num, step, num_layers, *code_shape)
return results if layerwise_manipulation else results[:, :, 0]
2、这个是非线性尝试
#分步非线性尝试,类似三角形沿着直角边走(线性就是沿着斜边走)
x = x[:, np.newaxis]
b = b[np.newaxis, np.newaxis, :]
print(flag)
c = np.zeros(512)
c[flag]=1
c = c.reshape(1,1,1,512)
#固定下一个维度的值
temp = np.zeros(512)
temp[flag+1]=start_distance
temp = temp.reshape(1,1,1,512)
l = np.linspace(start_distance, end_distance, step).reshape(
[step if axis == 1 else 1 for axis in range(x.ndim)])
results = np.tile(x, [step if axis == 1 else 1 for axis in range(x.ndim)])
is_manipulatable = np.zeros(results.shape, dtype=bool)
is_manipulatable[:, :, layer_indices] = True
results = np.where(is_manipulatable, x + l * c+temp, results)
assert results.shape == (num, step, num_layers, *code_shape)
return results if layerwise_manipulation else results[:, :, 0]
#结果生成也要两步
codes = manipulate(latent_codes=latent_codes,
boundary=boundary,
start_distance=args.start_distance,
end_distance=args.end_distance,
step=step,
layerwise_manipulation=True,
num_layers=generator.num_layers,
manipulate_layers=manipulate_layers,
is_code_layerwise=True,
is_boundary_layerwise=True,
flag=flag-1)
for img_idx in tqdm(range(num_images), leave=False):
output_images = generator.easy_synthesize(
codes[img_idx], latent_space_type='wp')['image']
for s, output_image in enumerate(output_images):
visualizer.set_cell(img_idx, s + 3, image=output_image)
save_image(f'{output_dir}/_AI'+str(img_idx)+"linear"+str(s+1)+'.png', output_image)
# Save results.
visualizer.save(f'{output_dir}/{job_name}_{flag}.html')
#以上次的终点作为新的起点
next_ = codes[0][step-1]
next_codes = manipulate(latent_codes=next_[np.newaxis,:],
boundary=boundary,
start_distance=0,
end_distance=args.end_distance-args.start_distance,
step=step,
layerwise_manipulation=True,
num_layers=generator.num_layers,
manipulate_layers=manipulate_layers,
is_code_layerwise=True,
is_boundary_layerwise=True,
flag=flag)
for img_idx in tqdm(range(num_images), leave=False):
output_images = generator.easy_synthesize(
next_codes[img_idx], latent_space_type='wp')['image']
for s, output_image in enumerate(output_images):
save_image(f'{output_dir}/_AI'+str(img_idx)+"linear"+str(s+1+step)+'.png', output_image)
(五)一些基本命令
1、winscp传输实在太慢了,所以尝试直接用命令行打包传输
#将结果打包为.tar文件
tar -cvf data.tar results