将数据标签变为类似MNIST的one-hot编码形式
def one_hot(indices,
depth,
on_value=None,
off_value=None,
axis=None,
dtype=None,
name=None):
"""Returns a one-hot tensor.
The locations represented by indices in `indices` take value
`on_value`,
while all other locations take value `off_value`.
`on_value` and `off_value` must have matching data types. If
`dtype` is also
provided, they must be the same data type as specified by
`dtype`.
If `on_value` is not provided, it will default to the value `1` with
type
`dtype`
If `off_value` is not provided, it will default to the value `0` with
type
`dtype`
If the input `indices` is rank `N`, the output will have rank
`N+1`. The
new axis is created at dimension `axis` (default: the new axis is
appended
at the end).
If `indices` is a scalar the output shape will be a vector of
length `depth`
If `indices` is a vector of length `features`, the output shape will
be:
```
features x depth if axis == -1
depth x features if axis == 0
```
If `indices` is a matrix (batch) with shape `[batch, features]`, the
output
shape will be:
```
batch x features x depth if axis == -1
batch x depth x features if axis == 1
depth x batch x features if axis == 0
```
If `dtype` is not provided, it will attempt to assume the data
type of
`on_value` or `off_value`, if one or both are passed in. If none
of
`on_value`, `off_value`, or `dtype` are provided, `dtype` will
default to the
value `tf.float32`.
Note: If a non-numeric data type output is desired (`tf.string`,
`tf.bool`,
etc.), both `on_value` and `off_value` _must_ be provided to
`one_hot`.
For example:
```python
indices = [0, 1, 2]
depth = 3
tf.one_hot(indices, depth) # output: [3 x 3]
# [[1., 0., 0.],
# [0., 1., 0.],
# [0., 0., 1.]]
indices = [0, 2, -1, 1]
depth = 3
tf.one_hot(indices, depth,
on_value=5.0, off_value=0.0,
axis=-1) # output: [4 x 3]
# [[5.0, 0.0, 0.0], # one_hot(0)
# [0.0, 0.0, 5.0], # one_hot(2)
# [0.0, 0.0, 0.0], # one_hot(-1)
# [0.0, 5.0, 0.0]] # one_hot(1)
indices = [[0, 2], [1, -1]]
depth = 3
tf.one_hot(indices, depth,
on_value=1.0, off_value=0.0,
axis=-1) # output: [2 x 2 x 3]
# [[[1.0, 0.0, 0.0], # one_hot(0)
# [0.0, 0.0, 1.0]], # one_hot(2)
# [[0.0, 1.0, 0.0], # one_hot(1)
# [0.0, 0.0, 0.0]]] # one_hot(-1)
```
Args:
indices: A `Tensor` of indices.
depth: A scalar defining the depth of the one hot dimension.
on_value: A scalar defining the value to fill in output when
`indices[j]
= i`. (default: 1)
off_value: A scalar defining the value to fill in output when
`indices[j]
!= i`. (default: 0)
axis: The axis to fill (default: -1, a new inner-most axis).
dtype: The data type of the output tensor.
Returns:
output: The one-hot tensor.
Raises:
TypeError: If dtype of either `on_value` or `off_value` don't
match `dtype`
TypeError: If dtype of `on_value` and `off_value` don't match
one another
"""
with ops.name_scope(name, "one_hot",
[indices, depth, on_value, off_value, axis,
dtype]) as name:
on_exists = on_value is not None
off_exists = off_value is not None
on_dtype = ops.convert_to_tensor(on_value).dtype.base_dtype
if on_exists else None
off_dtype = ops.convert_to_tensor(off_value).dtype.
base_dtype if off_exists else None
if on_exists or off_exists:
if dtype is not None:
# Ensure provided on_value and/or off_value match dtype
if (on_exists and on_dtype != dtype):
raise TypeError("dtype {0} of on_value does not match "
"dtype parameter {1}".format(on_dtype, dtype))
if (off_exists and off_dtype != dtype):
raise TypeError("dtype {0} of off_value does not match "
"dtype parameter {1}".format(off_dtype, dtype))
else:
# dtype not provided: automatically assign it
dtype = on_dtype if on_exists else off_dtype
elif dtype is None:
# None of on_value, off_value, or dtype provided. Default
dtype to float32
dtype = dtypes.float32
if not on_exists:
# on_value not provided: assign to value 1 of type dtype
on_value = ops.convert_to_tensor(1, dtype, name="
on_value")
on_dtype = dtype
if not off_exists:
# off_value not provided: assign to value 0 of type dtype
off_value = ops.convert_to_tensor(0, dtype, name="
off_value")
off_dtype = dtype
if on_dtype != off_dtype:
raise TypeError("dtype {0} of on_value does not match "
"dtype {1} of off_value".format(on_dtype, off_dtype))
return gen_array_ops._one_hot(indices, depth, on_value,
off_value, axis,
name)
Enter: apply completion.
+ Ctrl: remove arguments and replace current word (no Pop-
up focus).
+ Shift: remove arguments (requires Pop-up focus).
import tensorflow as tf import numpy as np data = np.linspace(0,9,10) label = tf.one_hot(data,10) with tf.Session() as sess: print(data) print(sess.run(label))
补充知识:数据清洗—制作one-hot
使用pandas进行one-hot编码
pandas.get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False, dtype=None)
pandas中get_dummies()函数可以将字段进行编码,转换为01形式,其中prefix可以为每个新展开的列名添加前缀。
但是,笔者发现它较易使用在数据为每一列为单独的字符:
df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], 'C': [1, 2, 3]})
## one-hot
df_dumm = pd.get_dummies(df)
my_one_hot
但是对于数据为下面形式的可就不能直接转换了,需要先预处理一下,之后转换为one-hot形式:
我的做法是:
## tqdm_notebook可以导入tqdm包来使用
def one_hot_my(dataframe, attri):
sample_attri_list = []
sample_attri_loc_dic = {}
loc = 0
dataframe[attri] = dataframe[attri].astype(str)
for attri_id in tqdm_notebook(dataframe[attri]):
attri_id_pro = attri_id.strip().split(',')
for key in attri_id_pro:
if key not in sample_attri_loc_dic.keys():
sample_attri_loc_dic[key] = loc
loc+=1
sample_attri_list.append(attri_id_pro)
print("开始完成one-hot.......")
one_hot_attri = []
for attri_id in tqdm_notebook(sample_attri_list):
array = [0 for _ in range(len(sample_attri_loc_dic.keys()))]
for key in attri_id:
array[sample_attri_loc_dic[key]] = 1
one_hot_attri.append(array)
print("封装成dataframe.......")
## 封装成dataframe
columns = [attri+x for x in sample_attri_loc_dic.keys()]
one_hot_rig_id_df = pd.DataFrame(one_hot_attri,columns=columns)
return one_hot_rig_id_df
对属性二值化可以采用:
## 对属性进行二值化 def binary_apply(key, attri, dataframe): key_modify = 'is_' + ''.join(lazy_pinyin(key)) + '_' + attri print(key_modify) dataframe[key_modify] = dataframe.apply(lambda x:1 if x[attri]== key else 0, axis=1) return dataframe
对字符进行编码,将字符转换为0,1,2…:
## 对字符进行编码 # columns = ['job', 'marital', 'education','default','housing' ,'loan','contact', 'poutcome'] def encode_info(dataframe, columns): for col in columns: print(col) dataframe[col] = pd.factorize(dataframe[col])[0] return dataframe
以上这篇Tensorflow实现将标签变为one-hot形式就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。
《魔兽世界》大逃杀!60人新游玩模式《强袭风暴》3月21日上线
暴雪近日发布了《魔兽世界》10.2.6 更新内容,新游玩模式《强袭风暴》即将于3月21 日在亚服上线,届时玩家将前往阿拉希高地展开一场 60 人大逃杀对战。
艾泽拉斯的冒险者已经征服了艾泽拉斯的大地及遥远的彼岸。他们在对抗世界上最致命的敌人时展现出过人的手腕,并且成功阻止终结宇宙等级的威胁。当他们在为即将于《魔兽世界》资料片《地心之战》中来袭的萨拉塔斯势力做战斗准备时,他们还需要在熟悉的阿拉希高地面对一个全新的敌人──那就是彼此。在《巨龙崛起》10.2.6 更新的《强袭风暴》中,玩家将会进入一个全新的海盗主题大逃杀式限时活动,其中包含极高的风险和史诗级的奖励。
《强袭风暴》不是普通的战场,作为一个独立于主游戏之外的活动,玩家可以用大逃杀的风格来体验《魔兽世界》,不分职业、不分装备(除了你在赛局中捡到的),光是技巧和战略的强弱之分就能决定出谁才是能坚持到最后的赢家。本次活动将会开放单人和双人模式,玩家在加入海盗主题的预赛大厅区域前,可以从强袭风暴角色画面新增好友。游玩游戏将可以累计名望轨迹,《巨龙崛起》和《魔兽世界:巫妖王之怒 经典版》的玩家都可以获得奖励。
更新动态
- 小骆驼-《草原狼2(蓝光CD)》[原抓WAV+CUE]
- 群星《欢迎来到我身边 电影原声专辑》[320K/MP3][105.02MB]
- 群星《欢迎来到我身边 电影原声专辑》[FLAC/分轨][480.9MB]
- 雷婷《梦里蓝天HQⅡ》 2023头版限量编号低速原抓[WAV+CUE][463M]
- 群星《2024好听新歌42》AI调整音效【WAV分轨】
- 王思雨-《思念陪着鸿雁飞》WAV
- 王思雨《喜马拉雅HQ》头版限量编号[WAV+CUE]
- 李健《无时无刻》[WAV+CUE][590M]
- 陈奕迅《酝酿》[WAV分轨][502M]
- 卓依婷《化蝶》2CD[WAV+CUE][1.1G]
- 群星《吉他王(黑胶CD)》[WAV+CUE]
- 齐秦《穿乐(穿越)》[WAV+CUE]
- 发烧珍品《数位CD音响测试-动向效果(九)》【WAV+CUE】
- 邝美云《邝美云精装歌集》[DSF][1.6G]
- 吕方《爱一回伤一回》[WAV+CUE][454M]




