'''Creates a constant tensor from a tensor-like object.'''
features = tf.constant([[1,3],[2,1],[3,3]])
features
<tf.Tensor: shape=(3,2), dtype=int32, numpy=
array([[1,3],[2,1],[3,3]], dtype=int32)>'''If `shape` is set, the `value` is reshaped to match. Scalars are expanded to fill the `shape`:'''
a = tf.constant(0, shape=(2,3))
a
<tf.Tensor: shape=(2,3), dtype=int32, numpy=
array([[0,0,0],[0,0,0]], dtype=int32)>'''相当于对一维向量进行 reshape 操作转换'''
b = tf.constant([1,2,3,4,5,6], shape=[2,3])
b
<tf.Tensor: shape=(2,3), dtype=int32, numpy=
array([[1,2,3],[4,5,6]], dtype=int32)>
2. tf.data.Dataset.from_tensor_slices
'''Creates a `Dataset` whose elements are slices of the given tensors.'''# from_tensor_slices 里如果是一个 tuple 结果也是一个 tuple
test_2 = tf.data.Dataset.from_tensor_slices((data_add[['col_1','col_2']][:13].values, data_add[['label']][:13].values))
test_2
<TensorSliceDataset shapes:((2,),(1,)), types:(tf.int64, tf.int64)># 是一个 list 的话,结果也是一个 list
tf.data.Dataset.from_tensor_slices(data_add[['label']][:13].values)<TensorSliceDataset shapes:(1,), types: tf.int64>
3. TensorSliceDataset .as_numpy_iterator
'''Returns an iterator which converts all elements of the dataset to numpy.'''# 接上# 不用 as_numpy_iterator 的话,查看 shape 和 types 和 每个元素的值for ele in test_2:
ele
type(ele)type(ele[0])(<tf.Tensor: shape=(2,), dtype=int64, numpy=array([3,1])>,<tf.Tensor: shape=(1,), dtype=int64, numpy=array([1])>)tuple
tensorflow.python.framework.ops.EagerTensor
# 用 as_numpy_iterator 的话,print dataset elements directly 直接看到每个元素的内容for ele in test_2.as_numpy_iterator():
ele
type(ele)type(ele[0])(array([3,1]), array([1]))tuple
numpy.ndarray
4.TensorSliceDataset . shuffle
'''Randomly shuffles the elements of this dataset.'''# 接2# 为了实现完美的洗牌,需要缓冲区 buffer_size 的大小,大于或等于数据集的完整大小。
test_2 = test_2.shuffle(buffer_size=16, seed=2021)for ele in test_2.as_numpy_iterator():
ele
type(ele)type(ele[0])(array([0,0]), array([1]))tuple
numpy.ndarray
...
'''Combines consecutive elements of this dataset into batches.''''''drop_remainder representing whether the last batch should be dropped in the case it has fewer than `batch_size` elements; if True == drop'''# 接4,看起来会先默认 shuffle 一遍再 batchfor ele in test_2.batch(3, drop_remainder=False):
ele
(<tf.Tensor: shape=(3,2), dtype=int64, numpy=
array([[1,0],[1,1],[2,1]])>,<tf.Tensor: shape=(3,1), dtype=int64, numpy=
array([[1],[1],[1]])>)
6. TensorSliceDataset – .take / .skip / .shard
'''Creates a `Dataset` with at most `count` elements from this dataset.'''# 从已经存在的 dataset 中取前 k 个
dataset = tf.data.Dataset.range(10)
dataset = dataset.take(3)list(dataset.as_numpy_iterator())[0,1,2]'''Creates a `Dataset` that skips `count` elements from this dataset.'''# 从已经存在的 dataset 中跳过前 k 个取后面所有的
dataset = tf.data.Dataset.range(10)
dataset = dataset.skip(7)list(dataset.as_numpy_iterator())[7,8,9]'''Creates a `Dataset` that includes only 1/`num_shards` of this dataset.'''# 生成包含已存在 dataset 中,索引mod n = i 的元素值# eg:[1, 10, 2, 7, 6, 8, 9],mod 3 = 1 的 index 是 1 和 4,对应的值为 10 和 6
A = tf.data.Dataset.from_tensor_slices([1,10,2,7,6,8,9])
C = A.shard(num_shards=3, index=1)list(C.as_numpy_iterator())[10,6]