TensorFlow 张量基础

什么是张量？

张量（Tensor）是TensorFlow中的基本数据结构，可以理解为多维数组的泛化。在TensorFlow中，所有数据都以张量的形式存在和流动。

python

import tensorflow as tf
import numpy as np

# 不同维度的张量
scalar = tf.constant(3.14)                    # 0维张量（标量）
vector = tf.constant([1, 2, 3])              # 1维张量（向量）
matrix = tf.constant([[1, 2], [3, 4]])       # 2维张量（矩阵）
tensor_3d = tf.zeros([2, 3, 4])              # 3维张量

print(f"标量: {scalar}")
print(f"向量: {vector}")
print(f"矩阵: {matrix}")
print(f"3维张量形状: {tensor_3d.shape}")

创建张量

1. 从常量创建

python

import tensorflow as tf

# 创建常量张量
const_tensor = tf.constant([1, 2, 3, 4])
print(f"常量张量: {const_tensor}")

# 指定数据类型
float_tensor = tf.constant([1.0, 2.0, 3.0], dtype=tf.float32)
int_tensor = tf.constant([1, 2, 3], dtype=tf.int32)

print(f"浮点张量: {float_tensor}")
print(f"整数张量: {int_tensor}")

# 创建多维张量
matrix = tf.constant([[1, 2, 3], [4, 5, 6]])
print(f"矩阵: {matrix}")

# 从嵌套列表创建
nested_tensor = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(f"嵌套张量形状: {nested_tensor.shape}")

2. 使用内置函数创建

python

# 零张量
zeros = tf.zeros([3, 4])
print(f"零张量:\n{zeros}")

# 一张量
ones = tf.ones([2, 3])
print(f"一张量:\n{ones}")

# 单位矩阵
eye = tf.eye(3)
print(f"单位矩阵:\n{eye}")

# 填充特定值
fill = tf.fill([2, 3], 7)
print(f"填充张量:\n{fill}")

# 随机张量
random_normal = tf.random.normal([2, 3])  # 正态分布
random_uniform = tf.random.uniform([2, 3])  # 均匀分布
print(f"正态分布随机张量:\n{random_normal}")
print(f"均匀分布随机张量:\n{random_uniform}")

# 序列张量
range_tensor = tf.range(10)  # 0到9
linspace_tensor = tf.linspace(0.0, 1.0, 5)  # 0到1的5个等间距点
print(f"范围张量: {range_tensor}")
print(f"线性空间张量: {linspace_tensor}")

3. 从NumPy数组创建

python

import numpy as np

# 从NumPy数组创建张量
np_array = np.array([[1, 2, 3], [4, 5, 6]])
tf_tensor = tf.constant(np_array)
print(f"从NumPy创建: {tf_tensor}")

# 自动类型转换
np_float = np.array([1.0, 2.0, 3.0])
tf_from_np = tf.constant(np_float)
print(f"NumPy到TensorFlow: {tf_from_np.dtype}")

# 张量转换为NumPy
tf_tensor = tf.constant([1, 2, 3])
np_from_tf = tf_tensor.numpy()
print(f"TensorFlow到NumPy: {np_from_tf}")
print(f"类型: {type(np_from_tf)}")

4. 变量张量

python

# 创建变量（可训练参数）
variable = tf.Variable([1.0, 2.0, 3.0])
print(f"变量: {variable}")

# 变量可以被修改
variable.assign([4.0, 5.0, 6.0])
print(f"修改后的变量: {variable}")

# 部分修改
variable.assign_add([1.0, 1.0, 1.0])  # 加法
print(f"加法后: {variable}")

variable.assign_sub([0.5, 0.5, 0.5])  # 减法
print(f"减法后: {variable}")

# 创建可训练变量
trainable_var = tf.Variable(tf.random.normal([3, 3]), trainable=True)
print(f"可训练变量: {trainable_var.trainable}")

张量属性

python

# 创建示例张量
tensor = tf.random.normal([2, 3, 4])

print(f"形状: {tensor.shape}")           # 张量形状
print(f"维度: {tensor.ndim}")            # 张量维度数
print(f"大小: {tf.size(tensor)}")        # 元素总数
print(f"数据类型: {tensor.dtype}")       # 数据类型
print(f"设备: {tensor.device}")          # 所在设备

# 获取形状信息
print(f"形状列表: {tensor.shape.as_list()}")
print(f"动态形状: {tf.shape(tensor)}")

# 检查张量属性
print(f"是否为变量: {isinstance(tensor, tf.Variable)}")

数据类型

1. 基本数据类型

python

# 浮点类型
float16_tensor = tf.constant([1.0, 2.0], dtype=tf.float16)  # 半精度
float32_tensor = tf.constant([1.0, 2.0], dtype=tf.float32)  # 单精度
float64_tensor = tf.constant([1.0, 2.0], dtype=tf.float64)  # 双精度

# 整数类型
int8_tensor = tf.constant([1, 2], dtype=tf.int8)
int16_tensor = tf.constant([1, 2], dtype=tf.int16)
int32_tensor = tf.constant([1, 2], dtype=tf.int32)
int64_tensor = tf.constant([1, 2], dtype=tf.int64)

# 布尔类型
bool_tensor = tf.constant([True, False], dtype=tf.bool)

# 字符串类型
string_tensor = tf.constant(["hello", "world"], dtype=tf.string)

print(f"Float32: {float32_tensor.dtype}")
print(f"Int32: {int32_tensor.dtype}")
print(f"Bool: {bool_tensor.dtype}")
print(f"String: {string_tensor.dtype}")

2. 类型转换

python

# 创建整数张量
int_tensor = tf.constant([1, 2, 3], dtype=tf.int32)

# 类型转换
float_tensor = tf.cast(int_tensor, tf.float32)
bool_tensor = tf.cast(int_tensor, tf.bool)

print(f"原始类型: {int_tensor.dtype}")
print(f"转换为浮点: {float_tensor.dtype}")
print(f"转换为布尔: {bool_tensor.dtype}")

# 自动类型提升
mixed_result = tf.add(tf.constant(1), tf.constant(2.0))
print(f"混合运算结果类型: {mixed_result.dtype}")

张量操作

1. 索引和切片

python

# 创建示例张量
tensor = tf.constant([[1, 2, 3, 4],
                     [5, 6, 7, 8],
                     [9, 10, 11, 12]])

# 基本索引
print(f"第一行: {tensor[0]}")
print(f"第一列: {tensor[:, 0]}")
print(f"特定元素: {tensor[1, 2]}")

# 切片操作
print(f"前两行: {tensor[:2]}")
print(f"后两列: {tensor[:, -2:]}")
print(f"子矩阵: {tensor[1:3, 1:3]}")

# 步长切片
print(f"每隔一行: {tensor[::2]}")
print(f"反向: {tensor[::-1]}")

# 高级索引
indices = tf.constant([0, 2])
selected_rows = tf.gather(tensor, indices)
print(f"选择特定行: {selected_rows}")

# 布尔索引
mask = tensor > 6
masked_values = tf.boolean_mask(tensor, mask)
print(f"大于6的元素: {masked_values}")

2. 形状变换

python

# 创建示例张量
tensor = tf.random.normal([2, 3, 4])
print(f"原始形状: {tensor.shape}")

# 重塑形状
reshaped = tf.reshape(tensor, [6, 4])
print(f"重塑后: {reshaped.shape}")

# 展平
flattened = tf.reshape(tensor, [-1])  # -1表示自动计算
print(f"展平后: {flattened.shape}")

# 增加维度
expanded = tf.expand_dims(tensor, axis=0)
print(f"增加维度: {expanded.shape}")

expanded_last = tf.expand_dims(tensor, axis=-1)
print(f"最后增加维度: {expanded_last.shape}")

# 减少维度
squeezed = tf.squeeze(expanded)
print(f"压缩维度: {squeezed.shape}")

# 转置
transposed = tf.transpose(tensor, perm=[2, 0, 1])
print(f"转置后: {transposed.shape}")

# 矩阵转置（2D）
matrix = tf.constant([[1, 2, 3], [4, 5, 6]])
matrix_t = tf.transpose(matrix)
print(f"矩阵转置: {matrix_t}")

3. 拼接和分割

python

# 创建示例张量
a = tf.constant([[1, 2], [3, 4]])
b = tf.constant([[5, 6], [7, 8]])

# 拼接
concat_0 = tf.concat([a, b], axis=0)  # 沿第0维拼接
concat_1 = tf.concat([a, b], axis=1)  # 沿第1维拼接
print(f"沿行拼接:\n{concat_0}")
print(f"沿列拼接:\n{concat_1}")

# 堆叠（增加新维度）
stacked = tf.stack([a, b], axis=0)
print(f"堆叠结果形状: {stacked.shape}")

# 分割
tensor = tf.constant([[1, 2, 3, 4, 5, 6]])
split_result = tf.split(tensor, num_or_size_splits=3, axis=1)
print(f"分割结果: {[t.numpy() for t in split_result]}")

# 不等长分割
uneven_split = tf.split(tensor, num_or_size_splits=[2, 2, 2], axis=1)
print(f"不等长分割: {[t.numpy() for t in uneven_split]}")

数学运算

1. 基本运算

python

# 创建示例张量
a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
b = tf.constant([[5.0, 6.0], [7.0, 8.0]])

# 逐元素运算
add_result = tf.add(a, b)  # 或 a + b
sub_result = tf.subtract(a, b)  # 或 a - b
mul_result = tf.multiply(a, b)  # 或 a * b
div_result = tf.divide(a, b)  # 或 a / b

print(f"加法:\n{add_result}")
print(f"乘法:\n{mul_result}")

# 标量运算
scalar_mul = tf.multiply(a, 2.0)  # 或 a * 2.0
scalar_add = tf.add(a, 1.0)  # 或 a + 1.0

print(f"标量乘法:\n{scalar_mul}")
print(f"标量加法:\n{scalar_add}")

# 数学函数
sqrt_result = tf.sqrt(a)
exp_result = tf.exp(a)
log_result = tf.math.log(a)
sin_result = tf.sin(a)

print(f"平方根:\n{sqrt_result}")
print(f"指数:\n{exp_result}")

2. 矩阵运算

python

# 矩阵乘法
a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
b = tf.constant([[5.0, 6.0], [7.0, 8.0]])

matmul_result = tf.matmul(a, b)  # 或 a @ b
print(f"矩阵乘法:\n{matmul_result}")

# 批量矩阵乘法
batch_a = tf.random.normal([3, 2, 4])
batch_b = tf.random.normal([3, 4, 5])
batch_result = tf.matmul(batch_a, batch_b)
print(f"批量矩阵乘法形状: {batch_result.shape}")

# 矩阵求逆
matrix = tf.constant([[1.0, 2.0], [3.0, 4.0]])
try:
    inverse = tf.linalg.inv(matrix)
    print(f"矩阵求逆:\n{inverse}")
except:
    print("矩阵不可逆")

# 矩阵行列式
det = tf.linalg.det(matrix)
print(f"行列式: {det}")

# 特征值分解
eigenvalues, eigenvectors = tf.linalg.eigh(matrix)
print(f"特征值: {eigenvalues}")

3. 统计运算

python

# 创建示例张量
tensor = tf.random.normal([3, 4])

# 基本统计
mean_all = tf.reduce_mean(tensor)  # 全局均值
mean_axis0 = tf.reduce_mean(tensor, axis=0)  # 沿第0维均值
mean_axis1 = tf.reduce_mean(tensor, axis=1)  # 沿第1维均值

print(f"全局均值: {mean_all}")
print(f"沿行均值: {mean_axis0}")
print(f"沿列均值: {mean_axis1}")

# 其他统计量
std_dev = tf.math.reduce_std(tensor)  # 标准差
variance = tf.math.reduce_variance(tensor)  # 方差
max_val = tf.reduce_max(tensor)  # 最大值
min_val = tf.reduce_min(tensor)  # 最小值
sum_val = tf.reduce_sum(tensor)  # 求和

print(f"标准差: {std_dev}")
print(f"方差: {variance}")
print(f"最大值: {max_val}")
print(f"最小值: {min_val}")

# 最值索引
argmax = tf.argmax(tensor, axis=1)  # 最大值索引
argmin = tf.argmin(tensor, axis=1)  # 最小值索引
print(f"最大值索引: {argmax}")
print(f"最小值索引: {argmin}")

# Top-k操作
top_k_values, top_k_indices = tf.nn.top_k(tensor, k=2)
print(f"Top-2值: {top_k_values}")
print(f"Top-2索引: {top_k_indices}")

广播机制

python

# TensorFlow支持广播机制
a = tf.constant([[1, 2, 3]])  # 形状: (1, 3)
b = tf.constant([[1], [2], [3]])  # 形状: (3, 1)

# 广播相加
result = a + b  # 结果形状: (3, 3)
print(f"广播结果:\n{result}")

# 不同维度的广播
scalar = tf.constant(10)
vector = tf.constant([1, 2, 3])
matrix = tf.constant([[1, 2, 3], [4, 5, 6]])

# 标量与向量
scalar_vector = scalar + vector
print(f"标量+向量: {scalar_vector}")

# 向量与矩阵
vector_matrix = vector + matrix
print(f"向量+矩阵:\n{vector_matrix}")

# 检查广播兼容性
def check_broadcast_compatibility(shape1, shape2):
    try:
        a = tf.ones(shape1)
        b = tf.ones(shape2)
        result = a + b
        print(f"形状 {shape1} 和 {shape2} 兼容，结果形状: {result.shape}")
    except Exception as e:
        print(f"形状 {shape1} 和 {shape2} 不兼容: {e}")

check_broadcast_compatibility([3, 1], [1, 4])
check_broadcast_compatibility([3, 4], [2, 4])

设备管理

python

# 检查可用设备
print("可用设备:")
for device in tf.config.list_physical_devices():
    print(f"  {device}")

# 指定设备
with tf.device('/CPU:0'):
    cpu_tensor = tf.constant([1, 2, 3])
    print(f"CPU张量: {cpu_tensor.device}")

# GPU设备（如果可用）
if tf.config.list_physical_devices('GPU'):
    with tf.device('/GPU:0'):
        gpu_tensor = tf.constant([1, 2, 3])
        print(f"GPU张量: {gpu_tensor.device}")

# 设备间数据传输
cpu_data = tf.constant([1, 2, 3])
if tf.config.list_physical_devices('GPU'):
    # 复制到GPU
    gpu_data = tf.identity(cpu_data)  # 自动放置
    print(f"数据设备: {gpu_data.device}")

# 手动设备放置
def manual_placement():
    with tf.device('/CPU:0'):
        a = tf.constant([1.0, 2.0, 3.0])
    
    with tf.device('/GPU:0' if tf.config.list_physical_devices('GPU') else '/CPU:0'):
        b = tf.constant([4.0, 5.0, 6.0])
    
    # 运算会自动处理设备间数据传输
    result = a + b
    return result

result = manual_placement()
print(f"跨设备运算结果: {result}")

内存管理

python

# 检查张量内存使用
def check_memory_usage():
    large_tensor = tf.random.normal([1000, 1000])
    print(f"大张量形状: {large_tensor.shape}")
    print(f"内存使用: {large_tensor.numpy().nbytes / 1024 / 1024:.2f} MB")

check_memory_usage()

# 删除张量释放内存
del large_tensor  # 在Python中删除引用

# 使用tf.function优化内存
@tf.function
def memory_efficient_operation(x):
    # 在图模式下运行，更高效
    return tf.reduce_sum(x ** 2)

# 内存映射（对于大数据集）
def create_memory_mapped_dataset():
    # 创建大数据集时使用tf.data API
    dataset = tf.data.Dataset.range(1000000)
    dataset = dataset.map(lambda x: tf.cast(x, tf.float32))
    return dataset

# 梯度带内存管理
def gradient_tape_memory():
    x = tf.Variable(2.0)
    
    # 使用persistent=False（默认）节省内存
    with tf.GradientTape() as tape:
        y = x ** 2
    
    grad = tape.gradient(y, x)
    print(f"梯度: {grad}")
    
    # tape在使用后自动释放

gradient_tape_memory()

实用技巧

1. 张量调试

python

# 打印张量信息
def print_tensor_info(tensor, name="Tensor"):
    print(f"{name}:")
    print(f"  形状: {tensor.shape}")
    print(f"  数据类型: {tensor.dtype}")
    print(f"  设备: {tensor.device}")
    print(f"  值:\n{tensor}")
    print()

tensor = tf.random.normal([2, 3])
print_tensor_info(tensor, "随机张量")

# 使用tf.print进行调试（在图模式下也能工作）
@tf.function
def debug_function(x):
    tf.print("输入张量:", x)
    result = x * 2
    tf.print("结果:", result)
    return result

debug_function(tf.constant([1, 2, 3]))

2. 条件操作

python

# 条件选择
condition = tf.constant([True, False, True])
x = tf.constant([1, 2, 3])
y = tf.constant([4, 5, 6])

result = tf.where(condition, x, y)
print(f"条件选择: {result}")

# 数值条件
values = tf.constant([-1, 0, 1, 2])
positive_mask = values > 0
positive_values = tf.where(positive_mask, values, 0)
print(f"正数掩码: {positive_values}")

# 复杂条件
def complex_condition(x):
    return tf.where(
        x > 0,
        tf.sqrt(x),  # x > 0时取平方根
        tf.zeros_like(x)  # x <= 0时为0
    )

test_values = tf.constant([-1.0, 0.0, 1.0, 4.0])
result = complex_condition(test_values)
print(f"复杂条件结果: {result}")

3. 性能优化

python

# 使用tf.function装饰器
@tf.function
def optimized_computation(x, y):
    # 这个函数会被编译为图，执行更快
    return tf.matmul(x, y) + tf.reduce_mean(x)

# 预分配张量
def preallocate_tensors():
    # 预分配可以避免重复内存分配
    result = tf.zeros([1000, 1000])
    for i in range(10):
        # 就地操作更高效
        result = result + tf.random.normal([1000, 1000])
    return result

# 使用合适的数据类型
# float32通常比float64更快，内存使用更少
fast_tensor = tf.constant([1.0, 2.0, 3.0], dtype=tf.float32)

总结

张量是TensorFlow的核心概念，掌握张量操作是深度学习的基础：

创建张量：了解各种创建方法和数据类型
张量属性：理解形状、维度、数据类型等概念
基本操作：索引、切片、形状变换、拼接分割
数学运算：逐元素运算、矩阵运算、统计运算
广播机制：理解不同形状张量的运算规则
设备管理：合理使用CPU和GPU资源
内存管理：优化内存使用，提高性能

这些基础操作将在后续的神经网络构建中频繁使用，建议多加练习！

TensorFlow 张量基础 ​

什么是张量？ ​

创建张量 ​

1. 从常量创建 ​

2. 使用内置函数创建 ​

3. 从NumPy数组创建 ​

4. 变量张量 ​

张量属性 ​

数据类型 ​

1. 基本数据类型 ​

2. 类型转换 ​

张量操作 ​

1. 索引和切片 ​

2. 形状变换 ​

3. 拼接和分割 ​

数学运算 ​

1. 基本运算 ​

2. 矩阵运算 ​

3. 统计运算 ​

广播机制 ​

设备管理 ​

内存管理 ​

实用技巧 ​

1. 张量调试 ​

2. 条件操作 ​

3. 性能优化 ​

总结 ​

TensorFlow 张量基础

什么是张量？

创建张量

1. 从常量创建

2. 使用内置函数创建

3. 从NumPy数组创建

4. 变量张量

张量属性

数据类型

1. 基本数据类型

2. 类型转换

张量操作

1. 索引和切片

2. 形状变换

3. 拼接和分割

数学运算

1. 基本运算

2. 矩阵运算

3. 统计运算

广播机制

设备管理

内存管理

实用技巧

1. 张量调试

2. 条件操作

3. 性能优化

总结