四个随机初始化值randint,rand,tf.random_normal,tf.random_uniform

博客介绍了NumPy和TensorFlow中的随机函数原型。包括np.random.randint、np.random.rand、tf.random_normal和tf.random_uniform的具体参数,如np.random.randint有low、high、size等参数,为使用这些函数提供了基础信息。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import tensorflow as tf

import numpy as np

原型:np.random.randint(low, high=None, size=None, dtype=‘l’)

'''
  当只有low时候,返回的值范围是[0,low).有low和high时候,返回值范围是[low,high).
'''
t1 = np.random.randint(2,size=10)
print(t1)
#[0 0 0 0 1 0 1 1 1 1]

t2 = np.random.randint(low=1,high=3,size=10)
print(t2)
#[2 1 2 1 2 2 2 1 1 2]

原型:np.random.rand(d0, d1, …, dn),其中di表示维数

'''
    返回范围为[0,1)均匀分布
'''
t3 = np.random.rand(3, 2)
print(t3)
#[[0.25586789 0.26593995]
# [0.00827676 0.67958833]
# [0.77343696 0.40320088]]

原型:tf.random_normal(shape, mean=0.0, stddev=1.0, dtype=dtypes.float32, seed=None, name=None)

shape: 输出张量的形状,必选
mean: 正态分布的均值,默认为0
stddev: 正态分布的标准差,默认为1.0
dtype: 输出的类型,默认为tf.float32
seed: 随机数种子,是一个整数,当设置之后,每次生成的随机数都一样
name: 操作的名称

'''
    根据shape返回一个张量,其中值服从均值为0,方差为1的正态分布
'''
t4 = tf.random_normal((3, 2))
print(t4)
#Tensor("random_normal:0", shape=(3, 2), dtype=float32)

with tf.Session() as sess:
    init =tf.global_variables_initializer()
    print(sess.run(t4))
#[[-0.1009187  -0.52692866]
# [ 0.75775075  0.10555366]
# [ 0.89376223 -1.5488473 ]]

原型:tf.random_uniform(shape, minval=0, maxval=None, dtype=dtypes.float32, seed=None, name=None)

'''
    从均匀分布中随机取值,范围为[minval,maxval)
'''
t5 = tf.random_uniform((3, 2),minval=1,maxval=3)
print(t5)
#Tensor("random_uniform:0", shape=(3, 2), dtype=float32)

with tf.Session() as sess:
    init =tf.global_variables_initializer()
    print(sess.run(t5))
#[[2.8821492 1.3117931]
# [2.6424809 1.5386689]
# [1.4922662 1.0668414]]
import os import numpy as np import matplotlib.pyplot as plt import librosa import librosa.display from sklearn.model_selection import StratifiedShuffleSplit from sklearn.metrics import confusion_matrix, classification_report import tensorflow as tf from tensorflow.keras import layers, models, utils, callbacks from tensorflow.keras.regularizers import l2 # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False # ============================================== # 配置路径 # ============================================== DATASET_PATH = "E:/genres" TEST_AUDIO_PATH = "D:/218.wav" # ============================================== # 1. 增强版特征提取函数 # ============================================== def extract_features(file_path, max_pad_len=174, augment=False): try: # 固定采样率和时长 audio, sample_rate = librosa.load(file_path, sr=22050, duration=30) # 更丰富的数据增强策略 if augment: # 随机组合多种增强方式 if np.random.random() > 0.5: # 音高变换 n_steps = np.random.uniform(-2.0, 2.0) audio = librosa.effects.pitch_shift(audio, sr=sample_rate, n_steps=n_steps) if np.random.random() > 0.5: # 时间拉伸 rate = np.random.uniform(0.8, 1.2) audio = librosa.effects.time_stretch(audio, rate=rate) # 确保音频长度不变 if len(audio) > sample_rate * 30: audio = audio[:sample_rate * 30] else: audio = np.pad(audio, (0, sample_rate * 30 - len(audio))) if np.random.random() > 0.5: # 添加噪声 noise = np.random.normal(0, 0.005 * np.std(audio), len(audio)) audio = audio + noise if np.random.random() > 0.5: # 随机裁剪和填充 start = np.random.randint(0, max(1, len(audio) - sample_rate * 25)) audio = audio[start:start + sample_rate * 25] audio = np.pad(audio, (0, sample_rate * 30 - len(audio))) # 提取更全面的特征 mfccs = librosa.feature.mfcc( y=audio, sr=sample_rate, n_mfcc=40, n_fft=2048, hop_length=512 ) # 特征长度统一处理 if mfccs.shape[1] < max_pad_len: # 使用反射填充,比边缘填充更自然 pad_width = max_pad_len - mfccs.shape[1] mfccs = np.pad(mfccs, ((0, 0), (0, pad_width)), mode='reflect') else: # 随机裁剪(训练时)或中心裁剪(测试时) if augment and mfccs.shape[1] > max_pad_len: start = np.random.randint(0, mfccs.shape[1] - max_pad_len) mfccs = mfccs[:, start:start + max_pad_len] else: mfccs = mfccs[:, :max_pad_len] # 更稳健的标准化处理 mean = np.mean(mfccs, axis=1, keepdims=True) std = np.std(mfccs, axis=1, keepdims=True) mfccs = (mfccs - mean) / (std + 1e-8) except Exception as e: print(f"Error processing {file_path}: {str(e)}") return None return mfccs # ============================================== # 2. 数据集加载(增加数据平衡处理) # ============================================== def load_dataset(dataset_path, augment_train=False): genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock'] features, labels = [], [] # 记录每个类别的样本数 class_counts = {genre: 0 for genre in genres} for genre_idx, genre in enumerate(genres): genre_path = os.path.join(dataset_path, genre) if not os.path.exists(genre_path): continue print(f"Processing: {genre}") audio_files = [f for f in os.listdir(genre_path) if f.endswith('.wav')] for audio_file in audio_files: file_path = os.path.join(genre_path, audio_file) mfccs = extract_features(file_path) if mfccs is not None: features.append(mfccs) labels.append(genre_idx) class_counts[genre] += 1 # 对训练集进行增强(每个样本增强1-3次) if augment_train: num_augmentations = np.random.randint(1, 4) for _ in range(num_augmentations): mfccs_aug = extract_features(file_path, augment=True) if mfccs_aug is not None: features.append(mfccs_aug) labels.append(genre_idx) # 打印类别分布 print("\n类别分布:") for genre, count in class_counts.items(): print(f"{genre}: {count} samples") # 分层划分数据集 sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42) train_idx, test_idx = next(sss.split(features, labels)) X_train = np.array([features[i] for i in train_idx]) y_train = np.array([labels[i] for i in train_idx]) X_test = np.array([features[i] for i in test_idx]) y_test = np.array([labels[i] for i in test_idx]) return X_train, y_train, X_test, y_test # ============================================== # 3. 改进的模型架构(更强的正则化) # ============================================== def build_and_compile_model(input_shape): model = models.Sequential([ layers.InputLayer(input_shape=input_shape), # 第一个卷积块 layers.Conv2D(32, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.SpatialDropout2D(0.2), # 2D空间Dropout更适合CNN # 第二个卷积块 layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.SpatialDropout2D(0.3), # 第三个卷积块 layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.SpatialDropout2D(0.4), # 第四个卷积块(增加网络深度) layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005)), layers.BatchNormalization(), layers.GlobalAveragePooling2D(), # 全连接层 layers.Dense(128, activation='relu', kernel_regularizer=l2(0.001)), layers.BatchNormalization(), layers.Dropout(0.5), # 输出层(使用标签平滑) layers.Dense(10, activation='softmax') ]) # 优化器配置(使用较低的学习率和梯度裁剪) optimizer = tf.keras.optimizers.Adam( learning_rate=0.0003, beta_1=0.9, beta_2=0.999, epsilon=1e-07, clipnorm=1.0 # 梯度裁剪防止梯度爆炸 ) # 使用带标签平滑的损失函数 loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1) model.compile( optimizer=optimizer, loss=loss, metrics=['accuracy'] ) return model # ============================================== # 4. 改进的训练与评估(增加验证集) # ============================================== def train_and_evaluate(model, X_train, y_train, X_test, y_test): # 进一步划分为训练集和验证集 sss = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=42) train_idx, val_idx = next(sss.split(X_train, y_train)) X_train_split = X_train[train_idx] y_train_split = y_train[train_idx] X_val = X_train[val_idx] y_val = y_train[val_idx] print(f"训练集: {len(X_train_split)}, 验证集: {len(X_val)}, 测试集: {len(X_test)}") # 添加通道维度 X_train_split = X_train_split[..., np.newaxis] X_val = X_val[..., np.newaxis] X_test = X_test[..., np.newaxis] # 转换为one-hot编码 y_train_split = utils.to_categorical(y_train_split, 10) y_val = utils.to_categorical(y_val, 10) y_test = utils.to_categorical(y_test, 10) # 增强的回调函数 callbacks_list = [ # 早停机制(监控验证集准确率) callbacks.EarlyStopping( monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1 ), # 学习率衰减 callbacks.ReduceLROnPlateau( monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1 ), # 保存最佳模型 callbacks.ModelCheckpoint( 'best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1 ), # 记录训练历史 callbacks.CSVLogger('training_history.csv') ] # 训练模型 history = model.fit( X_train_split, y_train_split, validation_data=(X_val, y_val), epochs=150, # 增加训练轮次 batch_size=16, # 减小批次大小提高泛化能力 callbacks=callbacks_list, verbose=1 ) # 加载最佳模型 model = tf.keras.models.load_model('best_model.keras') # 评估模型 train_loss, train_acc = model.evaluate(X_train_split, y_train_split, verbose=0) val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0) test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0) print("\n=== 评估结果 ===") print(f"训练集 - 准确率: {train_acc:.4f}, 损失: {train_loss:.4f}") print(f"验证集 - 准确率: {val_acc:.4f}, 损失: {val_loss:.4f}") print(f"测试集 - 准确率: {test_acc:.4f}, 损失: {test_loss:.4f}") # 绘制训练曲线 plot_training_history(history) # 绘制混淆矩阵 plot_confusion_matrix(model, X_test, y_test) return model, history # ============================================== # 5. 可视化函数 # ============================================== def plot_training_history(history): plt.figure(figsize=(12, 5)) # 准确率曲线 plt.subplot(1, 2, 1) plt.plot(history.history['accuracy'], label='训练准确率') plt.plot(history.history['val_accuracy'], label='验证准确率') plt.title('模型准确率') plt.xlabel('训练轮次') plt.ylabel('准确率') plt.legend() # 损失曲线 plt.subplot(1, 2, 2) plt.plot(history.history['loss'], label='训练损失') plt.plot(history.history['val_loss'], label='验证损失') plt.title('模型损失') plt.xlabel('训练轮次') plt.ylabel('损失') plt.legend() plt.tight_layout() plt.show() def plot_confusion_matrix(model, X_test, y_test): y_pred = model.predict(X_test) y_pred_classes = np.argmax(y_pred, axis=1) y_true = np.argmax(y_test, axis=1) cm = confusion_matrix(y_true, y_pred_classes) plt.figure(figsize=(10, 8)) plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues) plt.title('混淆矩阵') plt.colorbar() genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock'] tick_marks = np.arange(len(genres)) plt.xticks(tick_marks, genres, rotation=45) plt.yticks(tick_marks, genres) # 在混淆矩阵上标注数 thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): plt.text(j, i, format(cm[i, j], 'd'), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('真实类别') plt.xlabel('预测类别') plt.show() # ============================================== # 主函数 # ============================================== def main(): # 检查路径 if not os.path.exists(DATASET_PATH): print(f"错误:数据集路径不存在!\n当前路径: {os.path.abspath(DATASET_PATH)}") return # 加载数据 print("\n=== 加载数据 ===") X_train, y_train, X_test, y_test = load_dataset(DATASET_PATH, augment_train=True) # 构建模型 print("\n=== 构建模型 ===") model = build_and_compile_model((X_train.shape[1], X_train.shape[2], 1)) model.summary() # 训练评估 print("\n=== 开始训练 ===") model, history = train_and_evaluate(model, X_train, y_train, X_test, y_test) # 测试预测 if os.path.exists(TEST_AUDIO_PATH): print("\n=== 测试预测 ===") mfccs = extract_features(TEST_AUDIO_PATH) if mfccs is not None: mfccs = mfccs[np.newaxis, ..., np.newaxis] pred = model.predict(mfccs) genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock'] print("\n预测概率分布:") for i, prob in enumerate(pred[0]): print(f"{genres[i]:<10}: {prob*100:.2f}%") print(f"\n最终预测: {genres[np.argmax(pred)]}") if __name__ == "__main__": main()这串代码是根据genres数据集来实现的,请帮我修改代码提高模型训练的准确率,然后防止模型出现过拟合或者欠拟合
最新发布
05-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

wujiekd

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值