Feature_col

这段代码展示了如何在 TensorFlow 中使用不同的特征列类型,包括数值列、桶化列、词汇列表分类列、哈希桶列、嵌入列以及加权分类列,并演示了线性模型和交叉列的创建与应用。
import tensorflow as tf
from tensorflow.python.estimator.inputs import numpy_io
import numpy as np
import collections
from tensorflow.python.framework import errors
from tensorflow.python.platform import test
from tensorflow.python.training import coordinator
from tensorflow import feature_column

from tensorflow.python.feature_column.feature_column import _LazyBuilder

def test_numeric():
    '''
    两个产生的结果相同
    _LazyBuilder
    input_layer
    :return:
    '''

    price = {'price': [[1.], [2.], [3.], [4.],[10.]]}  # 4行样本
    builder = _LazyBuilder(price)

    def transform_fn(x):
        return x + 2

    price_column = feature_column.numeric_column('price', normalizer_fn=transform_fn)

    price_transformed_tensor = price_column._get_dense_tensor(builder)

    with tf.Session() as session:
        print(session.run([price_transformed_tensor]))

    # 使用input_layer

    price_transformed_tensor = feature_column.input_layer(price, [price_column])

    with tf.Session() as session:
        print('use input_layer' + '_' * 40)
        print(session.run([price_transformed_tensor]))

def test_bucketized_column():

    price = {'price': [[5.], [15.], [25.], [35.], [55.], [45.]]}  # 4行样本

    price_column = feature_column.numeric_column('price')
    bucket_price = feature_column.bucketized_column(price_column, [0, 10, 20, 30, 40])

    price_bucket_tensor = feature_column.input_layer(price, [bucket_price])

    with tf.Session() as session:
        print(session.run([price_bucket_tensor]))

def test_categorical_column_with_vocabulary_list():

    # color_data = {'color': [['R', 'R'], ['G', 'R'], ['B', 'G'], ['A', 'A']]}  # 4行样本
    color_data = {'color
下面这段代码是以中心recipe为每团reicpe的中心,其他的recipe对中心recipe做diff,然后训练每团各自的模型,这样数据太少,现在我想改成每团内部的recipe两两进行差分,形成更多数据,然后拿来训练每团各自的模型,帮我重新修改代码:def construct_multi_base_differences_with_distance_vector( df: pd.DataFrame, feature_cols: List[str], target_col: str, recipe_id_col: str = “recipe_id”, base_ids: Optional[List[str]] = None, max_distance: float = 0.5, top_k: Optional[int] = None, weights: Optional[Dict[str, float]] = None ) -> pd.DataFrame: diffs = [] data = df[df[target_col].notna()].copy() X = data[feature_cols].copy() y = data[target_col].values ids = data[recipe_id_col].values scaler = MinMaxScaler() X_scaled = scaler.fit_transform(X) if weights is None: weights = {col: 1.0 for col in feature_cols} weight_vector = np.array([weights.get(col, 1.0) for col in feature_cols]) id_to_index = {rid: idx for idx, rid in enumerate(ids)} if base_ids is None: base_ids = list(ids) for base_id in base_ids: i = id_to_index.get(base_id) if i is None: continue base_vector = X_scaled[i] dist_vector = compute_weighted_l1_distance_vector(base_vector, X_scaled, weight_vector) valid_idxs = np.where((dist_vector > 0) & (dist_vector <= max_distance))[0] if top_k: valid_idxs = valid_idxs[np.argsort(dist_vector[valid_idxs])[:top_k]] for j in valid_idxs: delta_x = X.iloc[j].values - X.iloc[i].values delta_y = y[j] - y[i] record = { f"Δ{col}“: delta_x[k] for k, col in enumerate(feature_cols) } record[f"Δ{target_col}”] = delta_y record[“base_id”] = ids[i] record[“compare_id”] = ids[j] record[“distance”] = dist_vector[j] diffs.append(record) df_diff = pd.DataFrame(diffs) return df_diff def train_diff_models_by_base_no_shap( df_diff: pd.DataFrame, feature_cols: List[str], target_col: str, base_col: str = “base_id”, model_type: str = “lasso”, output_dir: str = “base_models” ) -> pd.DataFrame: os.makedirs(output_dir, exist_ok=True) base_ids = df_diff[base_col].unique() records = [] for base in base_ids: df_base = df_diff[df_diff[base_col] == base] if len(df_base) < 5: continue X = df_base[feature_cols].values y = df_base[target_col].values if model_type == “lasso”: model = LassoCV(cv=3, random_state=42).fit(X, y) else: raise NotImplementedError(“Only ‘lasso’ is implemented.”) y_pred = model.predict(X) r2 = r2_score(y, y_pred) mse = mean_squared_error(y, y_pred) records.append({ “base_id”: base, “r2”: r2, “mse”: mse, “n_samples”: len(df_base), “coef_dict”: dict(zip(feature_cols, model.coef_)) }) return pd.DataFrame(records)
07-04
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值