import numpy as np
class ComputationGraphFunction:
def __init__(self, inputs, outcomes, parameters, prediction, objective):
"""
Parameters:
inputs: list of ValueNode objects containing inputs (in the ML sense)
outcomes: list of ValueNode objects containing outcomes (in the ML sense)
parameters: list of ValueNode objects containing values we will optimize over
prediction: node whose 'out' variable contains our prediction
objective: node containing the objective for which we compute the gradient
"""
self.inputs = inputs
self.outcomes = outcomes
self.parameters = parameters
self.prediction = prediction
self.objective = objective
# Create name to node lookup, so users can just supply node_name to set parameters
self.name_to_node = {}
self.name_to_node[self.prediction.node_name] = self.prediction
self.name_to_node[self.objective.node_name] = self.objective
for node in self.inputs + self.outcomes + self.parameters:
self.name_to_node[node.node_name] = node
# Precompute the topological and reverse topological sort of the nodes
self.objective_node_list_forward = sort_topological(self.objective)
self.objective_node_list_backward = sort_topological(self.objective)
self.objective_node_list_backward.reverse()
self.prediction_node_list_forward = sort_topological(self.prediction)
def __set_values__(self, node_values):
for node_name in node_values:
node = self.name_to_node[node_name]
node.out = node_values[node_name]
def set_parameters(self, parameter_values):
self.__set_values__(parameter_values)
def increment_parameters(self, parameter_steps):
for node_name in parameter_steps:
node = self.name_to_node[node_name]
node.out += parameter_steps[node_name]
def get_objective(self, input_values, outcome_values):
self.__set_values__(input_values)
self.__set_values__(outcome_values)
obj = forward_graph(self.objective, node_list=self.objective_node_list_forward)
return obj
def get_gradients(self, input_values, outcome_values):
obj = self.get_objective(input_values, outcome_values) #need forward pass anyway
#print("backward node list: ",self.objective_node_list_backward)
backward_graph(self.objective, node_list=self.objective_node_list_backward)
parameter_gradients = {}
for node in self.parameters:
parameter_gradients[node.node_name] = node.d_out
return obj, parameter_gradients
def get_prediction(self, input_values):
self.__set_values__(input_values)
pred = forward_graph(self.prediction, node_list=self.prediction_node_list_forward)
return pred
###### Computation graph utilities
def sort_topological(sink):
"""Returns a list of the sink node and all its ancestors in topologically sorted order.
Subgraph of these nodes must form a DAG."""
L = [] # Empty list that will contain the sorted nodes
T = set() # Set of temporarily marked nodes
P = set() # Set of permanently marked nodes
def visit(node):
if node in P:
return
if node in T:
raise 'Your graph is not a DAG!'
T.add(node) # mark node temporarily
for predecessor in node.get_predecessors():
visit(predecessor)
P.add(node) # mark node permanently
L.append(node)
visit(sink)
return L
def forward_graph(graph_output_node, node_list=None):
# If node_list is not None, it should be sort_topological(graph_output_node)
if node_list is None:
node_list = sort_topological(graph_output_node)
for node in node_list:
out = node.forward()
return out
def backward_graph(graph_output_node, node_list=None):
"""
If node_list is not None, it should be the reverse of sort_topological(graph_output_node).
Assumes that forward_graph has already been called on graph_output_node.
Sets d_out of each node to the appropriate derivative.
"""
if node_list is None:
node_list = sort_topological(graph_output_node)
node_list.reverse()
graph_output_node.d_out = np.array(1) # Derivative of graph output w.r.t. itself is 1
for node in node_list:
node.backward()
import numpy as np
class ValueNode(object):
"""计算图中没有输入的节点,仅持有一个值"""
def __init__(self, node_name):
self.node_name = node_name
self.out = None
self.d_out = None
def forward(self):
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
pass
def get_predecessors(self):
return []
class VectorScalarAffineNode(object):
"""计算一个将向量映射到标量的仿射函数的节点"""
def __init__(self, x, w, b, node_name):
"""
参数:
x: 节点,其 `x.out` 是一个一维的 numpy 数组
w: 节点,其 `w.out` 是一个与 `x.out` 相同大小的一维 numpy 数组
b: 节点,其 `b.out` 是一个 numpy 标量(即 0 维数组)
node_name: 节点的名称(字符串)
"""
self.node_name = node_name
self.out = None
self.d_out = None
self.x = x
self.w = w
self.b = b
def forward(self):
self.out = np.dot(self.x.out, self.w.out) + self.b.out
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_x = self.d_out * self.w.out
d_w = self.d_out * self.x.out
d_b = self.d_out
self.x.d_out += d_x
self.w.d_out += d_w
self.b.d_out += d_b
def get_predecessors(self):
return [self.x, self.w, self.b]
class SquaredL2DistanceNode(object):
""" 计算两个数组之间的 L2 距离(平方差之和)的节点"""
def __init__(self, a, b, node_name):
"""
参数:
a: 节点,其 `a.out` 是一个 numpy 数组
b: 节点,其 `b.out` 是一个与 `a.out` 形状相同的 numpy 数组
node_name: 节点的名称(字符串)
"""
self.node_name = node_name
self.out = None
self.d_out = None
self.a = a
self.b = b
self.a_minus_b = None
def forward(self):
self.a_minus_b = self.a.out - self.b.out
self.out = np.sum(self.a_minus_b ** 2)
self.d_out = np.zeros(self.out.shape) #此处为初始化,下同
return self.out
def backward(self):
d_a = self.d_out * 2 * self.a_minus_b
d_b = -self.d_out * 2 * self.a_minus_b
self.a.d_out += d_a
self.b.d_out += d_b
return self.d_out
def get_predecessors(self):
return [self.a, self.b]
class L2NormPenaltyNode(object):
""" 计算 l2_reg * ||w||^2 节点,其中 l2_reg 为标量, w为向量"""
def __init__(self, l2_reg, w, node_name):
"""
参数:
l2_reg: 一个大于等于0的标量值(不是节点)
w: 节点,其 w.out 是一个 numpy 向量
node_name: 节点的名称(字符串)
"""
self.node_name = node_name
self.out = None
self.d_out = None
self.l2_reg = np.array(l2_reg)
self.w = w
def forward(self):
self.out = self.l2_reg * np.sum(self.w.out ** 2)
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_w = self.d_out * 2 * self.l2_reg * self.w.out
self.w.d_out += d_w
return self.d_out
def get_predecessors(self):
return [self.w]
## TODO
## Hint:实现对应的forward,backword,get_predecessors接口即可
class SumNode(object):
""" 计算 a + b 的节点,其中 a 和 b 是 numpy 数组。"""
def __init__(self, a, b, node_name):
"""
参数:
a: 节点,其 a.out 是一个 numpy 数组
b: 节点,其 b.out 是一个与 a 的形状相同的 numpy 数组
node_name: 节点的名称(一个字符串)
"""
self.node_name = node_name
self.out = None
self.d_out = None
self.a = a
self.b = b
def forward(self):
self.out = self.a.out + self.b.out
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_a = self.d_out * np.ones_like(self.a.out)
d_b = self.d_out * np.ones_like(self.b.out)
self.a.d_out += d_a
self.b.d_out += d_b
return self.d_out
def get_predecessors(self):
return [self.a, self.b]
## TODO
## Hint:实现对应的forward,backword,get_predecessors接口即可
class AffineNode(object):
"""实现仿射变换 (W,x,b)-->Wx+b 的节点,其中 W 是一个矩阵,x 和 b 是向量
参数:
W: 节点,其 W.out 是形状为 (m,d) 的 numpy 数组
x: 节点,其 x.out 是形状为 (d) 的 numpy 数组
b: 节点,其 b.out 是形状为 (m) 的 numpy 数组(即长度为 m 的向量)
"""
def __init__(self, W, x, b, node_name):
self.node_name = node_name
self.out = None
self.d_out = None
self.W = W
self.x = x
self.b = b
def forward(self):
self.out = np.dot(self.W.out, self.x.out) + self.b.out
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_W = np.outer(self.d_out, self.x.out)
d_x = np.dot(self.W.out.T, self.d_out)
d_b = self.d_out.copy()
self.W.d_out += d_W
self.x.d_out += d_x
self.b.d_out += d_b
return self.d_out
def get_predecessors(self):
return [self.W, self.x, self.b]
## Hint:实现对应的forward,backword,get_predecessors接口即可
class TanhNode(object):
"""节点 tanh(a),其中 tanh 是对数组 a 逐元素应用的
参数:
a: 节点,其 a.out 是一个 numpy 数组
"""
def __init__(self, a, node_name):
self.node_name = node_name
self.out = None
self.d_out = None
self.a = a
def forward(self):
self.out = np.tanh(self.a.out)
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_a = self.d_out * (1 - self.out ** 2)
self.a.d_out += d_a
return self.d_out
def get_predecessors(self):
return [self.a]
import matplotlib.pyplot as plt
import setup_problem
from sklearn.base import BaseEstimator, RegressorMixin
import numpy as np
import nodes
import graph
import plot_utils
#import pdb
#pdb.set_trace() #useful for debugging!
class MLPRegression(BaseEstimator, RegressorMixin):
""" 基于计算图的MLP实现 """
def __init__(self, num_hidden_units=10, step_size=.005, init_param_scale=0.01, max_num_epochs = 5000):
self.num_hidden_units = num_hidden_units
self.init_param_scale = 0.01
self.max_num_epochs = max_num_epochs
self.step_size = step_size
# 开始构建计算图
self.x = nodes.ValueNode(node_name="x") # to hold a vector input
self.y = nodes.ValueNode(node_name="y") # to hold a scalar response
## TODO
## Hint: 根据PPT中给定的图,来构建MLP
def fit(self, X, y):
num_instances, num_ftrs = X.shape
y = y.reshape(-1)
## TODO: 初始化参数(小的随机数——不是全部为0,以打破对称性)
s = self.init_param_scale
init_values = None ## TODO,在这里进行初始化,hint:调用np.random.standard_normal方法
self.graph.set_parameters(init_values)
for epoch in range(self.max_num_epochs):
shuffle = np.random.permutation(num_instances)
epoch_obj_tot = 0.0
for j in shuffle:
obj, grads = self.graph.get_gradients(input_values = {"x": X[j]},
outcome_values = {"y": y[j]})
#print(obj)
epoch_obj_tot += obj
# Take step in negative gradient direction
steps = {}
for param_name in grads:
steps[param_name] = -self.step_size * grads[param_name]
self.graph.increment_parameters(steps)
if epoch % 50 == 0:
train_loss = sum((y - self.predict(X,y)) **2)/num_instances
print("Epoch ",epoch,": Ave objective=",epoch_obj_tot/num_instances," Ave training loss: ",train_loss)
def predict(self, X, y=None):
try:
getattr(self, "graph")
except AttributeError:
raise RuntimeError("You must train classifer before predicting data!")
num_instances = X.shape[0]
preds = np.zeros(num_instances)
for j in range(num_instances):
preds[j] = self.graph.get_prediction(input_values={"x":X[j]})
return preds
def main():
#lasso_data_fname = "lasso_data.pickle"
lasso_data_fname = r"C:\Users\XM_Ta\OneDrive\Desktop\1120223544-汤阳光-实验四\Question\lasso_data.pickle"
x_train, y_train, x_val, y_val, target_fn, coefs_true, featurize = setup_problem.load_problem(lasso_data_fname)
# Generate features
X_train = featurize(x_train)
X_val = featurize(x_val)
# Let's plot prediction functions and compare coefficients for several fits
# and the target function.
pred_fns = []
x = np.sort(np.concatenate([np.arange(0,1,.001), x_train]))
pred_fns.append({"name": "Target Parameter Values (i.e. Bayes Optimal)", "coefs": coefs_true, "preds": target_fn(x)})
estimator = MLPRegression(num_hidden_units=10, step_size=0.001, init_param_scale=.0005, max_num_epochs=5000)
x_train_as_column_vector = x_train.reshape(x_train.shape[0],1) # fit expects a 2-dim array
x_as_column_vector = x.reshape(x.shape[0],1) # fit expects a 2-dim array
estimator.fit(x_train_as_column_vector, y_train)
name = "MLP regression - no features"
pred_fns.append({"name":name, "preds": estimator.predict(x_as_column_vector) })
X = featurize(x)
estimator = MLPRegression(num_hidden_units=10, step_size=0.0005, init_param_scale=.01, max_num_epochs=500)
estimator.fit(X_train, y_train)
name = "MLP regression - with features"
pred_fns.append({"name":name, "preds": estimator.predict(X) })
plot_utils.plot_prediction_functions(x, pred_fns, x_train, y_train, legend_loc="best")
if __name__ == '__main__':
main()
请帮我补充完整以上代码