“”"Computation graph function and utilities
By linking nodes together, one creates a computation graph representing a
function, and one can use backpropagation to easily compute the gradient of the
graph output with respect all input values. However, when doing machine
learning, different nodes of the computation graph maybe treated differently
and have special meaning. For example, if we represent a linear function in a
computation graph, we will want the gradient w.r.t. the node representing the
parameter vector, we’ll frequently want to access the node that is the linear
function, since that is our predictions, but we’ll also need access to the
graph output node, since that contains the objective function value. In the
class ComputationGraphFunction below, we create a wrapper around a computation
graph to handle many of the standard things we need to do in ML. Once graph is
constructed, in the sense of constructing the nodes and linking them together,
we can construct a ComputationGraphFunction below by passing the nodes in
different lists, specifying whether a node is an input, outcome (i.e. label or
response), parameter, prediction, or objective node. [Note that not all nodes
of the graph will be one of these types. The nodes that are not explicitly
passed in one of these lists are still accessible, since they are linked to
other nodes.]
This computation graph framework was designed and implemented by Philipp
Meerkamp, Pierre Garapon, and David Rosenberg.
License: Creative Commons Attribution 4.0 International License
“”"
import numpy as np
class ComputationGraphFunction:
def init(self, inputs, outcomes, parameters, prediction, objective):
“”"
Parameters:
inputs: list of ValueNode objects containing inputs (in the ML sense)
outcomes: list of ValueNode objects containing outcomes (in the ML sense)
parameters: list of ValueNode objects containing values we will optimize over
prediction: node whose ‘out’ variable contains our prediction
objective: node containing the objective for which we compute the gradient
“”"
self.inputs = inputs
self.outcomes = outcomes
self.parameters = parameters
self.prediction = prediction
self.objective = objective
# Create name to node lookup, so users can just supply node_name to set parameters
self.name_to_node = {}
self.name_to_node[self.prediction.node_name] = self.prediction
self.name_to_node[self.objective.node_name] = self.objective
for node in self.inputs + self.outcomes + self.parameters:
self.name_to_node[node.node_name] = node
# Precompute the topological and reverse topological sort of the nodes
self.objective_node_list_forward = sort_topological(self.objective)
self.objective_node_list_backward = sort_topological(self.objective)
self.objective_node_list_backward.reverse()
self.prediction_node_list_forward = sort_topological(self.prediction)
def __set_values__(self, node_values):
for node_name in node_values:
node = self.name_to_node[node_name]
node.out = node_values[node_name]
def set_parameters(self, parameter_values):
self.__set_values__(parameter_values)
def increment_parameters(self, parameter_steps):
for node_name in parameter_steps:
node = self.name_to_node[node_name]
node.out += parameter_steps[node_name]
def get_objective(self, input_values, outcome_values):
self.__set_values__(input_values)
self.__set_values__(outcome_values)
obj = forward_graph(self.objective, node_list=self.objective_node_list_forward)
return obj
def get_gradients(self, input_values, outcome_values):
obj = self.get_objective(input_values, outcome_values) #need forward pass anyway
#print("backward node list: ",self.objective_node_list_backward)
backward_graph(self.objective, node_list=self.objective_node_list_backward)
parameter_gradients = {}
for node in self.parameters:
parameter_gradients[node.node_name] = node.d_out
return obj, parameter_gradients
def get_prediction(self, input_values):
self.__set_values__(input_values)
pred = forward_graph(self.prediction, node_list=self.prediction_node_list_forward)
return pred
Computation graph utilities
def sort_topological(sink):
“”“Returns a list of the sink node and all its ancestors in topologically sorted order.
Subgraph of these nodes must form a DAG.”“”
L = [] # Empty list that will contain the sorted nodes
T = set() # Set of temporarily marked nodes
P = set() # Set of permanently marked nodes
def visit(node):
if node in P:
return
if node in T:
raise 'Your graph is not a DAG!'
T.add(node) # mark node temporarily
for predecessor in node.get_predecessors():
visit(predecessor)
P.add(node) # mark node permanently
L.append(node)
visit(sink)
return L
def forward_graph(graph_output_node, node_list=None):
# If node_list is not None, it should be sort_topological(graph_output_node)
if node_list is None:
node_list = sort_topological(graph_output_node)
for node in node_list:
out = node.forward()
return out
def backward_graph(graph_output_node, node_list=None):
“”"
If node_list is not None, it should be the reverse of sort_topological(graph_output_node).
Assumes that forward_graph has already been called on graph_output_node.
Sets d_out of each node to the appropriate derivative.
“”"
if node_list is None:
node_list = sort_topological(graph_output_node)
node_list.reverse()
graph_output_node.d_out = np.array(1) # Derivative of graph output w.r.t. itself is 1
for node in node_list:
node.backward()
“”"
计算图节点类型
节点必须实现以下方法:
init:初始化节点
forward:反向传播的第1步,从前置节点获取输出,更新自身输出,并将梯度设为零
backward:反向传播的第2步,假设已进行前向传播并调用了后续节点的backward方法,计算图输出关于节点输入的导数,将这些导数加到输入节点的d_out数组中
get_predecessors:返回节点的父节点列表
节点必须具有以下属性:
node_name:节点名称(字符串)
out:节点输出
d_out:图输出关于节点输出的导数
“”"
import numpy as np
class ValueNode(object):
“”“计算图中没有输入的节点,仅持有一个值”“”
def init(self, node_name):
self.node_name = node_name
self.out = None
self.d_out = None
def forward(self):
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
pass
def get_predecessors(self):
return []
class VectorScalarAffineNode(object):
“”“计算一个将向量映射到标量的仿射函数的节点”“”
def init(self, x, w, b, node_name):
“”"
参数:
x: 节点,其 x.out 是一个一维的 numpy 数组
w: 节点,其 w.out 是一个与 x.out 相同大小的一维 numpy 数组
b: 节点,其 b.out 是一个 numpy 标量(即 0 维数组)
node_name: 节点的名称(字符串)
“”"
self.node_name = node_name
self.out = None
self.d_out = None
self.x = x
self.w = w
self.b = b
def forward(self):
self.out = np.dot(self.x.out, self.w.out) + self.b.out
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_x = self.d_out * self.w.out
d_w = self.d_out * self.x.out
d_b = self.d_out
self.x.d_out += d_x
self.w.d_out += d_w
self.b.d_out += d_b
def get_predecessors(self):
return [self.x, self.w, self.b]
class SquaredL2DistanceNode(object):
“”" 计算两个数组之间的 L2 距离(平方差之和)的节点"“”
def init(self, a, b, node_name):
“”"
参数:
a: 节点,其 a.out 是一个 numpy 数组
b: 节点,其 b.out 是一个与 a.out 形状相同的 numpy 数组
node_name: 节点的名称(字符串)
“”"
self.node_name = node_name
self.out = None
self.d_out = None
self.a = a
self.b = b
self.a_minus_b = None
def forward(self):
self.a_minus_b = self.a.out - self.b.out
self.out = np.sum(self.a_minus_b ** 2)
self.d_out = np.zeros(self.out.shape) #此处为初始化,下同
return self.out
def backward(self):
d_a = self.d_out * 2 * self.a_minus_b
d_b = -self.d_out * 2 * self.a_minus_b
self.a.d_out += d_a
self.b.d_out += d_b
return self.d_out
def get_predecessors(self):
return [self.a, self.b]
class L2NormPenaltyNode(object):
“”" 计算 l2_reg * ||w||^2 节点,其中 l2_reg 为标量, w为向量"“”
def init(self, l2_reg, w, node_name):
“”"
参数:
l2_reg: 一个大于等于0的标量值(不是节点)
w: 节点,其 w.out 是一个 numpy 向量
node_name: 节点的名称(字符串)
“”"
self.node_name = node_name
self.out = None
self.d_out = None
self.l2_reg = np.array(l2_reg)
self.w = w
def forward(self):
self.out = self.l2_reg * np.sum(self.w.out ** 2)
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_w = self.d_out * 2 * self.l2_reg * self.w.out
self.w.d_out += d_w
return self.d_out
def get_predecessors(self):
return [self.w]
## TODO
## Hint:实现对应的forward,backword,get_predecessors接口即可
class SumNode(object):
“”" 计算 a + b 的节点,其中 a 和 b 是 numpy 数组。“”"
def init(self, a, b, node_name):
“”"
参数:
a: 节点,其 a.out 是一个 numpy 数组
b: 节点,其 b.out 是一个与 a 的形状相同的 numpy 数组
node_name: 节点的名称(一个字符串)
“”"
self.node_name = node_name
self.out = None
self.d_out = None
self.a = a
self.b = b
def forward(self):
self.out = self.a.out + self.b.out
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_a = self.d_out * np.ones_like(self.a.out)
d_b = self.d_out * np.ones_like(self.b.out)
self.a.d_out += d_a
self.b.d_out += d_b
return self.d_out
def get_predecessors(self):
return [self.a, self.b]
## TODO
## Hint:实现对应的forward,backword,get_predecessors接口即可
class AffineNode(object):
“”“实现仿射变换 (W,x,b)–>Wx+b 的节点,其中 W 是一个矩阵,x 和 b 是向量
参数:
W: 节点,其 W.out 是形状为 (m,d) 的 numpy 数组
x: 节点,其 x.out 是形状为 (d) 的 numpy 数组
b: 节点,其 b.out 是形状为 (m) 的 numpy 数组(即长度为 m 的向量)
“””
def init(self, W, x, b, node_name):
self.node_name = node_name
self.out = None
self.d_out = None
self.W = W
self.x = x
self.b = b
def forward(self):
self.out = np.dot(self.W.out, self.x.out) + self.b.out
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_W = np.outer(self.d_out, self.x.out)
d_x = np.dot(self.W.out.T, self.d_out)
d_b = self.d_out.copy()
self.W.d_out += d_W
self.x.d_out += d_x
self.b.d_out += d_b
return self.d_out
def get_predecessors(self):
return [self.W, self.x, self.b]
Hint:实现对应的forward,backword,get_predecessors接口即可
class TanhNode(object):
“”“节点 tanh(a),其中 tanh 是对数组 a 逐元素应用的
参数:
a: 节点,其 a.out 是一个 numpy 数组
“””
def init(self, a, node_name):
self.node_name = node_name
self.out = None
self.d_out = None
self.a = a
def forward(self):
self.out = np.tanh(self.a.out)
self.d_out = np.zeros(self.out.shape)
return self.out
def backward(self):
d_a = self.d_out * (1 - self.out ** 2)
self.a.d_out += d_a
return self.d_out
def get_predecessors(self):
return [self.a]
TODO
Hint:实现对应的forward,backword,get_predecessors接口即可
tanh函数直接调np.tanh即可
import matplotlib.pyplot as plt
import setup_problem
from sklearn.base import BaseEstimator, RegressorMixin
import numpy as np
import nodes
import graph
import plot_utils
#import pdb
#pdb.set_trace() #useful for debugging!
class MLPRegression(BaseEstimator, RegressorMixin):
“”" 基于计算图的MLP实现 “”"
def init(self, num_hidden_units=10, step_size=.005, init_param_scale=0.01, max_num_epochs = 5000):
self.num_hidden_units = num_hidden_units
self.init_param_scale = 0.01
self.max_num_epochs = max_num_epochs
self.step_size = step_size
# 开始构建计算图
self.x = nodes.ValueNode(node_name="x") # to hold a vector input
self.y = nodes.ValueNode(node_name="y") # to hold a scalar response
## TODO
## Hint: 根据PPT中给定的图,来构建MLP
def fit(self, X, y):
num_instances, num_ftrs = X.shape
y = y.reshape(-1)
## TODO: 初始化参数(小的随机数——不是全部为0,以打破对称性)
s = self.init_param_scale
init_values = None ## TODO,在这里进行初始化,hint:调用np.random.standard_normal方法
self.graph.set_parameters(init_values)
for epoch in range(self.max_num_epochs):
shuffle = np.random.permutation(num_instances)
epoch_obj_tot = 0.0
for j in shuffle:
obj, grads = self.graph.get_gradients(input_values = {"x": X[j]},
outcome_values = {"y": y[j]})
#print(obj)
epoch_obj_tot += obj
# Take step in negative gradient direction
steps = {}
for param_name in grads:
steps[param_name] = -self.step_size * grads[param_name]
self.graph.increment_parameters(steps)
if epoch % 50 == 0:
train_loss = sum((y - self.predict(X,y)) **2)/num_instances
print("Epoch ",epoch,": Ave objective=",epoch_obj_tot/num_instances," Ave training loss: ",train_loss)
def predict(self, X, y=None):
try:
getattr(self, “graph”)
except AttributeError:
raise RuntimeError(“You must train classifer before predicting data!”)
num_instances = X.shape[0]
preds = np.zeros(num_instances)
for j in range(num_instances):
preds[j] = self.graph.get_prediction(input_values={"x":X[j]})
return preds
def main():
#lasso_data_fname = “lasso_data.pickle”
lasso_data_fname = r"C:\Users\XM_Ta\OneDrive\Desktop\1120223544-汤阳光-实验四\Question\lasso_data.pickle"
x_train, y_train, x_val, y_val, target_fn, coefs_true, featurize = setup_problem.load_problem(lasso_data_fname)
Generate features
X_train = featurize(x_train)
X_val = featurize(x_val)
Let’s plot prediction functions and compare coefficients for several fits
and the target function.
pred_fns = []
x = np.sort(np.concatenate([np.arange(0,1,.001), x_train]))
pred_fns.append({“name”: “Target Parameter Values (i.e. Bayes Optimal)”, “coefs”: coefs_true, “preds”: target_fn(x)})
estimator = MLPRegression(num_hidden_units=10, step_size=0.001, init_param_scale=.0005, max_num_epochs=5000)
x_train_as_column_vector = x_train.reshape(x_train.shape[0],1) # fit expects a 2-dim array
x_as_column_vector = x.reshape(x.shape[0],1) # fit expects a 2-dim array
estimator.fit(x_train_as_column_vector, y_train)
name = “MLP regression - no features”
pred_fns.append({“name”:name, “preds”: estimator.predict(x_as_column_vector) })
X = featurize(x)
estimator = MLPRegression(num_hidden_units=10, step_size=0.0005, init_param_scale=.01, max_num_epochs=500)
estimator.fit(X_train, y_train)
name = “MLP regression - with features”
pred_fns.append({“name”:name, “preds”: estimator.predict(X) })
plot_utils.plot_prediction_functions(x, pred_fns, x_train, y_train, legend_loc=“best”)
if name == ‘main’:
main()
请帮我补充完整以上代码
最新发布