





tensorflow中的计算图有三种,分别是静态计算图,动态计算图以及autograph,tf2默认采用动态计算图,即每使用一个算子后,该算子会被动态加入到隐含的默认计算图中立即执行得到结果,每次当我们搭建完一个计算图,然后在反向传播结束之后,整个计算图就在内存中被释放了,如下示例,第二次loss.backward()就直接报错了,这也是pytorch的计算方式。动态图不区分计算图的定义和执行,定义后立即执行,称之为eager excution。

a = torch.tensor([3.0, 1.0], requires_grad=True)
b = a * a
loss = b.mean()

loss.backward() # 正常
loss.backward() # RuntimeError


import tensorflow as tf
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(name='x', shape=[], dtype=tf.string)
    y = tf.placeholder(name='y', shape=[], dtype=tf.string)
    z = tf.string_join([x,y],name = 'join',separator=' ')
with tf.Session(graph = g) as sess:
    print(sess.run(fetches = z,feed_dict = {x:"hello",y:"world"}))





class Graph(object):
    """    computational graph

    def __init__(self):
        self.operations = []
        self.placeholders = []
        self.variables = []
        self.constants = []

    def __enter__(self):
        global _default_graph
        self.graph = _default_graph
        _default_graph = self
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        global _default_graph
        _default_graph = self.graph

    def as_default(self):
        return self

class Operation(object):

    def __init__(self, *input_nodes):
        self.input_nodes = input_nodes
        self.output_nodes = []

        # 将当前节点的引用添加到输入节点的output_nodes,这样可以在输入节点中找到当前节点
        for node in input_nodes:

        # 将当前节点的引用添加到图中,方便后面对图中的资源进行回收等操作

    def compute(self):

    def __add__(self, other):
        from .operations import add
        return add(self, other)

    def __neg__(self):
        from .operations import negative
        return negative(self)

    def __sub__(self, other):
        from .operations import add,negative
        return add(self, negative(other))

    def __mul__(self, other):
        from .operations import matmul
        return matmul(self, other)

class Placeholder(object):

    def __init__(self):
        self.output_nodes = []


    def __add__(self, other):
        from .operations import add
        return add(self, other)

    def __neg__(self):
        from .operations import negative
        return negative(self)

    def __sub__(self, other):
        from .operations import add, negative
        return add(self, negative(other))

    def __mul__(self, other):
        from .operations import matmul
        return matmul(self, other)

class Variable(object):

    def __init__(self, initial_value=None):
        self.value = initial_value
        self.output_nodes = []


    def __add__(self, other):
        from .operations import add
        return add(self, other)

    def __neg__(self):
        from .operations import negative
        return negative(self)

    def __sub__(self, other):
        from .operations import add, negative
        return add(self, negative(other))

    def __mul__(self, other):
        from .operations import matmul
        return matmul(self, other)

class Constant(object):

    def __init__(self, value=None):
        self.value = value
        self.output_nodes = []


    def __add__(self, other):
        from .operations import add
        return add(self, other)

    def __neg__(self):
        from .operations import negative
        return negative(self)

    def __sub__(self, other):
        from .operations import add, negative
        return add(self, negative(other))

    def __mul__(self, other):
        from .operations import matmul
        return matmul(self, other)

1.把运算符重载一波,这样,同节点可以直接+-*了,但是由于存在相互调用,所以每次都要from import一下。



import numpy as np
from .graph import Operation, Placeholder, Variable, Constant

class Session(object):
    """ feedforward

    def __init__(self):
        self.graph = _default_graph

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        return self.close()

    def close(self):
        all_nodes = (self.graph.operations + self.graph.variables +
                     self.graph.constants + self.graph.placeholders)
        for node in all_nodes:
            node.output = None

    def run(self, operation, feed_dict=None):
        """   计算节点的输出值
        :param operation:
        :param feed_dict:
        nodes_postorder = traverse_postorder(operation)

        for node in nodes_postorder:
            if type(node) == Placeholder:
                node.output = feed_dict[node]
            elif (type(node) == Variable) or (type(node) == Constant):
                node.output = node.value
            else:  # Operation
                # 取出每个节点的值
                node.inputs = [input_node.output for input_node in node.input_nodes]
                # 拆包,调用operation的compute计算前向值
                node.output = node.compute(*node.inputs)

            if type(node.output) == list:
                node.output = np.array(node.output)
        return operation.output

def traverse_postorder(operation):
    :param operation:
    nodes_postorder = []

    def recurse(node):
        if isinstance(node, Operation):
            for input_node in node.input_nodes:

    return nodes_postorder


import numpy as np
from .graph import Operation

class matmul(Operation):
    def __init__(self, x, y):
        super(matmul, self).__init__(x, y)

    def compute(self, x_value, y_value):
        """ x_value,y_value是具体的值,而非节点中的类型,如果直接用self.input_nodes就是garph中的节点,
        :param x_value:
        :param y_value:
        return np.dot(x_value, y_value)

class add(Operation):
    def __init__(self, x, y):
        super(add, self).__init__(x, y)

    def compute(self, x_value, y_value):
        return np.add(x_value, y_value)

class negative(Operation):
    def __init__(self, x):
        super(negative, self).__init__(x)

    def compute(self, x_value):
        return -x_value

class multiply(Operation):
    def __init__(self, x, y):
        super(multiply, self).__init__(x, y)

    def compute(self, x_value, y_value):
        return np.multiply(x_value, y_value)

class sigmoid(Operation):
    def __init__(self, x):
        super(sigmoid, self).__init__(x)

    def compute(self, x_value):
        return 1 / (1 + np.exp(-x_value))

class softmax(Operation):
    def __init__(self, x):
        super(softmax, self).__init__(x)

    def compute(self, x_value):
        return np.exp(x_value) / np.sum(np.exp(x_value), axis=1)[:, None]

class log(Operation):
    def __init__(self, x):
        super(log, self).__init__(x)

    def compute(self, x_value):
        return np.log(x_value)

class square(Operation):
    def __init__(self, x):
        super(square, self).__init__(x)

    def compute(self, x_value):
        return np.square(x_value)

class reduce_sum(Operation):
    def __init__(self, A, axis=None):
        super(reduce_sum, self).__init__(A)
        self.axis = axis

    def compute(self, A_value):
        return np.sum(A_value, self.axis)





import numpy as np

_gradient_registry = {}

class RegisterGradient(object):
    def __init__(self, op_type):
        self._op_type = eval(op_type)

    def __call__(self, f):
        _gradient_registry[self._op_type] = f
        return f

def _add_gradient(op, grad):
    """   求和矩阵求导,行相加,列相加
    :param op:
    :param grad:
    x, y = op.inputs[0], op.inputs[1]

    grad_wrt_x = grad
    while np.ndim(grad_wrt_x) > len(np.shape(x)):
        grad_wrt_x = np.sum(grad_wrt_x, axis=0)
    for axis, size in enumerate(np.shape(x)):
        if size == 1:
            grad_wrt_x = np.sum(grad_wrt_x, axis=axis, keepdims=True)

    grad_wrt_y = grad
    while np.ndim(grad_wrt_y) > len(np.shape(y)):
        grad_wrt_y = np.sum(grad_wrt_y, axis=0)
    for axis, size in enumerate(np.shape(y)):
        if size == 1:
            grad_wrt_y = np.sum(grad_wrt_y, axis=axis, keepdims=True)

    return [grad_wrt_x, grad_wrt_y]

def _matmul_gradient(op, grad):
    """ 求x的梯度:y的转置,求y的梯度:x的转置
    :param op:
    :param grad:
    x, y = op.inputs[0], op.inputs[1]
    return [np.dot(grad, np.transpose(y)), np.dot(np.transpose(x), grad)]

def _sigmoid_gradient(op, grad):
    sigmoid = op.output
    return grad * sigmoid * (1 - sigmoid)

def _softmax_gradient(op, grad):
    """ softmax 倒数
    :param op:
    :param grad:
    softmax = op.output
    return (grad - np.reshape(np.sum(grad * softmax, 1), [-1, 1])) * softmax

def _log_gradient(op, grad):
    x = op.inputs[0]
    return grad / x

def _multiply_gradient(op, grad):
    x, y = op.inputs[0], op.inputs[1]
    return [grad * y, grad * x]

def _negative_gradient(op, grad):
    return -grad

def _square_gradient(op, grad):
    x = op.inputs[0]
    return grad * np.multiply(2.0, x)

def _reduce_sum_gradient(op, grad):
    x = op.inputs[0]

    output_shape = np.array(np.shape(x))
    output_shape[op.axis] = 1
    tile_scaling = np.shape(x) // output_shape
    grad = np.reshape(grad, output_shape)
    return np.tile(grad, tile_scaling)


import numpy as np
from queue import Queue

from .graph import Operation, Variable
from .gradients import _gradient_registry

def compute_gradients(loss):
    """ 已知每个节点中输出对输入的梯度,从后往前反向搜索与损失节点相关联的节点进行反向传播计算梯度。

    :param loss:
    grad_table = {}  # 存放节点的梯度
    grad_table[loss] = 1

    visited = set()
    queue = Queue()

    while not queue.empty():
        node = queue.get()

        # 该节点不是loss节点,先遍历进queue
        if node != loss:
            grad_table[node] = 0

            for output_node in node.output_nodes:
                lossgrad_wrt_output_node_output = grad_table[output_node]

                output_node_op_type = output_node.__class__
                bprop = _gradient_registry[output_node_op_type]

                lossgrads_wrt_output_node_inputs = bprop(output_node, lossgrad_wrt_output_node_output)

                if len(output_node.input_nodes) == 1:
                    grad_table[node] += lossgrads_wrt_output_node_inputs
                    # 若一个节点有多个输出,则多个梯度求和
                    node_index_in_output_node_inputs = output_node.input_nodes.index(node)
                    lossgrad_wrt_node = lossgrads_wrt_output_node_inputs[node_index_in_output_node_inputs]
                    grad_table[node] += lossgrad_wrt_node

        # 把节点存入到队列中
        if hasattr(node, "input_nodes"):
            for input_node in node.input_nodes:
                if input_node not in visited:

    return grad_table


class GradientDescentOptimizer(object):
    def __init__(self, learning_rate):
        self.learning_rate = learning_rate

    def minimize(self, loss):
        learning_rate = self.learning_rate

        class MinimizationOperation(Operation):
            def compute(self):
                grad_table = compute_gradients(loss)

                for node in grad_table:
                    if type(node) == Variable or type(node) == Constant:
                        grad = grad_table[node]
                        node.value -= learning_rate * grad

        return MinimizationOperation()


import numpy as np
import matplotlib.pylab as plt
import similarflow as sf

input_x = np.linspace(-1, 1, 100)
input_y = input_x * 3 + np.random.randn(input_x.shape[0]) * 0.5

x = sf.Placeholder()
y = sf.Placeholder()
w = sf.Variable([[1.0]])
b = sf.Variable(0.0)

linear = sf.add(sf.matmul(x, w), b)
linear = x * w + b

loss = sf.reduce_sum(sf.square(sf.add(linear, sf.negative(y))))
loss = sf.reduce_sum(sf.square(linear - y))

train_op = sf.train.GradientDescentOptimizer(learning_rate=0.005).minimize(loss)

feed_dict = {x: np.reshape(input_x, (-1, 1)), y: np.reshape(input_y, (-1, 1))}
feed_dict = {x: input_x, y: input_y}

with sf.Session() as sess:
    for step in range(20):
        # 前向
        loss_value = sess.run(loss, feed_dict)
        mse = loss_value / len(input_x)
        # 反向传播
        sess.run(train_op, feed_dict)
    w_value = sess.run(w, feed_dict=feed_dict)
    b_value = sess.run(b, feed_dict=feed_dict)

w_value = float(w_value)
max_x, min_x = np.max(input_x), np.min(input_x)
max_y, min_y = w_value * max_x + b_value, w_value * min_x + b_value

plt.plot([max_x, min_x], [max_y, min_y], color='r')
plt.scatter(input_x, input_y)


import numpy as np
import similarflow as sf
import matplotlib.pyplot as plt

Create red points centered at (-2, -2)
red_points = np.random.randn(50, 2) - 2 * np.ones((50, 2))

Create blue points centered at (2, 2)
blue_points = np.random.randn(50, 2) + 2 * np.ones((50, 2))

X = sf.Placeholder()
y = sf.Placeholder()
W = sf.Variable(np.random.randn(2, 2))
b = sf.Variable(np.random.randn(2))

p = sf.softmax(sf.add(sf.matmul(X, W), b))

loss = sf.negative(sf.reduce_sum(sf.reduce_sum(sf.multiply(y, sf.log(p)), axis=1)))

train_op = sf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

feed_dict = {
    X: np.concatenate((blue_points, red_points)),
    y: [[1, 0]] * len(blue_points) + [[0, 1]] * len(red_points)

with sf.Session() as sess:
    for step in range(100):
        loss_value = sess.run(loss, feed_dict)
        if step % 10 == 0:
        sess.run(train_op, feed_dict)

    # Print final result
    W_value = sess.run(W)
    print("Weight matrix:\n", W_value)
    b_value = sess.run(b)
    print("Bias:\n", b_value)

Plot a line y = -x
x_axis = np.linspace(-4, 4, 100)
y_axis = -W_value[0][0] / W_value[1][0] * x_axis - b_value[0] / W_value[1][0]
plt.plot(x_axis, y_axis)

Add the red and blue points
plt.scatter(red_points[:, 0], red_points[:, 1], color='red')
plt.scatter(blue_points[:, 0], blue_points[:, 1], color='blue')


import numpy as np
import similarflow as sf
import matplotlib.pyplot as plt

Create two clusters of red points centered at (0, 0) and (1, 1), respectively.

red_points = np.concatenate((
    0.2 * np.random.randn(25, 2) + np.array([[0, 0]] * 25),
    0.2 * np.random.randn(25, 2) + np.array([[1, 1]] * 25)

Create two clusters of blue points centered at (0, 1) and (1, 0), respectively.

blue_points = np.concatenate((
    0.2 * np.random.randn(25, 2) + np.array([[0, 1]] * 25),
    0.2 * np.random.randn(25, 2) + np.array([[1, 0]] * 25)

Plot them
plt.scatter(red_points[:, 0], red_points[:, 1], color='red')
plt.scatter(blue_points[:, 0], blue_points[:, 1], color='blue')

X = sf.Placeholder()
y = sf.Placeholder()
W_hidden = sf.Variable(np.random.randn(2, 2))
b_hidden = sf.Variable(np.random.randn(2))
p_hidden = sf.sigmoid(sf.add(sf.matmul(X, W_hidden), b_hidden))

W_output = sf.Variable(np.random.randn(2, 2))
b_output = sf.Variable(np.random.rand(2))
p_output = sf.softmax(sf.add(sf.matmul(p_hidden, W_output), b_output))

loss = sf.negative(sf.reduce_sum(sf.reduce_sum(sf.multiply(y, sf.log(p_output)), axis=1)))

train_op = sf.train.GradientDescentOptimizer(learning_rate=0.03).minimize(loss)

feed_dict = {
    X: np.concatenate((blue_points, red_points)),
    y: [[1, 0]] * len(blue_points) + [[0, 1]] * len(red_points)

with sf.Session() as sess:
    for step in range(100):
        loss_value = sess.run(loss, feed_dict)
        if step % 10 == 0:
        sess.run(train_op, feed_dict)

    # Print final result
    W_hidden_value = sess.run(W_hidden)
    print("Hidden layer weight matrix:\n", W_hidden_value)
    b_hidden_value = sess.run(b_hidden)
    print("Hidden layer bias:\n", b_hidden_value)
    W_output_value = sess.run(W_output)
    print("Output layer weight matrix:\n", W_output_value)
    b_output_value = sess.run(b_output)
    print("Output layer bias:\n", b_output_value)

Visualize classification boundary
xs = np.linspace(-2, 2)
ys = np.linspace(-2, 2)
pred_classes = []
for x in xs:
    for y in ys:
        pred_class = sess.run(p_output, feed_dict={X: [[x, y]]})[0]
        pred_classes.append((x, y, pred_class.argmax()))
xs_p, ys_p = [], []
xs_n, ys_n = [], []
for x, y, c in pred_classes:
    if c == 0:
plt.plot(xs_p, ys_p, 'ro', xs_n, ys_n, 'bo')

Original: https://blog.csdn.net/u012193416/article/details/122958900
Author: Kun Li
Title: 用numpy实现tensorflow式的深度学习框架similarflow





