开发仓颉numpy替代包之路_仓颉和python-优快云博客

本文链接：https://blog.youkuaiyun.com/sergeyyurkov1/article/details/144677614

大家好，今天我想分享一下我开始开发的另一个仓颉项目。当研究如何用仓颉编程自己的神经网络我遇到了一个障碍：没有在Python中那么简单的办法对数组执行各种操作。比如，对数组执行幂运算。在Python中我们可以使用numpy的`np.exp(array)`函数，在仓颉中，此操作是这样执行的：

public func exp(array: Array<Array<Float64>>) {
    let output = ArrayList<Array<Float64>>([])
    for (a in array) {
        output.append(a |> map {i => exp(i)} |> collectArray)
    }
    return output.toArray()
}

太复杂了吧！我的想法是简单化数组操作，在仓颉中重新创建numpy包。

介绍

基于matrix4cj numcj重新创建numpy中的函数和API：https://github.com/sergeyyurkov1/numcj

numcj创建一个新数据结构名为`N2Darray`( np.array()代替品 )。N2Darray使用矩阵和数组函数，并存储矩阵和数组以进行互操作。

目前，只有这些功能可用：

数组加法、减法、乘法、除法、点积（使用matrix4cj包）
数组和浮点数加法、减法、乘法、除法（原始数组函数）
array.transpose()和array.T
np.zeros()
np.randn()
np.broadcastTo()
np.maximum()
np.exp()
np.sum()

因为基于matrix4cj，所以numcj有一个限制：它只是支持Float64二维数组，不能使用三维或整数数组。我们可以使用类型转换或扩展matrix4cj包以使用整数。目前，我们将继续使用Float64二维数组。

安装的指南

将numcj添加到cjpm.toml文件中：

[dependencies]
  numcj = {git = "https://github.com/sergeyyurkov1/numcj.git", branch = "main"}

[package]
  cjc-version = "0.53.13"
  ...

import numcj.*

演示

let np = Numcj() // 初始化numcj

有两种方法可以创建N2Darray，从二维数组、矩阵或形状元组：

let test = np.n2darray([[1.0, 1.0], [1.0, 1.0]])

let test2 = np.n2darray((2, 2), 1.0) // 默认值是0.0

let test3 = np.n2darray(Matrix([[1.0, 1.0], [1.0, 1.0]]))

println("${test.toString()}, ${test2.toString()}, ${test3.toString()}")

>>> [[1.000000, 1.000000], [1.000000, 1.000000]], [[1.000000, 1.000000], [1.000000, 1.000000]], [[1.000000, 1.000000], [1.000000, 1.000000]]

我们可以使用`toString()`方法来获取N2Darray的字符串表示。

让我们来看一下我们的仓颉神经网络教程中的一些例子，把它们改成numcj的函数：

Neural Network from Scratch in Cangjie: Part 2 - 仓颉从头开始的神经网络：第二部分-优快云博客

np.dot()、array.T和数组加法

let inputs = np.n2darray([[1.0, 2.0, 3.0, 2.5], [2.0, 5.0, -1.0, 2.0], [-1.5, 2.7, 3.3, -0.8]])

let weights = np.n2darray([[0.2, 0.8, -0.5, 1.0], [0.5, -0.91, 0.26, -0.5], [-0.26, -0.27, 0.17, 0.87]])

let biases = np.n2darray([2.0, 3.0, 0.5]) //一维数组自动转换为二维数组

let layerOutputs = np.dot(inputs, weights.T) + biases

println("layerOutputs: ${layerOutputs.toString()}")

>>> layerOutputs: [[4.800000, 1.210000, 2.385000], [8.900000, -1.810000, 0.200000], [1.410000, 1.051000, 0.026000]]

np.randn()和数组和浮点数乘法

let randomWeights = np.randn((inputs.shape[0], weights.shape[0])) * 0.01

println("randomWeights: ${randomWeights.toString()}")

>>> randomWeights: [[0.005169, 0.001264, 0.004598], [0.007986, 0.005451, 0.003594], [0.009959, 0.001109, 0.004150]]

np.zeros()

let biasesZeros = np.zeros((1, weights.shape[0])) // (1, 3) - 3 neurons

println("biasesZeros: ${biasesZeros.toString()}")

>>> biasesZeros: [[0.000000, 0.000000, 0.000000]]

np.broadcastTo()

let biasesBroadcasted = np.broadcastTo(biases, 3)

println("biases: ${biases.toString()}")

println("biasesBroadcasted: ${biasesBroadcasted.toString()}")

>>>

biases: [[2.000000, 3.000000, 0.500000]]
biasesBroadcasted: [[2.000000, 3.000000, 0.500000], [2.000000, 3.000000, 0.500000], [2.000000, 3.000000, 0.500000]]

np.maximum()

let inputs2 = Array<Float64>([0.0, 2.0, -1.0, 3.3, -2.7, 1.1, 2.2, -100.0])

println("inputs2: ${inputs2.toString()}")

let outputs2 = np.maximum(0.0, np.n2darray(inputs2))

println("outputs2: ${outputs2.toString()}")

>>>

inputs2: [0.000000, 2.000000, -1.000000, 3.300000, -2.700000, 1.100000, 2.200000, -100.000000]
outputs2: [[0.000000, 2.000000, 0.000000, 3.300000, 0.000000, 1.100000, 2.200000, 0.000000]]

0以下的每个数字都变为0。

let outputs3 = np.maximum(0.0, inputs2)

println("outputs3: ${outputs3.toString()}")

>>> outputs3: [[0.000000, 2.000000, 0.000000, 3.300000, 0.000000, 1.100000, 2.200000, 0.000000]]

我们还可以传入一个正则数组，结果是一样的。

np.exp()

let inputs4 = np.n2darray([[4.8, 1.21, 2.385], [8.9, -1.81, 0.2], [1.41, 1.051, 0.026]])

println("inputs4: ${inputs4.toString()}")

let inputs4Exp = np.exp(inputs4)

println("inputs4Exp: ${inputs4Exp.toString()}")

>>>

inputs4: [[4.800000, 1.210000, 2.385000], [8.900000, -1.810000, 0.200000], [1.410000, 1.051000, 0.026000]]
inputs4Exp: [[121.510418, 3.353485, 10.859063], [7331.973539, 0.163654, 1.221403], [4.095955, 2.860510, 1.026341]]

np.sum()

let inputs4Sum = np.sum(inputs4, axis: 0)

println("inputs4Sum: ${inputs4Sum.toString()}")

let inputs4SumAxis1 = np.sum(inputs4, axis: 1)

println("inputs4SumAxis1: ${inputs4SumAxis1.toString()}")

let inputs4SumAxis1Keepdims = np.sum(inputs4, axis: 1, keepdims: true)

println("inputs4SumAxis1Keepdims: ${inputs4SumAxis1Keepdims.toString()}")

>>>

inputs4Sum: [[15.110000, 0.451000, 2.611000]]
inputs4SumAxis1: [[8.395000, 7.290000, 2.487000]]
inputs4SumAxis1Keepdims: [[8.395000], [7.290000], [2.487000]]

数组除法

let inputs4ExpNorm = inputs4Exp / inputs4SumAxis1Keepdims

println("inputs4ExpNorm: ${inputs4ExpNorm.toString()}")

>>> inputs4ExpNorm: [[14.474141, 0.399462, 1.293516], [1005.757687, 0.022449, 0.167545], [1.646946, 1.150185, 0.412682]]

其他数组操作

let a = np.n2darray([[1.0, 1.0], [1.0, 1.0]])
let b = np.n2darray([[2.0, 2.0], [2.0, 2.0]])
println("a: ${a.toString()}")
println("b: ${b.toString()}")

let c = a + b
println("a + b: ${c.toString()}")

let d = a - b
println("a - b: ${d.toString()}")

// Array-wise, not dot product
let e = a * b
println("a * b: ${e.toString()}")

let e2 = a * 3.0
println("a * 3.0: ${e2.toString()}")

let f = a / b
println("a / b: ${f.toString()}")

let f2 = a / 3.0
println("a / 3.0: ${f2.toString()}")

>>>
a: [[1.000000, 1.000000], [1.000000, 1.000000]]
b: [[2.000000, 2.000000], [2.000000, 2.000000]]
a + b: [[3.000000, 3.000000], [3.000000, 3.000000]]
a - b: [[-1.000000, -1.000000], [-1.000000, -1.000000]]
a * b: [[2.000000, 2.000000], [2.000000, 2.000000]]
a * 3.0: [[3.000000, 3.000000], [3.000000, 3.000000]]
a / b: [[0.500000, 0.500000], [0.500000, 0.500000]]
a / 3.0: [[0.333333, 0.333333], [0.333333, 0.333333]]

随着时间的推移，将添加更多功能。同学们，如果有任何想法，请在下面发表评论。感谢阅读！