Python C API && pybind11

Python C API

什么是Python的C语言扩展

为什么要使用C API

  • C语言实现的复杂计算效率高;
  • 已经有成熟的C语言编写的函数库;
  • 和操作系统相关的操作只能使用C语言实现。

为什么Python能够使用C模块

  • cpython虚拟机是由C语言编写;
  • 使用动态链接库的方式可以直接调用模块函数。

Python调用C模块都有哪些方法

  • C api;
  • pybind11;
  • ctypes;
  • SWIG;
  • Cython。

我理解的C模块调用原理

模块加载

python interpreter

1
2
3
4
5
6
7
8
9
10
  Module
|
\/ +-----------+
Load lib(libsum.so) ------> |PyModuleDef|
+-----------+ +-----------+
|PyMethodDef| ------> |PyMethodDef|
+-----------+ +-----------+
|C function | ------> sum(c function)
+-----------+

模块内函数调用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
python --   call python function
|
\/
-- PyArg_ParseTuple
| |
| \/
C | call c module's function
| |
| \/
-- Py_BuildValue
|
\/
python -- python function return

简单示例

libsum.cc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include <Python.h>

// 实际业务函数体
int sum(int a, int b) {
return a + b;
}

/**
* 业务函数的包装函数,该函数注册到PyMethodDef中,供Python虚拟机调用
* 传入两个参数,self和args,args中的参数需要从对象中解析出来。
* PyArg_ParseTuple接收一个格式串,根据格式传将参数从PyObject中解析出来
* 返回一个整数,需要包装成PyObject类型。
**/
static PyObject* sum_wrapper(PyObject* self, PyObject* args) {
int a, b;
if (!PyArg_ParseTuple(args, "ii", &a, &b)) {
return NULL;
}
int result = sum(a, b);
return Py_BuildValue("i", result);
}

/**
* 描述需要注册到模块中的函数,每个函数定义一行,以{NULL, NULL, 0, NULL}结束
* 每行结构有4个参数,分别为:python调用的函数名,实际调用的包装函数;传入参数的类型;
* 函数的描述。
**/
static PyMethodDef SumMethods[] = {
{"sum", sum_wrapper, METH_VARARGS, "Calculate the sum of two integers."},
{NULL, NULL, 0, NULL}
};

/**
* 描述模块属性,参数分别为:固定参数;模块名,就是import后的名字;模块描述;???;
* 函数数组。
**/
static struct PyModuleDef summodule = {
PyModuleDef_HEAD_INIT,
"libsum",
"A module that adds two numbers",
-1,
SumMethods
};

// 初始化模块,以PyInit_模块名 命名
PyMODINIT_FUNC PyInit_libsum(void) {
return PyModule_Create(&summodule);
}

py_sum.py

1
2
3
4
5
6
7
import libsum

a = 1
b = 2
c = libsum.sum(a, b)

print(f"{a} + {b} = {c}")

setup.py

1
2
3
4
5
6
7
8
9
10
from distutils.core import setup, Extension

libsum_module = Extension('libsum',
sources = ['libsum.cc'],
extra_compile_args=['-g'])

setup (name = 'libsum',
version = '1.0',
description = 'This is a libsum module',
ext_modules = [libsum_module])
1
2
3
4
5
6
python setup.py build_ext --inplace

生成
libsum.cpython-310-x86_64-linux-gnu.so

可以直接被python import进来

数组和自定义类型

数组

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
double avg(double *arr, size_t len) {
double sum = 0;
for (size_t i = 0; i < len; i++) {
sum += arr[i];
}
return (sum/len);
}

static PyObject *avg_wrapper(PyObject *self, PyObject *args) {
PyObject *bufobj;
Py_buffer view;
double result;
if (!PyArg_ParseTuple(args, "O", &bufobj)) {
return NULL;
}

// 数组内存是连续的
if (PyObject_GetBuffer(bufobj, &view,
PyBUF_ANY_CONTIGUOUS | PyBUF_FORMAT) == -1) {
return NULL;
}

// 判断是一维数组
if (view.ndim != 1) {
PyErr_SetString(PyExc_TypeError, "Expected a 1-dimensional array");
PyBuffer_Release(&view);
return NULL;
}

// 检查是否是double类型的数组
if (strcmp(view.format,"d") != 0) {
PyErr_SetString(PyExc_TypeError, "Expected an array of doubles");
PyBuffer_Release(&view);
return NULL;
}

// View.buf是double数组的首指针,view.shape[0]是这一维度的长度
result = avg((double*)view.buf, view.shape[0]);

// 需要显式释放Py_buffer
PyBuffer_Release(&view);
return Py_BuildValue("d", result);
}

自定义类型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
// 定义C++类型
class DoubleWarpper {
private:
double m_value;

public:
DoubleWarpper(double value) : m_value(value) {}
virtual ~DoubleWarpper() { m_value = 0; }

double get() { return m_value; }
void set(double value) { m_value = value; }

DoubleWarpper operator+(const DoubleWarpper &other) const {
return DoubleWarpper(m_value + other.m_value);
}
DoubleWarpper operator-(const DoubleWarpper &other) const {
return DoubleWarpper(m_value - other.m_value);
}
DoubleWarpper operator*(const DoubleWarpper &other) const {
return DoubleWarpper(m_value * other.m_value);
}
DoubleWarpper operator/(const DoubleWarpper &other) const {
return DoubleWarpper(m_value / other.m_value);
}
};

// 包装成Python C API需要的结构
typedef struct {
PyObject_HEAD;
DoubleWarpper *warpper = nullptr;
} PyDoubleWarpper;

// Python对象构造时调用,需要返回一个PyDoubleWarpper类型的对象。 kwds???
static PyObject *pycal_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
PyDoubleWarpper *self;
self = (PyDoubleWarpper *)type->tp_alloc(type, 0);
char *kwlist[] = {"value", 0};
double value = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "d", kwlist, &value)) {
Py_DECREF(self);
return nullptr;
}
self->warpper = new DoubleWarpper(value);
return (PyObject *)self;
}

// Python对象销毁时调用
static void *pycal_dealloc(PyObject *py_cal) {
delete ((PyDoubleWarpper *)py_cal)->warpper;
Py_TYPE(py_cal)->tp_free(py_cal);
return (void *)0;
}

// 从PyObject中获取Calculator指针
static DoubleWarpper *get_cal(PyObject *obj) {
return ((PyDoubleWarpper *)obj)->warpper;
}

// 通过Calculator指针构造PyObject
static PyObject *return_cal(DoubleWarpper *cal, PyTypeObject *type) {
PyDoubleWarpper *obj = PyObject_NEW(PyDoubleWarpper, type);
obj->warpper = cal;
return (PyObject *)obj;
}

// Calculator类提供的函数的包装
static PyObject *pycal_set(PyObject *self, PyObject *args) {
DoubleWarpper *cal = get_cal(self);
double value = 0;
if (!PyArg_ParseTuple(args, "d", &value)) {
return nullptr;
}

cal->set(value);
return Py_BuildValue("i", 0);
}

// 打印对象
static PyObject *pycal_str(PyObject *self) {
DoubleWarpper *cal = get_cal(self);
std::stringstream ss;
ss<<cal->get();
return Py_BuildValue("s", ss.str().c_str());
}

// 实现加减乘除
static PyObject *pycal_add(PyObject *a, PyObject *b) {
DoubleWarpper *cal_a = get_cal(a);
DoubleWarpper *cal_b = get_cal(b);
DoubleWarpper *ret = new DoubleWarpper(*cal_a + *cal_b);
return return_cal(ret, a->ob_type);
}

static PyObject *pycal_minus(PyObject *a, PyObject *b) {
DoubleWarpper *cal_a = get_cal(a);
DoubleWarpper *cal_b = get_cal(b);
DoubleWarpper *ret = new DoubleWarpper(*cal_a - *cal_b);
return return_cal(ret, a->ob_type);
}

static PyObject *pycal_multipy(PyObject *a, PyObject *b) {
DoubleWarpper *cal_a = get_cal(a);
DoubleWarpper *cal_b = get_cal(b);
DoubleWarpper *ret = new DoubleWarpper(*cal_a * *cal_b);
return return_cal(ret, a->ob_type);
}

static PyObject *pycal_divide(PyObject *a, PyObject *b) {
DoubleWarpper *cal_a = get_cal(a);
DoubleWarpper *cal_b = get_cal(b);
DoubleWarpper *ret = new DoubleWarpper(*cal_a / *cal_b);
return return_cal(ret, a->ob_type);
}

// 对象的数字属性,这里仅实现了加减乘除
static PyNumberMethods numberMethods = {
pycal_add, // nb_add
pycal_minus, // nb_subtract;
pycal_multipy, // nb_multiply
nullptr, // nb_remainder;
nullptr, // nb_divmod;
nullptr, // nb_power;
nullptr, // nb_negative;
nullptr, // nb_positive;
nullptr, // nb_absolute;
nullptr, // nb_bool;
nullptr, // nb_invert;
nullptr, // nb_lshift;
nullptr, // nb_rshift;
nullptr, // nb_and;
nullptr, // nb_xor;
nullptr, // nb_or;
nullptr, // nb_int;
nullptr, // nb_reserved;
nullptr, // nb_float;
nullptr, // nb_inplace_add;
nullptr, // nb_inplace_subtract;
nullptr, // nb_inplace_multiply;
nullptr, // nb_inplace_remainder;
nullptr, // nb_inplace_power;
nullptr, // nb_inplace_lshift;
nullptr, // nb_inplace_rshift;
nullptr, // nb_inplace_and;
nullptr, // nb_inplace_xor;
nullptr, // nb_inplace_or;
nullptr, // nb_floor_divide;
pycal_divide, // nb_true_divide;
nullptr, // nb_inplace_floor_divide;
nullptr, // nb_inplace_true_divide;
nullptr, // nb_index;
nullptr, // nb_matrix_multiply;
nullptr // nb_inplace_matrix_multiply;

};

// PyCalculator对象的成员函数
static PyMethodDef pycal_methods[] = {
{"set", (PyCFunction)pycal_set, METH_VARARGS, "set DoubleWarpper value."},
{nullptr}};

// PyCalculator对象内容
static PyTypeObject DoubleWarpperType = {
PyVarObject_HEAD_INIT(nullptr, 0) "libsum.DoubleWarpper", /* tp_name */
sizeof(PyDoubleWarpper), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)pycal_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
nullptr, /* tp_getattr */
nullptr, /* tp_setattr */
nullptr, /* tp_reserved */
nullptr, /* tp_repr */
&numberMethods, /* tp_as_number */
nullptr, /* tp_as_sequence */
nullptr, /* tp_as_mapping */
nullptr, /* tp_hash */
nullptr, /* tp_call */
pycal_str, /* tp_str */
nullptr, /* tp_getattro */
nullptr, /* tp_setattro */
nullptr, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
"Coustom DoubleWarpper class.", /* tp_doc */
nullptr, /* tp_traverse */
nullptr, /* tp_clear */
nullptr, /* tp_richcompare */
0, /* tp_weaklistoffset */
nullptr, /* tp_iter */
nullptr, /* tp_iternext */
pycal_methods, /* tp_methods */
nullptr, /* tp_members */
nullptr, /* tp_getset */
nullptr, /* tp_base */
nullptr, /* tp_dict */
nullptr, /* tp_descr_get */
nullptr, /* tp_descr_set */
0, /* tp_dictoffset */
nullptr, /* tp_init */
nullptr, /* tp_alloc */
pycal_new /* tp_new */
};


PyMODINIT_FUNC PyInit_libsum(void) {
if (PyType_Ready(&DoubleWarpperType) < 0) {
return nullptr;
}

PyObject *module = PyModule_Create(&summodule);
if (module == nullptr) {
return nullptr;
}

// 注册对象,这里为啥要引用+1??
Py_INCREF(&DoubleWarpperType);
if (PyModule_AddObject(module, "DoubleWarpper",
(PyObject *)&DoubleWarpperType) < 0) {
Py_DECREF(&DoubleWarpperType);
Py_DECREF(module);
return nullptr;
}

return module;
}

执行结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import array
import libsum

print ("Simple case:")
a = 1
b = 2
c = libsum.sum(a, b)
print(f"{a} + {b} = {c}\n")

print("Pass array:")
d = libsum.avg(array.array('d',[1.0,2.0,3.0,4.0,5.0]))
print(f"avg = {d}\n")

print ("Define new struct:")
e = libsum.DoubleWarpper(1);
f = libsum.DoubleWarpper(2);
print (f"e = {e}, f = {f}")

e.set(20);
f.set(10);
print (f"after set: e = {e}, f = {f}")

g = e+f;
h = e-f;
i = e*f;
j = e/f;
print (f"{e} + {f} = {g}")
print (f"{e} - {f} = {h}")
print (f"{e} * {f} = {i}")
print (f"{e} / {f} = {j}\n")

(base) hua@hfc-ascend:~/code/share$ python py_sum.py
Simple case:
1 + 2 = 3

Pass array:
avg = 3.0

Define new struct:
e = 1, f = 2
after set: e = 20, f = 10
20 + 10 = 30
20 - 10 = 10
20 * 10 = 200
20 / 10 = 2

pybind 11

简单例子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#include <Python.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <vector>

namespace py = pybind11;

int sum(int a, int b) { return a + b; }

double avg(std::vector<double> &arr) {
double sum = 0;
for (auto it = arr.begin(); it != arr.end(); it++) {
sum += (*it);
}
return (sum / arr.size());
}

PYBIND11_MODULE(libsum, m) {
m.doc() = "Py module example.";
m.def("sum", &sum, "Calculate the sum of two integers.");
m.def("avg", &avg, "alculate the avg of a double array.");
}

自定义类型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
class DoubleWarpper {
private:
double m_value;

public:
DoubleWarpper(double value) : m_value(value) {}
virtual ~DoubleWarpper() { m_value = 0; }

double get() { return m_value; }
void set(double value) { m_value = value; }
DoubleWarpper operator+(const DoubleWarpper &other) const{
return DoubleWarpper(m_value + other.m_value);
}
DoubleWarpper operator-(const DoubleWarpper &other) const{
return DoubleWarpper(m_value - other.m_value);
}
DoubleWarpper operator*(const DoubleWarpper &other) const{
return DoubleWarpper(m_value * other.m_value);
}
DoubleWarpper operator/(const DoubleWarpper &other) const{
return DoubleWarpper(m_value / other.m_value);
}

};

PYBIND11_MODULE(libsum, m) {
m.doc() = "Py module example.";
m.def("sum", &sum, "Calculate the sum of two integers.");
m.def("avg", &avg, "alculate the avg of a double array.");

py::class_<DoubleWarpper>(m, "DoubleWarpper")
.def(py::init<double>())
.def("set", &DoubleWarpper::set)
.def("get", &DoubleWarpper::get)
//运算符重载
.def(py::self + py::self)
.def(py::self - py::self)
.def(py::self * py::self)
.def(py::self / py::self)
.def("__repr__",
[](DoubleWarpper &warpper) {
std::stringstream ss;
ss<<warpper.get();
return ss.str().c_str();
}
);
}

toml

pyproject.toml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# pyproject.toml
[build-system]
requires = ["setuptools>=61.0", "cython"]
build-backend = "setuptools.build_meta"

[project]
name = "libsum"
description = "This is a libsum module"
version = "0.0.1"
readme = "README.md"
requires-python = ">=3.10"
authors = [
{ name="huafengchun", email="huafengchun@huawei.com" },
]
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
keywords = ["sum", "avg"]

[project.urls]
"Homepage" = "w3.huawei.com"

[tool.setuptools]
py-modules = ["_custom_build"]

[tool.setuptools.cmdclass]
build_py = "_custom_build.build_py"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# _custom_build.py

from setuptools import Extension
from setuptools.command.build_py import build_py as _build_py

class build_py(_build_py):
def run(self):
self.run_command("build_ext")
return super().run()

def initialize_options(self):
super().initialize_options()
if self.distribution.ext_modules == None:
self.distribution.ext_modules = []

self.distribution.ext_modules.append(

Extension(
"libsum",
sources=["libsum.c"],
extra_compile_args=['-g', '-I/home/hua/anaconda3/include/python3.10', '-I/home/hua/anaconda3/lib/python3.10/site-packages/pybind11/include'],
)
)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
python -m build

base) hua@hfc-ascend:~/code/share$ ll
total 2488
drwxrwxr-x 7 hua hua 4096 Jun 5 09:20 ./
drwxrwxr-x 5 hua hua 4096 May 30 11:08 ../
drwxrwxr-x 4 hua hua 4096 Jun 5 09:17 build/
-rw-rw-r-- 1 hua hua 716 Jun 5 09:16 _custom_build.py
drwxrwxr-x 2 hua hua 4096 Jun 5 09:16 dist/
-rw-rw-r-- 1 hua hua 9015 May 29 19:14 libsum.cc
-rwxrwxr-x 1 hua hua 2485776 Jun 5 09:17 libsum.cpython-310-x86_64-linux-gnu.so*
drwxrwxr-x 2 hua hua 4096 Jun 5 09:16 libsum.egg-info/
-rw-rw-r-- 1 hua hua 1715 May 29 19:33 libsum_pybind11.cc
drwxrwxr-x 2 hua hua 4096 Jun 5 09:16 __pycache__/
-rw-rw-r-- 1 hua hua 660 Jun 5 09:16 pyproject.toml
-rw-rw-r-- 1 hua hua 535 May 29 19:17 py_sum.py
-rw-rw-r-- 1 hua hua 477 May 29 19:16 setup.py
drwxrwxr-x 2 hua hua 4096 May 30 11:32 .vscode/