字节码与虚拟机
当每次调用函数或刚开始运行时候,建立新 frame,然后在这个 frame 的环境下一条条的运行 bytecode,每一条 bytecode 都有相应的 c 语言代码执行,在每一个 frame python 会维护一个 stack,然后 bytecode 和 stack 进行交互,当然也会和 code object 保存信息进行交互,执行逻辑运算结果
import dis
def add(a, b):
return a + b
dis.dis(add)
5 0 LOAD_FAST 0 (a)
2 LOAD_FAST 1 (b)
4 BINARY_ADD
6 RETURN_VALUE
// https://github.com/python/cpython/blob/ddd1949fea59f256e51191540a4446f75ed608fa/Python/ceval.c#L1070
case TARGET(LOAD_FAST): {
PyObject *value = GETLOCAL(oparg);
if (value == NULL) {
format_exc_check_arg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(co->co_varnames, oparg));
goto error;
}
Py_INCREF(value); // 引用
PUSH(value);
FAST_DISPATCH(); // 进入下个循环
}
// https://github.com/python/cpython/blob/ddd1949fea59f256e51191540a4446f75ed608fa/Python/ceval.c#L1280
case TARGET(BINARY_ADD): {
PyObject *right = POP();
PyObject *left = TOP();
PyObject *sum;
/* NOTE(haypo): Please don't try to micro-optimize int+int on
CPython using bytecode, it is simply worthless.
See http://bugs.python.org/issue21955 and
http://bugs.python.org/issue10044 for the discussion. In short,
no patch shown any impact on a realistic benchmark, only a minor
speedup on microbenchmarks. */
// 关于 string 的优化
if (PyUnicode_CheckExact(left) &&
PyUnicode_CheckExact(right)) {
sum = unicode_concatenate(left, right, f, next_instr);
/* unicode_concatenate consumed the ref to left */
}
else {
sum = PyNumber_Add(left, right);
Py_DECREF(left);
}
Py_DECREF(right);
SET_TOP(sum);
if (sum == NULL)
goto error;
DISPATCH();
}
// https://github.com/python/cpython/blob/ddd1949fea59f256e51191540a4446f75ed608fa/Python/ceval.c#L1648
case TARGET(RETURN_VALUE): {
retval = POP();
assert(f->f_iblock == 0);
goto return_or_yield;
}
Code Object
编译一次就不会再改变
def f(a, *, b=3, **kwargs):
pass
code = f.__code__
# 代码位置
print(f"co_code: {code.co_code}") # 这段代码真正的 bytecode,用二进制表示
print(f"co_filename: {code.co_filename}") # 这段 code 的名字,在哪一个文件里被定义的
print(
f"co_lnotab: {code.co_lnotab}"
) # mapping,bytecode 对应的源代码行数,用二进制表示
# virtual machine 需要的数据
print(
f"co_flags: {code.co_flags}"
) # bitmap,编译时会判断 code 有没有特别属性根据这些来执行不同的逻辑,比如有没有 *args/**kwargs/generator/coroutine
print(f"co_stacksize: {code.co_stacksize}") # virtual machine 需要栈空间有多大
# 输入参数数量,python 函数重载基础
print(f"co_argcount: {code.co_argcount}") # 排除 * 和 ** 之后的数量
print(
f"co_posonlyargcount: {code.co_posonlyargcount}"
) # / 之前的所有参数都是 args 变量
print(
f"co_kwonlyargcount: {code.co_kwonlyargcount}"
) # * 之后的所有参数都是 kwargs 变量
f(1)
f(1, b=1)
f(a=1)
print(f"co_nlocals: {code.co_nlocals}") # 局部变量数量
print(f"co_varnames: {code.co_varnames}") # 局部变量
print(f"co_names: {code.co_names}") # 除了 varnames/cellvars/freevars 之外的变量
print(f"co_cellvars: {code.co_cellvars}") # 其他的 scope 也会用,一般用来实现闭包方式
print(f"co_freevars: {code.co_freevars}") # 这个 vars 是从其他 scope 中来的
print(f"co_consts: {code.co_consts}") # 函数中出现的常量
co_code: b'd\x00S\x00'
co_filename: /blog/Python深入学习/test.py
co_lnotab: b'\x00\x01'
co_flags: 75
co_stacksize: 1
co_argcount: 1
co_posonlyargcount: 0
co_kwonlyargcount: 1
co_nlocals: 3
co_varnames: ('a', 'b', 'kwargs')
co_names: ()
co_cellvars: ()
co_freevars: ()
co_consts: (None,)
Frame
- 和 Code Object 不同是动态的,记录当前运行时状态
- 每个函数只会编译出来一个 code object,大部分信息是保存在 code object 里面的,而 frame 根据执行逻辑不同,可能会对同一函数有不同的 frame
- 每一次函数的执行开始到结束可以认为是当前 frame 的开始结束,结束时返回上一个 frame(栈结构)
import inspect
# import sys
from objprint import op
def f():
frame = inspect.currentframe()
# frame = sys._getframe()
op(frame, honor_existing=False, depth=1)
f()
<frame 0x7f7064b28400
.f_back = <frame 0x560f77c639c0 ... >, # 上一个 frame
.f_builtins = { ... }, # 这个函数下的内置函数
.f_code = <code 0x7f7064e6f500 ... >, # 代码运行信息
.f_globals = { ... }, # 全局变量
.f_lasti = 18, # 运行到哪个字节码
.f_lineno = 10, # 运行到第几行
.f_locals = { ... }, # 局部变量,实质上是一个读出来的值,用了类似数组的方式保存的
.f_trace = None, # 下面 3 个变量和 trace 有关,比如 debugger/coverage,sys.settrace 设置后就不是 None
.f_trace_lines = True, # 是否每一行触发函数
.f_trace_opcodes = False # 是否每一个字节码触发函数
>
import inspect
def f():
frame = inspect.currentframe()
# 调用方函数的情况
print(frame.f_back.f_code.co_name)
print(frame.f_back.f_locals)
# 需要知道是调用了这个函数
print(frame.f_back.f_code.co_filename)
print(frame.f_back.f_lineno)
def g():
a = 3
b = 4
f()
g()
g
{'a': 3, 'b': 4}
路径/test4.py
20
BINARY_ADD
// https://github.com/python/cpython/blob/ddd1949fea59f256e51191540a4446f75ed608fa/Python/ceval.c#L1280
case TARGET(BINARY_ADD): {
PyObject *right = POP();
PyObject *left = TOP();
PyObject *sum;
/* NOTE(haypo): Please don't try to micro-optimize int+int on
CPython using bytecode, it is simply worthless.
See http://bugs.python.org/issue21955 and
http://bugs.python.org/issue10044 for the discussion. In short,
no patch shown any impact on a realistic benchmark, only a minor
speedup on microbenchmarks. */
// 关于 string 的优化
if (PyUnicode_CheckExact(left) &&
PyUnicode_CheckExact(right)) {
sum = unicode_concatenate(left, right, f, next_instr);
/* unicode_concatenate consumed the ref to left */
}
else {
// 耗时最长
sum = PyNumber_Add(left, right);
Py_DECREF(left);
}
Py_DECREF(right);
SET_TOP(sum);
if (sum == NULL)
goto error;
DISPATCH();
}
// https://github.com/python/cpython/blob/95c340a86b828cac6c0a2e4f2fa8a96695388c73/Objects/abstract.c#L956
PyObject *
PyNumber_Add(PyObject *v, PyObject *w)
{
PyObject *result = binary_op1(v, w, NB_SLOT(nb_add));
if (result == Py_NotImplemented) {
PySequenceMethods *m = v->ob_type->tp_as_sequence;
Py_DECREF(result);
if (m && m->sq_concat) {
return (*m->sq_concat)(v, w);
}
result = binop_type_error(v, w, "+");
}
return result;
}
// https://github.com/python/cpython/blob/95c340a86b828cac6c0a2e4f2fa8a96695388c73/Objects/abstract.c#L785
static PyObject *
binary_op1(PyObject *v, PyObject *w, const int op_slot)
{
PyObject *x;
binaryfunc slotv = NULL;
binaryfunc slotw = NULL;
// 左边是数字,开始加
if (v->ob_type->tp_as_number != NULL)
slotv = NB_BINOP(v->ob_type->tp_as_number, op_slot);
// 左右两边类型不同且右边是数字
if (w->ob_type != v->ob_type &&
w->ob_type->tp_as_number != NULL) {
slotw = NB_BINOP(w->ob_type->tp_as_number, op_slot);
if (slotw == slotv)
slotw = NULL;
}
if (slotv) {
// v/w 是同一类型,且 v/w 都有 + 函数指针,比如 1 + 1
if (slotw && PyType_IsSubtype(w->ob_type, v->ob_type)) {
x = slotw(v, w);
if (x != Py_NotImplemented)
return x;
Py_DECREF(x); /* can't do it */
slotw = NULL;
}
// 调用 long_add
x = slotv(v, w);
if (x != Py_NotImplemented)
return x;
Py_DECREF(x); /* can't do it */
}
if (slotw) {
x = slotw(v, w);
if (x != Py_NotImplemented)
return x;
Py_DECREF(x); /* can't do it */
}
Py_RETURN_NOTIMPLEMENTED;
}
static PyObject *
long_add(PyLongObject *a, PyLongObject *b)
{
// a, b 能不能做加法
CHECK_BINOP(a, b);
return _PyLong_Add(a, b);
}
PyObject *
_PyLong_Add(PyLongObject *a, PyLongObject *b)
{
// a, b 都比较小的时候,可以加
if (_PyLong_BothAreCompact(a, b)) {
// 耗时部分
return _PyLong_FromSTwoDigits(medium_value(a) + medium_value(b));
}
// 如果 a, b 很大的情况
PyLongObject *z;
if (_PyLong_IsNegative(a)) {
if (_PyLong_IsNegative(b)) {
z = x_add(a, b);
if (z != NULL) {
/* x_add received at least one multiple-digit int,
and thus z must be a multiple-digit int.
That also means z is not an element of
small_ints, so negating it in-place is safe. */
assert(Py_REFCNT(z) == 1);
_PyLong_FlipSign(z);
}
}
else
z = x_sub(b, a);
}
else {
if (_PyLong_IsNegative(b))
z = x_sub(a, b);
else
z = x_add(a, b);
}
return (PyObject *)z;
}
/* Create a new int object from a C word-sized int */
static inline PyObject *
_PyLong_FromSTwoDigits(stwodigits x)
{
if (IS_SMALL_INT(x)) {
return get_small_int((sdigit)x);
}
assert(x != 0);
if (is_medium_int(x)) {
return _PyLong_FromMedium((sdigit)x);
}
return _PyLong_FromLarge(x);
}
static PyObject *
get_small_int(sdigit ival)
{
assert(IS_SMALL_INT(ival));
return (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + ival];
}
static PyObject *
_PyLong_FromMedium(sdigit x)
{
assert(!IS_SMALL_INT(x));
assert(is_medium_int(x));
/* We could use a freelist here */
PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject));
if (v == NULL) {
PyErr_NoMemory();
return NULL;
}
digit abs_x = x < 0 ? -x : x;
_PyLong_SetSignAndDigitCount(v, x<0?-1:1, 1);
_PyObject_Init((PyObject*)v, &PyLong_Type);
v->long_value.ob_digit[0] = abs_x;
return (PyObject*)v;
}
static PyObject *
_PyLong_FromLarge(stwodigits ival)
{
twodigits abs_ival;
int sign;
assert(!is_medium_int(ival));
if (ival < 0) {
/* negate: can't write this as abs_ival = -ival since that
invokes undefined behaviour when ival is LONG_MIN */
abs_ival = 0U-(twodigits)ival;
sign = -1;
}
else {
abs_ival = (twodigits)ival;
sign = 1;
}
/* Must be at least two digits */
assert(abs_ival >> PyLong_SHIFT != 0);
twodigits t = abs_ival >> (PyLong_SHIFT * 2);
Py_ssize_t ndigits = 2;
while (t) {
++ndigits;
t >>= PyLong_SHIFT;
}
PyLongObject *v = _PyLong_New(ndigits);
if (v != NULL) {
digit *p = v->long_value.ob_digit;
_PyLong_SetSignAndDigitCount(v, sign, ndigits);
t = abs_ival;
while (t) {
*p++ = Py_SAFE_DOWNCAST(
t & PyLong_MASK, twodigits, digit);
t >>= PyLong_SHIFT;
}
}
return (PyObject *)v;
}
PyLongObject *
_PyLong_New(Py_ssize_t size)
{
assert(size >= 0);
PyLongObject *result;
if (size > (Py_ssize_t)MAX_LONG_DIGITS) {
PyErr_SetString(PyExc_OverflowError,
"too many digits in integer");
return NULL;
}
/* Fast operations for single digit integers (including zero)
* assume that there is always at least one digit present. */
Py_ssize_t ndigits = size ? size : 1;
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +
sizeof(digit)*size. Previous incarnations of this code used
sizeof() instead of the offsetof, but this risks being
incorrect in the presence of padding between the header
and the digits. */
// 申请新内存,耗时核心原因
result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) +
ndigits*sizeof(digit));
if (!result) {
PyErr_NoMemory();
return NULL;
}
_PyLong_SetSignAndDigitCount(result, size != 0, size);
_PyObject_Init((PyObject*)result, &PyLong_Type);
/* The digit has to be initialized explicitly to avoid
* use-of-uninitialized-value. */
result->long_value.ob_digit[0] = 0;
return result;
}
综上,python 做一个简单加法这么复杂的原因:
- 灵活代价,复杂度交给了底层性能不高
- 建立对象消耗性能
Gil
// https://github.com/python/cpython/blob/e225bebc1409bcf68db74a35ed3c31222883bf8f/Python/ceval.c#L1234
if (_Py_atomic_load_relaxed(&ceval->gil_drop_request)) {
/* Give another thread a chance */
if (_PyThreadState_Swap(&runtime->gilstate, NULL) != tstate) {
Py_FatalError("ceval: tstate mix-up");
}
drop_gil(ceval, tstate);
/* Other threads may run now */
take_gil(ceval, tstate);
/* Check if we should make a quick exit. */
exit_thread_if_finalizing(tstate);
if (_PyThreadState_Swap(&runtime->gilstate, tstate) != NULL) {
Py_FatalError("ceval: orphan tstate");
}
}
以上代码保证了当前线程会拿到全局的 gil 锁,通过这种机制可以保证每个 bytecode 在运行时会拿到全局锁,不会被其他线程打断
有了这个全局锁之后好处是:
- 简单的设计不会出错:比每个 object 都实现一个锁要简单
- 由于只有一个线程锁,避免了死锁问题
- 对于单线程或没法并行的多线程程序,全局锁性能优秀,如果每个 object 都实现一个锁,那么由于每一行 bytecode 要读写很多 object,需要加很多锁性能较低
- cpython 维护简单,不需要在修改 python object 的时候关心锁问题,让第三方开发者心智负担降低
当然有做过拿掉 gil 的尝试,但是有以下问题:
- 不能保证单进程、单线程下运行速度
- 向后兼容问题,不能兼容之前的代码
坏处是:
-
python 多线程无法利用多核增加运行速度,但是可以用以下方式解决
- 多进程
- c 拓展在 c 里面做多线程
- 无 gil 的 python 解释器
Descriptor
# 情况 1
class Name:
def __get__(self, obj, objtype):
return "Peter"
class A:
name = Name()
print(A.name) # Peter
# 情况 2
class Name:
def __get__(self, obj, objtype):
return "Peter"
class A:
def __init__(self):
self.name = Name()
o = A()
print(o.name) # <__main__.Name object at 0x7fb1658443a0>
# 情况 3
class Name:
def __get__(self, obj, objtype):
return "Peter"
class A:
name = Name()
o = A()
o.name = "Bob"
print(o.name) # Bob
# 情况 4
class Name:
def __get__(self, obj, objtype):
return "Peter"
class A:
name = Name()
o = A()
o.name = "Bob"
Name.__set__ = lambda x, y, z: None
print(o.name) # Peter
本质上和字节码 LOAD_ATTR
、STORE_ATTR
有关,有着相似的逻辑,下面只说明 LOAD_ATTR
的情况
// https://github.com/python/cpython/blob/95c340a86b828cac6c0a2e4f2fa8a96695388c73/Python/ceval.c#L2963
case TARGET(LOAD_ATTR): {
PyObject *name = GETITEM(names, oparg);
PyObject *owner = TOP();
// 核心逻辑
PyObject *res = PyObject_GetAttr(owner, name);
Py_DECREF(owner);
SET_TOP(res);
if (res == NULL)
goto error;
DISPATCH();
}
// https://github.com/python/cpython/blob/95c340a86b828cac6c0a2e4f2fa8a96695388c73/Objects/object.c#L929
PyObject *
PyObject_GetAttr(PyObject *v, PyObject *name)
{
PyTypeObject *tp = Py_TYPE(v);
if (!PyUnicode_Check(name)) {
PyErr_Format(PyExc_TypeError,
"attribute name must be string, not '%.200s'",
name->ob_type->tp_name);
return NULL;
}
if (tp->tp_getattro != NULL)
// 核心逻辑 getattro
return (*tp->tp_getattro)(v, name);
if (tp->tp_getattr != NULL) {
const char *name_str = PyUnicode_AsUTF8(name);
if (name_str == NULL)
return NULL;
return (*tp->tp_getattr)(v, (char *)name_str);
}
PyErr_Format(PyExc_AttributeError,
"'%.50s' object has no attribute '%U'",
tp->tp_name, name);
return NULL;
}
// https://github.com/python/cpython/blob/95c340a86b828cac6c0a2e4f2fa8a96695388c73/Objects/typeobject.c#L4796
PyTypeObject PyBaseObject_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"object", /* tp_name */
sizeof(PyObject), /* tp_basicsize */
0, /* tp_itemsize */
object_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
object_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
(hashfunc)_Py_HashPointer, /* tp_hash */
0, /* tp_call */
object_str, /* tp_str */
// 没有重载的时候用的逻辑
PyObject_GenericGetAttr, /* tp_getattro */
PyObject_GenericSetAttr, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
object_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
object_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
object_methods, /* tp_methods */
0, /* tp_members */
object_getsets, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
object_init, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
object_new, /* tp_new */
PyObject_Del, /* tp_free */
};
// https://github.com/python/cpython/blob/95c340a86b828cac6c0a2e4f2fa8a96695388c73/Objects/object.c#L1332C1-L1336C2
PyObject *
PyObject_GenericGetAttr(PyObject *obj, PyObject *name)
{
return _PyObject_GenericGetAttrWithDict(obj, name, NULL, 0);
}
// https://github.com/python/cpython/blob/95c340a86b828cac6c0a2e4f2fa8a96695388c73/Objects/object.c#L1217
// obj: obj 本身,name: attr 值,字符串
PyObject *
_PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name,
PyObject *dict, int suppress)
{
/* Make sure the logic of _PyObject_GetMethod is in sync with
this method.
When suppress=1, this function suppress AttributeError.
*/
// obj 的 type 就是 class A
PyTypeObject *tp = Py_TYPE(obj);
PyObject *descr = NULL;
PyObject *res = NULL;
descrgetfunc f;
Py_ssize_t dictoffset;
PyObject **dictptr;
if (!PyUnicode_Check(name)){
PyErr_Format(PyExc_TypeError,
"attribute name must be string, not '%.200s'",
name->ob_type->tp_name);
return NULL;
}
Py_INCREF(name);
if (tp->tp_dict == NULL) {
if (PyType_Ready(tp) < 0)
goto done;
}
// 在这个 type 里找 name,也就是说 descriptor 必须定义在 type 上,即写到 class 的定义里面,解释了情况 2
descr = _PyType_Lookup(tp, name);
f = NULL;
if (descr != NULL) {
Py_INCREF(descr);
// 首先寻找了可能是 descriptor 的 object 有没有 tp_descr_get 函数,即有没有 __get__ 函数
f = descr->ob_type->tp_descr_get;
// 判断 IsData:实际上就是有没有 __set__ 函数
if (f != NULL && PyDescr_IsData(descr)) {
// 返回 __get__ 函数的返回值
res = f(descr, obj, (PyObject *)obj->ob_type);
if (res == NULL && suppress &&
PyErr_ExceptionMatches(PyExc_AttributeError)) {
PyErr_Clear();
}
goto done;
}
}
// 找到 object 本身的 __dict__,里面保存了所有的成员变量
if (dict == NULL) {
/* Inline _PyObject_GetDictPtr */
dictoffset = tp->tp_dictoffset;
if (dictoffset != 0) {
if (dictoffset < 0) {
Py_ssize_t tsize;
size_t size;
tsize = ((PyVarObject *)obj)->ob_size;
if (tsize < 0)
tsize = -tsize;
size = _PyObject_VAR_SIZE(tp, tsize);
_PyObject_ASSERT(obj, size <= PY_SSIZE_T_MAX);
dictoffset += (Py_ssize_t)size;
_PyObject_ASSERT(obj, dictoffset > 0);
_PyObject_ASSERT(obj, dictoffset % SIZEOF_VOID_P == 0);
}
dictptr = (PyObject **) ((char *)obj + dictoffset);
dict = *dictptr;
}
}
// object 的 key 里面找对应的值,和 object 绑定的变量
if (dict != NULL) {
Py_INCREF(dict);
res = PyDict_GetItemWithError(dict, name);
if (res != NULL) {
Py_INCREF(res);
Py_DECREF(dict);
goto done;
}
else {
Py_DECREF(dict);
if (PyErr_Occurred()) {
if (suppress && PyErr_ExceptionMatches(PyExc_AttributeError)) {
PyErr_Clear();
}
else {
goto done;
}
}
}
}
// 如果只有 __get__ 函数,没有 __set__ 函数
if (f != NULL) {
res = f(descr, obj, (PyObject *)Py_TYPE(obj));
if (res == NULL && suppress &&
PyErr_ExceptionMatches(PyExc_AttributeError)) {
PyErr_Clear();
}
goto done;
}
// 如果没有 __get__、__set__ 函数,原样返回
// 对应如下代码
// class A:
// name = "Alice"
// a = A()
// print(a.__dict__) # {},没有值
// print(a.name) # 没发现 name 有 __get__ 或 __set__ 函数,直接返回
// 实现的是实例成员变量没有找到,再到类的属性上找,甚至到父类
if (descr != NULL) {
res = descr;
descr = NULL;
goto done;
}
if (!suppress) {
PyErr_Format(PyExc_AttributeError,
"'%.50s' object has no attribute '%U'",
tp->tp_name, name);
}
done:
Py_XDECREF(descr);
Py_DECREF(name);
return res;
}
总结,LOAD_ATTR
优先级如下,从大到小:
- 如果
__get__
、__set__
都有,用__get__
返回值 - 在
__dict__
找对应的值 - 用
__get__
返回值 - 类的属性
Decorator
从字节码来看,是一个输入输出都是函数的语法糖
def dec(f):
pass
@dec
def double(x):
return x * 2
# 完全等价
double = dec(double)
但是输出不一定是函数
def dec(f):
return 1
@dec
def double(x):
return x * 2
当然,装饰器还可以带参数
import time
def timeit(iter):
def inner(f):
def wrapper(*args, **kwargs):
start = time.time()
for _ in range(iter):
ret = f(*args, **kwargs)
print(time.time() - start)
return ret
return wrapper
return inner
@timeit(1000)
def double(x):
return x * 2
# double(3)
# 等价于
inner = timeit(1000)
double = inner(double)
类装饰器
import time
class Timer:
def __init__(self, func):
self.func = func
def __call__(self, *args, **kwargs):
start = time.time()
ret = self.func(*args, **kwargs)
print(f"Time: {time.time() - start}")
return ret
@Timer
def add(a, b):
return a + b
# 等价于
# add = Timer(add)
print(add(2, 3))
import time
class Timer:
def __init__(self, prefix):
self.prefix = prefix
def __call__(self, func):
def wrapper(*args, **kwargs):
start = time.time()
ret = func(*args, **kwargs)
print(f"{self.prefix}{time.time() - start}")
return ret
return wrapper
@Timer(prefix="curr_time: ")
def add(a, b):
return a + b
# 等价于
# add = Timer(prefix="curr_time: ")(add)
print(add(2, 3))
上面的 2 种都是装饰器类,下面是真正的类装饰器
def add_str(cls):
def __str__(self):
return str(self.__dict__)
cls.__str__ = __str__
return cls
@add_str
class MyObject:
def __init__(self, a, b):
self.a = a
self.b = b
# 等价于
# MyObject = add_str(MyObject)
o = MyObject(1, 2)
print(o)
装饰器在类里面的调用方法
def log_function(func):
def wrapper(*args, **kwargs):
print(f"function start!")
print(f"args: {args}")
ret = func(*args, **kwargs)
print(f"function end!")
return ret
return wrapper
@log_function
def fib(n):
if n <= 1:
return 0
return fib(n - 1) + fib(n - 2)
fib(3)
如何把全局的装饰器移到类里面?下面是一种可能的实现
class Decorators:
def log_function(self, func):
def wrapper(*args, **kwargs):
print(f"function start!")
print(f"args: {args}")
ret = func(*args, **kwargs)
print(f"function end!")
return ret
return wrapper
d = Decorators()
@d.log_function
def fib(n):
if n <= 1:
return 0
return fib(n - 1) + fib(n - 2)
fib(3)
以上代码虽然可以,但是有以下缺点
- 为了使用这个装饰器,需要建立新对象(classmethod 解决,但还是会带有 self)
- 装饰器出现 self 参数(staticmethod 解决,不会带 self)
class Decorators:
@staticmethod
def log_function(func):
def wrapper(*args, **kwargs):
print(f"function start!")
print(f"args: {args}")
ret = func(*args, **kwargs)
print(f"function end!")
return ret
return wrapper
@Decorators.log_function
def fib(n):
if n <= 1:
return 0
return fib(n - 1) + fib(n - 2)
fib(3)
如果想用类里面的装饰器去装饰类里面的方法,可以直接去掉装饰器
class Decorators:
def log_function(func):
def wrapper(*args, **kwargs):
print(f"function start!")
print(f"args: {args}")
ret = func(*args, **kwargs)
print(f"function end!")
return ret
return wrapper
@log_function
def fib(self, n):
if n <= 1:
return 0
return self.fib(n - 1) + self.fib(n - 2)
d = Decorators()
d.fib(3)
当然如果需要支持以下 3 种情况的调用,最好的方式如下
- 在类内部调用
- 在类外部以类调用
- 在类外部以对象调用
class Decorators:
# 本身就支持在类外部以类调用
def log_function(func):
def wrapper(*args, **kwargs):
print(f"function start!")
print(f"args: {args}")
ret = func(*args, **kwargs)
print(f"function end!")
return ret
return wrapper
@log_function
def fib(self, n):
if n <= 1:
return 0
return self.fib(n - 1) + self.fib(n - 2)
# 在 fib 后声明,防止影响上面的 fib 装饰器调用
# 同时也提供给外面调用,即支持了在类外部以对象调用
log_function = staticmethod(log_function)
d = Decorators()
d.fib(3)
@Decorators.log_function
def f():
pass
@d.log_function
def g():
pass
f()
g()
迭代器
lst = [1, 3, 5]
for i in lst:
print(i)
d = {"a": 1, "b": 2}
for i in d:
print(i)
with open("my.txt") as f:
for i in f:
print(i)
可迭代对象 + 迭代器,以上都可迭代
class NodeIter:
def __init__(self, node):
self.curr_node = node
def __next__(self):
if self.curr_node is None:
raise StopIteration
node, self.curr_node = self.curr_node, self.curr_node.next
return node
class Node:
def __init__(self, name):
self.name = name
self.next = None
def __iter__(self):
return NodeIter(self)
node1 = Node("node1")
node2 = Node("node2")
node3 = Node("node3")
node1.next = node2
node2.next = node3
for node in iter(node1):
print(node.name)
上面的代码运行到 iter(node1)
就报错,说明迭代器不一定是可迭代对象(Iterator 不一定 Iterable),所以修改如下
class NodeIter:
def __init__(self, node):
self.curr_node = node
def __next__(self): # 是迭代器,即 Iterator
if self.curr_node is None:
raise StopIteration
node, self.curr_node = self.curr_node, self.curr_node.next
return node
def __iter__(self): # 是可迭代对象,即 Iterable return self
class Node:
def __init__(self, name):
self.name = name
self.next = None
def __iter__(self):
return NodeIter(self)
node1 = Node("node1")
node2 = Node("node2")
node3 = Node("node3")
node1.next = node2
node2.next = node3
for node in iter(node1):
print(node.name)
生成器
def gen(num):
while num > 0:
yield num
num -= 1
# 生成器函数里面 return 等价于 raise StopIteration
# 即使 return 有值也不会被 next() 返回,只有 yield 的值才会返回
# 如果一定要拿到 return 值,需要 catch StopIteration Exception
return
g = gen(5)
first = next(g)
for i in g:
print(i)
>>>
4
3
2
1
作用和迭代器一致,不同的点在于生成器函数 def gen(num):
和生成器对象 g = gen(5)
两种形式,而迭代器就是一个明确的 class
对应的源码为
case TARGET(YIELD_VALUE): {
retval = POP(); // 拿出栈顶值
if (co->co_flags & CO_ASYNC_GENERATOR) {
PyObject *w = _PyAsyncGenValueWrapperNew(retval);
Py_DECREF(retval);
if (w == NULL) {
retval = NULL;
goto error;
}
retval = w;
}
f->f_stacktop = stack_pointer;
goto exit_yielding;
}
以下生成器的写法比迭代器简洁
class Node:
def __init__(self, name):
self.name = name
self.next = None
def __iter__(self):
node = self
while node is not None:
yield node
node = node.next
node1 = Node("node1")
node2 = Node("node2")
node3 = Node("node3")
node1.next = node2
node2.next = node3
for node in iter(node1):
print(node.name)
生成器还有 send 的用法
def gen(num):
while num > 0:
tmp = yield num
if tmp is not None:
num = tmp
num -= 1
g = gen(5)
first = next(g) # 等价于 first = g.send(None)
print(f"first: {first}")
print(f"send: {g.send(10)}")
for i in g:
print(i)
闭包
def f():
data = []
def inner(value):
data.append(value)
return data
return inner
g = f()
print(g(1))
print(g(2))
>>>
[1]
[1, 2]
可以判断闭包的执行顺序
def f():
data = []
def inner(value):
data.append(value)
return data
data = [0]
return inner
g = f()
print(g(1))
print(g(2))
>>>
[0, 1]
[0, 1, 2]
cell 引用的闭包
def f():
data = []
def inner(value):
data.append(value)
return data
return inner
g = f() # 只要 g 存在就有对 data 的引用,即使 f 不存在了
print(g.__closure__) # 和下面的 g(3) 的地址完全一致,即是同一个 list
print(g(1))
print(g(2))
print(hex(id(g(3))))
>>>
(<cell at 0x7f7fbc6eafd0: list object at 0x7f7fbc7bcec0>,)
[1]
[1, 2]
0x7f7fbc7bcec0