为什么 python 的 select 分不清文件的可读可写?

使用 python 的 select.select

为了简单,就先没有使用 socket,而是使用 file

因为官方文档中说,可以是 python 文件对象,也可以是 socket 套接字
为什么 python 的 select 分不清文件的可读可写?

? 但是发现了和预期不符合的情况:

from pathlib import Path

import select

from loguru import logger

BASE_DIR = Path(__file__).resolve().parent

with open(BASE_DIR/'run.log', 'r', encoding='utf-8') as fr, open(BASE_DIR/'run.log', 'w', encoding='utf-8') as fw:

# 返回值是三个列表,包含已就绪对象,返回的三个列表是前三个参数的子集。当超时时间已到且没有文件描述符就绪时,返回三个空列表。

ready_objects: tuple[list, list, list] = select.select(

[fr, fw],

[fr, fw],

[fr, fw]

)

ready_readable_objects, ready_writeable_objects, ready_exception_objects = ready_objects

logger.debug(ready_readable_objects)

logger.debug(ready_writeable_objects)

logger.debug(ready_exception_objects)

运行后输出如下 ? :

2022-06-21 13:05:06.448 | DEBUG    | __main__:<module>:20 - [<_io.TextIOWrapper name='/Users/ponponon/Desktop/code/me/ideaboom/test_select/run.log' mode='r' encoding='utf-8'>, <_io.TextIOWrapper name='/Users/ponponon/Desktop/code/me/ideaboom/test_select/run.log' mode='w' encoding='utf-8'>]

2022-06-21 13:05:06.448 | DEBUG | __main__:<module>:21 - [<_io.TextIOWrapper name='/Users/ponponon/Desktop/code/me/ideaboom/test_select/run.log' mode='r' encoding='utf-8'>, <_io.TextIOWrapper name='/Users/ponponon/Desktop/code/me/ideaboom/test_select/run.log' mode='w' encoding='utf-8'>]

2022-06-21 13:05:06.448 | DEBUG | __main__:<module>:22 - [<_io.TextIOWrapper name='/Users/ponponon/Desktop/code/me/ideaboom/test_select/run.log' mode='r' encoding='utf-8'>, <_io.TextIOWrapper name='/Users/ponponon/Desktop/code/me/ideaboom/test_select/run.log' mode='w' encoding='utf-8'>]

从结果来看,frfw 都被认为是可读可写对象了。

但是我预期的是 fr 是可读不可写,fw 是可写不可读


使用下面的代码用例也可以证明:fr 不可写

python">from pathlib import Path

import select

from loguru import logger

BASE_DIR = Path(__file__).resolve().parent

with open(BASE_DIR/'run.log', 'r', encoding='utf-8') as fr, open(BASE_DIR/'run.log', 'w', encoding='utf-8') as fw:

fr.write('hihi')

运行后输出如下 ? :

Traceback (most recent call last):

File "/Users/ponponon/Desktop/code/me/ideaboom/test_select/main.py", line 9, in <module>

fr.write('hihi')

io.UnsupportedOperation: not writable

可以看到,当我们调用 frwrite 方法的时候报错了,fr 是个不可写对象

使用下面的代码用例也可以证明:fw 不可读

from pathlib import Path

import select

from loguru import logger

BASE_DIR = Path(__file__).resolve().parent

with open(BASE_DIR/'run.log', 'r', encoding='utf-8') as fr, open(BASE_DIR/'run.log', 'w', encoding='utf-8') as fw:

fw.read()

运行后输出如下 ? :

Traceback (most recent call last):

File "/Users/ponponon/Desktop/code/me/ideaboom/test_select/main.py", line 9, in <module>

fw.read()

io.UnsupportedOperation: not readable

可以看到,当我们调用 fwread 方法的时候报错了,fw 是个不可读对象


那为什么 select 会把 fr 认为是可写对象,fw 是可读对象呢?


select 这个模块是 c 写的,没有办法直接看 python 版本源码,下了 cpython 的源代码,又看不懂,谁能从 cpython 来分析一下是为什么呢?

大概的位置:Modules/selectmodule.c

static PyObject *

select_select_impl(PyObject *module, PyObject *rlist, PyObject *wlist,

PyObject *xlist, PyObject *timeout_obj)

/*[clinic end generated code: output=2b3cfa824f7ae4cf input=e467f5d68033de00]*/

{

#ifdef SELECT_USES_HEAP

pylist *rfd2obj, *wfd2obj, *efd2obj;

#else /* !SELECT_USES_HEAP */

/* XXX: All this should probably be implemented as follows:

* - find the highest descriptor we're interested in

* - add one

* - that's the size

* See: Stevens, APitUE, $12.5.1

*/

pylist rfd2obj[FD_SETSIZE + 1];

pylist wfd2obj[FD_SETSIZE + 1];

pylist efd2obj[FD_SETSIZE + 1];

#endif /* SELECT_USES_HEAP */

PyObject *ret = NULL;

fd_set ifdset, ofdset, efdset;

struct timeval tv, *tvp;

int imax, omax, emax, max;

int n;

_PyTime_t timeout, deadline = 0;

if (timeout_obj == Py_None)

tvp = (struct timeval *)NULL;

else {

if (_PyTime_FromSecondsObject(&timeout, timeout_obj,

_PyTime_ROUND_TIMEOUT) < 0) {

if (PyErr_ExceptionMatches(PyExc_TypeError)) {

PyErr_SetString(PyExc_TypeError,

"timeout must be a float or None");

}

return NULL;

}

if (_PyTime_AsTimeval(timeout, &tv, _PyTime_ROUND_TIMEOUT) == -1)

return NULL;

if (tv.tv_sec < 0) {

PyErr_SetString(PyExc_ValueError, "timeout must be non-negative");

return NULL;

}

tvp = &tv;

}

#ifdef SELECT_USES_HEAP

/* Allocate memory for the lists */

rfd2obj = PyMem_NEW(pylist, FD_SETSIZE + 1);

wfd2obj = PyMem_NEW(pylist, FD_SETSIZE + 1);

efd2obj = PyMem_NEW(pylist, FD_SETSIZE + 1);

if (rfd2obj == NULL || wfd2obj == NULL || efd2obj == NULL) {

if (rfd2obj) PyMem_Free(rfd2obj);

if (wfd2obj) PyMem_Free(wfd2obj);

if (efd2obj) PyMem_Free(efd2obj);

return PyErr_NoMemory();

}

#endif /* SELECT_USES_HEAP */

/* Convert iterables to fd_sets, and get maximum fd number

* propagates the Python exception set in seq2set()

*/

rfd2obj[0].sentinel = -1;

wfd2obj[0].sentinel = -1;

efd2obj[0].sentinel = -1;

if ((imax = seq2set(rlist, &ifdset, rfd2obj)) < 0)

goto finally;

if ((omax = seq2set(wlist, &ofdset, wfd2obj)) < 0)

goto finally;

if ((emax = seq2set(xlist, &efdset, efd2obj)) < 0)

goto finally;

max = imax;

if (omax > max) max = omax;

if (emax > max) max = emax;

if (tvp) {

deadline = _PyDeadline_Init(timeout);

}

do {

Py_BEGIN_ALLOW_THREADS

errno = 0;

n = select(

max,

imax ? &ifdset : NULL,

omax ? &ofdset : NULL,

emax ? &efdset : NULL,

tvp);

Py_END_ALLOW_THREADS

if (errno != EINTR)

break;

/* select() was interrupted by a signal */

if (PyErr_CheckSignals())

goto finally;

if (tvp) {

timeout = _PyDeadline_Get(deadline);

if (timeout < 0) {

/* bpo-35310: lists were unmodified -- clear them explicitly */

FD_ZERO(&ifdset);

FD_ZERO(&ofdset);

FD_ZERO(&efdset);

n = 0;

break;

}

_PyTime_AsTimeval_clamp(timeout, &tv, _PyTime_ROUND_CEILING);

/* retry select() with the recomputed timeout */

}

} while (1);

#ifdef MS_WINDOWS

if (n == SOCKET_ERROR) {

PyErr_SetExcFromWindowsErr(PyExc_OSError, WSAGetLastError());

}

#else

if (n < 0) {

PyErr_SetFromErrno(PyExc_OSError);

}

#endif

else {

/* any of these three calls can raise an exception. it's more

convenient to test for this after all three calls... but

is that acceptable?

*/

rlist = set2list(&ifdset, rfd2obj);

wlist = set2list(&ofdset, wfd2obj);

xlist = set2list(&efdset, efd2obj);

if (PyErr_Occurred())

ret = NULL;

else

ret = PyTuple_Pack(3, rlist, wlist, xlist);

Py_XDECREF(rlist);

Py_XDECREF(wlist);

Py_XDECREF(xlist);

}

finally:

reap_obj(rfd2obj);

reap_obj(wfd2obj);

reap_obj(efd2obj);

#ifdef SELECT_USES_HEAP

PyMem_Free(rfd2obj);

PyMem_Free(wfd2obj);

PyMem_Free(efd2obj);

#endif /* SELECT_USES_HEAP */

return ret;

}


回答:

因为 Unix/Linux 底层设计就是如此。

首先你要理解 Unix/Linux 下“一切皆文件”,所有东西都在底层被抽象为“文件描述符”,无论 Socket 还是你所谓的“文件对象”皆是如此。

其次要注意区分 FileAccess 里的可读/可写(即文件权限中的可读、可写、可执行等等)、FileMode 里的可读/可写(即文件打开模式中的只读、追加、覆盖、创建或覆盖等等,也是你 Python 代码里通过 r/w 控制的)、和 select() 里判断可读/可写条件就绪,都可以叫“可读”/“可写”,但实质不是一回事儿。

最后 select 的实质是不断轮询队列是否有满足可读/可写条件就绪的文件描述符。但判断是否满足可读/可写,是由该文件自身的类型及其驱动决定的。对于 Socket 而言,可读就是接收缓冲区内有数据、可写就是发送缓冲区未满;但磁盘文件系统是没有缓冲区一说的、自然也就没有可读/可写条件就绪一说了。(其实还有个错误条件就绪,咱先忽略)

至于本地磁盘文件系统为什么被设计成永远是可读/可写、而不是永远不可读/不可写,这就是另一个话题了。

对细节感兴趣的话建议阅读《Advanced Programming in the UNIX Environment》这本书(中文翻译叫《UNIX 环境高级编程》)。

所以严格来说本地磁盘的文件描述符是不应该被用于 select 的。


回答:

在你自己提供的说明 select.select
中说的很详细(最顶上整体介绍的最后一句),就是 这个模块不能用于常规文件的!


已参与了 SegmentFault 思否社区 10 周年「问答」打卡 ,欢迎正在阅读的你也加入。


回答:

select

select() allows a program to monitor multiple file descriptors,
waiting until one or more of the file descriptors become "ready"
for some class of I/O operation (e.g., input possible). A file
descriptor is considered ready if it is possible to perform a
corresponding I/O operation (e.g., read(2), or a sufficiently
small write(2)) without blocking.

select 并不是用来判断通常文件意义上的“可读”,“可写”,而是判断对它调用 read, write 是否会阻塞。

以上是 为什么 python 的 select 分不清文件的可读可写? 的全部内容, 来源链接: utcz.com/p/938477.html

回到顶部