diff options
Diffstat (limited to 'src/pooptmodule.c')
-rw-r--r-- | src/pooptmodule.c | 486 |
1 files changed, 0 insertions, 486 deletions
diff --git a/src/pooptmodule.c b/src/pooptmodule.c deleted file mode 100644 index 69fb7f6f..00000000 --- a/src/pooptmodule.c +++ /dev/null @@ -1,486 +0,0 @@ -/* Copyright 2010-2011 Florent Le Coz <louiz@louiz.org> */ - -/* This file is part of Poezio. */ - -/* Poezio is free software: you can redistribute it and/or modify */ -/* it under the terms of the zlib license. See the COPYING file. */ - -/** The poopt python3 module -**/ - -/* This file is a python3 module for poezio, used to replace some time-critical -python functions that are too slow. */ - -#define PY_SSIZE_T_CLEAN - -#include "Python.h" - -PyObject *ErrorObject; - -/*** - Internal functions - ***/ - -/** - Just checking if the return value is -1. In some (all?) implementations, - wcwidth("😆") returns -1 while it should return 1. In these cases, we - return 1 instead because this is by far the most probable real value. - Since the string is received from python, and the unicode character is - extracted with mbrtowc(), and supposing these two compononents are not - bugged, and since poezio’s code should never pass '\t', '\n' or their - friends, a return value of -1 from wcwidth() is considered to be a bug in - wcwidth() (until proven otherwise). xwcwidth() is here to work around - this bug. */ -static int xwcwidth(wchar_t c) -{ - const int res = wcwidth(c); - if (res == -1 && c != '\x19') - return 1; - return res; -} - -/*** - The module functions - ***/ - -/** - cut_text: takes a string and returns a tuple of int. - - Each two int tuple is a line, represented by the ending position it - (where it should be cut). Not that this position is calculed using the - position of the python string characters, not just the individual bytes. - - For example, - poopt_cut_text("vivent les réfrigérateurs", 6); - will return [(0, 6), (7, 10), (11, 17), (17, 22), (22, 24)], meaning that - the lines are - "vivent", "les", "réfrig", "érateu" and "rs" - -*/ -PyDoc_STRVAR(poopt_cut_text_doc, "cut_text(text, width)\n\n\nReturn a list of two-tuple, the first int is the starting position of the line and the second is its end."); - -static PyObject* poopt_cut_text(PyObject* self, PyObject* args) -{ - /* The list of tuples that we return */ - PyObject* retlist = PyList_New(0); - /* The temporary name for the tuples */ - PyObject* tmp; - - /* Get the python arguments */ - const size_t width; - const char* buffer; - const Py_ssize_t buffer_len; - - if (PyArg_ParseTuple(args, "s#k", &buffer, &buffer_len, &width) == 0) - return NULL; - - /* Pointer to the end of the string */ - const char* const end = buffer + buffer_len; - - /* The position, considering unicode chars (aka, the position in the - * python string). This is used to determine the position in the python - * string at which we should cut */ - unsigned int spos = 0; - - /* The start position (in the python-string) of the next line */ - unsigned int start_pos = 0; - - /* The position of the last space seen in the current line. This is used - * to cut on spaces instead of cutting inside words, if possible (aka if - * there is a space) */ - int last_space = -1; - /* The number of columns taken by chars between start_pos and last_space */ - size_t cols_until_space = 0; - - /* The number of bytes consumed by mbrtowc. We advance the buffer ptr by this value */ - size_t consumed; - - /* Number of columns taken to display the current line so far */ - size_t columns = 0; - - /* The unicode character found by mbrtowc */ - wchar_t wc; - - while (buffer < end) - { - /* Special case to jump poezio special characters that are contained - * in the python string, but should not be counted as chars, because - * they will not be displayed. Those are the formatting chars (to - * insert colors or things like that in the string) */ - if (*buffer == 25) /* \x19 */ - { - /* Jump everything until the end of this format marker, but - * without increasing the number of columns of the current - * line. Because these chars are not printed. */ - while (buffer < end && *buffer != 'u' && - *buffer != 'a' && *buffer != 'i' && - *buffer != 'b' && *buffer != 'o' && - *buffer != '}') - { - buffer++; - spos++; - } - buffer++; - spos++; - continue; - } - /* Find the next unicode character (a wchar_t) in the string. This - * may consume from one to 4 bytes. */ - consumed = mbrtowc(&wc, buffer, end-buffer, NULL); - if (consumed == 0) - break ; - else if ((size_t)-1 == consumed) - { - PyErr_SetString(PyExc_UnicodeError, - "mbrtowc returned -1: Invalid multibyte sequence."); - return NULL; - } - else if ((size_t)-2 == consumed) - { - PyErr_SetString(PyExc_UnicodeError, - "mbrtowc returned -2: Could not parse a complete multibyte character."); - return NULL; - } - - buffer += consumed; - - /* This is one condition to end the line: an explicit \n is found */ - if (wc == (wchar_t)'\n') - { - spos++; - tmp = Py_BuildValue("II", start_pos, spos); - if (PyList_Append(retlist, tmp) == -1) - { - Py_XDECREF(tmp); - return NULL; - } - Py_XDECREF(tmp); - /* And then initiate a new line */ - start_pos = spos; - last_space = -1; - columns = 0; - continue ; - } - - /* Get the number of columns needed to display this character. May be 0, 1 or 2 */ - const size_t cols = xwcwidth(wc); - - /* This is the second condition to end the line: we have consumed - * enough columns to fill a whole line */ - if (columns + cols > width) - { /* If possible, cut on a space */ - if (last_space != -1) - { - tmp = Py_BuildValue("II", start_pos, last_space); - if (PyList_Append(retlist, tmp) == -1) - { - Py_XDECREF(tmp); - return NULL; - } - Py_XDECREF(tmp); - start_pos = last_space + 1; - last_space = -1; - columns -= (cols_until_space + 1); - } - else - { - /* Otherwise, cut in the middle of a word */ - tmp = Py_BuildValue("II", start_pos, spos); - if (PyList_Append(retlist, tmp) == -1) - { - Py_XDECREF(tmp); - return NULL; - } - Py_XDECREF(tmp); - start_pos = spos; - columns = 0; - } - } - /* We save the position of the last space seen in this line, and the - number of columns we have until now. This helps us keep track of - the columns to count when we will use that space as a cutting - point, later */ - if (wc == (wchar_t)' ') - { - last_space = spos; - cols_until_space = columns; - } - /* We advanced from one char, increment spos by one and add the - * char's columns to the line's columns */ - columns += cols; - spos++; - } - /* We are at the end of the string, append the last line, not finished */ - tmp = Py_BuildValue("II", start_pos, spos); - if (PyList_Append(retlist, tmp) == -1) - { - Py_XDECREF(tmp); - return NULL; - } - Py_XDECREF(tmp); - return retlist; -} - -/** - wcswidth: An emulation of the POSIX wcswidth(3) function using wcwidth - and mbrtowc. -*/ -PyDoc_STRVAR(poopt_wcswidth_doc, "wcswidth(s)\n\n\nThe wcswidth() function returns the number of columns needed to represent the wide-character string pointed to by s. Raise UnicodeError if an invalid unicode value is passed"); -static PyObject* poopt_wcswidth(PyObject* self, PyObject* args) -{ - const char* string; - const Py_ssize_t len; - if (PyArg_ParseTuple(args, "s#", &string, &len) == 0) - return NULL; - const char* const end = string + len; - wchar_t wc; - int res = 0; - while (string < end) - { - const size_t consumed = mbrtowc(&wc, string, end-string, NULL); - if (consumed == 0) - break ; - else if ((size_t)-1 == consumed) - { - PyErr_SetString(PyExc_UnicodeError, - "mbrtowc returned -1: Invalid multibyte sequence."); - return NULL; - } - else if ((size_t)-2 == consumed) - { - PyErr_SetString(PyExc_UnicodeError, - "mbrtowc returned -2: Could not parse a complete multibyte character."); - return NULL; - } - string += consumed; - res += xwcwidth(wc); - } - return Py_BuildValue("i", res); -} - -/** - cut_by_columns: takes a python string and a number of columns, returns a - python string truncated to take at most that many columns - For example cut_by_columns(n, "エメルカ") will return: - - n == 5 -> "エメ" (which takes only 4 columns since we can't cut the - next character in half) - - n == 2 -> "エ" - - n == 1 -> "" - - n == 42 -> "エメルカ" - - etc -*/ -PyDoc_STRVAR(poopt_cut_by_columns_doc, "cut_by_columns(n, string)\n\n\nreturns a string truncated to take at most n columns"); -static PyObject* poopt_cut_by_columns(PyObject* self, PyObject* args) -{ - const char* start; - const Py_ssize_t len; - const size_t limit; - if (PyArg_ParseTuple(args, "s#k", &start, &len, &limit) == 0) - return NULL; - - const char* const end = start + len; - const char* ptr = start; - wchar_t wc; - - /* The number of columns that the string would take so far */ - size_t columns = 0; - - while (ptr < end) - { - if (columns == limit) - break ; - const size_t consumed = mbrtowc(&wc, ptr, end-ptr, NULL); - if (consumed == 0) - break ; - else if ((size_t)-1 == consumed) - { - PyErr_SetString(PyExc_UnicodeError, - "mbrtowc returned -1: Invalid multibyte sequence."); - return NULL; - } - else if ((size_t)-2 == consumed) - { - PyErr_SetString(PyExc_UnicodeError, - "mbrtowc returned -2: Could not parse a complete multibyte character."); - return NULL; - } - const size_t cols = wcwidth(wc); - if (columns + cols > limit) - /* Adding the next character would exceed the column limit */ - break ; - ptr += consumed; - columns += cols; - } - return Py_BuildValue("s#", start, ptr - start); -} - -/*** - Module initialization. Just taken from the xxmodule.c template from the - python sources. - ***/ -static PyTypeObject Str_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "pooptmodule.Str", /*tp_name*/ - 0, /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - 0, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_reserved*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - 0, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /* see PyInit_xx */ /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - 0, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ -}; - -static PyObject * -null_richcompare(PyObject *self, PyObject *other, int op) -{ - Py_INCREF(Py_NotImplemented); - return Py_NotImplemented; -} - -static PyTypeObject Null_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "pooptmodule.Null", /*tp_name*/ - 0, /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - 0, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_reserved*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - 0, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - null_richcompare, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /* see PyInit_xx */ /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - 0, /* see PyInit_xx */ /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ -}; - - -/* List of functions defined in the module */ -static PyMethodDef poopt_methods[] = { - {"cut_text", poopt_cut_text, METH_VARARGS, poopt_cut_text_doc}, - {"wcswidth", poopt_wcswidth, METH_VARARGS, poopt_wcswidth_doc}, - {"cut_by_columns", poopt_cut_by_columns, METH_VARARGS, poopt_cut_by_columns_doc}, - {} /* sentinel */ -}; - -PyDoc_STRVAR(module_doc, - "This is a template module just for instruction. And poopt."); - -/* Initialization function for the module (*must* be called PyInit_xx) */ - -static struct PyModuleDef pooptmodule = { - PyModuleDef_HEAD_INIT, - "poopt", - module_doc, - -1, - poopt_methods, - NULL, - NULL, - NULL, - NULL -}; - -PyMODINIT_FUNC -PyInit_poopt(void) -{ - PyObject *m = NULL; - - /* Due to cross platform compiler issues the slots must be filled - * here. It's required for portability to Windows without requiring - * C++. */ - Null_Type.tp_base = &PyBaseObject_Type; - Null_Type.tp_new = PyType_GenericNew; - Str_Type.tp_base = &PyUnicode_Type; - - /* Finalize the type object including setting type of the new type - * object; doing it here is required for portability, too. */ - /* if (PyType_Ready(&Xxo_Type) < 0) */ - /* goto fail; */ - - /* Create the module and add the functions */ - m = PyModule_Create(&pooptmodule); - if (m == NULL) - goto fail; - - /* Add some symbolic constants to the module */ - if (ErrorObject == NULL) { - ErrorObject = PyErr_NewException("poopt.error", NULL, NULL); - if (ErrorObject == NULL) - goto fail; - } - Py_INCREF(ErrorObject); - PyModule_AddObject(m, "error", ErrorObject); - - /* Add Str */ - if (PyType_Ready(&Str_Type) < 0) - goto fail; - PyModule_AddObject(m, "Str", (PyObject *)&Str_Type); - - /* Add Null */ - if (PyType_Ready(&Null_Type) < 0) - goto fail; - PyModule_AddObject(m, "Null", (PyObject *)&Null_Type); - return m; - fail: - Py_XDECREF(m); - return NULL; -} |