summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFlorent Le Coz <louiz@louiz.org>2013-06-20 22:17:43 +0200
committerFlorent Le Coz <louiz@louiz.org>2013-06-20 22:17:43 +0200
commitda6c11603cf37c190bb30f3d2e9bcb5a83f17824 (patch)
treede5d2cd3db53671abcae9866b0cf1305e57e10aa /src
parenta92163d173226bf3418de87fbf6bf957e7426f12 (diff)
downloadpoezio-da6c11603cf37c190bb30f3d2e9bcb5a83f17824.tar.gz
poezio-da6c11603cf37c190bb30f3d2e9bcb5a83f17824.tar.bz2
poezio-da6c11603cf37c190bb30f3d2e9bcb5a83f17824.tar.xz
poezio-da6c11603cf37c190bb30f3d2e9bcb5a83f17824.zip
Add a poopt function to cut a string by the number of columns it takes
Diffstat (limited to 'src')
-rw-r--r--src/pooptmodule.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/src/pooptmodule.c b/src/pooptmodule.c
index f6840abd..b80bc8dc 100644
--- a/src/pooptmodule.c
+++ b/src/pooptmodule.c
@@ -207,6 +207,63 @@ static PyObject* poopt_wcswidth(PyObject* self, PyObject* args)
return Py_BuildValue("i", res);
}
+/**
+ cut_by_columns: takes a python string and a number of columns, returns a
+ python string truncated to take at most that many columns
+ For example cut_by_columns(n, "エメルカ") will return:
+ - n == 5 -> "エメ" (which takes only 4 columns since we can't cut the next character in half)
+ - n == 2 -> "エ"
+ - n == 1 -> ""
+ - n == 42 -> "エメルカ"
+ - etc
+*/
+PyDoc_STRVAR(poopt_cut_by_columns_doc, "cut_by_columns(n, string)\n\n\nreturns a string truncated to take at most n columns");
+static PyObject* poopt_cut_by_columns(PyObject* self, PyObject* args)
+{
+ const char* start;
+ const size_t len;
+ const size_t limit;
+ if (PyArg_ParseTuple(args, "Is#", &limit, &start, &len) == 0)
+ return NULL;
+
+ const char* const end = start + len;
+ const char* ptr = start;
+ wchar_t wc;
+
+ /* The number of columns that the string would take so far */
+ size_t columns = 0;
+
+ while (ptr < end)
+ {
+ const size_t consumed = mbrtowc(&wc, ptr, end-ptr, NULL);
+ if (consumed == 0)
+ break ;
+ else if ((size_t)-1 == consumed)
+ {
+ PyErr_SetString(PyExc_UnicodeError,
+ "mbrtowc returned -1: Invalid multibyte sequence.");
+ return NULL;
+ }
+ else if ((size_t)-2 == consumed)
+ {
+ PyErr_SetString(PyExc_UnicodeError,
+ "mbrtowc returned -2: Could not parse a complete multibyte character.");
+ return NULL;
+ }
+ const size_t cols = wcwidth(wc);
+ if (columns + cols > limit)
+ /* Adding the next character would exceed the column limit */
+ break ;
+ ptr += consumed;
+ columns += cols;
+ if (columns == limit)
+ /* With the new character we are exactly at the column limit. No
+ need to go check the next char */
+ break ;
+ }
+ return Py_BuildValue("s#", start, ptr - start);
+}
+
/***
Module initialization. Just taken from the xxmodule.c template from the python sources.
***/
@@ -310,6 +367,7 @@ static PyTypeObject Null_Type = {
static PyMethodDef poopt_methods[] = {
{"cut_text", poopt_cut_text, METH_VARARGS, poopt_cut_text_doc},
{"wcswidth", poopt_wcswidth, METH_VARARGS, poopt_wcswidth_doc},
+ {"cut_by_columns", poopt_cut_by_columns, METH_VARARGS, poopt_cut_by_columns_doc},
{} /* sentinel */
};