Add a pure-Python alternative to pooptmodule, for pypy3.

author: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> 2017-04-01 19:10:33 +0100
committer: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> 2017-04-01 19:10:33 +0100
commit: 2406204d1cf4b9c3a8583b2d03e8935a6e57c453 (patch)
tree: 2382e8dc3ee0e4a10196c8e7eb109022ffe84776 /poezio
parent: d99558b9b8a3c359b7ed1ce8c0360bc7bbcc3ec5 (diff)
download: poezio-2406204d1cf4b9c3a8583b2d03e8935a6e57c453.tar.gz
poezio-2406204d1cf4b9c3a8583b2d03e8935a6e57c453.tar.bz2
poezio-2406204d1cf4b9c3a8583b2d03e8935a6e57c453.tar.xz
poezio-2406204d1cf4b9c3a8583b2d03e8935a6e57c453.zip
1 files changed, 181 insertions, 0 deletions
diff --git a/poezio/poopt.py b/poezio/poopt.py
new file mode 100644
index 00000000..016c04e1
--- /dev/null
+++ b/poezio/poopt.py
@@ -0,0 +1,181 @@
+# Copyright 2017 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
+#
+# This file is part of Poezio.
+#
+# Poezio is free software: you can redistribute it and/or modify
+# it under the terms of the zlib license. See the COPYING file.
+
+'''This is a template module just for instruction. And poopt.'''
+
+from typing import List, Tuple
+
+# CFFI codepath.
+from cffi import FFI
+
+ffi = FFI()
+ffi.cdef("""
+    typedef long wchar_t;
+    int wcwidth(wchar_t c);
+""")
+libc = ffi.dlopen(None)
+
+# Cython codepath.
+#cdef extern from "wchar.h":
+#    ctypedef Py_UCS4 wchar_t
+#    int wcwidth(wchar_t c)
+
+# Just checking if the return value is -1.  In some (all?) implementations,
+# wcwidth("😆") returns -1 while it should return 1.  In these cases, we
+# return 1 instead because this is by far the most probable real value.
+# Since the string is received from python, and the unicode character is
+# extracted with mbrtowc(), and supposing these two compononents are not
+# bugged, and since poezio’s code should never pass '\t', '\n' or their
+# friends, a return value of -1 from wcwidth() is considered to be a bug in
+# wcwidth() (until proven otherwise). xwcwidth() is here to work around
+# this bug.
+def xwcwidth(c: str) -> int:
+    character = ord(c)
+    res = libc.wcwidth(character)
+    if res == -1 and c != '\x19':
+        return 1
+    return res
+
+# cut_text: takes a string and returns a tuple of int.
+#
+# Each two int tuple is a line, represented by the ending position it
+# (where it should be cut).  Not that this position is calculed using the
+# position of the python string characters, not just the individual bytes.
+#
+# For example,
+# poopt_cut_text("vivent les réfrigérateurs", 6);
+# will return [(0, 6), (7, 10), (11, 17), (17, 22), (22, 24)], meaning that
+# the lines are
+# "vivent", "les", "réfrig", "érateu" and "rs"
+def cut_text(string: str, width: int) -> List[Tuple[int, int]]:
+    '''cut_text(text, width)
+
+    Return a list of two-tuple, the first int is the starting position of the line and the second is its end.'''
+
+    # The list of tuples that we return
+    retlist = []
+
+    # The start position (in the python-string) of the next line
+    #: unsigned int
+    start_pos = 0
+
+    # The position of the last space seen in the current line. This is used
+    # to cut on spaces instead of cutting inside words, if possible (aka if
+    # there is a space)
+    #: int
+    last_space = -1
+    # The number of columns taken by chars between start_pos and last_space
+    #: size_t
+    cols_until_space = 0
+
+    # Number of columns taken to display the current line so far
+    #: size_t
+    columns = 0
+
+    #: wchar_t
+    #wc = 0
+
+    # The position, considering unicode chars (aka, the position in the
+    # python string). This is used to determine the position in the python
+    # string at which we should cut */
+    #: unsigned int
+    #spos = -1
+
+    in_special_character = False
+    for spos, wc in enumerate(string):
+        # Special case to skip poezio special characters that are contained
+        # in the python string, but should not be counted as chars because
+        # they will not be displayed. Those are the formatting chars (to
+        # insert colors or things like that in the string)
+        if in_special_character:
+            # Skip everything until the end of this format marker, but
+            # without increasing the number of columns of the current
+            # line. Because these chars are not printed.
+            if wc in ('u', 'a', 'i', 'b', 'o', '}'):
+                in_special_character = False
+            continue
+        if wc == '\x19':
+            in_special_character = True
+            continue
+
+        # This is one condition to end the line: an explicit \n is found
+        if wc == '\n':
+            retlist.append((start_pos, spos))
+
+            # And then initiate a new line
+            start_pos = spos
+            last_space = -1
+            columns = 0
+            continue
+
+        # Get the number of columns needed to display this character. May be 0, 1 or 2
+        cols = xwcwidth(wc)
+
+        # This is the second condition to end the line: we have consumed
+        # enough columns to fill a whole line
+        if columns + cols > width:
+            # If possible, cut on a space
+            if last_space != -1:
+                retlist.append((start_pos, last_space))
+                start_pos = last_space + 1
+                last_space = -1
+                columns -= (cols_until_space + 1)
+            else:
+                # Otherwise, cut in the middle of a word
+                retlist.append((start_pos, spos))
+                start_pos = spos
+                columns = 0
+        # We save the position of the last space seen in this line, and the
+        # number of columns we have until now. This helps us keep track of
+        # the columns to count when we will use that space as a cutting
+        # point, later
+        if wc == ' ':
+            last_space = spos
+            cols_until_space = columns
+        # We advanced from one char, increment spos by one and add the
+        # char's columns to the line's columns
+        columns += cols
+    # We are at the end of the string, append the last line, not finished
+    retlist.append((start_pos, spos+1))
+    return retlist
+
+# wcswidth: An emulation of the POSIX wcswidth(3) function using xwcwidth.
+def wcswidth(string: str) -> int:
+    '''wcswidth(s)
+
+    The wcswidth() function returns the number of columns needed to represent the wide-character string pointed to by s. Raise UnicodeError if an invalid unicode value is passed'''
+
+    columns = 0
+    for wc in string:
+        columns += xwcwidth(wc)
+    return columns
+
+# cut_by_columns: takes a python string and a number of columns, returns a
+# python string truncated to take at most that many columns
+# For example cut_by_columns(n, "エメルカ") will return:
+# - n == 5 -> "エメ" (which takes only 4 columns since we can't cut the
+#   next character in half)
+# - n == 2 -> "エ"
+# - n == 1 -> ""
+# - n == 42 -> "エメルカ"
+# - etc
+def cut_by_columns(string: str, limit: int) -> str:
+    '''cut_by_columns(string, limit)
+
+    returns a string truncated to take at most limit columns'''
+
+    spos = 0
+    columns = 0
+    for wc in string:
+        if columns == limit:
+            break
+        cols = xwcwidth(wc)
+        if columns + cols > limit:
+            break
+        spos += 1
+        columns += cols
+    return string[:spos]
author	Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>	2017-04-01 19:10:33 +0100
committer	Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>	2017-04-01 19:10:33 +0100
commit	2406204d1cf4b9c3a8583b2d03e8935a6e57c453 (patch)
tree	2382e8dc3ee0e4a10196c8e7eb109022ffe84776 /poezio
parent	d99558b9b8a3c359b7ed1ce8c0360bc7bbcc3ec5 (diff)
download	poezio-2406204d1cf4b9c3a8583b2d03e8935a6e57c453.tar.gz poezio-2406204d1cf4b9c3a8583b2d03e8935a6e57c453.tar.bz2 poezio-2406204d1cf4b9c3a8583b2d03e8935a6e57c453.tar.xz poezio-2406204d1cf4b9c3a8583b2d03e8935a6e57c453.zip