From 2406204d1cf4b9c3a8583b2d03e8935a6e57c453 Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Sat, 1 Apr 2017 19:10:33 +0100 Subject: Add a pure-Python alternative to pooptmodule, for pypy3. --- poezio/poopt.py | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 poezio/poopt.py (limited to 'poezio') diff --git a/poezio/poopt.py b/poezio/poopt.py new file mode 100644 index 00000000..016c04e1 --- /dev/null +++ b/poezio/poopt.py @@ -0,0 +1,181 @@ +# Copyright 2017 Emmanuel Gil Peyrot +# +# This file is part of Poezio. +# +# Poezio is free software: you can redistribute it and/or modify +# it under the terms of the zlib license. See the COPYING file. + +'''This is a template module just for instruction. And poopt.''' + +from typing import List, Tuple + +# CFFI codepath. +from cffi import FFI + +ffi = FFI() +ffi.cdef(""" + typedef long wchar_t; + int wcwidth(wchar_t c); +""") +libc = ffi.dlopen(None) + +# Cython codepath. +#cdef extern from "wchar.h": +# ctypedef Py_UCS4 wchar_t +# int wcwidth(wchar_t c) + +# Just checking if the return value is -1. In some (all?) implementations, +# wcwidth("😆") returns -1 while it should return 1. In these cases, we +# return 1 instead because this is by far the most probable real value. +# Since the string is received from python, and the unicode character is +# extracted with mbrtowc(), and supposing these two compononents are not +# bugged, and since poezio’s code should never pass '\t', '\n' or their +# friends, a return value of -1 from wcwidth() is considered to be a bug in +# wcwidth() (until proven otherwise). xwcwidth() is here to work around +# this bug. +def xwcwidth(c: str) -> int: + character = ord(c) + res = libc.wcwidth(character) + if res == -1 and c != '\x19': + return 1 + return res + +# cut_text: takes a string and returns a tuple of int. +# +# Each two int tuple is a line, represented by the ending position it +# (where it should be cut). Not that this position is calculed using the +# position of the python string characters, not just the individual bytes. +# +# For example, +# poopt_cut_text("vivent les réfrigérateurs", 6); +# will return [(0, 6), (7, 10), (11, 17), (17, 22), (22, 24)], meaning that +# the lines are +# "vivent", "les", "réfrig", "érateu" and "rs" +def cut_text(string: str, width: int) -> List[Tuple[int, int]]: + '''cut_text(text, width) + + Return a list of two-tuple, the first int is the starting position of the line and the second is its end.''' + + # The list of tuples that we return + retlist = [] + + # The start position (in the python-string) of the next line + #: unsigned int + start_pos = 0 + + # The position of the last space seen in the current line. This is used + # to cut on spaces instead of cutting inside words, if possible (aka if + # there is a space) + #: int + last_space = -1 + # The number of columns taken by chars between start_pos and last_space + #: size_t + cols_until_space = 0 + + # Number of columns taken to display the current line so far + #: size_t + columns = 0 + + #: wchar_t + #wc = 0 + + # The position, considering unicode chars (aka, the position in the + # python string). This is used to determine the position in the python + # string at which we should cut */ + #: unsigned int + #spos = -1 + + in_special_character = False + for spos, wc in enumerate(string): + # Special case to skip poezio special characters that are contained + # in the python string, but should not be counted as chars because + # they will not be displayed. Those are the formatting chars (to + # insert colors or things like that in the string) + if in_special_character: + # Skip everything until the end of this format marker, but + # without increasing the number of columns of the current + # line. Because these chars are not printed. + if wc in ('u', 'a', 'i', 'b', 'o', '}'): + in_special_character = False + continue + if wc == '\x19': + in_special_character = True + continue + + # This is one condition to end the line: an explicit \n is found + if wc == '\n': + retlist.append((start_pos, spos)) + + # And then initiate a new line + start_pos = spos + last_space = -1 + columns = 0 + continue + + # Get the number of columns needed to display this character. May be 0, 1 or 2 + cols = xwcwidth(wc) + + # This is the second condition to end the line: we have consumed + # enough columns to fill a whole line + if columns + cols > width: + # If possible, cut on a space + if last_space != -1: + retlist.append((start_pos, last_space)) + start_pos = last_space + 1 + last_space = -1 + columns -= (cols_until_space + 1) + else: + # Otherwise, cut in the middle of a word + retlist.append((start_pos, spos)) + start_pos = spos + columns = 0 + # We save the position of the last space seen in this line, and the + # number of columns we have until now. This helps us keep track of + # the columns to count when we will use that space as a cutting + # point, later + if wc == ' ': + last_space = spos + cols_until_space = columns + # We advanced from one char, increment spos by one and add the + # char's columns to the line's columns + columns += cols + # We are at the end of the string, append the last line, not finished + retlist.append((start_pos, spos+1)) + return retlist + +# wcswidth: An emulation of the POSIX wcswidth(3) function using xwcwidth. +def wcswidth(string: str) -> int: + '''wcswidth(s) + + The wcswidth() function returns the number of columns needed to represent the wide-character string pointed to by s. Raise UnicodeError if an invalid unicode value is passed''' + + columns = 0 + for wc in string: + columns += xwcwidth(wc) + return columns + +# cut_by_columns: takes a python string and a number of columns, returns a +# python string truncated to take at most that many columns +# For example cut_by_columns(n, "エメルカ") will return: +# - n == 5 -> "エメ" (which takes only 4 columns since we can't cut the +# next character in half) +# - n == 2 -> "エ" +# - n == 1 -> "" +# - n == 42 -> "エメルカ" +# - etc +def cut_by_columns(string: str, limit: int) -> str: + '''cut_by_columns(string, limit) + + returns a string truncated to take at most limit columns''' + + spos = 0 + columns = 0 + for wc in string: + if columns == limit: + break + cols = xwcwidth(wc) + if columns + cols > limit: + break + spos += 1 + columns += cols + return string[:spos] -- cgit v1.2.3