# Copyright 2017 Emmanuel Gil Peyrot # # This file is part of Poezio. # # Poezio is free software: you can redistribute it and/or modify # it under the terms of the zlib license. See the COPYING file. '''This is a template module just for instruction. And poopt.''' from typing import List, Tuple # CFFI codepath. from cffi import FFI ffi = FFI() ffi.cdef(""" typedef long wchar_t; int wcwidth(wchar_t c); """) libc = ffi.dlopen(None) # Cython codepath. #cdef extern from "wchar.h": # ctypedef Py_UCS4 wchar_t # int wcwidth(wchar_t c) # Just checking if the return value is -1. In some (all?) implementations, # wcwidth("😆") returns -1 while it should return 2. In these cases, we # return 1 instead because this is by far the most probable real value. # Since the string is received from python, and the unicode character is # extracted with mbrtowc(), and supposing these two compononents are not # bugged, and since poezio’s code should never pass '\t', '\n' or their # friends, a return value of -1 from wcwidth() is considered to be a bug in # wcwidth() (until proven otherwise). xwcwidth() is here to work around # this bug. def xwcwidth(c: str) -> int: character = ord(c) res = libc.wcwidth(character) if res == -1 and c != '\x19': return 1 return res # cut_text: takes a string and returns a tuple of int. # # Each two int tuple is a line, represented by the ending position it # (where it should be cut). Not that this position is calculed using the # position of the python string characters, not just the individual bytes. # # For example, # poopt_cut_text("vivent les réfrigérateurs", 6); # will return [(0, 6), (7, 10), (11, 17), (17, 22), (22, 24)], meaning that # the lines are # "vivent", "les", "réfrig", "érateu" and "rs" def cut_text(string: str, width: int) -> List[Tuple[int, int]]: '''cut_text(text, width) Return a list of two-tuple, the first int is the starting position of the line and the second is its end.''' # The list of tuples that we return retlist = [] # The start position (in the python-string) of the next line #: unsigned int start_pos = 0 # The position of the last space seen in the current line. This is used # to cut on spaces instead of cutting inside words, if possible (aka if # there is a space) #: int last_space = -1 # The number of columns taken by chars between start_pos and last_space #: size_t cols_until_space = 0 # Number of columns taken to display the current line so far #: size_t columns = 0 #: wchar_t #wc = 0 # The position, considering unicode chars (aka, the position in the # python string). This is used to determine the position in the python # string at which we should cut */ #: unsigned int #spos = -1 in_special_character = False for spos, wc in enumerate(string): # Special case to skip poezio special characters that are contained # in the python string, but should not be counted as chars because # they will not be displayed. Those are the formatting chars (to # insert colors or things like that in the string) if in_special_character: # Skip everything until the end of this format marker, but # without increasing the number of columns of the current # line. Because these chars are not printed. if wc in ('u', 'a', 'i', 'b', 'o', '}'): in_special_character = False continue if wc == '\x19': in_special_character = True continue # This is one condition to end the line: an explicit \n is found if wc == '\n': spos += 1 retlist.append((start_pos, spos)) # And then initiate a new line start_pos = spos last_space = -1 columns = 0 continue # Get the number of columns needed to display this character. May be 0, 1 or 2 cols = xwcwidth(wc) # This is the second condition to end the line: we have consumed # enough columns to fill a whole line if columns + cols > width: # If possible, cut on a space if last_space != -1: retlist.append((start_pos, last_space)) start_pos = last_space + 1 last_space = -1 columns -= (cols_until_space + 1) else: # Otherwise, cut in the middle of a word retlist.append((start_pos, spos)) start_pos = spos columns = 0 # We save the position of the last space seen in this line, and the # number of columns we have until now. This helps us keep track of # the columns to count when we will use that space as a cutting # point, later if wc == ' ': last_space = spos cols_until_space = columns # We advanced from one char, increment spos by one and add the # char's columns to the line's columns columns += cols # We are at the end of the string, append the last line, not finished retlist.append((start_pos, spos + 1)) return retlist # wcswidth: An emulation of the POSIX wcswidth(3) function using xwcwidth. def wcswidth(string: str) -> int: '''wcswidth(s) The wcswidth() function returns the number of columns needed to represent the wide-character string pointed to by s. Raise UnicodeError if an invalid unicode value is passed''' columns = 0 for wc in string: columns += xwcwidth(wc) return columns # cut_by_columns: takes a python string and a number of columns, returns a # python string truncated to take at most that many columns # For example cut_by_columns(n, "エメルカ") will return: # - n == 5 -> "エメ" (which takes only 4 columns since we can't cut the # next character in half) # - n == 2 -> "エ" # - n == 1 -> "" # - n == 42 -> "エメルカ" # - etc def cut_by_columns(string: str, limit: int) -> str: '''cut_by_columns(string, limit) returns a string truncated to take at most limit columns''' spos = 0 columns = 0 for wc in string: if columns == limit: break cols = xwcwidth(wc) if columns + cols > limit: break spos += 1 columns += cols return string[:spos]