Source code for console.viewers

# -*- coding: future_fstrings -*-
'''
    .. console - Comprehensive utility library for ANSI terminals.
    .. © 2020, Mike Miller - Released under the LGPL, version 3+.

    An EXPERIMENTAL module containing an HTML to ANSI sequence converter.
    It supports quick rich-text in scripting applications for those familiar
    with HTML.  Why invent another styling language?

    Currently is partially useful.  No CSS class support yet,
    but many inline styles that correspond to terminal capabilities work.
'''
import re
import logging
from os.path import splitext
from enum import Enum, auto

from . import fg, bg, fx, defx
from .utils import make_hyperlink, make_line, make_sized
from .detection import _sized_char_support, get_size

from html.parser import HTMLParser


HALF2FULL = dict((i, i + 0xFEE0) for i in range(0x21, 0x7F))  # Wide ASCII map
HALF2FULL[0x20] = 0x3000  # https://stackoverflow.com/a/36693548/450917
log = logging.getLogger(__name__)
debug = log.debug
fx_tags = ('b', 'i', 's', 'u', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'strong')
skip_data_tags = ('script', 'style', 'title')
block_tags = '''address article aside canvas dd div dl dt fieldset figcaption
figure footer form header main nav noscript p sectiontable tfoot video
'''.split()
# blockquote h1-h6 hr pre ul ol li https://www.w3schools.com/htmL/html_blocks.asp
multi_whitespace_hunter = re.compile(r'\s\s+')
_width = get_size().columns


# find html header mode for rendering
[docs]class HeaderMode(Enum):
    NORMAL = auto()
    FIGLET = auto()
    DOUBLE = auto()


if _sized_char_support:
    header_mode = HeaderMode.DOUBLE
    _double_fonts = dict(
        h1=dict(double=True),  # + bold
        h2=dict(double=True),
        h3=dict(double=False, wide=True),  # + bold
        h4=dict(double=False, wide=True),
    )

else:
    try:
        import sys
        if '-i' in sys.argv:  # testing
            raise ImportError
        from pyfiglet import Figlet  # gettin' figgy with it, na na na…
        header_mode = HeaderMode.FIGLET
        Figlet.render = Figlet.renderText
        _figlet_fonts = dict(
            h1=Figlet(font='standard', width=_width),
            h2=Figlet(font='small', width=_width),
            #~ h3=Figlet(font='wideterm', width=_width),
        )
    except ImportError:
        header_mode = HeaderMode.NORMAL


[docs]class StringCache(dict):
    ''' Used to cache rendered ANSI color/fx strings with a dictionary lookup
        interface.
    '''
    def __init__(self, palette, **kwargs):
        self._palette = palette
        # allows renames to happen, currently supports em --> i
        self._renames = kwargs

    def __missing__(self, key):
        ''' Not found, render, save, return. '''
        if _sized_char_support and key[0] == 'h':
            if key[1] in '24':  # disable styles on even headers
                return ''

        key = self._renames.get(key, key)
        if key.startswith('#'):                 # handle hex colors
            key = 't' + key[1:]

        entry = None  # there might be more than one, delimited by commas.
        for sub_key in key.split(','):
            next_entry = getattr(self._palette, sub_key)
            if entry:
                entry += next_entry  # add together
            else:
                entry = next_entry

        val = str(entry)  # render palette entry
        self[key] = val
        #~ debug('missing called with: %r, returned %r', key, val)
        return val


[docs]class LiteHTMLParser(HTMLParser):
    ''' Parses simple HTML tags, returns as text and ANSI sequences.

        Exmaple::

            parser = LiteHTMLParser()
            parser.feed(text)
            result = ''.join(parser.tokens)  # build and return final string
            parser.tokens.clear()

    '''
    _anchor = []
    tokens = []
    _setting_bg_color = None
    _setting_fg_color = _setting_fg_color_dim = None
    _setting_font_style = _setting_font_weight = None
    _setting_text_decoration_u = _setting_text_decoration_o = None
    _skip_data = None
    _preformatted_data = None
    _list_mode = None
    _blockquote = None
    _in_header = None

    def _to_full_width(self, data, is_ascii):
        ''' Converts ASCII characters to their full-width counterpart,
            adds spaces for the rest.
            https://stackoverflow.com/a/36693548/450917
        '''
        if is_ascii:
            data = data.translate(HALF2FULL)  # widen with wide chars
        else:
            data = ' '.join(data)  # widen with spaces
        return data

    def _set_fg_color(self, val):
        self.tokens.append(fg_cache[val])
        self._setting_fg_color = True

    def _set_bg_color(self, val):
        self.tokens.append(bg_cache[val])
        self._setting_bg_color = True

    def _set_fg_color_default(self):
        self.tokens.append(fg_cache['default'])
        self._setting_fg_color = False

    def _set_bg_color_default(self):
        self.tokens.append(bg_cache['default'])
        self._setting_bg_color = False

    def _new_paragraph(self, desc='start'):  # max two newlines at a time
        tokens = self.tokens
        try:
            if tokens:
                last = tokens[-1]
                if last.endswith('\n'):
                    if last.endswith('\n\n'):   # cap newlines at two
                        pass
                    elif last == '\n':
                        if tokens[-2].endswith('\n'):  # penultimate
                            pass
                        else:
                            tokens.append('\n')
                    else:
                        tokens.append('\n')
                else:
                    tokens.append('\n\n')   # in full effect
            else:
                tokens.append('\n')
        except IndexError:
            tokens.append('\n')

    def _handle_start_span(self, attrs):
        ''' Put bulky span/style/css handling here. '''
        for key, val in attrs:
            if key == 'style':
                for pair in val.split(';'):
                    prop, _, prop_val = [
                        x.strip() for x in pair.partition(':')
                    ]
                    if prop == 'color':
                        self._set_fg_color(prop_val)
                    elif prop in ('background', 'background-color'):
                        self._set_bg_color(prop_val)
                    elif prop == 'font-style' and prop_val == 'italic':
                        self.tokens.append(fx_cache['i'])
                        self._setting_font_style = True
                    elif prop == 'font-weight' and prop_val == 'bold':
                        self.tokens.append(fx_cache['b'])
                        self._setting_font_weight = True
                    elif prop == 'text-decoration':
                        if prop_val == 'underline':
                            self.tokens.append(fx_cache['u'])
                            self._setting_text_decoration_u = True
                        elif prop_val == 'overline':
                            self.tokens.append(fx_cache['overline'])
                            self._setting_text_decoration_o = True

    def _handle_header_styles(self, tag, data):
        ''' Header shizzle moved in here. '''
        # inspect data to find best rendition
        try:
            is_ascii = data.isascii()   # 3.7
        except AttributeError:          # :-/
            try:
                data.encode('ascii')
                is_ascii = True
            except UnicodeEncodeError:
                is_ascii = False

        if is_ascii:
            is_latin1 = False
        else:
            try:
                data.encode('latin1', errors='strict')
                is_latin1 = True
            except UnicodeEncodeError:
                is_latin1 = False

        # -- Double DECember -------------------------------------------------
        if header_mode is HeaderMode.DOUBLE and (is_ascii or is_latin1):
            font = _double_fonts.get(tag)
            if font:
                data = make_sized(data, **font)
            else:
                data = self._to_full_width(data, is_ascii)
            styled_data = f'{fx_cache[tag]}{data}{dx_cache[tag]}'
            self.tokens.append(styled_data)

        # -- Figlet ----------------------------------------------------------
        elif (
                header_mode is HeaderMode.FIGLET and
                tag in _figlet_fonts.keys() and
                is_ascii
            ):
            font = _figlet_fonts.get(tag)
            if font:
                self.tokens.append(font.render(data))

        # -- Text + ANSI -----------------------------------------------------
        else:  # normal
            if tag in ('h1', 'h2'):
                data = data.upper()

            data = self._to_full_width(data, is_ascii)
            if tag == 'h1':  # Style first, then center, harder than it sounds:
                len_data = len(data) * 2  # wide ascii, but not asian widths
                padding = (_width - len_data) // 2
                styled_data = f'{fx_cache[tag]}{data}{dx_cache[tag]}'
                centered_data = f'{padding * " "}{styled_data}'
                self.tokens.append(centered_data)
            else:
                styled_data = f'{fx_cache[tag]}{data}{dx_cache[tag]}'
                self.tokens.append(styled_data)

[docs]    def handle_data(self, data):
        ''' Deals with text between and outside the tags.  Cases::

                ' '
                ' word\n    '
                '\n    '
                '\n    word'
                'word', 'word ', ' word'

        '''
        debug('data0: %r', data)
        if self._skip_data:
            pass
        elif self._anchor:
            self._anchor.append(data)  # caption
        elif self._in_header:  # tag, add styling
            self._handle_header_styles(self._in_header, data)
        elif self._preformatted_data:
            self.tokens.append(data.lstrip('\n'))  # new para already
        else:
            tokens = self.tokens
            new_line = tokens and tokens[-1].endswith('\n')
            if data.startswith('\n'):  # at the end of each line
                data = data.lstrip()
                if tokens and not new_line:
                    data = ' ' + data  # give breathing room
                elif not data:
                    return
                debug('data1: %r', data)

            if new_line:
                data = data.lstrip()
                debug('data2: %r', data)

            # consolidate remaining whitespace to a single space:
            data = multi_whitespace_hunter.sub(' ', data)
            if self._blockquote:  # seems to come in lines
                data = (f'    {fx_cache["dim"]}│{dx_cache["dim"]} '
                        f'{fx_cache["i"]}{data}{dx_cache["i"]}')

            tokens.append(data)
        debug('tokens: %r\n', self.tokens)

[docs]    def handle_starttag(self, tag, attrs):
        debug('start tag: %s', tag)
        if tag in fx_tags:
            if tag.startswith('h'):
                self._new_paragraph()
                self._in_header = tag
            else:
                self.tokens.append(fx_cache[tag])
        else:
            if tag == 'span':
                self._handle_start_span(attrs)

            elif tag == 'a':
                for key, val in attrs:
                    if key == 'href':
                        self._anchor.append(val)  # target

            elif tag == 'br':
                self.tokens.append('\n')

            elif tag == 'c':
                fore = True
                for key, val in attrs:
                    if key == 'on':
                        fore = False; continue
                    if key == 'dim' and fore:  # consider dim a color
                        self.tokens.append(fx_cache[key])
                        self._setting_fg_color_dim = True
                    elif fore:
                        self._set_fg_color(key)  # <-- key, not val!
                        fore = False  # 'on' not needed
                    else:
                        self._set_bg_color(key)  # <-- key, not val!

            elif tag == 'font':
                for key, val in attrs:
                    if key == 'color':
                        self._set_fg_color(val)

            elif tag == 'q':
                self.tokens.append('“')

            elif tag == 'hr':
                self._new_paragraph()
                self.tokens.append(make_line())
                self._new_paragraph()

            elif tag == 'pre':
                self._new_paragraph()
                self._preformatted_data = True

            elif tag == 'blockquote':
                self._new_paragraph()
                self._blockquote = True

            elif tag == 'ul':
                self._new_paragraph()
                self._list_mode = 'ul'

            elif tag == 'ol':
                self._new_paragraph()
                self._list_mode = 1

            elif tag == 'li':
                mode = self._list_mode
                if mode == 'ul':
                    bullet = '•'
                else:
                    bullet = f'{mode}.'  # below could be nested  :-/
                    self._list_mode = (self._list_mode or 0) + 1
                self.tokens.append(f'  {bullet} ')

            elif tag in block_tags:  # behind pre, hr, etc
                self._new_paragraph()

            elif tag in skip_data_tags:
                self._skip_data = True

[docs]    def handle_endtag(self, tag):
        debug('end tag: %s', tag)
        if tag in fx_tags:
            if tag.startswith('h'):
                self._new_paragraph(desc='  end')
                self._in_header = False
            else:
                self.tokens.append(dx_cache[tag])
            if tag == 'h1' and _sized_char_support:  # xterm: one more nl,
                self.tokens.append('\n')  # underline is too close
        else:
            if tag == 'span':  # no elifs, could be multiple
                if self._setting_fg_color:
                    self._set_fg_color_default()
                if self._setting_bg_color:
                    self._set_bg_color_default()
                if self._setting_font_style:
                    self.tokens.append(dx_cache['i'])
                    self._setting_font_style = False
                if self._setting_font_weight:
                    self.tokens.append(dx_cache['b'])
                    self._setting_font_weight = False
                if self._setting_text_decoration_u:
                    self.tokens.append(dx_cache['u'])
                    self._setting_text_decoration_u = False
                if self._setting_text_decoration_o:
                    self.tokens.append(dx_cache['overline'])
                    self._setting_text_decoration_o = False

            elif tag == 'a':
                self._set_fg_color('lightblue')
                target, caption = self._anchor[0], ''.join(self._anchor[1:])
                self.tokens.append(make_hyperlink(target, caption, icon=''))  # 🔗?
                self._set_fg_color_default()
                self._anchor.clear()

            elif tag == 'c':
                if self._setting_fg_color:
                    self._set_fg_color_default()
                if self._setting_bg_color:  # no elif, could be multiple
                    self._set_bg_color_default()
                if self._setting_fg_color_dim:  # consider dim a color
                    self.tokens.append(dx_cache['dim'])
                    self._setting_fg_color_dim = False

            elif tag == 'font':
                if self._setting_fg_color:
                    self._set_fg_color_default()

            elif tag == 'q':
                self.tokens.append('”')

            elif tag == 'pre':
                self._preformatted_data = False
                self._new_paragraph()

            elif tag == 'blockquote':
                self._blockquote = False
                #~ self.tokens.append('”')
                self._new_paragraph()

            elif tag in ('ul', 'ol'):
                self._list_mode = None
                self._new_paragraph()

            elif tag == 'li':
                self.tokens.append('\n')

            elif tag in block_tags:
                self._new_paragraph()

            elif tag in skip_data_tags:
                self._skip_data = False

fg_cache = StringCache(fg)
bg_cache = StringCache(bg)
fx_cache = StringCache(
    fx, em='i', h1='b,u', h2='b', h3='b', h4='i', h5='b', h6='i', strong='b'
)
# disables individual styles, must match above
dx_cache = StringCache(
    defx, em='i', h1='b,u', h2='b', h3='b', h4='i', h5='b', h6='i', strong='b'
)
parser = LiteHTMLParser()


[docs]def hprint(*args, newline=False, **kwargs):
    ''' Print function for terminals, with limited HTML support. '''
    end = kwargs.pop('end', None)

    for arg in args:
        result = hrender(arg)

        debug('called with: %r %s', result, kwargs)
        print(result, end='', **kwargs)

    if not newline:
        print(end=end)


[docs]def hrender(text):
    ''' Renders HTML to an ANSI-compatible string. '''
    if '<' in text:
        parser.feed(text)
        result = ''.join(parser.tokens)
        parser.tokens.clear()
    else:
        result = text
    return result


[docs]def view(path):
    ''' Display text files, converting formatting to equivalent ANSI escapes.
        Currently supports limited-HTML only.
    '''
    result = ''
    if splitext(path)[1] in ('.html', '.htm'):
        with open(path) as infile:
            parser.feed(infile.read())
            result = ''.join(parser.tokens)
            parser.tokens.clear()
    return result


if __name__ == '__main__':

    import sys

    if '-d' in sys.argv:
        try:
            import out
            out.configure(level='debug')
        except ImportError:
            logging.basicConfig(level='DEBUG',
                format=('%(levelname)s '
                f'{fx.dim}%(funcName)s:{fg.green}%(lineno)s{fg.default}{defx.dim}'
                ' %(message)s'),
            )

    html = '''
    <script> var Mr_Bill = "Oh No!"; // nothing to see here </script>
    <style foo=bar>Dad { how-bout-you: "shut yer big YAPPER" !important; }</style>
    <h1>H1. HTML Print Test</h1>
    <h1>I ♥ ラーメン</h1>
    <c dim>fx:</c> <b>bold</b> <i>italic</i><em>/em</em> <s>strike</s>
    <u>undy</u><br>
    ¶<c dim>(span tag)</c>
    To<span style="color: red">Bill</span> Brasky!
    To <span style="color: red">Bill</span>Brasky!
    <b><span style=background:green>gr&euro;&euro;n</span></b>
      <c dim>(w/ entities)</c>
    <span style="color:cyan;font-style:italic;font-weight:bold">cyan</span>
    <span style="background: yellow; color: black">yellow</span>
    <span style="color:#444;text-decoration:overline;text-decoration: underline">
        #444</span> <c dim>(web/hex colors)</c><br>
    <c dim>(font tag)</c>
    <font color=blue>blue </font> <font color=purple> purple</font><br>
    <c dim>(c/color tag, with web/X11 color names)</c>
    <c orange>l'orange</c>
    <c black on bisque3>bisque3</c>
    <hr>
    <h2>H2. Part II</h2>
    <c #b0b>B0B</c> -&gt; <a href="http://example.com/">click here!</a>
    <p>
        A bit of <q>plain text</q> in its own paragraph.
    </p>
    <pre>
        var canvas = document.getElementById('myCanvas');
        var context = canvas.getContext('2d');
    </pre>
    <blockquote>
        This is a blockquote.
        This is a long line.<br>
        This is a long line.
        This is a long line.
        This is a long line.
    </blockquote>
    <!-- nothing in this comment should be shown, Buh-BYE ! -->
    Hello <h3>H3. woild!</h3><h4>H4. Heré</h4><h5>H5. Here</h5><h6>H6. Here</h6>
    ;-) '''
    hprint(html)
Source code for console.viewers

console

Navigation

Related Topics