Source code for emlib.textlib

"""
Routines for working with text
"""
from __future__ import annotations
import textwrap
import re

from typing import TYPE_CHECKING
if TYPE_CHECKING:
    from typing import Sequence, Callable


[docs] def stripLines(text: str, which='all', splitregex=''): """ Strip leading and trailing empty lines Args: text: the text to work on which: one of 'all', 'top', 'bottom', where all removes all empty lines at the top and bottom, top removes only leading empty lines and bottom removes only trailing empty lines Returns: the transformed text """ lines = text.splitlines() if not splitregex else re.split(text, splitregex) if which == 'all': lines = linesStrip(lines) elif which == 'top': lines = linesStripTop(lines) elif which == 'bottom': lines = linesStripBottom(lines) else: raise ValueError(f"Expected one of 'all', 'top', 'bottom', got '{which}'") return '\n'.join(lines)
[docs] def linesStrip(lines: list[str]) -> list[str]: """ Remove empty lines from the top and bottom Args: lines: lines already split Returns: a list of lines without any empty lines at the beginning and at the end """ startidx, endidx = 0, 0 for startidx, line in enumerate(lines): if line and not line.isspace(): break for endidx, line in enumerate(reversed(lines)): if line and not line.isspace(): break return lines[startidx:len(lines)-endidx]
[docs] def reindent(text: str, prefix: str) -> str: """ Reindent a given text. Replaces the indentation with a new prefix. Args: text: the text to reindent prefix: the new prefix to add to each line Returns: the reindented text """ text = textwrap.dedent(text) if prefix: text = textwrap.indent(text, prefix=prefix) return text
[docs] def getIndentation(code: str) -> int: """ get the number of spaces used to indent code """ for line in code.splitlines(): stripped = line.lstrip() if not stripped: # skip empty lines continue return len(line) - len(stripped) return 0
[docs] def matchIndentation(code: str, modelcode: str) -> str: """ Indent code matching modelcode Args: code: the code to indent modelcode: the code to match Returns: code indented to match modelcode Example ~~~~~~~ >>> a = " # This is some code" >>> b = " # This is some other code" >>> matchIndentation(a, b) ' # This is some code' """ indentation = getIndentation(modelcode) code = textwrap.dedent(code) return textwrap.indent(code, prefix=" " * indentation)
[docs] def linesStripTop(lines: list[str]) -> list[str]: """ Remove empty lines from the top Args: lines: lines already split Returns: a list of lines without any empty lines at the beginning """ for i, line in enumerate(lines): if line and not line.isspace(): break else: return [] return lines[i:]
[docs] def linesStripBottom(lines: list[str], maxlines: int = 0) -> list[str]: """ Strip empty lines from the end of the list Args: lines: lines already split maxlines: the max. number of empty lines to leave at the end Returns: a list of lines with at most `maxlines` empty lines at the end """ for i, line in enumerate(reversed(lines)): if line and not line.isspace(): break if i - maxlines > 0: return lines[:maxlines - i] return lines
[docs] def joinPreservingIndentation(fragments: Sequence[str]) -> str: """ Like join, but preserving indentation Args: fragments: a list of code strings maxEmptyLines: if given, the max. number of empty lines between fragments Returns: the joint code """ if any(not isinstance(fragment, str) for fragment in fragments): fragment = next(_ for _ in fragments if not isinstance(_, str)) raise TypeError(f"Expected a string, got {fragment}") jointtext = "\n".join(textwrap.dedent(frag) for frag in fragments if frag) numspaces = getIndentation(fragments[0]) if numspaces: jointtext = textwrap.indent(jointtext, prefix=" "*numspaces) return jointtext
[docs] def fuzzymatch(pattern: str, strings: list[str] ) -> list[tuple[float, str]]: """ Find possible matches to pattern in ``strings``. This implements a **very** simple algorithm. Returns a subseq. of strings sorted by best score. Only strings representing possible matches are returned Args: pattern: the string to search for within *strings* strings: a list os possible strings Returns: a list of (score, string match) """ pattern = '.*?'.join(map(re.escape, list(pattern))) def calculate_score(pattern: str, s: str) -> float: match = re.search(pattern, s) if match is None: return 0. return 100.0 / ((1 + match.start()) * (match.end() - match.start() + 1)) matches = [(score, s) for s in strings if (score := calculate_score(pattern, s)) > 0] matches.sort(reverse=True) return matches
[docs] def ljust(s: str, width: int, fillchar=" ") -> str: """ Like str.ljust, but ensures that the output is always the given width Even if s is longer than ``width`` """ s = s.ljust(width, fillchar) if len(s) > width: s = s[:width] return s
[docs] def makeReplacer(conditions: dict) -> Callable: """ Create a function to replace many subtrings at once Args: conditions: a dictionary mapping a string to its replacement Returns: a function to be called to produce the given transformation Example ~~~~~~~ Create a function to remove some unwanted characters >>> import emlib.textlib >>> replacer = emlib.textlib.makeReplacer({"[": "", "]": "", '"': '', "'": "", "{": "", "}": ""}) >>> replacer("[foo:'{bar}']") foo:bar """ rep = {re.escape(k): v for k, v in conditions.items()} pattern = re.compile("|".join(rep.keys())) return lambda txt: pattern.sub(lambda m: rep[re.escape(m.group(0))], txt)
[docs] def firstSentence(txt: str) -> str: """ Returns the first sentence from txt Args: txt: the text to analyze Returns: the first sentence Example ~~~~~~~ >>> firstSentence(''' ... ... This is my text. It is amazing ... It continues here ... ''') "This is my text" >>> firstSentence(''' ... ... This is also my text ... It continues here ... ''') "This is also my text" """ txt = txt.strip() lines = txt.splitlines() return lines[0].split('.', maxsplit=1)[0]
[docs] def escapeAnsi(line: str) -> str: """ Escape ansi codes """ return re.compile(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]').sub('', line)
[docs] def splitInChunks(s: str, maxlen: int) -> list[str]: """ Split `s` into strings of max. size `maxlen` Args: s: the str/bytes to split maxlen: the max. length of each substring Returns: a list of substrings, where each substring has a max. length of *maxlen* """ out = [] idx = 0 L = len(s) while idx < L: n = min(L-idx, maxlen) subs = s[idx:idx+n] out.append(subs) idx += n return out
[docs] def quoteIfNeeded(s: str, quote='', defaultquote='"') -> str: """ Add quotation marks around `s` if needed Args: s: the string which might need quoting quote: which quote sign to use. If not given, it will be detected and if not found a default quote is used defaultquote: quote used when autodetection is used and no quote was found Returns: a string where it is ensured that it is surrounded by `quote` Example ~~~~~~~ >>> quoteIfNeeded('test') "test" >>> quoteIfNeeded("'foo'", "'") 'foo' """ if not quote: s0 = s[0] if s0 == s[-1] and (s0 == '"' or s0 == "'"): return s else: return f'{defaultquote}{s}{defaultquote}' else: if s[0] == s[-1] == quote: return s return f'{quote}{s}{quote}'
_fractions = { (1, 3): "⅓", (2, 3): "⅔", (1, 4): "¼", (2, 4): "½", (3, 4): "¾", (1, 5): "⅕", (2, 5): "⅖", (3, 5): "⅗", (4, 5): "⅘", (1, 6): "⅙", (2, 6): "⅔", (3, 6): "½", (4, 6): "⅔", (5, 6): "⅚", (1, 7): "⅐", (1, 8): "⅛", (3, 8): "⅜", (4, 8): "½", (5, 8): "⅝", (6, 8): "¾", (7, 8): "⅞", (1, 9): "⅑", (3, 9): "⅓", (6, 9): "⅔", (1, 10): "⅒", (2, 10): "⅕", (4, 10): "⅖", }
[docs] def unicodeFraction(numerator: int, denominator: int, simplify=True) -> str: if simplify: from fractions import Fraction frac = Fraction(numerator, denominator) numerator, denominator = frac.numerator, frac.denominator ufraction = _fractions.get((numerator, denominator)) if ufraction is not None: return ufraction return f"{numerator}/{denominator}"