"""
Routines for working with text
"""
from __future__ import annotations
import textwrap
import re
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Sequence, Callable
[docs]
def stripLines(text: str, which='all', splitregex=''):
"""
Strip leading and trailing empty lines
Args:
text: the text to work on
which: one of 'all', 'top', 'bottom', where all removes all
empty lines at the top and bottom, top removes only
leading empty lines and bottom removes only
trailing empty lines
Returns:
the transformed text
"""
lines = text.splitlines() if not splitregex else re.split(text, splitregex)
if which == 'all':
lines = linesStrip(lines)
elif which == 'top':
lines = linesStripTop(lines)
elif which == 'bottom':
lines = linesStripBottom(lines)
else:
raise ValueError(f"Expected one of 'all', 'top', 'bottom', got '{which}'")
return '\n'.join(lines)
[docs]
def linesStrip(lines: list[str]) -> list[str]:
"""
Remove empty lines from the top and bottom
Args:
lines: lines already split
Returns:
a list of lines without any empty lines at the beginning and at the end
"""
startidx, endidx = 0, 0
for startidx, line in enumerate(lines):
if line and not line.isspace():
break
for endidx, line in enumerate(reversed(lines)):
if line and not line.isspace():
break
return lines[startidx:len(lines)-endidx]
[docs]
def reindent(text: str, prefix: str) -> str:
"""
Reindent a given text. Replaces the indentation with a new prefix.
Args:
text: the text to reindent
prefix: the new prefix to add to each line
Returns:
the reindented text
"""
text = textwrap.dedent(text)
if prefix:
text = textwrap.indent(text, prefix=prefix)
return text
[docs]
def getIndentation(code: str) -> int:
""" get the number of spaces used to indent code """
for line in code.splitlines():
stripped = line.lstrip()
if not stripped:
# skip empty lines
continue
return len(line) - len(stripped)
return 0
[docs]
def matchIndentation(code: str, modelcode: str) -> str:
"""
Indent code matching modelcode
Args:
code: the code to indent
modelcode: the code to match
Returns:
code indented to match modelcode
Example
~~~~~~~
>>> a = " # This is some code"
>>> b = " # This is some other code"
>>> matchIndentation(a, b)
' # This is some code'
"""
indentation = getIndentation(modelcode)
code = textwrap.dedent(code)
return textwrap.indent(code, prefix=" " * indentation)
[docs]
def linesStripTop(lines: list[str]) -> list[str]:
"""
Remove empty lines from the top
Args:
lines: lines already split
Returns:
a list of lines without any empty lines at the beginning
"""
for i, line in enumerate(lines):
if line and not line.isspace():
break
else:
return []
return lines[i:]
[docs]
def linesStripBottom(lines: list[str], maxlines: int = 0) -> list[str]:
"""
Strip empty lines from the end of the list
Args:
lines: lines already split
maxlines: the max. number of empty lines to leave at the end
Returns:
a list of lines with at most `maxlines` empty lines at the end
"""
for i, line in enumerate(reversed(lines)):
if line and not line.isspace():
break
if i - maxlines > 0:
return lines[:maxlines - i]
return lines
[docs]
def joinPreservingIndentation(fragments: Sequence[str]) -> str:
"""
Like join, but preserving indentation
Args:
fragments: a list of code strings
maxEmptyLines: if given, the max. number of empty lines between fragments
Returns:
the joint code
"""
if any(not isinstance(fragment, str) for fragment in fragments):
fragment = next(_ for _ in fragments if not isinstance(_, str))
raise TypeError(f"Expected a string, got {fragment}")
jointtext = "\n".join(textwrap.dedent(frag) for frag in fragments if frag)
numspaces = getIndentation(fragments[0])
if numspaces:
jointtext = textwrap.indent(jointtext, prefix=" "*numspaces)
return jointtext
[docs]
def fuzzymatch(pattern: str, strings: list[str]
) -> list[tuple[float, str]]:
"""
Find possible matches to pattern in ``strings``.
This implements a **very** simple algorithm. Returns a subseq.
of strings sorted by best score. Only strings representing
possible matches are returned
Args:
pattern: the string to search for within *strings*
strings: a list os possible strings
Returns:
a list of (score, string match)
"""
pattern = '.*?'.join(map(re.escape, list(pattern)))
def calculate_score(pattern: str, s: str) -> float:
match = re.search(pattern, s)
if match is None:
return 0.
return 100.0 / ((1 + match.start()) * (match.end() - match.start() + 1))
matches = [(score, s) for s in strings
if (score := calculate_score(pattern, s)) > 0]
matches.sort(reverse=True)
return matches
[docs]
def ljust(s: str, width: int, fillchar=" ") -> str:
"""
Like str.ljust, but ensures that the output is always the given width
Even if s is longer than ``width``
"""
s = s.ljust(width, fillchar)
if len(s) > width:
s = s[:width]
return s
[docs]
def makeReplacer(conditions: dict) -> Callable:
"""
Create a function to replace many subtrings at once
Args:
conditions: a dictionary mapping a string to its replacement
Returns:
a function to be called to produce the given transformation
Example
~~~~~~~
Create a function to remove some unwanted characters
>>> import emlib.textlib
>>> replacer = emlib.textlib.makeReplacer({"[": "", "]": "", '"': '', "'": "", "{": "", "}": ""})
>>> replacer("[foo:'{bar}']")
foo:bar
"""
rep = {re.escape(k): v for k, v in conditions.items()}
pattern = re.compile("|".join(rep.keys()))
return lambda txt: pattern.sub(lambda m: rep[re.escape(m.group(0))], txt)
[docs]
def firstSentence(txt: str) -> str:
"""
Returns the first sentence from txt
Args:
txt: the text to analyze
Returns:
the first sentence
Example
~~~~~~~
>>> firstSentence('''
...
... This is my text. It is amazing
... It continues here
... ''')
"This is my text"
>>> firstSentence('''
...
... This is also my text
... It continues here
... ''')
"This is also my text"
"""
txt = txt.strip()
lines = txt.splitlines()
return lines[0].split('.', maxsplit=1)[0]
[docs]
def escapeAnsi(line: str) -> str:
"""
Escape ansi codes
"""
return re.compile(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]').sub('', line)
[docs]
def splitInChunks(s: str, maxlen: int) -> list[str]:
"""
Split `s` into strings of max. size `maxlen`
Args:
s: the str/bytes to split
maxlen: the max. length of each substring
Returns:
a list of substrings, where each substring has a max. length
of *maxlen*
"""
out = []
idx = 0
L = len(s)
while idx < L:
n = min(L-idx, maxlen)
subs = s[idx:idx+n]
out.append(subs)
idx += n
return out
[docs]
def quoteIfNeeded(s: str, quote='', defaultquote='"') -> str:
"""
Add quotation marks around `s` if needed
Args:
s: the string which might need quoting
quote: which quote sign to use. If not given, it will be detected
and if not found a default quote is used
defaultquote: quote used when autodetection is used and no quote was
found
Returns:
a string where it is ensured that it is surrounded by `quote`
Example
~~~~~~~
>>> quoteIfNeeded('test')
"test"
>>> quoteIfNeeded("'foo'", "'")
'foo'
"""
if not quote:
s0 = s[0]
if s0 == s[-1] and (s0 == '"' or s0 == "'"):
return s
else:
return f'{defaultquote}{s}{defaultquote}'
else:
if s[0] == s[-1] == quote:
return s
return f'{quote}{s}{quote}'
_fractions = {
(1, 3): "⅓",
(2, 3): "⅔",
(1, 4): "¼",
(2, 4): "½",
(3, 4): "¾",
(1, 5): "⅕",
(2, 5): "⅖",
(3, 5): "⅗",
(4, 5): "⅘",
(1, 6): "⅙",
(2, 6): "⅔",
(3, 6): "½",
(4, 6): "⅔",
(5, 6): "⅚",
(1, 7): "⅐",
(1, 8): "⅛",
(3, 8): "⅜",
(4, 8): "½",
(5, 8): "⅝",
(6, 8): "¾",
(7, 8): "⅞",
(1, 9): "⅑",
(3, 9): "⅓",
(6, 9): "⅔",
(1, 10): "⅒",
(2, 10): "⅕",
(4, 10): "⅖",
}
[docs]
def unicodeFraction(numerator: int, denominator: int, simplify=True) -> str:
if simplify:
from fractions import Fraction
frac = Fraction(numerator, denominator)
numerator, denominator = frac.numerator, frac.denominator
ufraction = _fractions.get((numerator, denominator))
if ufraction is not None:
return ufraction
return f"{numerator}/{denominator}"