xdev.regex_builder module

Helpers to build cross-flavor regular expressions.

class xdev.regex_builder.RegexBuilder[source]

Bases: object

Notes

The way to have multiple negative look aheads/behinds is to change them together SO12689046

References

Example

b = RegexBuilder.coerce(‘python’) import re pat = re.compile(‘[A-Z-]+’)

common_patterns = [{'docs': 'An alphanumeric word, i.e. [a-zA-Z0-9_] (also matches unicode characters in Python)', 'key': 'word', 'pattern': '\\w'}, {'docs': 'Anything not a word', 'key': 'non-word', 'pattern': '\\W'}, {'docs': 'Any space character including: " " "\\t", "\\n", "\\r"', 'key': 'space', 'pattern': '\\s'}, {'docs': 'Any non-space character', 'key': 'non-space', 'pattern': '\\S'}, {'docs': 'any number 0-9', 'key': 'digit', 'pattern': '\\d'}, {'docs': 'any non-digit', 'key': 'digit', 'pattern': '\\D'}, {'alias': ['kleene_star'], 'docs': 'zero or more of the pattern to the left', 'key': 'zero_or_more', 'pattern': '*'}]
lookahead(pat, positive=True, mode='positive')[source]

A lookahead pattern that can be positive or negative

looklook

lookbehind(pat, positive=True)[source]

A lookbehind pattern that can be positive or negative

named_field(pat, name=None)[source]
bref_field(name)[source]
escape(pat)[source]
optional(pat)[source]
group(pat)[source]
oneof(*paterns)[source]
classmethod coerce(backend='python')[source]
property identifier

A word, except it must start with a letter or underscore (not a number)

References

https://stackoverflow.com/questions/5474008/regular-expression-to-confirm-whether-a-string-is-a-valid-python-identifier

Example

>>> from xdev.regex_builder import *  # NOQA
>>> b = PythonRegexBuilder()
>>> assert re.match(b.identifier, 'hello')
>>> assert re.match(b.identifier, 'hello')
>>> assert re.match(b.identifier, '𝛣_ello')
>>> assert re.match(b.identifier, 'h_1e8llo')
>>> assert not re.match(b.identifier, '1hello')
property hex

A case-independent hex character

property word
property whitespace
property nongreedy
property number

Can match a generic floating point number

References

https://www.regular-expressions.info/floatingpoint.html

Example

>>> from xdev.regex_builder import *  # NOQA
>>> b = PythonRegexBuilder()
>>> pat = re.compile('^' + b.number + '$')
>>> assert pat.match('3.4')
>>> assert pat.match('3.4e-1')
>>> assert pat.match('3.4')
>>> assert pat.match('3.4e+1')
>>> assert not pat.match('3.4a+1')
>>> b = PythonRegexBuilder()
>>> num_part = b.named_field(b.number, name='number')
>>> space_part = b.named_field(' *', name='spaces')
>>> unit_part = b.named_field('.*', name='unit')
>>> pat = re.compile('^' + num_part + space_part + unit_part + '$')
>>> pat.match('3.4').groupdict()
>>> pat.match('3.1415 foobars').groupdict()
>>> pat.match('3.1415foobars').groupdict()
>>> pat.match('+3.1415e9foobars').groupdict()
class xdev.regex_builder.VimRegexBuilder[source]

Bases: RegexBuilder

https://dev.to/iggredible/learning-vim-regex-26ep

vim_patterns = [{'alias': ['nongreedy_kleene_star'], 'docs': 'non-greedily matches zero or more of the pattern to the left', 'key': 'nongreedy_zero_or_more', 'pattern': '\\{-}'}]
previous(min=None, max=None, exact=None, greedy=True)[source]

Match the previous pattern some number of times.

Parameters:
  • min (int | None) – minimum number of matches

  • max (int | None) – maximum number of matches

  • exact (int | None) – Specify exact number of matches. Mutex with minimum and max.

  • greedy (bool) – if True match as many as possible, otherwise match as few as possible

Example

>>> from xdev.regex_builder import *  # NOQA
>>> b = VimRegexBuilder()
>>> assert b.previous(exact=1) == r'\{1}'
>>> assert b.previous(min=1, max=3) == r'\{1,3}'
>>> assert b.previous(min=1, max=3, greedy=False) == r'\{-1,3}'
>>> assert b.previous(max=3) == r'\{,3}'
>>> assert b.previous(min=3) == r'\{3,}'
>>> assert b.previous() == '*'
>>> assert b.previous(greedy=False) == r'\{-}'
class xdev.regex_builder.PythonRegexBuilder[source]

Bases: RegexBuilder

Contains helper methods to construct a regex

Example

>>> b = PythonRegexBuilder()
>>> pat_text = b.lookbehind('_') + r'v\d+' + b.optional(b.lookahead('_'))
>>> pat = re.compile(pat_text)
>>> print(pat.search('_v321_').group())
v321
>>> print(pat.search('_v321').group())
v321
>>> print(pat.search('fdsfds_v321_fdsfsd').group())
v321
>>> print(pat.search('fdsfds_v321fdsfsd').group())
v321
>>> print(pat.search('fdsfdsv321fdsfsd'))
None

Example

>>> # Test multiple negative lookbehind
>>> b = PythonRegexBuilder()
>>> suffix = 'foo'
>>> neg_prefix1 = b.lookbehind('abc', positive=0)
>>> neg_prefix2 = b.lookbehind('efg', positive=0)
>>> pat1 = re.compile(neg_prefix1 + suffix)
>>> pat2 = re.compile(neg_prefix2 + suffix)
>>> patB = re.compile(neg_prefix1 + neg_prefix2 + suffix)
>>> cases = ['abcfoo', 'efgfoo', 'hijfoo', 'foo']
>>> print([bool(pat1.search(c)) for c in cases])
>>> print([bool(pat2.search(c)) for c in cases])
>>> print([bool(patB.search(c)) for c in cases])
[False, True, True, True]
[True, False, True, True]
[False, False, True, True]

References

https://www.dataquest.io/blog/regex-cheatsheet/ https://docs.python.org/3/library/re.html#regular-expression-syntax

python_patterns = [{'alias': ['nongreedy_kleene_star'], 'docs': 'non-greedily matches zero or more of the pattern to the left', 'key': 'nongreedy_zero_or_more', 'pattern': '*?'}, {'docs': 'The boundary at the start or end of a word', 'key': 'boundary', 'pattern': '\\b'}, {'key': 'non-boundary', 'pattern': '\\B'}, {'key': 'left-expr', 'pattern': '\\A'}, {'docs': 'Matches only at the end of the string', 'key': 'right-expr', 'pattern': '\\Z'}]
previous(min=None, max=None, exact=None, greedy=True)[source]

Match the previous pattern some number of times.

Parameters:
  • min (int | None) – minimum number of matches

  • max (int | None) – maximum number of matches

  • exact (int | None) – Specify exact number of matches. Mutex with minimum and max.

  • greedy (bool) – if True match as many as possible, otherwise match as few as possible

Example

>>> from xdev.regex_builder import *  # NOQA
>>> b = PythonRegexBuilder()
>>> assert b.previous(exact=1) == '{1}'
>>> assert b.previous(min=1, max=3) == '{1,3}'
>>> assert b.previous(min=1, max=3, greedy=False) == '{1,3}?'
>>> assert b.previous(max=3) == '{,3}'
>>> assert b.previous(min=3) == '{3,}'
>>> assert b.previous() == '*'
>>> assert b.previous(greedy=False) == '*?'

Example

>>> from xdev.regex_builder import *  # NOQA
>>> b = PythonRegexBuilder()
>>> assert re.compile('a' + b.previous(exact=2) + '$').match('aa')
>>> assert not re.compile('a' + b.previous(exact=2) + '$').match('aaa')
>>> assert not re.compile('a' + b.previous(exact=2) + '$').match('a')