-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #731 from pallets/feature/kill-stringdefs
Use str.isidentifier to match idents on python 3
- Loading branch information
Showing
6 changed files
with
140 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# generated by scripts/generate_identifier_pattern.py | ||
pattern = '·̀-ͯ·҃-֑҇-ׇֽֿׁׂׅׄؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ܑۭܰ-݊ަ-ް߫-߳ࠖ-࠙ࠛ-ࠣࠥ-ࠧࠩ-࡙࠭-࡛ࣔ-ࣣ࣡-ःऺ-़ा-ॏ॑-ॗॢॣঁ-ঃ়া-ৄেৈো-্ৗৢৣਁ-ਃ਼ਾ-ੂੇੈੋ-੍ੑੰੱੵઁ-ઃ઼ા-ૅે-ૉો-્ૢૣଁ-ଃ଼ା-ୄେୈୋ-୍ୖୗୢୣஂா-ூெ-ைொ-்ௗఀ-ఃా-ౄె-ైొ-్ౕౖౢౣಁ-ಃ಼ಾ-ೄೆ-ೈೊ-್ೕೖೢೣഁ-ഃാ-ൄെ-ൈൊ-്ൗൢൣංඃ්ා-ුූෘ-ෟෲෳัิ-ฺ็-๎ັິ-ູົຼ່-ໍ༹༘༙༵༷༾༿ཱ-྄྆྇ྍ-ྗྙ-ྼ࿆ါ-ှၖ-ၙၞ-ၠၢ-ၤၧ-ၭၱ-ၴႂ-ႍႏႚ-ႝ፝-፟ᜒ-᜔ᜲ-᜴ᝒᝓᝲᝳ឴-៓៝᠋-᠍ᢅᢆᢩᤠ-ᤫᤰ-᤻ᨗ-ᨛᩕ-ᩞ᩠-᩿᩼᪰-᪽ᬀ-ᬄ᬴-᭄᭫-᭳ᮀ-ᮂᮡ-ᮭ᯦-᯳ᰤ-᰷᳐-᳔᳒-᳨᳭ᳲ-᳴᳸᳹᷀-᷵᷻-᷿‿⁀⁔⃐-⃥⃜⃡-⃰℘℮⳯-⵿⳱ⷠ-〪ⷿ-゙゚〯꙯ꙴ-꙽ꚞꚟ꛰꛱ꠂ꠆ꠋꠣ-ꠧꢀꢁꢴ-ꣅ꣠-꣱ꤦ-꤭ꥇ-꥓ꦀ-ꦃ꦳-꧀ꧥꨩ-ꨶꩃꩌꩍꩻ-ꩽꪰꪲ-ꪴꪷꪸꪾ꪿꫁ꫫ-ꫯꫵ꫶ꯣ-ꯪ꯬꯭ﬞ︀-️︠-︯︳︴﹍-﹏_𐇽𐋠𐍶-𐍺𐨁-𐨃𐨅𐨆𐨌-𐨏𐨸-𐨿𐨺𐫦𐫥𑀀-𑀂𑀸-𑁆𑁿-𑂂𑂰-𑂺𑄀-𑄂𑄧-𑅳𑄴𑆀-𑆂𑆳-𑇊𑇀-𑇌𑈬-𑈷𑈾𑋟-𑋪𑌀-𑌃𑌼𑌾-𑍄𑍇𑍈𑍋-𑍍𑍗𑍢𑍣𑍦-𑍬𑍰-𑍴𑐵-𑑆𑒰-𑓃𑖯-𑖵𑖸-𑗀𑗜𑗝𑘰-𑙀𑚫-𑚷𑜝-𑜫𑰯-𑰶𑰸-𑰿𑲒-𑲧𑲩-𑲶𖫰-𖫴𖬰-𖬶𖽑-𖽾𖾏-𖾒𛲝𛲞𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄𝨀-𝨶𝨻-𝩬𝩵𝪄𝪛-𝪟𝪡-𝪯𞀀-𞀆𞀈-𞀘𞀛-𞀡𞀣𞀤𞀦-𞣐𞀪-𞣖𞥄-𞥊󠄀-󠇯' |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/usr/bin/env python3 | ||
import itertools | ||
import os | ||
import re | ||
import sys | ||
|
||
if sys.version_info[0] < 3: | ||
raise RuntimeError('This needs to run on Python 3.') | ||
|
||
|
||
def get_characters(): | ||
"""Find every Unicode character that is valid in a Python `identifier`_ but | ||
is not matched by the regex ``\w`` group. | ||
``\w`` matches some characters that aren't valid in identifiers, but | ||
:meth:`str.isidentifier` will catch that later in lexing. | ||
All start characters are valid continue characters, so we only test for | ||
continue characters. | ||
_identifier: https://docs.python.org/3/reference/lexical_analysis.html#identifiers | ||
""" | ||
for cp in range(sys.maxunicode + 1): | ||
s = chr(cp) | ||
|
||
if ('a' + s).isidentifier() and not re.match(r'\w', s): | ||
yield s | ||
|
||
|
||
def collapse_ranges(data): | ||
"""Given a sorted list of unique characters, generate ranges representing | ||
sequential code points. | ||
Source: https://stackoverflow.com/a/4629241/400617 | ||
""" | ||
for a, b in itertools.groupby( | ||
enumerate(data), | ||
lambda x: ord(x[1]) - x[0] | ||
): | ||
b = list(b) | ||
yield b[0][1], b[-1][1] | ||
|
||
|
||
def build_pattern(ranges): | ||
"""Output the regex pattern for ranges of characters. | ||
One and two character ranges output the individual characters. | ||
""" | ||
out = [] | ||
|
||
for a, b in ranges: | ||
if a == b: # single char | ||
out.append(a) | ||
elif ord(b) - ord(a) == 1: # two chars, range is redundant | ||
out.append(a) | ||
out.append(b) | ||
else: | ||
out.append(f'{a}-{b}') | ||
|
||
return ''.join(out) | ||
|
||
|
||
def main(): | ||
"""Build the regex pattern and write it to the file | ||
:file:`jinja2/_identifier.py`.""" | ||
pattern = build_pattern(collapse_ranges(get_characters())) | ||
filename = os.path.abspath(os.path.join( | ||
os.path.dirname(__file__), '..', 'jinja2', '_identifier.py' | ||
)) | ||
|
||
with open(filename, 'w', encoding='utf8') as f: | ||
f.write('# generated by scripts/generate_identifier_pattern.py\n') | ||
f.write(f'pattern = \'{pattern}\'\n') | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters