carpedm20 · TahirJalilov · Oct 31, 2022 · Sep 22, 2022 · Sep 22, 2022 · Sep 23, 2022
diff --git a/README.rst b/README.rst
@@ -32,7 +32,7 @@ both the full list and aliases.
 
 By default, the language is English (``language='en'``) but  also supported languages are:
 
-Spanish (``'es'``), Portuguese (``'pt'``), Italian (``'it'``), French (``'fr'``), German (``'de'``)
+Spanish (``'es'``), Portuguese (``'pt'``), Italian (``'it'``), French (``'fr'``), German (``'de'``), Farsi/Persian (``'fa'``)
 
 
 .. code-block:: python

diff --git a/docs/README.md b/docs/README.md
@@ -23,6 +23,13 @@ pip install -r requirements.txt
 make html
 ```
 
+Check for warnings:
+
+```bash
+make clean
+sphinx-build -n -T -b html . _build
+```
+
 Test code in code blocks:
 
 ```bash

diff --git a/docs/api.rst b/docs/api.rst
@@ -7,8 +7,8 @@ API Reference
    :noindex:
 
 
-+--------------------------------------------------------------------------------------------+
-| Table of Contents                                                                          |
++-----------------------------+--------------------------------------------------------------+
+| Table of Contents           |                                                              |
 +=============================+==============================================================+
 | **Functions:**              |                                                              |
 +-----------------------------+--------------------------------------------------------------+
@@ -28,8 +28,6 @@ API Reference
 +-----------------------------+--------------------------------------------------------------+
 | :func:`version`             | Find Unicode/Emoji version of an emoji                       |
 +-----------------------------+--------------------------------------------------------------+
-| :func:`get_emoji_regexp`    | Returns compiled regular expression that matches all emojis  |
-+-----------------------------+--------------------------------------------------------------+
 | **Module variables:**       |                                                              |
 +-----------------------------+--------------------------------------------------------------+
 | :data:`EMOJI_DATA`          | Dict of all emoji                                            |

diff --git a/docs/index.rst b/docs/index.rst
@@ -54,7 +54,7 @@ Languages
 
 By default, the language is English (``language='en'``) but  also supported languages are:
 
-Spanish (``'es'``), Portuguese (``'pt'``), Italian (``'it'``), French (``'fr'``), German (``'de'``)
+Spanish (``'es'``), Portuguese (``'pt'``), Italian (``'it'``), French (``'fr'``), German (``'de'``), Farsi/Persian (``'fa'``)
 
 .. doctest::
 
@@ -313,8 +313,8 @@ Reference documentation of all functions and properties in the module:
 
    api
 
-+--------------------------------------------------------------------------------------------+
-| API Reference                                                                              |
++-----------------------------+--------------------------------------------------------------+
+| API Reference               |                                                              |
 +=============================+==============================================================+
 | **Functions:**              |                                                              |
 +-----------------------------+--------------------------------------------------------------+
@@ -334,8 +334,6 @@ Reference documentation of all functions and properties in the module:
 +-----------------------------+--------------------------------------------------------------+
 | :func:`version`             | Find Unicode/Emoji version of an emoji                       |
 +-----------------------------+--------------------------------------------------------------+
-| :func:`get_emoji_regexp`    | Returns compiled regular expression that matches all emojis  |
-+-----------------------------+--------------------------------------------------------------+
 | **Module variables:**       |                                                              |
 +-----------------------------+--------------------------------------------------------------+
 | :data:`EMOJI_DATA`          | Dict of all emoji                                            |

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,2 +1,2 @@
-sphinx>=4.4.0
+sphinx>=5.2.2
 alabaster>=0.7.12
diff --git a/emoji/core.py b/emoji/core.py
@@ -9,6 +9,8 @@
 
 """
 
+import sys
+import unicodedata
 import re
 
 from emoji import unicode_codes
@@ -21,6 +23,14 @@
 
 _SEARCH_TREE = None
 _DEFAULT_DELIMITER = ':'
+_EMOJI_NAME_PATTERN = u'\\w\\-&.’”“()!#*+?–,/«»\u0300\u0301\u0302\u0303\u0308\u030a\u0327\u064b\u064e\u064f\u0650\u0653\u0654'
+_PY2 = sys.version_info[0] == 2
+
+
+def _normalize(form, s):
+    if _PY2:
+        s = unicode(s)
+    return unicodedata.normalize(form, s)
 
 
 def emojize(
@@ -47,7 +57,8 @@ def emojize(
 
     :param string: String contains emoji names.
     :param delimiters: (optional) Use delimiters other than _DEFAULT_DELIMITER. Each delimiter
-        should contain at least one character that is not part of a-zA-Z0-9 and ``_-–&.’”“()!?#*+,/\``
+        should contain at least one character that is not part of a-zA-Z0-9 and ``_-&.()!?#*+,``.
+        See ``emoji.core._EMOJI_NAME_PATTERN`` for the regular expression of unsafe characters.
     :param variant: (optional) Choose variation selector between "base"(None), VS-15 ("text_type") and VS-16 ("emoji_type")
     :param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias'
         to use English aliases
@@ -78,12 +89,12 @@ def emojize(
     else:
         language_pack = unicode_codes.get_emoji_unicode_dict(language)
 
-    pattern = re.compile(u'(%s[\\w\\-&.’”“()!#*+?–,/ًٌٍَُِّْؤئيإأآةك‌ٔء«»]+%s)' %
-                         (re.escape(delimiters[0]), re.escape(delimiters[1])), flags=re.UNICODE)
+    pattern = re.compile(u'(%s[%s]+%s)' %
+                         (re.escape(delimiters[0]), _EMOJI_NAME_PATTERN, re.escape(delimiters[1])), flags=re.UNICODE)
 
     def replace(match):
-        mg = match.group(1)[len(delimiters[0]):-len(delimiters[1])]
-        emj = language_pack.get(_DEFAULT_DELIMITER + mg + _DEFAULT_DELIMITER)
+        name = match.group(1)[len(delimiters[0]):-len(delimiters[1])]
+        emj = language_pack.get(_DEFAULT_DELIMITER + _normalize('NFKC', name) + _DEFAULT_DELIMITER)
         if emj is None:
             return match.group(1)