rope/base/worder.py

import bisect
import keyword

import rope.base.simplify

MINIMAL_LEN_FOR_AS = 5


def get_name_at(resource, offset):
    source_code = resource.read()
    word_finder = Worder(source_code)
    return word_finder.get_word_at(offset)


class Worder:
    """A class for finding boundaries of words and expressions

    Note that in these methods, offset should be the index of the
    character not the index of the character after it.

    Some of the methods here doesn't exactly do what their name might lead you
    to think they do, these probably should be fixed. Refer to
    ropetest/codeanalyzetest.py for what these methods returns. Note that
    codeanalyzetest.py documents the current behavior, rather than what they
    should've been.
    """

    def __init__(self, code, handle_ignores=False):
        simplified = rope.base.simplify.real_code(code)
        self.code_finder = _RealFinder(simplified, code)
        self.handle_ignores = handle_ignores
        self.code = code

    def _init_ignores(self):
        ignores = rope.base.simplify.ignored_regions(self.code)
        self.dumb_finder = _RealFinder(self.code, self.code)
        self.starts = [ignored[0] for ignored in ignores]
        self.ends = [ignored[1] for ignored in ignores]

    def _context_call(self, name, offset):
        if self.handle_ignores:
            if not hasattr(self, "starts"):
                self._init_ignores()
            start = bisect.bisect(self.starts, offset)
            if start > 0 and offset < self.ends[start - 1]:
                return getattr(self.dumb_finder, name)(offset)
        return getattr(self.code_finder, name)(offset)

    def get_primary_at(self, offset):
        return self._context_call("get_primary_at", offset)

    def get_word_at(self, offset):
        return self._context_call("get_word_at", offset)

    def get_primary_range(self, offset):
        return self._context_call("get_primary_range", offset)

    def get_split_primary_before(self, offset):
        return self._context_call("get_split_primary_before", offset)

    def get_word_range(self, offset):
        return self._context_call("get_word_range", offset)

    def is_function_keyword_parameter(self, offset):
        return self.code_finder.is_function_keyword_parameter(offset)

    def is_a_class_or_function_name_in_header(self, offset):
        return self.code_finder.is_a_class_or_function_name_in_header(offset)

    def is_from_statement_module(self, offset):
        return self.code_finder.is_from_statement_module(offset)

    def is_from_aliased(self, offset):
        return self.code_finder.is_from_aliased(offset)

    def is_import_statement_aliased_module(self, offset):
        return self.code_finder.is_import_statement_aliased_module(offset)

    def find_parens_start_from_inside(self, offset):
        return self.code_finder.find_parens_start_from_inside(offset)

    def is_a_name_after_from_import(self, offset):
        return self.code_finder.is_a_name_after_from_import(offset)

    def is_from_statement(self, offset):
        return self.code_finder.is_from_statement(offset)

    def get_from_aliased(self, offset):
        return self.code_finder.get_from_aliased(offset)

    def is_import_statement(self, offset):
        return self.code_finder.is_import_statement(offset)

    def is_assigned_here(self, offset):
        return self.code_finder.is_assigned_here(offset)

    def is_a_function_being_called(self, offset):
        return self.code_finder.is_a_function_being_called(offset)

    def get_word_parens_range(self, offset):
        return self.code_finder.get_word_parens_range(offset)

    def is_name_assigned_in_class_body(self, offset):
        return self.code_finder.is_name_assigned_in_class_body(offset)

    def is_on_function_call_keyword(self, offset):
        return self.code_finder.is_on_function_call_keyword(offset)

    def _find_parens_start(self, offset):
        return self.code_finder._find_parens_start(offset)

    def get_parameters(self, first, last):
        return self.code_finder.get_parameters(first, last)

    def get_from_module(self, offset):
        return self.code_finder.get_from_module(offset)

    def is_assigned_in_a_tuple_assignment(self, offset):
        return self.code_finder.is_assigned_in_a_tuple_assignment(offset)

    def get_assignment_type(self, offset):
        return self.code_finder.get_assignment_type(offset)

    def get_function_and_args_in_header(self, offset):
        return self.code_finder.get_function_and_args_in_header(offset)

    def get_lambda_and_args(self, offset):
        return self.code_finder.get_lambda_and_args(offset)

    def find_function_offset(self, offset):
        return self.code_finder.find_function_offset(offset)


class _RealFinder:
    def __init__(self, code, raw):
        self.code = code
        self.raw = raw

    def _find_word_start(self, offset):
        current_offset = offset
        while current_offset >= 0 and self._is_id_char(current_offset):
            current_offset -= 1
        return current_offset + 1

    def _find_word_end(self, offset):
        while offset + 1 < len(self.code) and self._is_id_char(offset + 1):
            offset += 1
        return offset

    def _find_last_non_space_char(self, offset):
        while offset >= 0 and self.code[offset].isspace():
            if self.code[offset] == "\n":
                return offset
            offset -= 1
        return max(-1, offset)

    def get_word_at(self, offset):
        offset = self._get_fixed_offset(offset)
        return self.raw[self._find_word_start(offset) : self._find_word_end(offset) + 1]

    def _get_fixed_offset(self, offset):
        if offset >= len(self.code):
            return offset - 1
        if not self._is_id_char(offset):
            if offset > 0 and self._is_id_char(offset - 1):
                return offset - 1
            if offset < len(self.code) - 1 and self._is_id_char(offset + 1):
                return offset + 1
        return offset

    def _is_id_char(self, offset):
        return self.code[offset].isalnum() or self.code[offset] == "_"

    def _find_string_start(self, offset):
        kind = self.code[offset]
        try:
            return self.code.rindex(kind, 0, offset)
        except ValueError:
            return 0

    def _find_parens_start(self, offset):
        offset = self._find_last_non_space_char(offset - 1)
        while offset >= 0 and self.code[offset] not in "[({":
            if self.code[offset] not in ":,":
                offset = self._find_primary_start(offset)
            offset = self._find_last_non_space_char(offset - 1)
        return offset

    def _find_atom_start(self, offset):
        old_offset = offset
        if self.code[offset] == "\n":
            return offset + 1
        if self.code[offset].isspace():
            offset = self._find_last_non_space_char(offset)
        if self.code[offset] in "'\"":
            return self._find_string_start(offset)
        if self.code[offset] in ")]}":
            return self._find_parens_start(offset)
        if self._is_id_char(offset):
            return self._find_word_start(offset)
        return old_offset

    def _find_primary_without_dot_start(self, offset):
        """It tries to find the undotted primary start

        It is different from `self._get_atom_start()` in that it
        follows function calls, too; such as in ``f(x)``.

        """
        last_atom = offset
        offset = self._find_last_non_space_char(last_atom)
        while offset > 0 and self.code[offset] in ")]":
            last_atom = self._find_parens_start(offset)
            offset = self._find_last_non_space_char(last_atom - 1)
        if offset >= 0 and (self.code[offset] in "\"'})]" or self._is_id_char(offset)):
            atom_start = self._find_atom_start(offset)
            if not keyword.iskeyword(self.code[atom_start : offset + 1]) or (
                offset + 1 < len(self.code) and self._is_id_char(offset + 1)
            ):
                return atom_start
        return last_atom

    def _find_primary_start(self, offset):
        if offset >= len(self.code):
            offset = len(self.code) - 1
        if self.code[offset] != ".":
            offset = self._find_primary_without_dot_start(offset)
        else:
            offset = offset + 1
        while offset > 0:
            prev = self._find_last_non_space_char(offset - 1)
            if offset <= 0 or self.code[prev] != ".":
                break

            # Check if relative import
            # XXX: Looks like a hack...
            prev_word_end = self._find_last_non_space_char(prev - 1)
            if self.code[prev_word_end - 3 : prev_word_end + 1] == "from":
                offset = prev
                break

            offset = self._find_primary_without_dot_start(prev - 1)
            if not self._is_id_char(offset):
                break

        return offset

    def get_primary_at(self, offset):
        offset = self._get_fixed_offset(offset)
        start, end = self.get_primary_range(offset)
        return self.raw[start:end].strip()

    def get_split_primary_before(self, offset):
        """returns expression, starting, starting_offset

        This function is used in `rope.codeassist.assist` function.
        """
        if offset == 0:
            return ("", "", 0)
        end = offset - 1
        word_start = self._find_atom_start(end)
        real_start = self._find_primary_start(end)
        if self.code[word_start:offset].strip() == "":
            word_start = end
        if self.code[end].isspace():
            word_start = end
        if self.code[real_start:word_start].strip() == "":
            real_start = word_start
        if real_start == word_start == end and not self._is_id_char(end):
            return ("", "", offset)
        if real_start == word_start:
            return ("", self.raw[word_start:offset], word_start)
        else:
            if self.code[end] == ".":
                return (self.raw[real_start:end], "", offset)
            last_dot_position = word_start
            if self.code[word_start] != ".":
                last_dot_position = self._find_last_non_space_char(word_start - 1)
            last_char_position = self._find_last_non_space_char(last_dot_position - 1)
            if self.code[word_start].isspace():
                word_start = offset
            return (
                self.raw[real_start : last_char_position + 1],
                self.raw[word_start:offset],
                word_start,
            )

    def _get_line_start(self, offset):
        try:
            return self.code.rindex("\n", 0, offset + 1)
        except ValueError:
            return 0

    def _get_line_end(self, offset):
        try:
            return self.code.index("\n", offset)
        except ValueError:
            return len(self.code)

    def is_name_assigned_in_class_body(self, offset):
        word_start = self._find_word_start(offset - 1)
        word_end = self._find_word_end(offset) + 1
        if "." in self.code[word_start:word_end]:
            return False
        line_start = self._get_line_start(word_start)
        line = self.code[line_start:word_start].strip()
        return not line and self.get_assignment_type(offset) == "="

    def is_a_class_or_function_name_in_header(self, offset):
        word_start = self._find_word_start(offset - 1)
        line_start = self._get_line_start(word_start)
        prev_word = self.code[line_start:word_start].strip()
        return prev_word in ["def", "class"]

    def _find_first_non_space_char(self, offset):
        if offset >= len(self.code):
            return len(self.code)
        while offset < len(self.code) and self.code[offset].isspace():
            if self.code[offset] == "\n":
                return offset
            offset += 1
        return offset

    def is_a_function_being_called(self, offset):
        word_end = self._find_word_end(offset) + 1
        next_char = self._find_first_non_space_char(word_end)
        return (
            next_char < len(self.code)
            and self.code[next_char] == "("
            and not self.is_a_class_or_function_name_in_header(offset)
        )

    def _find_import_end(self, start):
        return self._get_line_end(start)

    def is_import_statement(self, offset):
        try:
            last_import = self.code.rindex("import ", 0, offset)
        except ValueError:
            return False
        line_start = self._get_line_start(last_import)
        return (
            self._find_import_end(last_import + 7) >= offset
            and self._find_word_start(line_start) == last_import
        )

    def is_from_statement(self, offset):
        try:
            last_from = self.code.rindex("from ", 0, offset)
            from_import = self.code.index(" import ", last_from)
            from_names = from_import + 8
        except ValueError:
            return False
        from_names = self._find_first_non_space_char(from_names)
        return self._find_import_end(from_names) >= offset

    def is_from_statement_module(self, offset):
        if offset >= len(self.code) - 1:
            return False
        stmt_start = self._find_primary_start(offset)
        line_start = self._get_line_start(stmt_start)
        prev_word = self.code[line_start:stmt_start].strip()
        return prev_word == "from"

    def is_import_statement_aliased_module(self, offset):
        if not self.is_import_statement(offset):
            return False
        try:
            line_start = self._get_line_start(offset)
            import_idx = self.code.rindex("import", line_start, offset)
            imported_names = import_idx + 7
        except ValueError:
            return False
        # Check if the offset is within the imported names
        if (
            imported_names - 1 > offset
            or self._find_import_end(imported_names) < offset
        ):
            return False
        try:
            end = self._find_import_main_part_end(offset)
            if not self._has_enough_len_for_as(end):
                return False
            as_end = min(self._find_word_end(end + 1), len(self.code))
            as_start = self._find_word_start(as_end)
            return self.code[as_start : as_end + 1] == "as"
        except ValueError:
            return False

    def _has_enough_len_for_as(self, end):
        return len(self.code) > end + MINIMAL_LEN_FOR_AS

    def _find_import_main_part_end(self, offset):
        end = self._find_word_end(offset)
        while len(self.code) > end + 2 and self.code[end + 1] == ".":
            end = self._find_word_end(end + 2)
        return end

    def is_a_name_after_from_import(self, offset):
        try:
            if len(self.code) > offset and self.code[offset] == "\n":
                line_start = self._get_line_start(offset - 1)
            else:
                line_start = self._get_line_start(offset)
            last_from = self.code.rindex("from ", line_start, offset)
            from_import = self.code.index(" import ", last_from)
            from_names = from_import + 8
        except ValueError:
            return False
        if from_names - 1 > offset:
            return False
        return self._find_import_end(from_names) >= offset

    def get_from_module(self, offset):
        try:
            last_from = self.code.rindex("from ", 0, offset)
            import_offset = self.code.index(" import ", last_from)
            end = self._find_last_non_space_char(import_offset)
            return self.get_primary_at(end)
        except ValueError:
            pass

    def is_from_aliased(self, offset):
        if not self.is_a_name_after_from_import(offset):
            return False
        try:
            end = self._find_word_end(offset)
            as_end = min(self._find_word_end(end + 1), len(self.code))
            as_start = self._find_word_start(as_end)
            return self.code[as_start : as_end + 1] == "as"
        except ValueError:
            return False

    def get_from_aliased(self, offset):
        try:
            end = self._find_word_end(offset)
            as_ = self._find_word_end(end + 1)
            alias = self._find_word_end(as_ + 1)
            start = self._find_word_start(alias)
            return self.raw[start : alias + 1]
        except ValueError:
            pass

    def is_function_keyword_parameter(self, offset):
        word_end = self._find_word_end(offset)
        if word_end + 1 == len(self.code):
            return False
        next_char = self._find_first_non_space_char(word_end + 1)
        equals = self.code[next_char : next_char + 2]
        if equals == "==" or not equals.startswith("="):
            return False
        word_start = self._find_word_start(offset)
        prev_char = self._find_last_non_space_char(word_start - 1)
        return prev_char - 1 >= 0 and self.code[prev_char] in ",("

    def is_on_function_call_keyword(self, offset):
        stop = self._get_line_start(offset)
        if self._is_id_char(offset):
            offset = self._find_word_start(offset) - 1
        offset = self._find_last_non_space_char(offset)
        if offset <= stop or self.code[offset] not in "(,":
            return False
        parens_start = self.find_parens_start_from_inside(offset)
        return stop < parens_start

    def find_parens_start_from_inside(self, offset):
        stop = self._get_line_start(offset)
        while offset > stop:
            if self.code[offset] == "(":
                break
            if self.code[offset] != ",":
                offset = self._find_primary_start(offset)
            offset -= 1
        return max(stop, offset)

    def is_assigned_here(self, offset):
        return self.get_assignment_type(offset) is not None

    def get_assignment_type(self, offset):
        # XXX: does not handle tuple assignments
        word_end = self._find_word_end(offset)
        next_char = self._find_first_non_space_char(word_end + 1)
        single = self.code[next_char : next_char + 1]
        double = self.code[next_char : next_char + 2]
        triple = self.code[next_char : next_char + 3]
        if double not in ("==", "<=", ">=", "!="):
            for op in [single, double, triple]:
                if op.endswith("="):
                    return op

    def get_primary_range(self, offset):
        start = self._find_primary_start(offset)
        end = self._find_word_end(offset) + 1
        return (start, end)

    def get_word_range(self, offset):
        offset = max(0, offset)
        start = self._find_word_start(offset)
        end = self._find_word_end(offset) + 1
        return (start, end)

    def get_word_parens_range(self, offset, opening="(", closing=")"):
        end = self._find_word_end(offset)
        start_parens = self.code.index(opening, end)
        index = start_parens
        open_count = 0
        while index < len(self.code):
            if self.code[index] == opening:
                open_count += 1
            if self.code[index] == closing:
                open_count -= 1
            if open_count == 0:
                return (start_parens, index + 1)
            index += 1
        return (start_parens, index)

    def get_parameters(self, first, last):
        keywords = []
        args = []
        current = self._find_last_non_space_char(last - 1)
        while current > first:
            primary_start = current
            current = self._find_primary_start(current)
            while current != first and (
                self.code[current] not in "=," or self.code[current - 1] in "=!<>"
            ):
                current = self._find_last_non_space_char(current - 1)
            primary = self.raw[current + 1 : primary_start + 1].strip()
            if self.code[current] == "=":
                primary_start = current - 1
                current -= 1
                while current != first and self.code[current] not in ",":
                    current = self._find_last_non_space_char(current - 1)
                param_name = self.raw[current + 1 : primary_start + 1].strip()
                keywords.append((self.__strip_type_hint(param_name), primary))
            else:
                args.append(self.__strip_type_hint(primary))
            current = self._find_last_non_space_char(current - 1)
        args.reverse()
        keywords.reverse()
        return args, keywords

    def __strip_type_hint(self, name):
        return name.split(":", 1)[0]

    def is_assigned_in_a_tuple_assignment(self, offset):
        start = self._get_line_start(offset)
        end = self._get_line_end(offset)
        primary_start = self._find_primary_start(offset)
        primary_end = self._find_word_end(offset)

        prev_char_offset = self._find_last_non_space_char(primary_start - 1)
        next_char_offset = self._find_first_non_space_char(primary_end + 1)
        next_char = prev_char = ""
        if prev_char_offset >= start:
            prev_char = self.code[prev_char_offset]
        if next_char_offset < end:
            next_char = self.code[next_char_offset]
        try:
            equals_offset = self.code.index("=", start, end)
        except ValueError:
            return False
        if prev_char not in "(," and next_char not in ",)":
            return False
        parens_start = self.find_parens_start_from_inside(offset)
        # XXX: only handling (x, y) = value
        return offset < equals_offset and self.code[start:parens_start].strip() == ""

    def get_function_and_args_in_header(self, offset):
        offset = self.find_function_offset(offset)
        lparens, rparens = self.get_word_parens_range(offset)
        return self.raw[offset : rparens + 1]

    def find_function_offset(self, offset, definition="def "):
        while True:
            offset = self.code.index(definition, offset)
            if offset == 0 or not self._is_id_char(offset - 1):
                break
            offset += 1
        def_ = offset + 4
        return self._find_first_non_space_char(def_)

    def get_lambda_and_args(self, offset):
        offset = self.find_function_offset(offset, definition="lambda ")
        lparens, rparens = self.get_word_parens_range(offset, opening=" ", closing=":")
        return self.raw[offset : rparens + 1]