Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pre-process parsed docstrings to add hints on how to resolve names #723

Draft
wants to merge 14 commits into
base: master
Choose a base branch
from
11 changes: 7 additions & 4 deletions pydoctor/astbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def visit_Module(self, node: ast.Module) -> None:
def depart_Module(self, node: ast.Module) -> None:
self._infer_attr_annotations(self.builder.current)
self.builder.pop(self.module)
epydoc2stan.transform_parsed_names(self.module)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be done in post-processing


def visit_ClassDef(self, node: ast.ClassDef) -> None:
# Ignore classes within functions.
Expand Down Expand Up @@ -1020,12 +1021,12 @@ class _ValueFormatter:
"""

def __init__(self, value: ast.expr, ctx: model.Documentable):
self._colorized = colorize_inline_pyval(value)
self.parsed = colorize_inline_pyval(value)
"""
The colorized value as L{ParsedDocstring}.
"""

self._linker = ctx.docstring_linker
self.linker = ctx.docstring_linker
"""
Linker.
"""
Expand All @@ -1038,15 +1039,15 @@ def __repr__(self) -> str:
# Using node2stan.node2html instead of flatten(to_stan()).
# This avoids calling flatten() twice,
# but potential XML parser errors caused by XMLString needs to be handled later.
return ''.join(node2stan.node2html(self._colorized.to_node(), self._linker))
return ''.join(node2stan.node2html(self.parsed.to_node(), self.linker))

class _AnnotationValueFormatter(_ValueFormatter):
"""
Special L{_ValueFormatter} for function annotations.
"""
def __init__(self, value: ast.expr, ctx: model.Function):
super().__init__(value, ctx)
self._linker = linker._AnnotationLinker(ctx)
self.linker = linker._AnnotationLinker(ctx)

def __repr__(self) -> str:
"""
Expand All @@ -1059,6 +1060,8 @@ def __repr__(self) -> str:
class ASTBuilder:
"""
Keeps tracks of the state of the AST build, creates documentable and adds objects to the system.

One ASTBuilder instance is only suitable to build one Module.
"""
ModuleVistor = ModuleVistor

Expand Down
1 change: 1 addition & 0 deletions pydoctor/epydoc/markup/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from docutils import nodes
from twisted.web.template import Tag, tags

# TODO: this class should support to_node() like others.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should create an issue for that

class ParsedTypeDocstring(TypeDocstring, ParsedDocstring):
"""
Add L{ParsedDocstring} interface on top of L{TypeDocstring} and
Expand Down
246 changes: 219 additions & 27 deletions pydoctor/epydoc2stan.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,21 @@

from collections import defaultdict
import enum
import inspect
import builtins
from itertools import chain
from typing import (
TYPE_CHECKING, Any, Callable, ClassVar, DefaultDict, Dict, Generator,
Iterator, List, Mapping, Optional, Sequence, Tuple, Union,
Iterator, List, Mapping, Optional, Sequence, Tuple, TypeVar, Union,
)
import ast
import re

import attr
from docutils.transforms import Transform
from docutils import nodes

from pydoctor import model, linker, node2stan
from pydoctor.node2stan import parse_reference
from pydoctor.astutils import is_none_literal
from pydoctor.epydoc.markup import Field as EpydocField, ParseError, get_parser_by_name, processtypes
from twisted.web.template import Tag, tags
Expand Down Expand Up @@ -266,18 +271,19 @@ def __init__(self, obj: model.Documentable):
self.sinces: List[Field] = []
self.unknowns: DefaultDict[str, List[FieldDesc]] = defaultdict(list)

def set_param_types_from_annotations(
self, annotations: Mapping[str, Optional[ast.expr]]
) -> None:
def set_param_types_from_annotations(self) -> None:
if not isinstance(self.obj, model.Function):
return
annotations = self.obj.annotations
_linker = linker._AnnotationLinker(self.obj)
formatted_annotations = {
name: None if value is None
else ParamType(safe_to_stan(colorize_inline_pyval(value), _linker,
name: None if parsed_annotation is None
else ParamType(safe_to_stan(parsed_annotation, _linker,
self.obj, fallback=colorized_pyval_fallback, section='annotation', report=False),
# don't spam the log, invalid annotation are going to be reported when the signature gets colorized
origin=FieldOrigin.FROM_AST)

for name, value in annotations.items()
for name, parsed_annotation in get_parsed_annotations(self.obj).items()
}

ret_type = formatted_annotations.pop('return', None)
Expand Down Expand Up @@ -794,8 +800,7 @@ def format_docstring(obj: model.Documentable) -> Tag:
ret(unwrap_docstring_stan(stan))

fh = FieldHandler(obj)
if isinstance(obj, model.Function):
fh.set_param_types_from_annotations(obj.annotations)
fh.set_param_types_from_annotations()
if source is not None:
assert obj.parsed_docstring is not None, "ensure_parsed_docstring() did not do it's job"
for field in obj.parsed_docstring.fields:
Expand Down Expand Up @@ -874,20 +879,90 @@ def type2stan(obj: model.Documentable) -> Optional[Tag]:
return safe_to_stan(parsed_type, _linker, obj,
fallback=colorized_pyval_fallback, section='annotation')

_T = TypeVar('_T')
def _memoize(o:object, attrname:str, getter:Callable[[], _T]) -> _T:
parsed = getattr(o, attrname, None)
if parsed is not None:
return parsed #type:ignore
parsed = getter()
setattr(o, attrname, parsed)
return parsed

def get_parsed_type(obj: model.Documentable) -> Optional[ParsedDocstring]:
"""
Get the type of this attribute as parsed docstring.
"""
parsed_type = obj.parsed_type
if parsed_type is not None:
return parsed_type
def _get_parsed_type() -> Optional[ParsedDocstring]:
annotation = getattr(obj, 'annotation', None)
if annotation is not None:
v = colorize_inline_pyval(annotation)
reportWarnings(obj, v.warnings, section='colorize annotation')
return v
return None
return _memoize(obj, 'parsed_type', _get_parsed_type)

# Only Attribute instances have the 'annotation' attribute.
annotation: Optional[ast.expr] = getattr(obj, 'annotation', None)
if annotation is not None:
return colorize_inline_pyval(annotation)
def get_parsed_decorators(obj: Union[model.Attribute, model.Function,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once #640 is merged, this should be changed to the following:

Suggested change
def get_parsed_decorators(obj: Union[model.Attribute, model.Function,
def get_parsed_decorators(obj: Union[model.Property, model.Function,

model.FunctionOverload]) -> Optional[Sequence[ParsedDocstring]]:
"""
Get the decorators of this function as parsed docstring.
"""
def _get_parsed_decorators() -> Optional[Sequence[ParsedDocstring]]:
v = [colorize_inline_pyval(dec) for dec in obj.decorators] if \
obj.decorators is not None else None
documentable_obj = obj if not isinstance(obj, model.FunctionOverload) else obj.primary
for c in v or ():
if c:
reportWarnings(documentable_obj, c.warnings, section='colorize decorators')
return v
return _memoize(obj, 'parsed_decorators', _get_parsed_decorators)

def get_parsed_value(obj:model.Attribute) -> Optional[ParsedDocstring]:
"""
Get the value of this constant as parsed docstring.
"""
def _get_parsed_value() -> Optional[ParsedDocstring]:
v = colorize_pyval(obj.value,
linelen=obj.system.options.pyvalreprlinelen,
maxlines=obj.system.options.pyvalreprmaxlines) if obj.value is not None else None
# Report eventual warnings.
if v:
reportWarnings(obj, v.warnings, section='colorize constant')
return v
return _memoize(obj, 'parsed_value', _get_parsed_value)

def get_parsed_annotations(obj:model.Function) -> Mapping[str, Optional[ParsedDocstring]]:
"""
Get the annotations of this function as dict from str to parsed docstring.
"""
def _get_parsed_annotations() -> Mapping[str, Optional[ParsedDocstring]]:
return {name:colorize_inline_pyval(ann) if ann else None for \
(name, ann) in obj.annotations.items()}
# do not warn here
return _memoize(obj, 'parsed_annotations', _get_parsed_annotations)

return None
def get_parsed_bases(obj:model.Class) -> Sequence[ParsedDocstring]:
"""
Get the bases of this class as a seqeunce of parsed docstrings.
"""
def _get_parsed_bases() -> Sequence[ParsedDocstring]:
r = []
for (str_base, base_node), base_obj in zip(obj.rawbases, obj.baseobjects):
# Make sure we bypass the linker’s resolver process for base object,
# because it has been resolved already (with two passes).
# Otherwise, since the class declaration wins over the imported names,
# a class with the same name as a base class confused pydoctor and it would link
# to it self: https://github.com/twisted/pydoctor/issues/662
refmap = None
if base_obj is not None:
refmap = {str_base:base_obj.fullName()}

# link to external class, using the colorizer here
# to link to classes with generics (subscripts and other AST expr).
p = colorize_inline_pyval(base_node, refmap=refmap)
r.append(p)
reportWarnings(obj, p.warnings, section='colorize bases')
return r
return _memoize(obj, 'parsed_bases', _get_parsed_bases)

def format_toc(obj: model.Documentable) -> Optional[Tag]:
# Load the parsed_docstring if it's not already done.
Expand Down Expand Up @@ -985,23 +1060,19 @@ def colorized_pyval_fallback(_: List[ParseError], doc:ParsedDocstring, __:model.
return Tag('code')(node2stan.gettext(doc.to_node()))

def _format_constant_value(obj: model.Attribute) -> Iterator["Flattenable"]:

doc = get_parsed_value(obj)
if doc is None:
return

# yield the table title, "Value"
row = tags.tr(class_="fieldStart")
row(tags.td(class_="fieldName")("Value"))
# yield the first row.
yield row

doc = colorize_pyval(obj.value,
linelen=obj.system.options.pyvalreprlinelen,
maxlines=obj.system.options.pyvalreprmaxlines)

value_repr = safe_to_stan(doc, obj.docstring_linker, obj,
fallback=colorized_pyval_fallback, section='rendering of constant')

# Report eventual warnings. It warns when a regex failed to parse.
reportWarnings(obj, doc.warnings, section='colorize constant')

# yield the value repr.
row = tags.tr()
row(tags.td(tags.pre(class_='constant-value')(value_repr)))
Expand Down Expand Up @@ -1146,6 +1217,127 @@ def populate_constructors_extra_info(cls:model.Class) -> None:
extra_epytext += ', '
short_text = format_constructor_short_text(c, cls)
extra_epytext += '`%s <%s>`' % (short_text, c.fullName())

cls.extra_info.append(parse_docstring(
cls, extra_epytext, cls, 'restructuredtext', section='constructor extra'))

_builtin_names = set(dir(builtins))

class _ReferenceTransform(Transform):

def __init__(self, document:nodes.document,
ctx:'model.Documentable', is_annotation:bool):
super().__init__(document)
self.ctx = ctx
self.module = ctx.module
self.is_annotation = is_annotation

def _transform(self, node:nodes.title_reference) -> None:
ctx = self.ctx
module = self.module
_, target = parse_reference(node)
# we're setting two attributes here: 'refuri' and 'rawtarget'.
# 'refuri' might already be created by the colorizer or docstring parser,
# but 'rawtarget' is only created from within this transform, so we can
# use that information to ensure this process is only ever applied once
# per title_reference element.
attribs = node.attributes
if target == attribs.get('refuri', target) and 'rawtarget' not in attribs:
# save the raw target name
attribs['rawtarget'] = target
name, *rest = target.split('.')
is_name_defined = ctx.isNameDefined(name)
# check if it's a non-shadowed builtins
if not is_name_defined and name in _builtin_names:
# transform bare builtin name into builtins.<name>
attribs['refuri'] = '.'.join(('builtins', name, *rest))
return
# no-op for unbound name
if not is_name_defined:
attribs['refuri'] = target
return
# kindda duplicate a little part of the annotation linker logic here,
# there are no simple way of doing it otherwise at the moment.
# Once all presented parsed elements are stored as Documentable attributes
# we might be able to simply use that and drop the use of the annotation linker,
# but for now this will do the trick:
lookup_context = ctx
if self.is_annotation and ctx is not module and module.isNameDefined(name,
only_locals=True) and ctx.isNameDefined(name, only_locals=True):
# If we're dealing with an annotation, give precedence to the module's
# lookup (wrt PEP 563)
lookup_context = module
linker.warn_ambiguous_annotation(module, ctx, target)
# save pre-resolved refuri
attribs['refuri'] = '.'.join(chain(lookup_context.expandName(name).split('.'), rest))

def apply(self) -> None:
for node in self.document.findall(nodes.title_reference):
self._transform(node)


def _apply_reference_transform(doc:ParsedDocstring, ctx:'model.Documentable',
is_annotation:bool=False) -> None:
"""
Runs L{_ReferenceTransform} on the underlying docutils document.
No-op if L{to_node} raises L{NotImplementedError}.
"""
try:
document = doc.to_node()
except NotImplementedError:
return
else:
_ReferenceTransform(document, ctx, is_annotation).apply()

def transform_parsed_names(node:'model.Module') -> None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not be re-run on nodes that have been reparented. Meaning we must keep track of reparented nodes for every modules.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This issue will be fixed if we move the reparenting in post process and call transform_parsed_names() after.

"""
Walk this module's content and apply in-place transformations to the
L{ParsedDocstring} instances that olds L{obj_reference} or L{nodes.title_reference} nodes.

Fixing "Lookup of name in annotation fails on reparented object #295".
The fix is not 100% complete at the moment: attribute values and decorators
are not handled.
Comment on lines +1296 to +1298
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Fixing "Lookup of name in annotation fails on reparented object #295".
The fix is not 100% complete at the moment: attribute values and decorators
are not handled.
Fixing "Lookup of name in annotation fails on reparented object #295".

"""
from pydoctor import model, astbuilder
# resolve names early when possible
for ob in model.walk(node):
# resolve names in parsed_docstring, do not forget field bodies
if ob.parsed_docstring:
_apply_reference_transform(ob.parsed_docstring, ob)
for f in ob.parsed_docstring.fields:
_apply_reference_transform(f.body(), ob)
Comment on lines +1304 to +1307
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're not using ensure_parsed_docstring, we might miss some docs

if isinstance(ob, model.Function):
if ob.signature:
for p in ob.signature.parameters.values():
ann = p.annotation if p.annotation is not inspect.Parameter.empty else None
if isinstance(ann, astbuilder._ValueFormatter):
_apply_reference_transform(ann.parsed, ob, is_annotation=True)
default = p.default if p.default is not inspect.Parameter.empty else None
if isinstance(default, astbuilder._ValueFormatter):
_apply_reference_transform(default.parsed, ob)
for _,ann in get_parsed_annotations(ob).items():
if ann:
_apply_reference_transform(ann, ob, is_annotation=True)
for dec in get_parsed_decorators(ob) or ():
if dec:
_apply_reference_transform(dec, ob)
for overload in ob.overloads:
for dec in get_parsed_decorators(overload) or ():
if dec:
_apply_reference_transform(dec, ob)
elif isinstance(ob, model.Attribute):
# resolve attribute annotation with parsed_type attribute
parsed_type = get_parsed_type(ob)
if parsed_type:
_apply_reference_transform(parsed_type, ob, is_annotation=True)
if ob.kind in ob.system.show_attr_value:
parsed_value = get_parsed_value(ob)
if parsed_value:
_apply_reference_transform(parsed_value, ob)
for dec in get_parsed_decorators(ob) or ():
if dec:
_apply_reference_transform(dec, ob)
elif isinstance(ob, model.Class):
for base in get_parsed_bases(ob):
_apply_reference_transform(base, ob)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_apply_reference_transform(base, ob)
_apply_reference_transform(base, ob, is_annotation=True)


# TODO: do one test with parsed type docstrings