Skip to content

Commit

Permalink
fix: improve Go doc formatter
Browse files Browse the repository at this point in the history
Improvements:
- Use Go doc links introduced in Go 1.19
- Separate paragraphs so they are rendered as separate blocks of text

fixes aws/aws-sdk-go-v2#2597
  • Loading branch information
Luis Madrigal 🐧 committed May 8, 2024
1 parent 5aba977 commit fb3ea81
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 39 deletions.
2 changes: 1 addition & 1 deletion codegen/build.gradle.kts
Expand Up @@ -90,7 +90,7 @@ subprojects {
dependencies {
testImplementation("org.junit.jupiter:junit-jupiter-api:5.4.0")
testImplementation("org.junit.jupiter:junit-jupiter-engine:5.4.0")
testCompileOnly("org.junit.jupiter:junit-jupiter-params:5.4.0")
testImplementation("org.junit.jupiter:junit-jupiter-params:5.4.0")
testImplementation("org.hamcrest:hamcrest:2.1")
}

Expand Down
Expand Up @@ -15,7 +15,10 @@

package software.amazon.smithy.go.codegen;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.commonmark.node.BlockQuote;
import org.commonmark.node.FencedCodeBlock;
import org.commonmark.node.Heading;
Expand All @@ -25,6 +28,7 @@
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.safety.Safelist;
Expand All @@ -42,9 +46,9 @@ public final class DocumentationConverter {
// This allowlist strips out anything we can't reasonably convert, vastly simplifying the
// node tree we end up having to crawl through.
private static final Safelist GODOC_ALLOWLIST = new Safelist()
.addTags("code", "pre", "ul", "ol", "li", "a", "br", "h1", "h2", "h3", "h4", "h5", "h6")
.addTags("code", "pre", "ul", "ol", "li", "a", "br", "h1", "h2", "h3", "h4", "h5", "h6", "p")
.addAttributes("a", "href")
.addProtocols("a", "href", "http", "https", "mailto");
.addProtocols("a", "href", "http", "https");

// Construct a markdown parser that specifically ignores parsing indented code blocks. This
// is because HTML blocks can have really wonky formatting that can be mis-attributed to an
Expand Down Expand Up @@ -85,6 +89,8 @@ private static class FormattingVisitor implements NodeVisitor {
private static final Set<String> LIST_BLOCK_NODES = SetUtils.of("ul", "ol");
private static final Set<String> CODE_BLOCK_NODES = SetUtils.of("pre", "code");
private final CodeWriter writer;
// Godoc links are added at the end as `[text]: http://example.com` so keeping a collection to add them later
private final Map<String, String> links;

private boolean needsListPrefix = false;
private boolean shouldStripPrefixWhitespace = false;
Expand All @@ -100,6 +106,7 @@ private static class FormattingVisitor implements NodeVisitor {
writer.trimBlankLines();
writer.insertTrailingNewline(false);
this.docWrapLength = docWrapLength;
this.links = new HashMap<>();
this.lastLineString = "";
}

Expand All @@ -110,9 +117,31 @@ public void head(Node node, int depth) {
writer.indent();
}

if (node.nodeName().equals("a")) {
// Logic to format anchors as Go Links https://tip.golang.org/doc/comment#links
Element element = (Element) node;
String text = element.text();
String url = element.absUrl("href");
if (url.isEmpty()) {
// an empty anchor won't have a reference at the end, so just
// output it directly to the output
writer.writeInlineWithNoFormatting(text);
return;
}
String wrappedAnchorText = "[" + text + "]";
writer.writeInlineWithNoFormatting(wrappedAnchorText);
links.put(text, url);
}

Node parentNode = node.parentNode();
if (parentNode != null && parentNode.nodeName().equals("a") && node instanceof TextNode) {
// anchor tags get processed twice: once as anchor tags and another one as
// textNodes. Since this was already processed as anchor, no need to do anything else
return;
}
if (node instanceof TextNode) {
writeText((TextNode) node);
} else if (TEXT_BLOCK_NODES.contains(name) || isTopLevelCodeBlock(node, depth)) {
} else if (isTopLevelCodeBlock(node, depth)) {
writeNewline();
writeIndent();
} else if (LIST_BLOCK_NODES.contains(name)) {
Expand Down Expand Up @@ -279,19 +308,16 @@ public void tail(Node node, int depth) {
}

if (TEXT_BLOCK_NODES.contains(name) || isTopLevelCodeBlock(node, depth)) {
// A single newline is just treated as a line break, but gets
// rendered as the same block.
// Two ensure the text gets treated as a separate text block
writeNewline();
writeNewline();
} else if (LIST_BLOCK_NODES.contains(name)) {
listDepth--;
if (listDepth == 0) {
writeNewline();
}
} else if (name.equals("a")) {
String url = node.absUrl("href");
if (!url.isEmpty()) {
// godoc can't render links with text bodies, so we simply append the link.
// Full links do get rendered.
writeInline(" ($L)", url);
}
} else if (name.equals("li")) {
// Clear out the expectation of a list element if the element's body is empty.
needsListPrefix = false;
Expand All @@ -313,8 +339,22 @@ public String toString() {
}
result = String.join("\n", lines);

// iterate over every link found and append it at the end
String allLinks = links.entrySet().stream()
.map(link -> formatLink(link))
.collect(Collectors.joining("\n"));
if (!allLinks.isEmpty()) {
result += "\n\n" + allLinks;
}

// Strip out leading and trailing newlines.
return StringUtils.strip(result, "\n");
}

private String formatLink(Map.Entry<String, String> link) {
String anchor = link.getKey();
String destination = link.getValue();
return "[%s]: %s".formatted(anchor, destination);
}
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Expand Up @@ -25,11 +25,7 @@ private static Stream<Arguments> cases() {
),
Arguments.of(
"<a href=\"https://example.com\">a link</a>",
"a link (https://example.com)"
),
Arguments.of(
"<a href=\" https://example.com\">a link</a>",
"a link (https://example.com)"
"[a link]\n\n[a link]: https://example.com"
),
Arguments.of(
"<a>empty link</a>",
Expand All @@ -49,15 +45,15 @@ private static Stream<Arguments> cases() {
),
Arguments.of(
"<ul> <li> <p>Testing 1 2 3</p> </li><li> <p>FooBar</p></li></ul>",
" - Testing 1 2 3\n - FooBar"
" - Testing 1 2 3\n\n - FooBar"
),
Arguments.of(
"<ul> <li><code>Testing</code>: 1 2 3</li> <li>FooBar</li> </ul>",
" - Testing : 1 2 3\n - FooBar"
),
Arguments.of(
"<ul> <li><p><code>FOO</code> Bar</p></li><li><p><code>Xyz</code> ABC</p></li></ul>",
" - FOO Bar\n - Xyz ABC"
" - FOO Bar\n\n - Xyz ABC"
),
Arguments.of(
"<ul><li> foo</li><li>\tbar</li><li>\nbaz</li></ul>",
Expand Down Expand Up @@ -90,37 +86,39 @@ private static Stream<Arguments> cases() {
+ " configuration, see "
+ "<a href=\" https://docs.aws.amazon.com/AmazonS3/latest/dev/crr.html\">"
+ "Cross-Region Replication (CRR)</a> in the <i>Amazon S3 Developer Guide</i>. </p>",
"Deletes the replication configuration from the bucket. For information about\n" +
"replication configuration, see Cross-Region Replication (CRR) (https://docs.aws.amazon.com/AmazonS3/latest/dev/crr.html)\n" +
"in the Amazon S3 Developer Guide."
" Deletes the replication configuration from the bucket. For information about\n" +
"replication configuration, see [Cross-Region Replication (CRR)]in the Amazon S3 Developer Guide.\n\n" +
"[Cross-Region Replication (CRR)]: https://docs.aws.amazon.com/AmazonS3/latest/dev/crr.html"
),
Arguments.of(
"* foo\n* bar",
" - foo\n - bar"
),
Arguments.of(
"[a link](https://example.com)",
"a link (https://example.com)"
"[a link]\n\n[a link]: https://example.com"
),
Arguments.of("", ""),
Arguments.of("<!-- foo bar -->", ""),
Arguments.of("# Foo\nbar", "Foo\nbar"),
Arguments.of("<h1>Foo</h1>bar", "Foo\nbar"),
Arguments.of("## Foo\nbar", "Foo\nbar"),
Arguments.of("<h2>Foo</h2>bar", "Foo\nbar"),
Arguments.of("### Foo\nbar", "Foo\nbar"),
Arguments.of("<h3>Foo</h3>bar", "Foo\nbar"),
Arguments.of("#### Foo\nbar", "Foo\nbar"),
Arguments.of("<h4>Foo</h4>bar", "Foo\nbar"),
Arguments.of("##### Foo\nbar", "Foo\nbar"),
Arguments.of("<h5>Foo</h5>bar", "Foo\nbar"),
Arguments.of("###### Foo\nbar", "Foo\nbar"),
Arguments.of("<h6>Foo</h6>bar", "Foo\nbar"),
Arguments.of("# Foo\nbar", "Foo\n\nbar"),
Arguments.of("<h1>Foo</h1>bar", "Foo\n\nbar"),
Arguments.of("## Foo\nbar", "Foo\n\nbar"),
Arguments.of("<h2>Foo</h2>bar", "Foo\n\nbar"),
Arguments.of("### Foo\nbar", "Foo\n\nbar"),
Arguments.of("<h3>Foo</h3>bar", "Foo\n\nbar"),
Arguments.of("#### Foo\nbar", "Foo\n\nbar"),
Arguments.of("<h4>Foo</h4>bar", "Foo\n\nbar"),
Arguments.of("##### Foo\nbar", "Foo\n\nbar"),
Arguments.of("<h5>Foo</h5>bar", "Foo\n\nbar"),
Arguments.of("###### Foo\nbar", "Foo\n\nbar"),
Arguments.of("<h6>Foo</h6>bar", "Foo\n\nbar"),
Arguments.of("Inline `code`", "Inline code"),
Arguments.of("```\ncode block\n ```", " code block"),
Arguments.of("```java\ncode block\n ```", " code block"),
Arguments.of("foo<br/>bar", "foo\n\nbar"),
Arguments.of(" <p>foo</p>", "foo")
Arguments.of(" <p>foo</p>", "foo"),
Arguments.of("<p>paragraph one</p><p>paragraph two</p>", "paragraph one\n\nparagraph two"),
Arguments.of("<a href=\"mailto:someone@example.com\">someone</a>", "someone")
);
}
}

0 comments on commit fb3ea81

Please sign in to comment.