From 551890adba0339eca05f37148d8f2963aba9718b Mon Sep 17 00:00:00 2001
From: George Pollard <gpollard@microsoft.com>
Date: Tue, 12 Oct 2021 06:49:03 +1300
Subject: [PATCH] =?UTF-8?q?=E2=9A=A0=EF=B8=8F=20=20Trim=20leading/trailing?=
 =?UTF-8?q?=20whitespace=20on=20lines=20in=20documentation=20(#517)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Trim whitespace in doc-comments

This is important for input files that use `/*…*/`-style comments,
where `go/ast` preserves the leading and trailing (and inner) whitespace
and this ends up in the generated CRD schema descriptions.

* Fix spelling

* Use trimspace

* Update golden file
---
 pkg/crd/markers/zz_generated.markerhelp.go    |  6 +--
 .../testdata.kubebuilder.io_cronjobs.yaml     | 45 +++++++++----------
 pkg/markers/collect_test.go                   | 32 ++++++++++++-
 pkg/markers/markers_suite_test.go             | 11 +++++
 pkg/markers/zip.go                            | 12 ++++-
 5 files changed, 77 insertions(+), 29 deletions(-)

diff --git a/pkg/crd/markers/zz_generated.markerhelp.go b/pkg/crd/markers/zz_generated.markerhelp.go
index 03e3e44bf..cfb88141d 100644
--- a/pkg/crd/markers/zz_generated.markerhelp.go
+++ b/pkg/crd/markers/zz_generated.markerhelp.go
@@ -116,7 +116,7 @@ func (ListType) Help() *markers.DefinitionHelp {
 		Category: "CRD processing",
 		DetailedHelp: markers.DetailedHelp{
 			Summary: "specifies the type of data-structure that the list represents (map, set, atomic). ",
-			Details: "Possible data-structure types of a list are: \n - \"map\": it needs to have a key field, which will be used to build an   associative list. A typical example is a the pod container list,   which is indexed by the container name. \n - \"set\": Fields need to be \"scalar\", and there can be only one   occurrence of each. \n - \"atomic\": All the fields in the list are treated as a single value,   are typically manipulated together by the same actor.",
+			Details: "Possible data-structure types of a list are: \n - \"map\": it needs to have a key field, which will be used to build an associative list. A typical example is a the pod container list, which is indexed by the container name. \n - \"set\": Fields need to be \"scalar\", and there can be only one occurrence of each. \n - \"atomic\": All the fields in the list are treated as a single value, are typically manipulated together by the same actor.",
 		},
 		FieldHelp: map[string]markers.DetailedHelp{},
 	}
@@ -127,7 +127,7 @@ func (MapType) Help() *markers.DefinitionHelp {
 		Category: "CRD processing",
 		DetailedHelp: markers.DetailedHelp{
 			Summary: "specifies the level of atomicity of the map; i.e. whether each item in the map is independent of the others, or all fields are treated as a single unit. ",
-			Details: "Possible values: \n - \"granular\": items in the map are independent of each other,   and can be manipulated by different actors.   This is the default behavior. \n - \"atomic\": all fields are treated as one unit.   Any changes have to replace the entire map.",
+			Details: "Possible values: \n - \"granular\": items in the map are independent of each other, and can be manipulated by different actors. This is the default behavior. \n - \"atomic\": all fields are treated as one unit. Any changes have to replace the entire map.",
 		},
 		FieldHelp: map[string]markers.DetailedHelp{},
 	}
@@ -360,7 +360,7 @@ func (StructType) Help() *markers.DefinitionHelp {
 		Category: "CRD processing",
 		DetailedHelp: markers.DetailedHelp{
 			Summary: "specifies the level of atomicity of the struct; i.e. whether each field in the struct is independent of the others, or all fields are treated as a single unit. ",
-			Details: "Possible values: \n - \"granular\": fields in the struct are independent of each other,   and can be manipulated by different actors.   This is the default behavior. \n - \"atomic\": all fields are treated as one unit.   Any changes have to replace the entire struct.",
+			Details: "Possible values: \n - \"granular\": fields in the struct are independent of each other, and can be manipulated by different actors. This is the default behavior. \n - \"atomic\": all fields are treated as one unit. Any changes have to replace the entire struct.",
 		},
 		FieldHelp: map[string]markers.DetailedHelp{},
 	}
diff --git a/pkg/crd/testdata/testdata.kubebuilder.io_cronjobs.yaml b/pkg/crd/testdata/testdata.kubebuilder.io_cronjobs.yaml
index ceb55772f..eaf91f5c8 100644
--- a/pkg/crd/testdata/testdata.kubebuilder.io_cronjobs.yaml
+++ b/pkg/crd/testdata/testdata.kubebuilder.io_cronjobs.yaml
@@ -1,4 +1,3 @@
-
 ---
 apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
@@ -5387,9 +5386,9 @@ spec:
                                         the spread constraint. - DoNotSchedule (default)
                                         tells the scheduler not to schedule it. -
                                         ScheduleAnyway tells the scheduler to schedule
-                                        the pod in any location,   but giving higher
+                                        the pod in any location, but giving higher
                                         precedence to topologies that would help reduce
-                                        the   skew. A constraint is considered "Unsatisfiable"
+                                        the skew. A constraint is considered "Unsatisfiable"
                                         for an incoming pod if and only if every possible
                                         node assigment for that pod would violate
                                         "MaxSkew" on some topology. For example, in
@@ -5879,13 +5878,13 @@ spec:
                                         when the pod is removed. \n Use this if: a)
                                         the volume is only needed while the pod runs,
                                         b) features of normal volumes like restoring
-                                        from snapshot or capacity    tracking are
-                                        needed, c) the storage driver is specified
-                                        through a storage class, and d) the storage
-                                        driver supports dynamic volume provisioning
-                                        through    a PersistentVolumeClaim (see EphemeralVolumeSource
-                                        for more    information on the connection
-                                        between this volume type    and PersistentVolumeClaim).
+                                        from snapshot or capacity tracking are needed,
+                                        c) the storage driver is specified through
+                                        a storage class, and d) the storage driver
+                                        supports dynamic volume provisioning through
+                                        a PersistentVolumeClaim (see EphemeralVolumeSource
+                                        for more information on the connection between
+                                        this volume type and PersistentVolumeClaim).
                                         \n Use PersistentVolumeClaim or one of the
                                         vendor-specific APIs for volumes that persist
                                         for longer than the lifecycle of an individual
@@ -7339,23 +7338,23 @@ spec:
                   description: 'ObjectReference contains enough information to let
                     you inspect or modify the referred object. --- New uses of this
                     type are discouraged because of difficulty describing its usage
-                    when embedded in APIs.  1. Ignored fields.  It includes many fields
+                    when embedded in APIs. 1. Ignored fields.  It includes many fields
                     which are not generally honored.  For instance, ResourceVersion
-                    and FieldPath are both very rarely valid in actual usage.  2.
-                    Invalid usage help.  It is impossible to add specific help for
-                    individual usage.  In most embedded usages, there are particular     restrictions
+                    and FieldPath are both very rarely valid in actual usage. 2. Invalid
+                    usage help.  It is impossible to add specific help for individual
+                    usage.  In most embedded usages, there are particular restrictions
                     like, "must refer only to types A and B" or "UID not honored"
-                    or "name must be restricted".     Those cannot be well described
-                    when embedded.  3. Inconsistent validation.  Because the usages
-                    are different, the validation rules are different by usage, which
-                    makes it hard for users to predict what will happen.  4. The fields
+                    or "name must be restricted". Those cannot be well described when
+                    embedded. 3. Inconsistent validation.  Because the usages are
+                    different, the validation rules are different by usage, which
+                    makes it hard for users to predict what will happen. 4. The fields
                     are both imprecise and overly precise.  Kind is not a precise
-                    mapping to a URL. This can produce ambiguity     during interpretation
+                    mapping to a URL. This can produce ambiguity during interpretation
                     and require a REST mapping.  In most cases, the dependency is
-                    on the group,resource tuple     and the version of the actual
-                    struct is irrelevant.  5. We cannot easily change it.  Because
-                    this type is embedded in many locations, updates to this type     will
-                    affect numerous schemas.  Don''t make new APIs embed an underspecified
+                    on the group,resource tuple and the version of the actual struct
+                    is irrelevant. 5. We cannot easily change it.  Because this type
+                    is embedded in many locations, updates to this type will affect
+                    numerous schemas.  Don''t make new APIs embed an underspecified
                     API type they do not control. Instead of using this type, create
                     a locally provided and used type that is well-focused on your
                     reference. For example, ServiceReferences for admission registration:
diff --git a/pkg/markers/collect_test.go b/pkg/markers/collect_test.go
index 3f24023e6..eaa7bbea8 100644
--- a/pkg/markers/collect_test.go
+++ b/pkg/markers/collect_test.go
@@ -31,7 +31,10 @@ type fieldPath struct {
 var _ = Describe("Collecting", func() {
 	var col *Collector
 	var markersByType map[string]MarkerValues
+	var docsByType map[string]string
+
 	var markersByField map[fieldPath]MarkerValues
+	var docsByField map[fieldPath]string
 
 	JustBeforeEach(func() {
 		By("setting up the registry and collector")
@@ -45,15 +48,23 @@ var _ = Describe("Collecting", func() {
 
 		col = &Collector{Registry: reg}
 
-		By("gathering markers by type name")
+		By("gathering markers/docs by type/field name")
 		markersByType = make(map[string]MarkerValues)
+		docsByType = make(map[string]string)
 		markersByField = make(map[fieldPath]MarkerValues)
+		docsByField = make(map[fieldPath]string)
+
 		err := EachType(col, fakePkg, func(info *TypeInfo) {
 			markersByType[info.Name] = info.Markers
+			docsByType[info.Name] = info.Doc
+
 			for _, field := range info.Fields {
-				markersByField[fieldPath{typ: info.Name, field: field.Name}] = field.Markers
+				fieldPath := fieldPath{typ: info.Name, field: field.Name}
+				markersByField[fieldPath] = field.Markers
+				docsByField[fieldPath] = field.Doc
 			}
 		})
+
 		Expect(err).NotTo(HaveOccurred())
 		Expect(fakePkg.Errors).To(HaveLen(0))
 	})
@@ -94,6 +105,10 @@ var _ = Describe("Collecting", func() {
 					HaveKeyWithValue("testing:typelvl", ContainElement("here on type"))))
 			})
 
+			It("should have docs without markers", func() {
+				Expect(docsByType).To(HaveKeyWithValue("Foo", "normal godoc normal godoc"))
+			})
+
 			It("should associate markers in the closest non-godoc block", func() {
 				Expect(markersByType).To(HaveKeyWithValue("Foo",
 					HaveKeyWithValue("testing:typelvl", ContainElement("here before type"))))
@@ -109,6 +124,19 @@ var _ = Describe("Collecting", func() {
 				Expect(pkgMarkers).To(HaveKeyWithValue("testing:pkglvl", ContainElement("here reassociated")))
 			})
 		})
+
+		Context("types with /*…*/-style comments", func() {
+			It("should have doc without extraneous spaces", func() {
+				Expect(docsByType).To(HaveKeyWithValue("HasDocsWithSpaces",
+					"This type of doc has spaces preserved in go-ast, but we'd like to trim them."))
+			})
+
+			It("should have doc without extraneous spaces, even over multiple lines", func() {
+				Expect(docsByType).To(HaveKeyWithValue("HasDocsWithSpaces2",
+					"This type of doc has spaces preserved in go-ast, but we'd like to trim them, especially when formatted like this."))
+			})
+		})
+
 		Context("without godoc", func() {
 			It("should associate markers in the closest block", func() {
 				Expect(markersByType).To(HaveKeyWithValue("Quux",
diff --git a/pkg/markers/markers_suite_test.go b/pkg/markers/markers_suite_test.go
index 84d5c5b45..ad8491cb1 100644
--- a/pkg/markers/markers_suite_test.go
+++ b/pkg/markers/markers_suite_test.go
@@ -88,6 +88,17 @@ var _ = BeforeSuite(func() {
 						Bar = "foo"
 					)
 
+					/* This type of doc has spaces preserved in go-ast, but we'd like to trim them. */
+					type HasDocsWithSpaces struct {
+					}
+
+					/*
+					This type of doc has spaces preserved in go-ast, but we'd like to trim them,
+					especially when formatted like this.
+					*/
+					type HasDocsWithSpaces2 struct {
+					}
+
 					type Baz interface {
 						// +testing:pkglvl="not here in interface"
 					}
diff --git a/pkg/markers/zip.go b/pkg/markers/zip.go
index a9b4c98af..0ef1fb133 100644
--- a/pkg/markers/zip.go
+++ b/pkg/markers/zip.go
@@ -67,12 +67,22 @@ func extractDoc(node ast.Node, decl *ast.GenDecl) string {
 		// chop off the extraneous last part
 		outLines = outLines[:len(outLines)-1]
 	}
-	// respect double-newline meaning actual newline
+
 	for i, line := range outLines {
+		// Trim any extranous whitespace,
+		// for handling /*…*/-style comments,
+		// which have whitespace preserved in go/ast:
+		line = strings.TrimSpace(line)
+
+		// Respect that double-newline means
+		// actual newline:
 		if line == "" {
 			outLines[i] = "\n"
+		} else {
+			outLines[i] = line
 		}
 	}
+
 	return strings.Join(outLines, " ")
 }