@@ -37,6 +37,7 @@ import (
37
37
bf "github.com/russross/blackfriday/v2"
38
38
"github.com/spf13/afero"
39
39
"github.com/yuin/goldmark"
40
+ "github.com/yuin/goldmark/ast"
40
41
gmast "github.com/yuin/goldmark/ast"
41
42
gmtext "github.com/yuin/goldmark/text"
42
43
"golang.org/x/text/cases"
@@ -46,6 +47,7 @@ import (
46
47
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfbridge"
47
48
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfbridge/info"
48
49
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfgen/parse"
50
+ "github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfgen/parse/section"
49
51
)
50
52
51
53
const (
@@ -1511,70 +1513,70 @@ func (g *Generator) convertExamples(docs string, path examplePath) string {
1511
1513
// codeBlock represents a code block found in the upstream docs, delineated by code fences (```).
1512
1514
// It also tracks which header it is part of.
1513
1515
type codeBlock struct {
1514
- start int // The index of the first backtick of an opening code fence
1515
- end int // The index of the first backtick of a closing code fence
1516
- headerStart int // The index of the first "#" in a Markdown header. A value of -1 indicates there's no header.
1517
- }
1518
-
1519
- func findCodeBlock (doc string , i int ) (codeBlock , bool ) {
1520
- codeFence := "```"
1521
- var block codeBlock
1522
- // find opening code fence
1523
- if doc [i :i + len (codeFence )] == codeFence {
1524
- block .start = i
1525
- // find closing code fence
1526
- for j := i + len (codeFence ); j < (len (doc ) - len (codeFence )); j ++ {
1527
- if doc [j :j + len (codeFence )] == codeFence {
1528
- block .end = j
1529
- return block , true
1530
- }
1531
- }
1532
- return block , false
1533
- }
1534
- return block , false
1516
+ start int // The index of the first backtick of an opening code fence
1517
+ end int // The index of the first backtick of a closing code fence
1518
+ headerStart int // The index of the first "#" in a Markdown header. A value of -1 indicates there's no header.
1519
+ language string // The language of the code block.
1535
1520
}
1536
1521
1537
- func findHeader (doc string , i int ) (int , bool ) {
1538
- h2 := "##"
1539
- h3 := "###"
1540
- var foundH2 , foundH3 bool
1541
-
1542
- if i == 0 {
1543
- // handle header at very beginning of doc
1544
- foundH2 = doc [i :i + len (h2 )] == h2
1545
- foundH3 = doc [i :i + len (h3 )] == h3
1546
- } else {
1547
- // all other headers must be preceded by a newline
1548
- foundH2 = doc [i :i + len (h2 )] == h2 && string (doc [i - 1 ]) == "\n "
1549
- foundH3 = doc [i :i + len (h3 )] == h3 && string (doc [i - 1 ]) == "\n "
1550
- }
1551
-
1552
- if foundH3 {
1553
- return i + len (h3 ), true
1554
- }
1555
- if foundH2 {
1556
- return i + len (h2 ), true
1557
- }
1558
- return - 1 , false
1522
+ // A string representing the code inside a code block.
1523
+ //
1524
+ // Given the code block:
1525
+ //
1526
+ // ```sh
1527
+ // $ cmd \
1528
+ // --flag
1529
+ //
1530
+ // ```
1531
+ //
1532
+ // This method would return "$ cmd \\\n --flag\n".
1533
+ //
1534
+ // The returned string represents a view into the passed in byte slice, and does not
1535
+ // remove any padding found in the original document.
1536
+ func (cb codeBlock ) code (document []byte ) string {
1537
+ nextNewLine := bytes .IndexRune (document [cb .start :cb .end ], '\n' )
1538
+ return string (document [cb .start + nextNewLine + 1 : cb .end ])
1559
1539
}
1560
1540
1561
- func findFencesAndHeaders (doc string ) []codeBlock {
1562
- codeFence := "```"
1541
+ func findCodeBlocks (docs []byte ) []codeBlock {
1542
+ rootNode := goldmark .New (goldmark .WithExtensions (parse .TFRegistryExtension )).
1543
+ Parser ().Parse (gmtext .NewReader (docs ))
1544
+
1563
1545
var codeBlocks []codeBlock
1564
- headerStart := - 1
1565
- for i := 0 ; i < (len (doc ) - len (codeFence )); i ++ {
1566
- block , found := findCodeBlock (doc , i )
1567
- if found {
1568
- block .headerStart = headerStart
1569
- codeBlocks = append (codeBlocks , block )
1570
- i = block .end + 1
1546
+ parse .WalkNode (rootNode , func (cb * ast.FencedCodeBlock ) {
1547
+ lines := cb .Lines ()
1548
+
1549
+ headerStart := - 1
1550
+ for p := cb .Parent (); p != nil ; p = p .Parent () {
1551
+ if s , ok := p .(* section.Section ); ok {
1552
+ l := s .FirstChild ().Lines ()
1553
+ if l .Len () == 0 {
1554
+ // A header doesn't have any lines if there is no text associated with the
1555
+ // header, then we can't find its location due to limitations of goldmark.
1556
+ //
1557
+ // Just give up on finding a header here.
1558
+ break
1559
+ }
1560
+ headerStart = bytes .LastIndexByte (docs [:l .At (0 ).Start ], '\n' ) + 1
1561
+ break
1562
+ }
1571
1563
}
1572
- headerEnd , found := findHeader (doc , i )
1573
- if found {
1574
- headerStart = i
1575
- i = headerEnd
1564
+
1565
+ firstNewlineOfCodeBlock := bytes .LastIndexByte (docs [:lines .At (0 ).Start ], '\n' )
1566
+ firstNewlineOfCodeFence := bytes .LastIndexByte (docs [:firstNewlineOfCodeBlock ], '\n' )
1567
+ if firstNewlineOfCodeFence == - 1 {
1568
+ // This means that docs starts with a code block
1569
+ firstNewlineOfCodeFence = 0
1576
1570
}
1577
- }
1571
+ firstBacktickOfCodeFence := bytes .IndexByte (docs [firstNewlineOfCodeFence :], '`' ) + firstNewlineOfCodeFence
1572
+
1573
+ codeBlocks = append (codeBlocks , codeBlock {
1574
+ start : firstBacktickOfCodeFence ,
1575
+ end : lines .At (lines .Len () - 1 ).Stop ,
1576
+ headerStart : headerStart ,
1577
+ language : string (cb .Language (docs )),
1578
+ })
1579
+ })
1578
1580
return codeBlocks
1579
1581
}
1580
1582
@@ -1593,14 +1595,13 @@ func (g *Generator) convertExamplesInner(
1593
1595
_ , err := fmt .Fprintf (output , f , args ... )
1594
1596
contract .AssertNoErrorf (err , "Cannot fail to write out output buffer" )
1595
1597
}
1596
- codeBlocks := findFencesAndHeaders (docs )
1597
1598
const codeFence = "```"
1598
1599
1599
1600
// Traverse the code blocks and take appropriate action before appending to output
1600
1601
textStart := 0
1601
1602
stripSection := false
1602
- stripSectionHeader := 0
1603
- for _ , tfBlock := range codeBlocks {
1603
+ stripSectionHeader := 0 // The index of the header that we might want to strip.
1604
+ for _ , tfBlock := range findCodeBlocks ([] byte ( docs )) {
1604
1605
// if the section has a header we append the header after trying to convert the code.
1605
1606
hasHeader := tfBlock .headerStart >= 0 && textStart < tfBlock .headerStart
1606
1607
@@ -1615,75 +1616,77 @@ func (g *Generator) convertExamplesInner(
1615
1616
// if we are stripping this section and still have the same header, we append nothing and skip to the next
1616
1617
// code block.
1617
1618
if stripSectionHeader == tfBlock .headerStart {
1618
- textStart = tfBlock .end + len (codeFence )
1619
+ if eol := strings .IndexRune (docs [tfBlock .end :], '\n' ); eol > - 1 {
1620
+ textStart = tfBlock .end + eol
1621
+ } else {
1622
+ // If no newline character is found, we are at the end of the doc.
1623
+ textStart = len (docs )
1624
+ }
1619
1625
continue
1620
1626
}
1621
1627
if stripSectionHeader < tfBlock .headerStart {
1622
1628
stripSection = false
1623
1629
}
1624
1630
}
1625
- // find the actual start index of the code
1626
- nextNewLine := strings .Index (docs [tfBlock .start :tfBlock .end ], "\n " )
1627
- if nextNewLine == - 1 {
1628
- // write the line as-is; this is an in-line fence
1629
- fprintf ("%s%s" , docs [tfBlock .start :tfBlock .end ], codeFence )
1630
- } else {
1631
- fenceLanguage := docs [tfBlock .start : tfBlock .start + nextNewLine + 1 ]
1632
- hcl := docs [tfBlock .start + nextNewLine + 1 : tfBlock .end ]
1633
-
1634
- // Only attempt to convert code blocks that are either explicitly marked as Terraform, or
1635
- // unmarked. For unmarked snippets further gate by a regex guess if it is actually Terraform.
1636
- if isHCL (fenceLanguage , hcl ) {
1637
- // generate the code block and append
1638
- if g .language .shouldConvertExamples () {
1639
- hcl := docs [tfBlock .start + nextNewLine + 1 : tfBlock .end ]
1640
-
1641
- // Most of our results should be HCL, so we try to convert it.
1642
- var e * Example
1643
- if useCoverageTracker {
1644
- e = g .coverageTracker .getOrCreateExample (
1645
- path .String (), hcl )
1631
+ // Only attempt to convert code blocks that are either explicitly marked as Terraform, or
1632
+ // unmarked. For unmarked snippets further gate by a regex guess if it is actually Terraform.
1633
+ if hcl := tfBlock .code ([]byte (docs )); isHCL (tfBlock .language , hcl ) {
1634
+ // generate the code block and append
1635
+ if g .language .shouldConvertExamples () {
1636
+ // Most of our results should be HCL, so we try to convert it.
1637
+ var e * Example
1638
+ if useCoverageTracker {
1639
+ e = g .coverageTracker .getOrCreateExample (
1640
+ path .String (), hcl )
1641
+ }
1642
+ langs := genLanguageToSlice (g .language )
1643
+ convertedBlock , err := convertHCL (e , hcl , path .String (), langs )
1644
+ if err != nil {
1645
+ // We do not write this section, ever.
1646
+ //
1647
+ // We have to strip the entire section: any header, the code
1648
+ // block, and any surrounding text.
1649
+ stripSection = true
1650
+ stripSectionHeader = tfBlock .headerStart
1651
+ } else {
1652
+ // append any headers and following text first
1653
+ if hasHeader {
1654
+ fprintf ("%s" , docs [tfBlock .headerStart :tfBlock .start ])
1646
1655
}
1647
- langs := genLanguageToSlice (g .language )
1648
- convertedBlock , err := convertHCL (e , hcl , path .String (), langs )
1649
- if err != nil {
1650
- // We do not write this section, ever.
1651
- //
1652
- // We have to strip the entire section: any header, the code
1653
- // block, and any surrounding text.
1654
- stripSection = true
1655
- stripSectionHeader = tfBlock .headerStart
1656
- } else {
1657
- // append any headers and following text first
1658
- if hasHeader {
1659
- fprintf ("%s" , docs [tfBlock .headerStart :tfBlock .start ])
1660
- }
1661
-
1662
- switch g .language {
1663
- // If we are targeting the schema, then print code switcher
1664
- // fences for the registry.
1665
- case Schema :
1666
- fprintf ("%s\n %s\n %s" ,
1667
- startPulumiCodeChooser ,
1668
- convertedBlock ,
1669
- endPulumiCodeChooser )
1670
- // Otherwise skip code switcher fences so they don't show up
1671
- // in generated SDKs.
1672
- default :
1673
- fprintf ("%s" , convertedBlock )
1674
- }
1656
+
1657
+ switch g .language {
1658
+ // If we are targeting the schema, then print code switcher
1659
+ // fences for the registry.
1660
+ case Schema :
1661
+ fprintf ("%s\n %s\n %s" ,
1662
+ startPulumiCodeChooser ,
1663
+ convertedBlock ,
1664
+ endPulumiCodeChooser )
1665
+ // Otherwise skip code switcher fences so they don't show up
1666
+ // in generated SDKs.
1667
+ default :
1668
+ fprintf ("%s" , convertedBlock )
1675
1669
}
1676
1670
}
1677
- } else {
1678
- // Take already-valid code blocks as-is.
1679
- if hasHeader {
1680
- fprintf ("%s" , docs [tfBlock .headerStart :tfBlock .start ])
1681
- }
1682
- fprintf ("%s" + codeFence , docs [tfBlock .start :tfBlock .end ])
1683
1671
}
1672
+ } else {
1673
+ // Take already-valid code blocks as-is.
1674
+ if hasHeader {
1675
+ fprintf ("%s" , docs [tfBlock .headerStart :tfBlock .start ])
1676
+ }
1677
+ fprintf ("%s" + codeFence , docs [tfBlock .start :tfBlock .end ])
1678
+ }
1679
+
1680
+ // We want to start including non-code text after the end of the code block.
1681
+ //
1682
+ // The codeblock "ends" with the newline character at the end of the
1683
+ // closing fence.
1684
+ if eol := strings .IndexRune (docs [tfBlock .end :], '\n' ); eol > - 1 {
1685
+ textStart = tfBlock .end + eol
1686
+ } else {
1687
+ // If no newline character is found, we are at the end of the doc.
1688
+ textStart = len (docs )
1684
1689
}
1685
- // The non-code text starts up again after the last closing fences
1686
- textStart = tfBlock .end + len (codeFence )
1687
1690
}
1688
1691
// Append any remainder of the docs string to the output
1689
1692
if ! stripSection {
@@ -2342,6 +2345,12 @@ func guessIsHCL(code string) bool {
2342
2345
}
2343
2346
2344
2347
func isHCL (fenceLanguage , code string ) bool {
2345
- return fenceLanguage == "```terraform\n " || fenceLanguage == "```hcl\n " || fenceLanguage == "```tf\n " ||
2346
- (fenceLanguage == "```\n " && guessIsHCL (code ))
2348
+ switch fenceLanguage {
2349
+ case "terraform" , "hcl" , "tf" :
2350
+ return true
2351
+ case "" :
2352
+ return guessIsHCL (code )
2353
+ default :
2354
+ return false
2355
+ }
2347
2356
}
0 commit comments