Skip to content

Commit 7332dce

Browse files
feat(html): Prepare processing system (#4358)
1 parent 9dc30a5 commit 7332dce

File tree

236 files changed

+45485
-86287
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

236 files changed

+45485
-86287
lines changed

.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*.rs text eol=lf merge=union
88
*.js text eol=lf merge=union
99
*.json text eol=lf merge=union
10+
*.html text eol=lf merge=union
1011
*.debug text eol=lf merge=union
1112

1213
**/tests/**/*.js linguist-detectable=false

crates/swc_html_ast/src/base.rs

+96-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,101 @@
1-
use swc_common::{ast_node, Span};
2-
3-
use crate::TokenAndSpan;
1+
use is_macro::Is;
2+
use string_enum::StringEnum;
3+
use swc_atoms::JsWord;
4+
use swc_common::{ast_node, EqIgnoreSpan, Span};
45

56
#[ast_node("Document")]
7+
#[derive(Eq, Hash, EqIgnoreSpan)]
68
pub struct Document {
79
pub span: Span,
8-
pub children: Vec<TokenAndSpan>,
10+
pub mode: DocumentMode,
11+
pub children: Vec<Child>,
12+
}
13+
14+
#[ast_node("DocumentFragment")]
15+
#[derive(Eq, Hash, EqIgnoreSpan)]
16+
pub struct DocumentFragment {
17+
pub span: Span,
18+
pub children: Vec<Child>,
19+
}
20+
21+
#[derive(StringEnum, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, EqIgnoreSpan)]
22+
pub enum DocumentMode {
23+
/// `no-quirks`
24+
NoQuirks,
25+
/// `limited-quirks`
26+
LimitedQuirks,
27+
/// `quirks`
28+
Quirks,
29+
}
30+
31+
#[ast_node]
32+
#[derive(Eq, Hash, Is, EqIgnoreSpan)]
33+
pub enum Child {
34+
#[tag("DocumentType")]
35+
DocumentType(DocumentType),
36+
#[tag("Element")]
37+
Element(Element),
38+
#[tag("Text")]
39+
Text(Text),
40+
#[tag("Comment")]
41+
Comment(Comment),
42+
}
43+
44+
#[ast_node("DocumentType")]
45+
#[derive(Eq, Hash, EqIgnoreSpan)]
46+
pub struct DocumentType {
47+
pub span: Span,
48+
pub name: Option<JsWord>,
49+
pub public_id: Option<JsWord>,
50+
pub system_id: Option<JsWord>,
51+
}
52+
53+
#[derive(StringEnum, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, EqIgnoreSpan)]
54+
pub enum Namespace {
55+
/// `http://www.w3.org/1999/xhtml`
56+
HTML,
57+
/// `http://www.w3.org/1998/Math/MathML`
58+
MATHML,
59+
/// `http://www.w3.org/2000/svg`
60+
SVG,
61+
/// `http://www.w3.org/1999/xlink`
62+
XLINK,
63+
/// `http://www.w3.org/XML/1998/namespace`
64+
XML,
65+
/// `http://www.w3.org/2000/xmlns/`
66+
XMLNS,
67+
}
68+
69+
#[ast_node("Element")]
70+
#[derive(Eq, Hash, EqIgnoreSpan)]
71+
pub struct Element {
72+
pub span: Span,
73+
pub tag_name: JsWord,
74+
pub namespace: Namespace,
75+
pub attributes: Vec<Attribute>,
76+
pub children: Vec<Child>,
77+
/// For child nodes in `<template>`
78+
pub content: Option<DocumentFragment>,
79+
}
80+
81+
#[ast_node("Attribute")]
82+
#[derive(Eq, Hash, EqIgnoreSpan)]
83+
pub struct Attribute {
84+
pub span: Span,
85+
pub name: JsWord,
86+
pub value: Option<JsWord>,
87+
}
88+
89+
#[ast_node("Text")]
90+
#[derive(Eq, Hash, EqIgnoreSpan)]
91+
pub struct Text {
92+
pub span: Span,
93+
pub value: JsWord,
94+
}
95+
96+
#[ast_node("Comment")]
97+
#[derive(Eq, Hash, EqIgnoreSpan)]
98+
pub struct Comment {
99+
pub span: Span,
100+
pub data: JsWord,
9101
}

crates/swc_html_ast/src/token.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
use serde::{Deserialize, Serialize};
22
use swc_atoms::JsWord;
3-
use swc_common::{ast_node, Span};
3+
use swc_common::{ast_node, EqIgnoreSpan, Span};
44

55
#[ast_node("TokenAndSpan")]
6+
#[derive(Eq, Hash, EqIgnoreSpan)]
67
pub struct TokenAndSpan {
78
pub span: Span,
89
pub token: Token,
910
}
1011

11-
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
12-
pub struct Attribute {
12+
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, EqIgnoreSpan)]
13+
pub struct AttributeToken {
1314
pub name: JsWord,
1415
pub raw_name: Option<JsWord>,
1516
pub value: Option<JsWord>,
1617
// TODO improve me for html entity
1718
pub raw_value: Option<JsWord>,
1819
}
1920

20-
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
21+
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, EqIgnoreSpan)]
2122
pub enum Token {
2223
// TODO raw for bogus doctype
2324
Doctype {
@@ -49,13 +50,13 @@ pub enum Token {
4950
tag_name: JsWord,
5051
raw_tag_name: Option<JsWord>,
5152
self_closing: bool,
52-
attributes: Vec<Attribute>,
53+
attributes: Vec<AttributeToken>,
5354
},
5455
EndTag {
5556
tag_name: JsWord,
5657
raw_tag_name: Option<JsWord>,
5758
self_closing: bool,
58-
attributes: Vec<Attribute>,
59+
attributes: Vec<AttributeToken>,
5960
},
6061
Comment {
6162
data: JsWord,

crates/swc_html_codegen/src/lib.rs

+153-11
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,158 @@ where
4343
self.emit_list(&n.children, ListFormat::NotDelimited)?;
4444
}
4545

46+
#[emitter]
47+
fn emit_child(&mut self, n: &Child) -> Result {
48+
match n {
49+
Child::DocumentType(n) => emit!(self, n),
50+
Child::Element(n) => emit!(self, n),
51+
Child::Text(n) => emit!(self, n),
52+
Child::Comment(n) => emit!(self, n),
53+
}
54+
}
55+
56+
#[emitter]
57+
fn emit_document_doctype(&mut self, n: &DocumentType) -> Result {
58+
let mut doctype = String::new();
59+
60+
doctype.push('<');
61+
doctype.push('!');
62+
doctype.push_str("DOCTYPE");
63+
64+
if let Some(name) = &n.name {
65+
doctype.push(' ');
66+
doctype.push_str(name);
67+
}
68+
69+
if let Some(public_id) = &n.public_id {
70+
doctype.push(' ');
71+
doctype.push_str("PUBLIC");
72+
doctype.push(' ');
73+
doctype.push('"');
74+
doctype.push_str(public_id);
75+
doctype.push('"');
76+
77+
if let Some(system_id) = &n.system_id {
78+
doctype.push(' ');
79+
doctype.push('"');
80+
doctype.push_str(system_id);
81+
doctype.push('"');
82+
}
83+
} else if let Some(system_id) = &n.system_id {
84+
doctype.push(' ');
85+
doctype.push_str("SYSTEM");
86+
doctype.push(' ');
87+
doctype.push('"');
88+
doctype.push_str(system_id);
89+
doctype.push('"');
90+
}
91+
92+
doctype.push('>');
93+
94+
write_raw!(self, n.span, &doctype);
95+
}
96+
97+
#[emitter]
98+
fn emit_element(&mut self, n: &Element) -> Result {
99+
let mut start_tag = String::new();
100+
101+
start_tag.push('<');
102+
start_tag.push_str(&n.tag_name);
103+
104+
for attribute in &n.attributes {
105+
start_tag.push(' ');
106+
start_tag.push_str(&attribute.name);
107+
108+
if let Some(value) = &attribute.value {
109+
start_tag.push('=');
110+
111+
let quote = if value.contains('"') { '\'' } else { '"' };
112+
113+
start_tag.push(quote);
114+
start_tag.push_str(value);
115+
start_tag.push(quote);
116+
}
117+
}
118+
119+
start_tag.push('>');
120+
121+
write_str!(self, n.span, &start_tag);
122+
123+
let no_children = n.namespace == Namespace::HTML
124+
&& matches!(
125+
&*n.tag_name,
126+
"area"
127+
| "base"
128+
| "basefont"
129+
| "bgsound"
130+
| "br"
131+
| "col"
132+
| "embed"
133+
| "frame"
134+
| "hr"
135+
| "img"
136+
| "input"
137+
| "keygen"
138+
| "link"
139+
| "meta"
140+
| "param"
141+
| "source"
142+
| "track"
143+
| "wbr"
144+
);
145+
146+
if no_children {
147+
return Ok(());
148+
}
149+
150+
if !n.children.is_empty() {
151+
self.emit_list(&n.children, ListFormat::NotDelimited)?;
152+
}
153+
154+
let mut end_tag = String::new();
155+
156+
end_tag.push('<');
157+
end_tag.push('/');
158+
end_tag.push_str(&n.tag_name);
159+
end_tag.push('>');
160+
161+
write_str!(self, n.span, &end_tag);
162+
}
163+
164+
#[emitter]
165+
fn emit_text(&mut self, n: &Text) -> Result {
166+
let mut text = String::new();
167+
168+
for c in n.value.chars() {
169+
match c {
170+
'&' => {
171+
text.push_str(&String::from("&amp;"));
172+
}
173+
'<' => {
174+
text.push_str(&String::from("&lt;"));
175+
}
176+
'>' => {
177+
text.push_str(&String::from("&gt;"));
178+
}
179+
'\u{00A0}' => text.push_str(&String::from("&nbsp;")),
180+
_ => text.push(c),
181+
}
182+
}
183+
184+
write_str!(self, n.span, &text);
185+
}
186+
187+
#[emitter]
188+
fn emit_comment(&mut self, n: &Comment) -> Result {
189+
let mut comment = String::new();
190+
191+
comment.push_str("<!--");
192+
comment.push_str(&n.data);
193+
comment.push_str("-->");
194+
195+
write_str!(self, n.span, &comment);
196+
}
197+
46198
#[emitter]
47199
fn emit_token_and_span(&mut self, n: &TokenAndSpan) -> Result {
48200
let span = n.span;
@@ -226,7 +378,7 @@ where
226378

227379
start_tag.push('>');
228380

229-
write_str!(self, span, &start_tag);
381+
write_raw!(self, span, &start_tag);
230382
}
231383
Token::EndTag {
232384
tag_name,
@@ -325,19 +477,9 @@ where
325477
fn write_delim(&mut self, f: ListFormat) -> Result {
326478
match f & ListFormat::DelimitersMask {
327479
ListFormat::None => {}
328-
ListFormat::CommaDelimited => {
329-
write_raw!(self, ",");
330-
formatting_space!(self);
331-
}
332480
ListFormat::SpaceDelimited => {
333481
space!(self)
334482
}
335-
ListFormat::SemiDelimited => {
336-
write_raw!(self, ";")
337-
}
338-
ListFormat::DotDelimited => {
339-
write_raw!(self, ".");
340-
}
341483
_ => unreachable!(),
342484
}
343485

crates/swc_html_codegen/src/list.rs

+1-9
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,6 @@ add_bitflags!(
2626
Values {
2727
NotDelimited: 0,
2828
SpaceDelimited: 1 << 2,
29-
/// There is no delimiter between list items (default).
30-
SemiDelimited: 1 << 3,
31-
CommaDelimited: 1 << 4,
32-
DotDelimited: 1 << 5,
33-
DelimitersMask: SpaceDelimited | SemiDelimited | CommaDelimited | DotDelimited,
34-
},
35-
Values {
36-
/// Write a trailing comma (",") if present.
37-
AllowTrailingComma: 1 << 6,
29+
DelimitersMask: SpaceDelimited,
3830
},
3931
);

crates/swc_html_codegen/src/macros.rs

-8
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,6 @@ macro_rules! space {
3636
}};
3737
}
3838

39-
macro_rules! formatting_space {
40-
($g:expr) => {{
41-
if !$g.config.minify {
42-
$g.wr.write_space()?;
43-
}
44-
}};
45-
}
46-
4739
// macro_rules! increase_indent {
4840
// ($g:expr) => {{
4941
// if !$g.config.minify {

crates/swc_html_codegen/tests/fixture.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,13 @@ fn run(input: &Path, minify: bool) {
115115
struct NormalizeTest;
116116

117117
impl VisitMut for NormalizeTest {
118-
fn visit_mut_token_and_span(&mut self, n: &mut TokenAndSpan) {
118+
// TODO also investigate problem with empty body (why one empty node?)
119+
// TODO fix me, we should normalize only last text node in document due to
120+
// parsing html logic or maybe improve AST to allow developer understand it
121+
fn visit_mut_text(&mut self, n: &mut Text) {
119122
n.visit_mut_children_with(self);
123+
124+
n.value = "".into();
120125
}
121126

122127
fn visit_mut_span(&mut self, n: &mut Span) {

0 commit comments

Comments
 (0)