Skip to content

Commit 900dc05

Browse files
committed
build: release v0.3.1
- fix missing col start event - pass row start without `|-` (wikitext_tables/11.txt)
1 parent b9c0e27 commit 900dc05

File tree

6 files changed

+41
-21
lines changed

6 files changed

+41
-21
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "wikitext_table_parser"
3-
version = "0.3.0"
3+
version = "0.3.1"
44
edition = "2021"
55
license = "Apache-2.0"
66
keywords = ["wiki", "wikitext","table","parser"]

README.md

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ A table in wikitext should like:
2727
#### Installation
2828
```toml
2929
[dependencies]
30-
wikitext_table_parser = "0.3.0"
30+
wikitext_table_parser = "0.3.1"
3131
```
3232
#### Usage Example
3333
```rust
@@ -64,25 +64,28 @@ fn main() {
6464
WikitextTableParser::new(table_tokenizer, cell_tokenizer, &content, true);
6565
for event in wikitext_table_parser {
6666
match event {
67-
Event::TableStart => {
67+
Event::TableStart {} => {
6868
println!("Table START!");
6969
}
70-
Event::TableStyle(table_style) => {
70+
Event::TableStyle { text: table_style } => {
7171
println!("table style{:?}#", table_style);
7272
}
73-
Event::TableCaption(text) => {
73+
Event::TableCaption { text } => {
7474
println!("table name{:?}#", text);
7575
}
76-
Event::RowStyle(row_style) => {
76+
Event::RowStyle { text: row_style } => {
7777
println!("----- {:?} -----", row_style);
7878
}
79-
Event::ColStyle(col_style) => {
80-
print!("col style: {:?} -- ", col_style);
79+
Event::ColStart { cell_type } =>{
80+
print!("{:?} ",cell_type);
8181
}
82-
Event::ColEnd(text) => {
83-
println!("col data: {:?}", text);
82+
Event::ColStyle { text: col_style } => {
83+
print!("style: {:?} -> ", col_style);
8484
}
85-
Event::TableEnd => {
85+
Event::ColEnd { text } => {
86+
println!("data: {:?}", text);
87+
}
88+
Event::TableEnd {} => {
8689
println!("Table END!");
8790
}
8891
_ => {}
@@ -139,6 +142,7 @@ for event in parser.event_log_queue:
139142
print("col style:", event.text)
140143
elif isinstance(event, Event.ColEnd):
141144
print("col data:", event.text)
145+
print("-"*20)
142146
elif isinstance(event, Event.TableCaptionStart):
143147
pass
144148
elif isinstance(event, Event.TableCaption):
@@ -148,7 +152,7 @@ for event in parser.event_log_queue:
148152
elif isinstance(event, Event.RowStyle):
149153
print("row style:", event.text)
150154
elif isinstance(event, Event.RowEnd):
151-
print("-"*20)
155+
print("="*30)
152156
else:
153157
raise NotImplementedError(event)
154158
```

examples/use_in_python.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
print("col style:", event.text)
3535
elif isinstance(event, Event.ColEnd):
3636
print("col data:", event.text)
37+
print("-"*20)
3738
elif isinstance(event, Event.TableCaptionStart):
3839
pass
3940
elif isinstance(event, Event.TableCaption):
@@ -43,6 +44,6 @@
4344
elif isinstance(event, Event.RowStyle):
4445
print("row style:", event.text)
4546
elif isinstance(event, Event.RowEnd):
46-
print("-"*20)
47+
print("="*30)
4748
else:
4849
raise NotImplementedError(event)

src/main.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,14 @@ fn main() {
4343
Event::RowStyle { text: row_style } => {
4444
println!("----- {:?} -----", row_style);
4545
}
46+
Event::ColStart { cell_type } =>{
47+
print!("{:?} ",cell_type);
48+
}
4649
Event::ColStyle { text: col_style } => {
47-
print!("col style: {:?} -- ", col_style);
50+
print!("style: {:?} -> ", col_style);
4851
}
4952
Event::ColEnd { text } => {
50-
println!("col data: {:?}", text);
53+
println!("data: {:?}", text);
5154
}
5255
Event::TableEnd {} => {
5356
println!("Table END!");

src/parser.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ pub enum CellType {
4242
#[derive(Debug)]
4343
pub struct WikitextTableParser {
4444
state: State,
45-
#[pyo3(get,set)]
45+
#[pyo3(get, set)]
4646
event_log_queue: Vec<Event>,
47-
#[pyo3(get,set)]
47+
#[pyo3(get, set)]
4848
tokens: Vec<String>,
4949
text_buffer: String,
5050
table_tokenizer: Tokenizer,
@@ -165,6 +165,7 @@ impl WikitextTableParser {
165165

166166
fn step(&mut self) {
167167
let token = self.tokens.remove(0);
168+
// println!("{:?}", token);
168169
match self.state {
169170
State::Idle => {
170171
if &token == TableSpecialTokens::TableStart.as_ref() {
@@ -185,6 +186,12 @@ impl WikitextTableParser {
185186
});
186187
self.clear_text_buffer();
187188
self.transition(Event::RowStart {});
189+
} else if &token == TableSpecialTokens::TableHeaderCell.as_ref() {
190+
self.transition(Event::TableStyle {
191+
text: self.get_text_buffer_data(),
192+
});
193+
self.clear_text_buffer();
194+
self.transition(Event::RowStart {});
188195
}
189196
// end of table
190197
else if &token == TableSpecialTokens::TableEnd.as_ref() {
@@ -204,6 +211,7 @@ impl WikitextTableParser {
204211
// match ! after the caption, this type will not have a row style
205212
// and should turn in to read col state
206213
else if &token == TableSpecialTokens::TableHeaderCell.as_ref() {
214+
// catch table caption and trans the state to "State::ReadTable"
207215
self.transition(Event::TableCaption {
208216
text: self.get_text_buffer_data(),
209217
});
@@ -264,6 +272,9 @@ impl WikitextTableParser {
264272
text: self.get_text_buffer_data(),
265273
});
266274
self.clear_text_buffer();
275+
self.transition(Event::ColStart {
276+
cell_type: CellType::DataCell,
277+
});
267278
}
268279
// match \n! or \n!!
269280
else if &token == TableSpecialTokens::TableHeaderCell.as_ref()
@@ -276,6 +287,9 @@ impl WikitextTableParser {
276287
text: self.get_text_buffer_data(),
277288
});
278289
self.clear_text_buffer();
290+
self.transition(Event::ColStart {
291+
cell_type: CellType::HeaderCell,
292+
});
279293
} else if &token == TableSpecialTokens::TableRow.as_ref() {
280294
self.transition(Event::ColStyle {
281295
text: self.get_style_text_buffer_data(),
@@ -325,9 +339,7 @@ impl WikitextTableParser {
325339

326340
// State::ReadCol
327341
(State::ReadCol, Event::ColStyle { text }) => {}
328-
(State::ReadCol, Event::ColEnd { text }) => {
329-
self.state = State::ReadCol
330-
},
342+
(State::ReadCol, Event::ColEnd { text }) => self.state = State::ReadCol,
331343
(State::ReadCol, Event::RowStart {}) => self.state = State::ReadRow,
332344

333345
// Else

0 commit comments

Comments
 (0)