Skip to content

Commit 22cac66

Browse files
committed
MemoryPackSerializeOptions.Utf8(WIP)
1 parent 0b70baa commit 22cac66

17 files changed

+364
-67
lines changed

README.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -548,15 +548,16 @@ If you request it, there is a possibility to make a detuned Unity version. Pleas
548548

549549
Binary wire format specification
550550
---
551-
The type of `T` defined in `Serialize<T>` and `Deserialize<T>` is called C# schema. MemoryPack format is not self described format. Deserialize requires the corresponding C# schema. Four types exist as internal representations of binaries, but types cannot be determined without a C# schema.
551+
The type of `T` defined in `Serialize<T>` and `Deserialize<T>` is called C# schema. MemoryPack format is not self described format. Deserialize requires the corresponding C# schema. Five types exist as internal representations of binaries, but types cannot be determined without a C# schema.
552552

553553
There are no endian specifications. It is not possible to convert on machines with different endianness. However modern computers are usually little-endian.
554554

555-
There are four value types of format.
555+
There are five value types of format.
556556

557557
* Unmanaged struct
558558
* Object
559559
* Collection
560+
* String
560561
* Union
561562

562563
### Unmanaged struct
@@ -574,7 +575,14 @@ Object has 1byte unsigned byte as member count in header. Member count allows `0
574575

575576
`[int length, values...]`
576577

577-
Collection has 4byte signed interger as data count in header, `-1` represents `null`. Values store memorypack value for the number of length. String is collection(serialize as `ReadOnlySpan<char>`, in other words, UTF16).
578+
Collection has 4byte signed interger as data count in header, `-1` represents `null`. Values store memorypack value for the number of length.
579+
580+
### String
581+
582+
`(int utf16-length, utf16-value)`
583+
`(int ~utf8-length, int utf16-length, utf8-value)`
584+
585+
String has two-form, UTF16 and UTF8. If first 4byte signed integer is `-1`, represents null. `0`, represents empty. UTF16 is same as collection(serialize as `ReadOnlySpan<char>`, utf16-value's byte count is utf16-length * 2). If first signed integer <= `-2`, value is encoded by UTF8. utf8-length is encoded in complement, `~utf8-length` to retrieve length. Next signed integer is utf16-length, it allows `-1` that represents unknown length. utf8-value store byte value for the number of utf8-length.
578586

579587
### Union
580588

sandbox/Benchmark/Benchmarks/SerializeTest.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ namespace Benchmark.Benchmarks;
3030
//[GenericTypeArguments(typeof(MyClass))]
3131

3232

33-
[GenericTypeArguments(typeof(int))]
34-
[GenericTypeArguments(typeof(Vector3[]))]
35-
[GenericTypeArguments(typeof(JsonResponseModel))]
36-
[GenericTypeArguments(typeof(NeuralNetworkLayerModel))]
33+
//[GenericTypeArguments(typeof(int))]
34+
//[GenericTypeArguments(typeof(Vector3[]))]
35+
//[GenericTypeArguments(typeof(JsonResponseModel))]
36+
//[GenericTypeArguments(typeof(NeuralNetworkLayerModel))]
3737
[CategoriesColumn]
3838
[PayloadColumn]
3939
[GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory)]
@@ -79,7 +79,7 @@ public byte[] MessagePackSerialize()
7979
[Benchmark, BenchmarkCategory(Categories.Bytes)]
8080
public byte[] MemoryPackSerialize()
8181
{
82-
return MemoryPackSerializer.Serialize(value);
82+
return MemoryPackSerializer.Serialize(value, MemoryPackSerializeOptions.Default);
8383
}
8484

8585
// requires T:new(), can't test it.
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
using Benchmark.BenchmarkNetUtilities;
2+
using BinaryPack.Models.Helpers;
3+
using MemoryPack;
4+
using System.Net.Http;
5+
6+
namespace Benchmark.Benchmarks;
7+
8+
[PayloadColumn]
9+
public class Utf16VsUtf8
10+
{
11+
readonly string ascii;
12+
readonly string japanese;
13+
readonly string largeAscii;
14+
15+
readonly byte[] utf16Jpn;
16+
readonly byte[] utf8Jpn;
17+
readonly byte[] utf16Ascii;
18+
readonly byte[] utf8Ascii;
19+
readonly byte[] utf16LargeAscii;
20+
readonly byte[] utf8LargeAscii;
21+
22+
public Utf16VsUtf8()
23+
{
24+
this.japanese = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん";
25+
this.ascii = "abcedfghijklmnopqrstuvwxyz0123456789";
26+
this.utf16Jpn = MemoryPackSerializer.Serialize(japanese, MemoryPackSerializeOptions.Default);
27+
this.utf8Jpn = MemoryPackSerializer.Serialize(japanese, MemoryPackSerializeOptions.Utf8);
28+
this.utf16Ascii = MemoryPackSerializer.Serialize(ascii, MemoryPackSerializeOptions.Default);
29+
this.utf8Ascii = MemoryPackSerializer.Serialize(ascii, MemoryPackSerializeOptions.Utf8);
30+
31+
this.largeAscii = RandomProvider.NextString(600);
32+
this.utf16LargeAscii = MemoryPackSerializer.Serialize(largeAscii, MemoryPackSerializeOptions.Default);
33+
this.utf8LargeAscii = MemoryPackSerializer.Serialize(largeAscii, MemoryPackSerializeOptions.Utf8);
34+
}
35+
36+
[Benchmark]
37+
public byte[] SerializeUtf16Ascii()
38+
{
39+
return MemoryPackSerializer.Serialize(ascii);
40+
}
41+
42+
[Benchmark]
43+
public byte[] SerializeUtf16Japanese()
44+
{
45+
return MemoryPackSerializer.Serialize(japanese);
46+
}
47+
48+
[Benchmark]
49+
public byte[] SerializeUtf8Ascii()
50+
{
51+
return MemoryPackSerializer.Serialize(ascii, MemoryPackSerializeOptions.Utf8);
52+
}
53+
54+
[Benchmark]
55+
public byte[] SerializeUtf8Japanese()
56+
{
57+
return MemoryPackSerializer.Serialize(japanese, MemoryPackSerializeOptions.Utf8);
58+
}
59+
60+
[Benchmark]
61+
public byte[] SerializeUtf16LargeAscii()
62+
{
63+
return MemoryPackSerializer.Serialize(largeAscii, MemoryPackSerializeOptions.Default);
64+
}
65+
66+
[Benchmark]
67+
public byte[] SerializeUtf8LargeAscii()
68+
{
69+
return MemoryPackSerializer.Serialize(largeAscii, MemoryPackSerializeOptions.Utf8);
70+
}
71+
72+
[Benchmark]
73+
public void DeserializeUtf16Ascii()
74+
{
75+
MemoryPackSerializer.Deserialize<string>(utf16Ascii);
76+
}
77+
78+
[Benchmark]
79+
public void DeserializeUtf16Japanese()
80+
{
81+
MemoryPackSerializer.Deserialize<string>(utf16Jpn);
82+
}
83+
84+
[Benchmark]
85+
public void DeserializeUtf8Ascii()
86+
{
87+
MemoryPackSerializer.Deserialize<string>(utf8Ascii);
88+
}
89+
90+
[Benchmark]
91+
public void DeserializeUtf8Japanese()
92+
{
93+
MemoryPackSerializer.Deserialize<string>(utf8Jpn);
94+
}
95+
96+
[Benchmark]
97+
public void DeserializeUtf16LargeAscii()
98+
{
99+
MemoryPackSerializer.Deserialize<string>(utf16LargeAscii);
100+
}
101+
102+
[Benchmark]
103+
public void DeserializeUtf8LargeAscii()
104+
{
105+
MemoryPackSerializer.Deserialize<string>(utf8LargeAscii);
106+
}
107+
}

sandbox/Benchmark/Micro/GetLocalVsStaticField.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public GetLocalVsStaticField()
2424
[Benchmark(Baseline = true)]
2525
public void GetFromProvider()
2626
{
27-
var writer = new MemoryPackWriter<ArrayBufferWriter<byte>>(ref bufferWriter);
27+
var writer = new MemoryPackWriter<ArrayBufferWriter<byte>>(ref bufferWriter, MemoryPackSerializeOptions.Default);
2828
for (int i = 0; i < 100; i++)
2929
{
3030
writer.GetFormatter<int>().Serialize(ref writer, ref i);
@@ -35,7 +35,7 @@ public void GetFromProvider()
3535
[Benchmark]
3636
public void GetFromLocal()
3737
{
38-
var writer = new MemoryPackWriter<ArrayBufferWriter<byte>>(ref bufferWriter);
38+
var writer = new MemoryPackWriter<ArrayBufferWriter<byte>>(ref bufferWriter, MemoryPackSerializeOptions.Default);
3939
var provider = writer.GetFormatter<int>();
4040
for (int i = 0; i < 100; i++)
4141
{

sandbox/Benchmark/Micro/RawSerialize.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public byte[] HandMemoryPackWriterEmpty()
7171
bufWriter = staticWriter = new ReusableLinkedArrayBufferWriter(true, true);
7272
}
7373

74-
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer());
74+
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer(), MemoryPackSerializeOptions.Default);
7575
try
7676
{
7777
if (value == null)
@@ -106,7 +106,7 @@ public byte[] HandMemoryPackWriterHeaderOnly()
106106
bufWriter = staticWriter = new ReusableLinkedArrayBufferWriter(true, true);
107107
}
108108

109-
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer());
109+
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer(), MemoryPackSerializeOptions.Default);
110110
try
111111
{
112112
if (value == null)
@@ -140,7 +140,7 @@ public byte[] HandMemoryPackWriterHeaderInt3()
140140
bufWriter = staticWriter = new ReusableLinkedArrayBufferWriter(true, true);
141141
}
142142

143-
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer());
143+
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer(), MemoryPackSerializeOptions.Default);
144144
try
145145
{
146146
if (value == null)
@@ -174,7 +174,7 @@ public byte[] HandMemoryPackWriterHeaderInt3String1()
174174
bufWriter = staticWriter = new ReusableLinkedArrayBufferWriter(true, true);
175175
}
176176

177-
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer());
177+
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer(), MemoryPackSerializeOptions.Default);
178178
try
179179
{
180180
if (value == null)
@@ -208,7 +208,7 @@ public byte[] HandMemoryPackFull()
208208
bufWriter = staticWriter = new ReusableLinkedArrayBufferWriter(true, true);
209209
}
210210

211-
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer());
211+
var writer = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref bufWriter, bufWriter.DangerousGetFirstBuffer(), MemoryPackSerializeOptions.Default);
212212
try
213213
{
214214
if (value == null)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Text.Unicode;
6+
using System.Threading.Tasks;
7+
8+
namespace Benchmark.Micro;
9+
10+
public class Utf8Decoding
11+
{
12+
byte[] utf8bytes;
13+
int utf8length;
14+
int utf16length;
15+
16+
public Utf8Decoding()
17+
{
18+
// Japanese Hiragana
19+
var text = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん";
20+
utf8bytes = Encoding.UTF8.GetBytes(text);
21+
utf8length = utf8bytes.Length;
22+
utf16length = text.Length;
23+
}
24+
25+
[Benchmark]
26+
public string UTF8GetString()
27+
{
28+
return Encoding.UTF8.GetString(utf8bytes);
29+
}
30+
31+
[Benchmark]
32+
public string Utf16LengthUtf8ToUtf16()
33+
{
34+
return string.Create(utf16length, utf8bytes, static (dest, source) =>
35+
{
36+
Utf8.ToUtf16(source, dest, out var read, out var written);
37+
});
38+
}
39+
}

sandbox/Benchmark/Program.cs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
using MemoryPack.Formatters;
1515
using System.Reflection;
1616

17+
#if !DEBUG
18+
1719
var config = ManualConfig.CreateMinimumViable()
1820
.AddDiagnoser(MemoryDiagnoser.Default)
1921
.AddExporter(DefaultExporters.Plain)
@@ -23,6 +25,8 @@
2325
//BenchmarkSwitcher.FromAssembly(Assembly.GetEntryAssembly()!).Run(args, config);
2426

2527

28+
//BenchmarkRunner.Run<Utf8Decoding>(config, args);
29+
2630
//BenchmarkSwitcher.FromAssembly(Assembly.GetEntryAssembly()!).RunAllJoined(config);
2731

2832

@@ -40,6 +44,8 @@
4044

4145
//BenchmarkRunner.Run<SerializeTest<JsonResponseModel>>(config, args);
4246

47+
BenchmarkRunner.Run<Utf16VsUtf8>(config, args);
48+
4349
//BenchmarkRunner.Run<SerializeTest<NeuralNetworkLayerModel>>(config, args);
4450

4551
// BenchmarkRunner.Run<DeserializeTest<NeuralNetworkLayerModel>>(config, args);
@@ -48,18 +54,19 @@
4854

4955
//BenchmarkRunner.Run<GetLocalVsStaticField>(config, args);
5056

51-
BenchmarkSwitcher.FromTypes(new[]{
52-
typeof(SerializeTest<>),
53-
typeof(DeserializeTest<>) })
54-
.RunAllJoined(config);
57+
//BenchmarkSwitcher.FromTypes(new[]{
58+
// typeof(SerializeTest<>),
59+
// typeof(DeserializeTest<>) })
60+
// .RunAllJoined(config);
5561

62+
#endif
5663

5764
#if DEBUG
5865

66+
var foo = new Utf8Decoding().Utf16LengthUtf8ToUtf16();
67+
Console.WriteLine(foo);
5968

60-
61-
62-
Check<JsonResponseModel>();
69+
Check<JsonResponseModel>();
6370
//Check<NeuralNetworkLayerModel>();
6471

6572
void Check<T>()

sandbox/SandboxConsoleApp/Program.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
using System.Text;
2121
using System.Xml.Linq;
2222

23-
var a = int.MaxValue;
24-
var b = ~a;
25-
Console.WriteLine(b);
2623

2724

25+
var bin = MemoryPackSerializer.Serialize("hogehoge");
26+
var takotako = MemoryPackSerializer.Deserialize<string>(bin);
27+
28+
Console.WriteLine(takotako);
29+
2830
// ---
2931

3032

src/MemoryPack.Core/Formatters/ImmutableCollectionFormatters.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public override void Serialize<TBufferWriter>(ref MemoryPackWriter<TBufferWriter
144144
var tempBuffer = ReusableLinkedArrayBufferWriterPool.Rent();
145145
try
146146
{
147-
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer);
147+
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer, writer.Options);
148148

149149
var count = 0;
150150
var formatter = writer.GetFormatter<T?>();
@@ -233,7 +233,7 @@ public override void Serialize<TBufferWriter>(ref MemoryPackWriter<TBufferWriter
233233
var tempBuffer = ReusableLinkedArrayBufferWriterPool.Rent();
234234
try
235235
{
236-
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer);
236+
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer, writer.Options);
237237

238238
var count = 0;
239239
var formatter = writer.GetFormatter<T?>();
@@ -596,7 +596,7 @@ public override void Serialize<TBufferWriter>(ref MemoryPackWriter<TBufferWriter
596596
var tempBuffer = ReusableLinkedArrayBufferWriterPool.Rent();
597597
try
598598
{
599-
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer);
599+
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer, writer.Options);
600600

601601
var count = 0;
602602
var formatter = writer.GetFormatter<T?>();
@@ -685,7 +685,7 @@ public override void Serialize<TBufferWriter>(ref MemoryPackWriter<TBufferWriter
685685
var tempBuffer = ReusableLinkedArrayBufferWriterPool.Rent();
686686
try
687687
{
688-
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer);
688+
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer, writer.Options);
689689

690690
var count = 0;
691691
var formatter = writer.GetFormatter<T?>();

src/MemoryPack.Core/Formatters/InterfaceCollectionFormatters.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ public override void Serialize<TBufferWriter>(ref MemoryPackWriter<TBufferWriter
126126
var tempBuffer = ReusableLinkedArrayBufferWriterPool.Rent();
127127
try
128128
{
129-
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer);
129+
var tempWriter = new MemoryPackWriter<ReusableLinkedArrayBufferWriter>(ref tempBuffer, writer.Options);
130130

131131
count = 0;
132132
var formatter = writer.GetFormatter<T?>();

0 commit comments

Comments
 (0)