Skip to content

Commit 32d0868

Browse files
committed
Compressed syscall database wtih O(1) lookup
libkafel.so 5x smaller (x86_64, stripped): down to 88KiB from 440KiB. Closes google#20
1 parent d907da7 commit 32d0868

11 files changed

+711
-79
lines changed

src/Makefile

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,14 @@
1717
# limitations under the License.
1818
#
1919

20+
SUBDIRS:=syscalls
21+
2022
OBJCOPY?=objcopy
2123

2224
CFLAGS+=-fPIC -fvisibility=hidden
23-
GENERATED_SRCS:=lexer.c parser.c
25+
GENERATED_SRCS:=lexer.c parser.c syscalldb.c
2426
GENERATED:=lexer.h parser.h ${GENERATED_SRCS}
25-
TEMPORARY:=libkafel_r.o libkafel.o
26-
SYSCALL_LISTS:=amd64_syscalls.c \
27-
i386_syscalls.c \
28-
aarch64_syscalls.c \
29-
mipso32_syscalls.c \
30-
mips64_syscalls.c \
31-
arm_syscalls.c
27+
TEMPORARY:=libkafel_r.o libkafel.o syscalldb.gperf
3228
SRCS:=kafel.c \
3329
context.c \
3430
codegen.c \
@@ -37,8 +33,7 @@ SRCS:=kafel.c \
3733
policy.c \
3834
range_rules.c \
3935
syscall.c \
40-
${GENERATED_SRCS} \
41-
$(SYSCALL_LISTS:%.c=syscalls/%.c)
36+
${GENERATED_SRCS}
4237
DYNAMIC_TARGET:=${PROJECT_ROOT}libkafel.so
4338
STATIC_TARGET:=${PROJECT_ROOT}libkafel.a
4439
TARGET=${DYNAMIC_TARGET} ${STATIC_TARGET}
@@ -65,6 +60,10 @@ lexer.h lexer.c: lexer.l
6560
parser.h parser.c: parser.y
6661
bison $<
6762

63+
syscalldb.c: syscalls
64+
./syscalls/syscalldb_generator > ./syscalldb.gperf
65+
gperf -m10 --output-file=./syscalldb.c ./syscalldb.gperf
66+
6867
# DO NOT DELETE THIS LINE -- make depend depends on it.
6968

7069
kafel.o: codegen.h context.h includes.h policy.h expression.h syscall.h
@@ -76,14 +75,9 @@ expression.o: expression.h common.h
7675
includes.o: includes.h common.h
7776
policy.o: policy.h expression.h common.h
7877
range_rules.o: range_rules.h policy.h expression.h common.h syscall.h
79-
syscall.o: syscall.h common.h
78+
syscall.o: syscall.h syscalldb.h common.h
79+
syscalldb.o: syscall.h syscalldb.h syscalldb.inl
8080
lexer.o: parser.h context.h includes.h policy.h expression.h syscall.h
8181
lexer.o: common.h
8282
parser.o: parser.h context.h includes.h policy.h expression.h syscall.h
8383
parser.o: lexer.h
84-
syscalls/amd64_syscalls.o: syscall.h
85-
syscalls/i386_syscalls.o: syscall.h
86-
syscalls/aarch64_syscalls.o: syscall.h
87-
syscalls/mipso32_syscalls.o: syscall.h
88-
syscalls/mips64_syscalls.o: syscall.h
89-
syscalls/arm_syscalls.o: syscall.h

src/context.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ void kafel_ctxt_reset(kafel_ctxt_t ctxt) {
7070
}
7171
ctxt->default_action = 0;
7272
ctxt->lexical_error = false;
73-
ctxt->syscalls = NULL;
7473
}
7574

7675
void kafel_ctxt_clean(kafel_ctxt_t ctxt) {

src/context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ struct kafel_ctxt {
4646
struct policy* main_policy;
4747
int default_action;
4848
uint32_t target_arch;
49-
const struct syscall_list* syscalls;
49+
uint32_t target_arch_mask;
5050
struct {
5151
enum {
5252
INPUT_NONE,

src/kafel.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ static int parse(struct kafel_ctxt* ctxt) {
5757
kafel_yyset_column(1, scanner);
5858
kafel_yyset_lineno(1, scanner);
5959

60-
ctxt->syscalls = syscalls_lookup(ctxt->target_arch);
61-
if (ctxt->syscalls == NULL) {
60+
ctxt->target_arch_mask = syscall_get_arch_mask(ctxt->target_arch);
61+
if (!ctxt->target_arch_mask) {
6262
append_error(ctxt, "Cannot resolve syscall list for architecture %#x\n",
6363
ctxt->target_arch);
6464
kafel_yylex_destroy(scanner);

src/parser.y

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ syscall_id
298298
$$ = syscall_custom(value);
299299
} else {
300300
$$ = (struct syscall_descriptor*)
301-
syscall_lookup(ctxt->syscalls, $1);
301+
syscall_lookup(ctxt->target_arch_mask, $1);
302302
if ($$ == NULL) {
303303
emit_error(@1, "Undefined syscall `%s'", $1);
304304
free($1); $1 = NULL;

src/syscall.c

Lines changed: 29 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -25,75 +25,57 @@
2525
#include <string.h>
2626

2727
#include "common.h"
28+
#include "syscalldb.h"
2829

2930
// Fix for Linux <3.12
3031
#ifndef EM_ARM
3132
#define EM_ARM 40
3233
#endif
3334

34-
#define SYSCALL_LIST_DECL(arch) \
35-
extern const struct syscall_descriptor arch##_syscall_list[]; \
36-
extern const size_t arch##_syscall_list_size;
37-
38-
#define SYSCALL_LIST(audit_arch, arch) \
39-
{ audit_arch, arch##_syscall_list, &arch##_syscall_list_size }
40-
41-
SYSCALL_LIST_DECL(arm)
42-
SYSCALL_LIST_DECL(aarch64)
43-
SYSCALL_LIST_DECL(amd64)
44-
SYSCALL_LIST_DECL(mipso32)
45-
SYSCALL_LIST_DECL(mips64)
46-
SYSCALL_LIST_DECL(i386)
35+
struct syscall_descriptor* syscall_custom(uint32_t nr) {
36+
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
37+
rv->nr = nr;
38+
return rv;
39+
}
4740

48-
const struct syscall_list syscall_lists[] = {
41+
uint32_t syscall_get_arch_mask(uint32_t arch) {
42+
switch (arch) {
43+
default:
44+
return 0;
4945
#ifdef AUDIT_ARCH_ARM
50-
SYSCALL_LIST(AUDIT_ARCH_ARM, arm),
46+
case AUDIT_ARCH_ARM:
47+
return SYSCALLDB_ARCH_ARM_FLAG;
5148
#endif
5249
#ifdef AUDIT_ARCH_AARCH64
53-
SYSCALL_LIST(AUDIT_ARCH_AARCH64, aarch64),
50+
case AUDIT_ARCH_AARCH64:
51+
return SYSCALLDB_ARCH_AARCH64_FLAG;
5452
#endif
5553
#ifdef AUDIT_ARCH_X86_64
56-
SYSCALL_LIST(AUDIT_ARCH_X86_64, amd64),
54+
case AUDIT_ARCH_X86_64:
55+
return SYSCALLDB_ARCH_X86_64_FLAG;
5756
#endif
5857
#ifdef AUDIT_ARCH_MIPS
59-
SYSCALL_LIST(AUDIT_ARCH_MIPS, mipso32),
58+
case AUDIT_ARCH_MIPS:
59+
return SYSCALLDB_ARCH_MIPS_FLAG;
6060
#endif
6161
#ifdef AUDIT_ARCH_MIPS64
62-
SYSCALL_LIST(AUDIT_ARCH_MIPS64, mips64),
62+
case AUDIT_ARCH_MIPS64:
63+
return SYSCALLDB_ARCH_MIPS64_FLAG;
6364
#endif
6465
#ifdef AUDIT_ARCH_I386
65-
SYSCALL_LIST(AUDIT_ARCH_I386, i386),
66+
case AUDIT_ARCH_I386:
67+
return SYSCALLDB_ARCH_I386_FLAG;
6668
#endif
67-
};
68-
69-
struct syscall_descriptor* syscall_custom(uint32_t nr) {
70-
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
71-
rv->nr = nr;
72-
rv->is_custom = true;
73-
return rv;
74-
}
75-
76-
const struct syscall_list* syscalls_lookup(uint32_t arch) {
77-
for (size_t i = 0; i < sizeof(syscall_lists) / sizeof(syscall_lists[0]);
78-
++i) {
79-
if (syscall_lists[i].arch == arch) {
80-
return &syscall_lists[i];
81-
}
8269
}
83-
return NULL;
8470
}
8571

86-
const struct syscall_descriptor* syscall_lookup(const struct syscall_list* list,
72+
const struct syscall_descriptor* syscall_lookup(uint32_t mask,
8773
const char* name) {
88-
ASSERT(list != NULL);
89-
ASSERT(name != NULL);
90-
/* TODO use binary search if syscalls can be guaranteed to be
91-
* sorted alphabetically
92-
*/
93-
for (size_t i = 0; i < *list->size; ++i) {
94-
if (strcmp(name, list->syscalls[i].name) == 0) {
95-
return &list->syscalls[i];
96-
}
74+
const struct syscalldb_definition* def = syscalldb_lookup(name);
75+
if (def && mask & def->arch_mask) {
76+
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
77+
syscalldb_unpack(def, mask, rv);
78+
return rv;
9779
}
9880
return NULL;
9981
}
@@ -102,8 +84,6 @@ void syscall_descriptor_destroy(struct syscall_descriptor** desc) {
10284
ASSERT(desc != NULL);
10385
ASSERT((*desc) != NULL);
10486

105-
if ((*desc)->is_custom) {
106-
free(*desc);
107-
}
87+
free(*desc);
10888
(*desc) = NULL;
10989
}

src/syscall.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,9 @@ struct syscall_descriptor {
4040
struct syscall_arg args[SYSCALL_MAX_ARGS];
4141
};
4242

43-
struct syscall_list {
44-
uint32_t arch;
45-
const struct syscall_descriptor* const syscalls;
46-
const size_t* const size;
47-
};
48-
4943
struct syscall_descriptor* syscall_custom(uint32_t nr);
50-
const struct syscall_list* syscalls_lookup(uint32_t arch);
51-
const struct syscall_descriptor* syscall_lookup(const struct syscall_list* list,
44+
uint32_t syscall_get_arch_mask(uint32_t arch);
45+
const struct syscall_descriptor* syscall_lookup(uint32_t arch_mask,
5246
const char* name);
5347
void syscall_descriptor_destroy(struct syscall_descriptor** desc);
5448

src/syscalldb.h

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
Kafel - syscall database
3+
-----------------------------------------
4+
5+
Copyright 2019 Google Inc. All Rights Reserved.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
19+
*/
20+
21+
#ifndef KAFEL_SYSCALLDB_H
22+
#define KAFEL_SYSCALLDB_H
23+
24+
#include <stddef.h>
25+
#include <stdint.h>
26+
#include <string.h>
27+
28+
struct syscalldb_definition;
29+
struct syscall_descriptor;
30+
31+
enum {
32+
SYSCALLDB_ARCH_ARM_FLAG = 0x01,
33+
SYSCALLDB_ARCH_AARCH64_FLAG = 0x02,
34+
SYSCALLDB_ARCH_X86_64_FLAG = 0x04,
35+
SYSCALLDB_ARCH_MIPS_FLAG = 0x08,
36+
SYSCALLDB_ARCH_MIPS64_FLAG = 0x10,
37+
SYSCALLDB_ARCH_I386_FLAG = 0x20,
38+
};
39+
40+
const struct syscalldb_definition* syscalldb_lookup(const char* name);
41+
const char* syscalldb_reverse_lookup(uint32_t arch_mask, uint32_t nr);
42+
43+
void syscalldb_unpack(const struct syscalldb_definition* definition,
44+
uint32_t arch_mask, struct syscall_descriptor* dest);
45+
46+
/*
47+
internals
48+
49+
Generated from individual syscall lists, has O(1) lookups and takes
50+
advantage of the redundancy in the data set to reduce footprint
51+
dramatically.
52+
53+
O(1) lookups are courtesy of the perfect hash function generated with
54+
GNU gperf. PHF maps a name to an index in the table of <name, offset>
55+
tuples. If names match, syscall definition is found at the given
56+
offset.
57+
58+
Syscall definitions are of the variable length and stored back to
59+
back. For details, consult syscalldb_definition struct.
60+
61+
*/
62+
63+
#define SYSCALLDB_MAX_ARGTYPE 8
64+
#define SYSCALLDB_MAX_ARGNAME 0xffff
65+
66+
#define SYSCALLDB_ARGNO(no) (((uint32_t)(no)) << 24)
67+
#define SYSCALLDB_ARGTYPE(type) (((uint32_t)(type)) << 16)
68+
#define SYSCALLDB_ARGNAME(name) ((uint32_t)(name))
69+
70+
#define SYSCALLDB_GET_ARGNO(x) (((x)&UINT32_C(0xff000000)) >> 24)
71+
#define SYSCALLDB_GET_ARGTYPE(x) (((x)&UINT32_C(0x00ff0000)) >> 16)
72+
#define SYSCALLDB_GET_ARGNAME(x) (((x)&UINT32_C(0x0000ffff)))
73+
74+
struct syscalldb_entry {
75+
uint16_t name;
76+
uint16_t definition_offset;
77+
};
78+
79+
/*
80+
Observations:
81+
82+
(1) very few syscalls are arch-specific;
83+
84+
(2) syscall numbers varies wildly across archs;
85+
86+
(3) argument names and sizes (modulo pointer size differences) are the same
87+
across archs with a few notable exceptions (ex: clone).
88+
89+
Last but not least, avoid pointers in static data structures with
90+
initializers! Due to PIC requirements every single one of theese
91+
require relocation. Increases the footprint and has runtime overhead.
92+
93+
*/
94+
struct syscalldb_definition {
95+
uint32_t arch_mask; /* archs providing this syscall */
96+
uint32_t n_arg_info; /* if >INT32_MAX), consult ext_arg_info;
97+
it has -n_arg_info entries */
98+
union {
99+
uint32_t arg_info[1]; /* argno, argtype, argname */
100+
struct {
101+
uint32_t arch_mask; /* archs this entry applies to */
102+
uint32_t arg_info; /* argno, argtype, argname */
103+
} ext_arg_info[1];
104+
};
105+
/* uint32_t nr[]; syscall numbers, one value per a bit set in arch_mask */
106+
};
107+
108+
#define SYSCALLDB_DEFINITION_NR(d) \
109+
(&(d)->arch_mask + 2 + \
110+
((d)->n_arg_info > INT32_MAX ? 2 * -(d)->n_arg_info : (d)->n_arg_info))
111+
112+
#define SYSCALLDB_DEFINITION_NEXT(d) \
113+
(typeof(d))(SYSCALLDB_DEFINITION_NR(d) + __builtin_popcount((d)->arch_mask))
114+
115+
#endif /* KAFEL_SYSCALLDB_H */

0 commit comments

Comments
 (0)