summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2018-10-16 01:08:21 -0400
committerRich Felker <dalias@aerifal.cx>2018-10-17 23:16:35 -0400
commitdd8f02b7dce53d6b1c4282439f1636a2d63bee01 (patch)
tree22b57b941720ddefb64108da93268b2ea288567a
parent7136836e14e5286afe74a354c289601375bd472d (diff)
downloadmusl-dd8f02b7dce53d6b1c4282439f1636a2d63bee01.tar.gz
optimize hot paths of getc with manual shrink-wrapping
with these changes, in a program that has not created any threads besides the main thread and that has not called f[try]lockfile, getc performs indistinguishably from getc_unlocked. this was measured on several i386 and x86_64 models, and should hold on other archs too simply by the properties of the code generation. the case where the caller already holds the lock (via flockfile) is improved significantly as well (40-60% reduction in time on machines tested) and the case where locking is needed is improved somewhat (roughly 10%). the key technique used here is forcing the non-hot path out-of-line and enabling it to be a tail call. a static noinline function (conditional on __GNUC__) is used rather than the extern hiddens used elsewhere for this purpose, so that the compiler can choose non-default calling conventions, making it possible to tail-call to a callee that takes more arguments than the caller on archs where arguments are passed on the stack or must have space reserved on the stack for spilling the. the tid could just be reloaded via the thread pointer in locking_getc, but that would be ridiculously expensive on some archs where thread pointer load requires a trap or syscall.
-rw-r--r--src/stdio/fgetc.c10
-rw-r--r--src/stdio/getc.c10
-rw-r--r--src/stdio/getc.h22
-rw-r--r--src/stdio/getchar.c3
4 files changed, 30 insertions, 15 deletions
diff --git a/src/stdio/fgetc.c b/src/stdio/fgetc.c
index e1224164..2578afcc 100644
--- a/src/stdio/fgetc.c
+++ b/src/stdio/fgetc.c
@@ -1,11 +1,7 @@
-#include "stdio_impl.h"
+#include <stdio.h>
+#include "getc.h"
int fgetc(FILE *f)
{
- int c;
- if (f->lock < 0 || !__lockfile(f))
- return getc_unlocked(f);
- c = getc_unlocked(f);
- __unlockfile(f);
- return c;
+ return do_getc(f);
}
diff --git a/src/stdio/getc.c b/src/stdio/getc.c
index b3f351d1..8409fc23 100644
--- a/src/stdio/getc.c
+++ b/src/stdio/getc.c
@@ -1,13 +1,9 @@
-#include "stdio_impl.h"
+#include <stdio.h>
+#include "getc.h"
int getc(FILE *f)
{
- int c;
- if (f->lock < 0 || !__lockfile(f))
- return getc_unlocked(f);
- c = getc_unlocked(f);
- __unlockfile(f);
- return c;
+ return do_getc(f);
}
weak_alias(getc, _IO_getc);
diff --git a/src/stdio/getc.h b/src/stdio/getc.h
new file mode 100644
index 00000000..0657ab6f
--- /dev/null
+++ b/src/stdio/getc.h
@@ -0,0 +1,22 @@
+#include "stdio_impl.h"
+#include "pthread_impl.h"
+
+#ifdef __GNUC__
+__attribute__((__noinline__))
+#endif
+static int locking_getc(FILE *f, int tid)
+{
+ if (a_cas(&f->lock, 0, tid)) __lockfile(f);
+ int c = getc_unlocked(f);
+ if (a_swap(&f->lock, 0) & MAYBE_WAITERS)
+ __wake(&f->lock, 1, 1);
+ return c;
+}
+
+static inline int do_getc(FILE *f)
+{
+ int tid, l = f->lock;
+ if (l < 0 || (l & ~MAYBE_WAITERS) == (tid=__pthread_self()->tid))
+ return getc_unlocked(f);
+ return locking_getc(f, tid);
+}
diff --git a/src/stdio/getchar.c b/src/stdio/getchar.c
index c1012658..df395ca9 100644
--- a/src/stdio/getchar.c
+++ b/src/stdio/getchar.c
@@ -1,6 +1,7 @@
#include <stdio.h>
+#include "getc.h"
int getchar(void)
{
- return fgetc(stdin);
+ return do_getc(stdin);
}