diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/network/res_msend.c | 119 | 
1 files changed, 117 insertions, 2 deletions
| diff --git a/src/network/res_msend.c b/src/network/res_msend.c index 98e6d8a3..1e76886a 100644 --- a/src/network/res_msend.c +++ b/src/network/res_msend.c @@ -1,5 +1,6 @@  #include <sys/socket.h>  #include <netinet/in.h> +#include <netinet/tcp.h>  #include <netdb.h>  #include <arpa/inet.h>  #include <stdint.h> @@ -29,6 +30,51 @@ static unsigned long mtime()  		+ ts.tv_nsec / 1000000;  } +static int start_tcp(struct pollfd *pfd, int family, const void *sa, socklen_t sl, const unsigned char *q, int ql) +{ +	struct msghdr mh = { +		.msg_name = (void *)sa, +		.msg_namelen = sl, +		.msg_iovlen = 2, +		.msg_iov = (struct iovec [2]){ +			{ .iov_base = (uint8_t[]){ ql>>8, ql }, .iov_len = 2 }, +			{ .iov_base = (void *)q, .iov_len = ql } } +	}; +	int r; +	int fd = socket(family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); +	pfd->fd = fd; +	pfd->events = POLLOUT; +	if (!setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT, +	    &(int){1}, sizeof(int))) { +		r = sendmsg(fd, &mh, MSG_FASTOPEN|MSG_NOSIGNAL); +		if (r == ql+2) pfd->events = POLLIN; +		if (r >= 0) return r; +		if (errno == EINPROGRESS) return 0; +	} +	r = connect(fd, sa, sl); +	if (!r || errno == EINPROGRESS) return 0; +	close(fd); +	pfd->fd = -1; +	return -1; +} + +static void step_mh(struct msghdr *mh, size_t n) +{ +	/* Adjust iovec in msghdr to skip first n bytes. */ +	while (mh->msg_iovlen && n >= mh->msg_iov->iov_len) { +		n -= mh->msg_iov->iov_len; +		mh->msg_iov++; +		mh->msg_iovlen--; +	} +	if (!mh->msg_iovlen) return; +	mh->msg_iov->iov_base = (char *)mh->msg_iov->iov_base + n; +	mh->msg_iov->iov_len -= n; +} + +/* Internal contract for __res_msend[_rc]: asize must be >=512, nqueries + * must be sufficiently small to be safe as VLA size. In practice it's + * either 1 or 2, anyway. */ +  int __res_msend_rc(int nqueries, const unsigned char *const *queries,  	const int *qlens, unsigned char *const *answers, int *alens, int asize,  	const struct resolvconf *conf) @@ -47,6 +93,9 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,  	int i, j;  	int cs;  	struct pollfd pfd[nqueries+2]; +	int qpos[nqueries], apos[nqueries]; +	unsigned char alen_buf[nqueries][2]; +	int r;  	unsigned long t0, t1, t2;  	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); @@ -125,6 +174,11 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,  	t1 = t2 - retry_interval;  	for (; t2-t0 < timeout; t2=mtime()) { +		/* This is the loop exit condition: that all queries +		 * have an accepted answer. */ +		for (i=0; i<nqueries && alens[i]>0; i++); +		if (i==nqueries) break; +  		if (t2-t1 >= retry_interval) {  			/* Query all configured namservers in parallel */  			for (i=0; i<nqueries; i++) @@ -140,7 +194,8 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,  		/* Wait for a response, or until time to retry */  		if (poll(pfd, nqueries+1, t1+retry_interval-t2) <= 0) continue; -		while ((rlen = recvfrom(fd, answers[next], asize, 0, +		while (next < nqueries && +		  (rlen = recvfrom(fd, answers[next], asize, 0,  		  (void *)&sa, (socklen_t[1]){sl})) >= 0) {  			/* Ignore non-identifiable packets */ @@ -181,12 +236,72 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,  			else  				memcpy(answers[i], answers[next], rlen); -			if (next == nqueries) goto out; +			/* Ignore further UDP if all slots full or TCP-mode */ +			if (next == nqueries) pfd[nqueries].events = 0; + +			/* If answer is truncated (TC bit), fallback to TCP */ +			if (answers[i][2] & 2) { +				alens[i] = -1; +				pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0); +				r = start_tcp(pfd+i, family, ns+j, sl, queries[i], qlens[i]); +				pthread_setcancelstate(cs, 0); +				if (r >= 0) { +					qpos[i] = r; +					apos[i] = 0; +				} +				continue; +			} +		} + +		for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLOUT) { +			struct msghdr mh = { +				.msg_iovlen = 2, +				.msg_iov = (struct iovec [2]){ +					{ .iov_base = (uint8_t[]){ qlens[i]>>8, qlens[i] }, .iov_len = 2 }, +					{ .iov_base = (void *)queries[i], .iov_len = qlens[i] } } +			}; +			step_mh(&mh, qpos[i]); +			r = sendmsg(pfd[i].fd, &mh, MSG_NOSIGNAL); +			if (r < 0) goto out; +			qpos[i] += r; +			if (qpos[i] == qlens[i]+2) +				pfd[i].events = POLLIN; +		} + +		for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLIN) { +			struct msghdr mh = { +				.msg_iovlen = 2, +				.msg_iov = (struct iovec [2]){ +					{ .iov_base = alen_buf[i], .iov_len = 2 }, +					{ .iov_base = answers[i], .iov_len = asize } } +			}; +			step_mh(&mh, apos[i]); +			r = recvmsg(pfd[i].fd, &mh, 0); +			if (r < 0) goto out; +			apos[i] += r; +			if (apos[i] < 2) continue; +			int alen = alen_buf[i][0]*256 + alen_buf[i][1]; +			if (alen < 13) goto out; +			if (apos[i] < alen+2 && apos[i] < asize+2) +				continue; +			int rcode = answers[i][3] & 15; +			if (rcode != 0 && rcode != 3) +				goto out; + +			/* Storing the length here commits the accepted answer. +			 * Immediately close TCP socket so as not to consume +			 * resources we no longer need. */ +			alens[i] = alen; +			__syscall(SYS_close, pfd[i].fd); +			pfd[i].fd = -1;  		}  	}  out:  	pthread_cleanup_pop(1); +	/* Disregard any incomplete TCP results */ +	for (i=0; i<nqueries; i++) if (alens[i]<0) alens[i] = 0; +  	return 0;  } | 
