aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Wojciech Klama <jceel@FreeBSD.org>2020-04-29 16:24:32 +0000
committerJakub Wojciech Klama <jceel@FreeBSD.org>2020-04-29 16:24:32 +0000
commit6d9daf7da2883bacd006132f8a1b6a7c388e191c (patch)
tree0a8368af46b0e674f120a4dde9a1609648f59c07
Import lib9p 7ddb1164407da19b9b1afb83df83ae65a71a9a66.vendor/lib9p/7ddb1164407da19b9b1afb83df83ae65a71a9a66
Approved by: trasz (mentor) MFC after: 1 month Sponsored by: Conclusive Engineering
Notes
Notes: svn path=/vendor/lib9p/dist/; revision=360470 svn path=/vendor/lib9p/7ddb1164407da19b9b1afb83df83ae65a71a9a66/; revision=360471; tag=vendor/lib9p/7ddb1164407da19b9b1afb83df83ae65a71a9a66
-rw-r--r--.gitignore37
-rw-r--r--COPYRIGHT47
-rw-r--r--GNUmakefile76
-rw-r--r--Makefile27
-rw-r--r--README.md20
-rw-r--r--apple_endian.h27
-rw-r--r--backend/backend.h69
-rw-r--r--backend/fs.c3061
-rw-r--r--backend/fs.h37
-rw-r--r--connection.c215
-rw-r--r--example/Makefile10
-rw-r--r--example/server.c89
-rw-r--r--fcall.h624
-rw-r--r--fid.h160
-rw-r--r--genacl.c720
-rw-r--r--genacl.h307
-rw-r--r--hashtable.c267
-rw-r--r--hashtable.h107
-rw-r--r--lib9p.h249
-rw-r--r--lib9p_impl.h78
-rw-r--r--linux_errno.h247
-rw-r--r--log.c67
-rw-r--r--log.h46
-rw-r--r--pack.c993
-rw-r--r--pytest/.gitignore3
-rw-r--r--pytest/Makefile9
-rw-r--r--pytest/README32
-rwxr-xr-xpytest/client.py643
-rw-r--r--pytest/lerrno.py291
-rw-r--r--pytest/numalloc.py379
-rw-r--r--pytest/p9conn.py1788
-rw-r--r--pytest/p9err.py146
-rw-r--r--pytest/pfod.py204
-rw-r--r--pytest/protocol.py1998
-rw-r--r--pytest/sequencer.py653
-rw-r--r--pytest/testconf.ini.sample16
-rw-r--r--request.c1440
-rw-r--r--rfuncs.c320
-rw-r--r--rfuncs.h79
-rw-r--r--sbuf/sbuf.c127
-rw-r--r--sbuf/sbuf.h55
-rw-r--r--threadpool.c422
-rw-r--r--threadpool.h118
-rw-r--r--transport/socket.c363
-rw-r--r--transport/socket.h40
-rw-r--r--utils.c1268
46 files changed, 17974 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000000..beccfc6b48ec
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,37 @@
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
+/build/
+
+*.po
+*.pico
+*.depend
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 000000000000..b02f09aabdd9
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,47 @@
+Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted providing that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+Some parts of the code are based on libixp (http://libs.suckless.org/libixp)
+library code released under following license:
+
+© 2005-2006 Anselm R. Garbe <garbeam@gmail.com>
+© 2006-2010 Kris Maglione <maglione.k at Gmail>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/GNUmakefile b/GNUmakefile
new file mode 100644
index 000000000000..79108f2ceb61
--- /dev/null
+++ b/GNUmakefile
@@ -0,0 +1,76 @@
+CC_VERSION := $(shell $(CC) --version | \
+ sed -n -e '/clang-/s/.*clang-\([0-9][0-9]*\).*/\1/p')
+ifeq ($(CC_VERSION),)
+# probably not clang
+CC_VERSION := 0
+endif
+
+WFLAGS :=
+
+# Warnings are version-dependent, unfortunately,
+# so test for version before adding a -W flag.
+# Note: gnu make requires $(shell test ...) for "a > b" type tests.
+ifeq ($(shell test $(CC_VERSION) -gt 0; echo $$?),0)
+WFLAGS += -Weverything
+WFLAGS += -Wno-padded
+WFLAGS += -Wno-gnu-zero-variadic-macro-arguments
+WFLAGS += -Wno-format-nonliteral
+WFLAGS += -Wno-unused-macros
+WFLAGS += -Wno-disabled-macro-expansion
+WFLAGS += -Werror
+endif
+
+ifeq ($(shell test $(CC_VERSION) -gt 600; echo $$?),0)
+WFLAGS += -Wno-reserved-id-macro
+endif
+
+CFLAGS := $(WFLAGS) \
+ -g \
+ -O0 \
+ -DL9P_DEBUG=L9P_DEBUG
+# Note: to turn on debug, use -DL9P_DEBUG=L9P_DEBUG,
+# and set env variable LIB9P_LOGGING to stderr or to
+# the (preferably full path name of) the debug log file.
+
+LIB_SRCS := \
+ pack.c \
+ connection.c \
+ request.c \
+ genacl.c \
+ log.c \
+ hashtable.c \
+ utils.c \
+ rfuncs.c \
+ threadpool.c \
+ sbuf/sbuf.c \
+ transport/socket.c \
+ backend/fs.c
+
+SERVER_SRCS := \
+ example/server.c
+
+BUILD_DIR := build
+LIB_OBJS := $(addprefix build/,$(LIB_SRCS:.c=.o))
+SERVER_OBJS := $(SERVER_SRCS:.c=.o)
+LIB := lib9p.dylib
+SERVER := server
+
+all: build $(LIB) $(SERVER)
+
+$(LIB): $(LIB_OBJS)
+ cc -dynamiclib $^ -o build/$@
+
+$(SERVER): $(SERVER_OBJS) $(LIB)
+ cc $< -o build/$(SERVER) -Lbuild/ -l9p
+
+clean:
+ rm -rf build
+ rm -f $(SERVER_OBJS)
+build:
+ mkdir build
+ mkdir build/sbuf
+ mkdir build/transport
+ mkdir build/backend
+
+build/%.o: %.c
+ $(CC) $(CFLAGS) -c $< -o $@
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000000..cb9f364329e5
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,27 @@
+# Note: to turn on debug, use -DL9P_DEBUG=L9P_DEBUG,
+# and set env variable LIB9P_LOGGING to stderr or to
+# the (preferably full path name of) the debug log file.
+
+LIB= 9p
+SHLIB_MAJOR= 1
+SRCS= pack.c \
+ connection.c \
+ request.c log.c \
+ hashtable.c \
+ genacl.c \
+ utils.c \
+ rfuncs.c \
+ threadpool.c \
+ transport/socket.c \
+ backend/fs.c
+
+INCS= lib9p.h
+CC= clang
+CFLAGS= -g -O0 -DL9P_DEBUG=L9P_DEBUG -DWITH_CASPER
+LIBADD= sbuf libcasper libcap_pwd libcap_grp
+SUBDIR= example
+
+cscope: .PHONY
+ cd ${.CURDIR}; cscope -buq $$(find . -name '*.[ch]' -print)
+
+.include <bsd.lib.mk>
diff --git a/README.md b/README.md
new file mode 100644
index 000000000000..f74a978fb52b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,20 @@
+# lib9p
+
+lib9p is a server library implementing 9p2000, 9p2000.u and 9p2000.L revisions
+of 9P protocol. It is being developed primarily as a backend for virtio-9p in
+BHyVe, the FreeBSD hypervisor.
+
+# Features
+
+* 9p2000, 9p2000.u and 9p2000.L protocol support
+* Built-in TCP transport
+
+# Supported operating systems
+
+* FreeBSD (>=10)
+* macOS (>=10.9)
+
+# Authors
+
+* Jakub Klama [jceel](https://github.com/jceel)
+* Chris Torek [chris3torek](https://github.com/chris3torek)
diff --git a/apple_endian.h b/apple_endian.h
new file mode 100644
index 000000000000..d061b643b8f2
--- /dev/null
+++ b/apple_endian.h
@@ -0,0 +1,27 @@
+#ifndef _APPLE_ENDIAN_H
+#define _APPLE_ENDIAN_H
+
+/*
+ * Shims to make Apple's endian headers and macros compatible
+ * with <sys/endian.h> (which is awful).
+ */
+
+# include <libkern/OSByteOrder.h>
+
+# define _LITTLE_ENDIAN 0x12345678
+# define _BIG_ENDIAN 0x87654321
+
+# ifdef __LITTLE_ENDIAN__
+# define _BYTE_ORDER _LITTLE_ENDIAN
+# endif
+# ifdef __BIG_ENDIAN__
+# define _BYTE_ORDER _BIG_ENDIAN
+# endif
+
+# define htole32(x) OSSwapHostToLittleInt32(x)
+# define le32toh(x) OSSwapLittleToHostInt32(x)
+
+# define htobe32(x) OSSwapHostToBigInt32(x)
+# define be32toh(x) OSSwapBigToHostInt32(x)
+
+#endif /* _APPLE_ENDIAN_H */
diff --git a/backend/backend.h b/backend/backend.h
new file mode 100644
index 000000000000..2b4bf2d8e480
--- /dev/null
+++ b/backend/backend.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef LIB9P_BACKEND_H
+#define LIB9P_BACKEND_H
+
+struct l9p_backend {
+ void *softc;
+ void (*freefid)(void *, struct l9p_fid *);
+ int (*attach)(void *, struct l9p_request *);
+ int (*clunk)(void *, struct l9p_fid *);
+ int (*create)(void *, struct l9p_request *);
+ int (*open)(void *, struct l9p_request *);
+ int (*read)(void *, struct l9p_request *);
+ int (*remove)(void *, struct l9p_fid *);
+ int (*stat)(void *, struct l9p_request *);
+ int (*walk)(void *, struct l9p_request *);
+ int (*write)(void *, struct l9p_request *);
+ int (*wstat)(void *, struct l9p_request *);
+ int (*statfs)(void *, struct l9p_request *);
+ int (*lopen)(void *, struct l9p_request *);
+ int (*lcreate)(void *, struct l9p_request *);
+ int (*symlink)(void *, struct l9p_request *);
+ int (*mknod)(void *, struct l9p_request *);
+ int (*rename)(void *, struct l9p_request *);
+ int (*readlink)(void *, struct l9p_request *);
+ int (*getattr)(void *, struct l9p_request *);
+ int (*setattr)(void *, struct l9p_request *);
+ int (*xattrwalk)(void *, struct l9p_request *);
+ int (*xattrcreate)(void *, struct l9p_request *);
+ int (*xattrread)(void *, struct l9p_request *);
+ int (*xattrwrite)(void *, struct l9p_request *);
+ int (*xattrclunk)(void *, struct l9p_fid *);
+ int (*readdir)(void *, struct l9p_request *);
+ int (*fsync)(void *, struct l9p_request *);
+ int (*lock)(void *, struct l9p_request *);
+ int (*getlock)(void *, struct l9p_request *);
+ int (*link)(void *, struct l9p_request *);
+ int (*mkdir)(void *, struct l9p_request *);
+ int (*renameat)(void *, struct l9p_request *);
+ int (*unlinkat)(void *, struct l9p_request *);
+};
+
+#endif /* LIB9P_BACKEND_H */
diff --git a/backend/fs.c b/backend/fs.c
new file mode 100644
index 000000000000..474de756734e
--- /dev/null
+++ b/backend/fs.c
@@ -0,0 +1,3061 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <dirent.h>
+#include <pwd.h>
+#include <grp.h>
+#include <libgen.h>
+#include <pthread.h>
+#include "../lib9p.h"
+#include "../lib9p_impl.h"
+#include "../fid.h"
+#include "../log.h"
+#include "../rfuncs.h"
+#include "../genacl.h"
+#include "backend.h"
+#include "fs.h"
+
+#if defined(WITH_CASPER)
+ #include <libcasper.h>
+ #include <casper/cap_pwd.h>
+ #include <casper/cap_grp.h>
+#endif
+
+#if defined(__FreeBSD__)
+ #include <sys/param.h>
+ #if __FreeBSD_version >= 1000000
+ #define HAVE_BINDAT
+ #endif
+#endif
+
+#if defined(__FreeBSD__)
+ #define HAVE_BIRTHTIME
+#endif
+
+#if defined(__APPLE__)
+ #include <sys/syscall.h>
+ #include "Availability.h"
+ #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED
+#endif
+
+struct fs_softc {
+ int fs_rootfd;
+ bool fs_readonly;
+#if defined(WITH_CASPER)
+ cap_channel_t *fs_cappwd;
+ cap_channel_t *fs_capgrp;
+#endif
+};
+
+struct fs_fid {
+ DIR *ff_dir;
+ int ff_dirfd;
+ int ff_fd;
+ int ff_flags;
+ char *ff_name;
+ struct fs_authinfo *ff_ai;
+ pthread_mutex_t ff_mtx;
+ struct l9p_acl *ff_acl; /* cached ACL if any */
+};
+
+#define FF_NO_NFSV4_ACL 0x01 /* don't go looking for NFSv4 ACLs */
+/* FF_NO_POSIX_ACL 0x02 -- not yet */
+
+/*
+ * Our authinfo consists of:
+ *
+ * - a reference count
+ * - a uid
+ * - a gid-set
+ *
+ * The "default" gid is the first gid in the git-set, provided the
+ * set size is at least 1. The set-size may be zero, though.
+ *
+ * Adjustments to the ref-count must be atomic, once it's shared.
+ * It would be nice to use C11 atomics here but they are not common
+ * enough to all systems just yet; for now, we use a mutex.
+ *
+ * Note that some ops (Linux style ones) pass an effective gid for
+ * the op, in which case, that gid may override. To achieve this
+ * effect, permissions testing functions also take an extra gid.
+ * If this gid is (gid_t)-1 it is not used and only the remaining
+ * gids take part.
+ *
+ * The uid may also be (uid_t)-1, meaning "no uid was available
+ * at all at attach time". In this case, new files inherit parent
+ * directory uids.
+ *
+ * The refcount is simply the number of "openfile"s using this
+ * authinfo (so that when the last ref goes away, we can free it).
+ *
+ * There are also master ACL flags (same as in ff_flags).
+ */
+struct fs_authinfo {
+ pthread_mutex_t ai_mtx; /* lock for refcnt */
+ uint32_t ai_refcnt;
+ int ai_flags;
+ uid_t ai_uid;
+ int ai_ngids;
+ gid_t ai_gids[]; /* NB: flexible array member */
+};
+
+/*
+ * We have a global-static mutex for single-threading Tattach
+ * requests, which use getpwnam (and indirectly, getgr* functions)
+ * which are not reentrant.
+ */
+static bool fs_attach_mutex_inited;
+static pthread_mutex_t fs_attach_mutex;
+
+/*
+ * Internal functions (except inline functions).
+ */
+static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *);
+static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *);
+static int fs_buildname(struct l9p_fid *, char *, char *, size_t);
+static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t,
+ struct stat *st);
+static int fs_dpf(char *, char *, size_t);
+static int fs_oflags_dotu(int, int *);
+static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *);
+static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t,
+ struct stat *, uid_t *, gid_t *);
+static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool);
+static void dostat(struct fs_softc *, struct l9p_stat *, char *,
+ struct stat *, bool dotu);
+static void dostatfs(struct l9p_statfs *, struct statfs *, long);
+static void fillacl(struct fs_fid *ff);
+static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path);
+static void dropacl(struct fs_fid *ff);
+static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd,
+ const char *path);
+static int check_access(int32_t,
+ struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *,
+ struct fs_authinfo *, gid_t);
+static void generate_qid(struct stat *, struct l9p_qid *);
+
+static int fs_icreate(void *, struct l9p_fid *, char *, int,
+ bool, mode_t, gid_t, struct stat *);
+static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode,
+ gid_t, struct stat *);
+static int fs_imkdir(void *, struct l9p_fid *, char *,
+ bool, mode_t, gid_t, struct stat *);
+static int fs_imkfifo(void *, struct l9p_fid *, char *,
+ bool, mode_t, gid_t, struct stat *);
+static int fs_imknod(void *, struct l9p_fid *, char *,
+ bool, mode_t, dev_t, gid_t, struct stat *);
+static int fs_imksocket(void *, struct l9p_fid *, char *,
+ bool, mode_t, gid_t, struct stat *);
+static int fs_isymlink(void *, struct l9p_fid *, char *, char *,
+ gid_t, struct stat *);
+
+/*
+ * Internal functions implementing backend.
+ */
+static int fs_attach(void *, struct l9p_request *);
+static int fs_clunk(void *, struct l9p_fid *);
+static int fs_create(void *, struct l9p_request *);
+static int fs_open(void *, struct l9p_request *);
+static int fs_read(void *, struct l9p_request *);
+static int fs_remove(void *, struct l9p_fid *);
+static int fs_stat(void *, struct l9p_request *);
+static int fs_walk(void *, struct l9p_request *);
+static int fs_write(void *, struct l9p_request *);
+static int fs_wstat(void *, struct l9p_request *);
+static int fs_statfs(void *, struct l9p_request *);
+static int fs_lopen(void *, struct l9p_request *);
+static int fs_lcreate(void *, struct l9p_request *);
+static int fs_symlink(void *, struct l9p_request *);
+static int fs_mknod(void *, struct l9p_request *);
+static int fs_rename(void *, struct l9p_request *);
+static int fs_readlink(void *, struct l9p_request *);
+static int fs_getattr(void *, struct l9p_request *);
+static int fs_setattr(void *, struct l9p_request *);
+static int fs_xattrwalk(void *, struct l9p_request *);
+static int fs_xattrcreate(void *, struct l9p_request *);
+static int fs_readdir(void *, struct l9p_request *);
+static int fs_fsync(void *, struct l9p_request *);
+static int fs_lock(void *, struct l9p_request *);
+static int fs_getlock(void *, struct l9p_request *);
+static int fs_link(void *, struct l9p_request *);
+static int fs_renameat(void *, struct l9p_request *);
+static int fs_unlinkat(void *, struct l9p_request *);
+static void fs_freefid(void *, struct l9p_fid *);
+
+/*
+ * Convert from 9p2000 open/create mode to Unix-style O_* flags.
+ * This includes 9p2000.u extensions, but not 9p2000.L protocol,
+ * which has entirely different open, create, etc., flag bits.
+ *
+ * The <mode> given here is the one-byte (uint8_t) "mode"
+ * argument to Tcreate or Topen, so it can have at most 8 bits.
+ *
+ * https://swtch.com/plan9port/man/man9/open.html and
+ * http://plan9.bell-labs.com/magic/man2html/5/open
+ * both say:
+ *
+ * The [low two bits of the] mode field determines the
+ * type of I/O ... [I]f mode has the OTRUNC (0x10) bit
+ * set, the file is to be truncated, which requires write
+ * permission ...; if the mode has the ORCLOSE (0x40) bit
+ * set, the file is to be removed when the fid is clunked,
+ * which requires permission to remove the file from its
+ * directory. All other bits in mode should be zero. It
+ * is illegal to write a directory, truncate it, or
+ * attempt to remove it on close.
+ *
+ * 9P2000.u may add ODIRECT (0x80); this is not completely clear.
+ * The fcall.h header defines OCEXEC (0x20) as well, but it makes
+ * no sense to send this to a server. There seem to be no bits
+ * 0x04 and 0x08.
+ *
+ * We always turn on O_NOCTTY since as a server, we never want
+ * to gain a controlling terminal. We always turn on O_NOFOLLOW
+ * for reasons described elsewhere.
+ */
+static int
+fs_oflags_dotu(int mode, int *aflags)
+{
+ int flags;
+#define CONVERT(theirs, ours) \
+ do { \
+ if (mode & (theirs)) { \
+ mode &= ~(theirs); \
+ flags |= ours; \
+ } \
+ } while (0)
+
+ switch (mode & L9P_OACCMODE) {
+
+ case L9P_OREAD:
+ default:
+ flags = O_RDONLY;
+ break;
+
+ case L9P_OWRITE:
+ flags = O_WRONLY;
+ break;
+
+ case L9P_ORDWR:
+ flags = O_RDWR;
+ break;
+
+ case L9P_OEXEC:
+ if (mode & L9P_OTRUNC)
+ return (EINVAL);
+ flags = O_RDONLY;
+ break;
+ }
+
+ flags |= O_NOCTTY | O_NOFOLLOW;
+
+ CONVERT(L9P_OTRUNC, O_TRUNC);
+
+ /*
+ * Now take away some flags locally:
+ * the access mode (already translated)
+ * ORCLOSE - caller only
+ * OCEXEC - makes no sense in server
+ * ODIRECT - not applicable here
+ * If there are any flag bits left after this,
+ * we were unable to translate them. For now, let's
+ * treat this as EINVAL so that we can catch problems.
+ */
+ mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT);
+ if (mode != 0) {
+ L9P_LOG(L9P_INFO,
+ "fs_oflags_dotu: untranslated bits: %#x",
+ (unsigned)mode);
+ return (EINVAL);
+ }
+
+ *aflags = flags;
+ return (0);
+#undef CONVERT
+}
+
+/*
+ * Convert from 9P2000.L (Linux) open mode bits to O_* flags.
+ * See fs_oflags_dotu above.
+ *
+ * Linux currently does not have open-for-exec, but there is a
+ * proposal for it using O_PATH|O_NOFOLLOW, now handled here.
+ *
+ * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE.
+ */
+static int
+fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9)
+{
+ int flags;
+ enum l9p_omode p9;
+#define CLEAR(theirs) l_mode &= ~(uint32_t)(theirs)
+#define CONVERT(theirs, ours) \
+ do { \
+ if (l_mode & (theirs)) { \
+ CLEAR(theirs); \
+ flags |= ours; \
+ } \
+ } while (0)
+
+ /*
+ * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS.
+ */
+ flags = l_mode & O_ACCMODE;
+ if (flags == 3)
+ return (EINVAL);
+ CLEAR(O_ACCMODE);
+
+ if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) ==
+ (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) {
+ CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW);
+ p9 = L9P_OEXEC;
+ } else {
+ /*
+ * Slightly dirty, but same dirt, really, as
+ * setting flags from l_mode & O_ACCMODE.
+ */
+ p9 = (enum l9p_omode)flags; /* slightly dirty */
+ }
+
+ /* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */
+ if (l_mode & L9P_L_O_TRUNC)
+ p9 |= L9P_OTRUNC; /* but don't CLEAR yet */
+
+ flags |= O_NOCTTY | O_NOFOLLOW;
+
+ /*
+ * L_O_CREAT seems to be noise, since we get separate open
+ * and create. But it is actually set sometimes. We just
+ * throw it out here; create ops must set it themselves and
+ * open ops have no permissions bits and hence cannot create.
+ *
+ * L_O_EXCL does make sense on create ops, i.e., we can
+ * take a create op with or without L_O_EXCL. We pass that
+ * through.
+ */
+ CLEAR(L9P_L_O_CREAT);
+ CONVERT(L9P_L_O_EXCL, O_EXCL);
+ CONVERT(L9P_L_O_TRUNC, O_TRUNC);
+ CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY);
+ CONVERT(L9P_L_O_APPEND, O_APPEND);
+ CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK);
+
+ /*
+ * Discard these as useless noise at our (server) end.
+ * (NOATIME might be useful but we can only set it on a
+ * per-mount basis.)
+ */
+ CLEAR(L9P_L_O_CLOEXEC);
+ CLEAR(L9P_L_O_DIRECT);
+ CLEAR(L9P_L_O_DSYNC);
+ CLEAR(L9P_L_O_FASYNC);
+ CLEAR(L9P_L_O_LARGEFILE);
+ CLEAR(L9P_L_O_NOATIME);
+ CLEAR(L9P_L_O_NOCTTY);
+ CLEAR(L9P_L_O_NOFOLLOW);
+ CLEAR(L9P_L_O_SYNC);
+
+ if (l_mode != 0) {
+ L9P_LOG(L9P_INFO,
+ "fs_oflags_dotl: untranslated bits: %#x",
+ (unsigned)l_mode);
+ return (EINVAL);
+ }
+
+ *aflags = flags;
+ *ap9 = p9;
+ return (0);
+#undef CLEAR
+#undef CONVERT
+}
+
+static struct passwd *
+fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg)
+{
+#if defined(WITH_CASPER)
+ return (r_cap_getpwuid(sc->fs_cappwd, uid, pg));
+#else
+ (void)sc;
+ return (r_getpwuid(uid, pg));
+#endif
+}
+
+static struct group *
+fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg)
+{
+#if defined(WITH_CASPER)
+ return (r_cap_getgrgid(sc->fs_capgrp, gid, pg));
+#else
+ (void)sc;
+ return (r_getgrgid(gid, pg));
+#endif
+}
+
+/*
+ * Build full name of file by appending given name to directory name.
+ */
+static int
+fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size)
+{
+ struct fs_fid *dirf = dir->lo_aux;
+ size_t dlen, nlen1;
+
+ assert(dirf != NULL);
+ dlen = strlen(dirf->ff_name);
+ nlen1 = strlen(name) + 1; /* +1 for '\0' */
+ if (dlen + 1 + nlen1 > size)
+ return (ENAMETOOLONG);
+ memcpy(buf, dirf->ff_name, dlen);
+ buf[dlen] = '/';
+ memcpy(buf + dlen + 1, name, nlen1);
+ return (0);
+}
+
+/*
+ * Build parent name of file by splitting it off. Return an error
+ * if the given fid represents the root, so that there is no such
+ * parent, or if the discovered parent is not a directory.
+ */
+static int
+fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf,
+ size_t size, struct stat *st)
+{
+ struct fs_fid *ff;
+ char *path;
+
+ ff = fid->lo_aux;
+ assert(ff != NULL);
+ path = ff->ff_name;
+ path = r_dirname(path, buf, size);
+ if (path == NULL)
+ return (ENAMETOOLONG);
+ if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+ if (!S_ISDIR(st->st_mode))
+ return (ENOTDIR);
+ return (0);
+}
+
+/*
+ * Like fs_buildname() but for adding a file name to a buffer
+ * already holding a directory name. Essentially does
+ * strcat(dbuf, "/");
+ * strcat(dbuf, fname);
+ * but with size checking and an ENAMETOOLONG error as needed.
+ *
+ * (Think of the function name as "directory plus-equals file".)
+ */
+static int
+fs_dpf(char *dbuf, char *fname, size_t size)
+{
+ size_t dlen, nlen1;
+
+ dlen = strlen(dbuf);
+ nlen1 = strlen(fname) + 1;
+ if (dlen + 1 + nlen1 > size)
+ return (ENAMETOOLONG);
+ dbuf[dlen] = '/';
+ memcpy(dbuf + dlen + 1, fname, nlen1);
+ return (0);
+}
+
+/*
+ * Prepare to create a new directory entry (open with O_CREAT,
+ * mkdir, etc -- any operation that creates a new inode),
+ * operating in parent data <dir>, based on authinfo <ai> and
+ * effective gid <egid>.
+ *
+ * The new entity should be owned by user/group <*nuid, *ngid>,
+ * if it's really a new entity. It will be a directory if isdir.
+ *
+ * Returns an error number if the entry should not be created
+ * (e.g., read-only file system or no permission to write in
+ * parent directory). Always sets *nuid and *ngid on success:
+ * in the worst case, when there is no available ID, this will
+ * use the parent directory's IDs. Fills in <*st> on success.
+ */
+static int
+fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid,
+ struct stat *st, uid_t *nuid, gid_t *ngid)
+{
+ struct fs_fid *dirf;
+ struct fs_authinfo *ai;
+ int32_t op;
+ int error;
+
+ if (sc->fs_readonly)
+ return (EROFS);
+ dirf = dir->lo_aux;
+ assert(dirf != NULL);
+ if (fstatat(dirf->ff_dirfd, dirf->ff_name, st,
+ AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+ if (!S_ISDIR(st->st_mode))
+ return (ENOTDIR);
+ dirf = dir->lo_aux;
+ ai = dirf->ff_ai;
+ fillacl(dirf);
+ op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+ error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid);
+ if (error)
+ return (EPERM);
+
+ *nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid;
+ *ngid = egid != (gid_t)-1 ? egid :
+ ai->ai_ngids > 0 ? ai->ai_gids[0] : st->st_gid;
+ return (0);
+}
+
+/*
+ * Allocate new open-file data structure to attach to a fid.
+ *
+ * The new file's authinfo is the same as the old one's, and
+ * we gain a reference.
+ */
+static struct fs_fid *
+open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating)
+{
+ struct fs_fid *ret;
+ uint32_t newcount;
+ int error;
+
+ ret = l9p_calloc(1, sizeof(*ret));
+ error = pthread_mutex_init(&ret->ff_mtx, NULL);
+ if (error) {
+ free(ret);
+ return (NULL);
+ }
+ ret->ff_fd = -1;
+ ret->ff_dirfd = dirfd;
+ ret->ff_name = strdup(path);
+ if (ret->ff_name == NULL) {
+ pthread_mutex_destroy(&ret->ff_mtx);
+ free(ret);
+ return (NULL);
+ }
+ pthread_mutex_lock(&ai->ai_mtx);
+ newcount = ++ai->ai_refcnt;
+ pthread_mutex_unlock(&ai->ai_mtx);
+ /*
+ * If we just incremented the count to 1, we're the *first*
+ * reference. This is only allowed when creating the authinfo,
+ * otherwise it means something has gone wrong. This cannot
+ * catch every bad (re)use of a freed authinfo but it may catch
+ * a few.
+ */
+ assert(newcount > 1 || creating);
+ L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
+ (void *)ai, (u_long)newcount);
+ ret->ff_ai = ai;
+ return (ret);
+}
+
+static void
+dostat(struct fs_softc *sc, struct l9p_stat *s, char *name,
+ struct stat *buf, bool dotu)
+{
+ struct passwd *user;
+ struct group *group;
+
+ memset(s, 0, sizeof(struct l9p_stat));
+
+ generate_qid(buf, &s->qid);
+
+ s->type = 0;
+ s->dev = 0;
+ s->mode = buf->st_mode & 0777;
+
+ if (S_ISDIR(buf->st_mode))
+ s->mode |= L9P_DMDIR;
+
+ if (S_ISLNK(buf->st_mode) && dotu)
+ s->mode |= L9P_DMSYMLINK;
+
+ if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode))
+ s->mode |= L9P_DMDEVICE;
+
+ if (S_ISSOCK(buf->st_mode))
+ s->mode |= L9P_DMSOCKET;
+
+ if (S_ISFIFO(buf->st_mode))
+ s->mode |= L9P_DMNAMEDPIPE;
+
+ s->atime = (uint32_t)buf->st_atime;
+ s->mtime = (uint32_t)buf->st_mtime;
+ s->length = (uint64_t)buf->st_size;
+
+ s->name = r_basename(name, NULL, 0);
+
+ if (!dotu) {
+ struct r_pgdata udata, gdata;
+
+ user = fs_getpwuid(sc, buf->st_uid, &udata);
+ group = fs_getgrgid(sc, buf->st_gid, &gdata);
+ s->uid = user != NULL ? strdup(user->pw_name) : NULL;
+ s->gid = group != NULL ? strdup(group->gr_name) : NULL;
+ s->muid = user != NULL ? strdup(user->pw_name) : NULL;
+ r_pgfree(&udata);
+ r_pgfree(&gdata);
+ } else {
+ /*
+ * When using 9P2000.u, we don't need to bother about
+ * providing user and group names in textual form.
+ *
+ * NB: if the asprintf()s fail, s->extension should
+ * be unset so we can ignore these.
+ */
+ s->n_uid = buf->st_uid;
+ s->n_gid = buf->st_gid;
+ s->n_muid = buf->st_uid;
+
+ if (S_ISLNK(buf->st_mode)) {
+ char target[MAXPATHLEN];
+ ssize_t ret = readlink(name, target, MAXPATHLEN);
+
+ if (ret < 0) {
+ s->extension = NULL;
+ return;
+ }
+
+ s->extension = strndup(target, (size_t)ret);
+ }
+
+ if (S_ISBLK(buf->st_mode)) {
+ asprintf(&s->extension, "b %d %d", major(buf->st_rdev),
+ minor(buf->st_rdev));
+ }
+
+ if (S_ISCHR(buf->st_mode)) {
+ asprintf(&s->extension, "c %d %d", major(buf->st_rdev),
+ minor(buf->st_rdev));
+ }
+ }
+}
+
+static void dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen)
+{
+
+ out->type = L9P_FSTYPE;
+ out->bsize = in->f_bsize;
+ out->blocks = in->f_blocks;
+ out->bfree = in->f_bfree;
+ out->bavail = in->f_bavail;
+ out->files = in->f_files;
+ out->ffree = in->f_ffree;
+ out->namelen = (uint32_t)namelen;
+ out->fsid = ((uint64_t)in->f_fsid.val[0] << 32) |
+ (uint64_t)in->f_fsid.val[1];
+}
+
+static void
+generate_qid(struct stat *buf, struct l9p_qid *qid)
+{
+ qid->path = buf->st_ino;
+ qid->version = 0;
+
+ if (S_ISREG(buf->st_mode))
+ qid->type |= L9P_QTFILE;
+
+ if (S_ISDIR(buf->st_mode))
+ qid->type |= L9P_QTDIR;
+
+ if (S_ISLNK(buf->st_mode))
+ qid->type |= L9P_QTSYMLINK;
+}
+
+/*
+ * Fill in ff->ff_acl if it's not set yet. Skip if the "don't use
+ * ACLs" flag is set, and use the flag to remember failure so
+ * we don't bother retrying either.
+ */
+static void
+fillacl(struct fs_fid *ff)
+{
+
+ if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) {
+ ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name);
+ if (ff->ff_acl == NULL)
+ ff->ff_flags |= FF_NO_NFSV4_ACL;
+ }
+}
+
+/*
+ * Get an ACL given fd and/or path name. We check for the "don't get
+ * ACL" flag in the given ff_fid data structure first, but don't set
+ * the flag here. The fillacl() code is similar but will set the
+ * flag; it also uses the ff_fd and ff_name directly.
+ *
+ * (This is used to get ACLs for parent directories, for instance.)
+ */
+static struct l9p_acl *
+getacl(struct fs_fid *ff, int fd, const char *path)
+{
+
+ if (ff->ff_flags & FF_NO_NFSV4_ACL)
+ return (NULL);
+ return look_for_nfsv4_acl(ff, fd, path);
+}
+
+/*
+ * Drop cached ff->ff_acl, e.g., after moving from one directory to
+ * another, where inherited ACLs might change.
+ */
+static void
+dropacl(struct fs_fid *ff)
+{
+
+ l9p_acl_free(ff->ff_acl);
+ ff->ff_acl = NULL;
+ ff->ff_flags = ff->ff_ai->ai_flags;
+}
+
+/*
+ * Check to see if we can find NFSv4 ACLs for the given file.
+ * If we have an open fd, we can use that, otherwise we need
+ * to use the path.
+ */
+static struct l9p_acl *
+look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path)
+{
+ struct l9p_acl *acl;
+ acl_t sysacl;
+ int doclose = 0;
+
+ if (fd < 0) {
+ fd = openat(ff->ff_dirfd, path, 0);
+ doclose = 1;
+ }
+
+ sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4);
+ if (sysacl == NULL) {
+ /*
+ * EINVAL means no NFSv4 ACLs apply for this file.
+ * Other error numbers indicate some kind of problem.
+ */
+ if (errno != EINVAL) {
+ L9P_LOG(L9P_ERROR,
+ "error retrieving NFSv4 ACL from "
+ "fdesc %d (%s): %s", fd,
+ path, strerror(errno));
+ }
+
+ if (doclose)
+ close(fd);
+
+ return (NULL);
+ }
+#if defined(HAVE_FREEBSD_ACLS)
+ acl = l9p_freebsd_nfsv4acl_to_acl(sysacl);
+#else
+ acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */
+#endif
+ acl_free(sysacl);
+
+ if (doclose)
+ close(fd);
+
+ return (acl);
+}
+
+/*
+ * Verify that the user whose authinfo is in <ai> and effective
+ * group ID is <egid> ((gid_t)-1 means no egid supplied) has
+ * permission to do something.
+ *
+ * The "something" may be rather complex: we allow NFSv4 style
+ * operation masks here, and provide parent and child ACLs and
+ * stat data. At most one of pacl+pst and cacl+cst can be NULL,
+ * unless ACLs are not supported; then pacl and cacl can both
+ * be NULL but pst or cst must be non-NULL depending on the
+ * operation.
+ */
+static int
+check_access(int32_t opmask,
+ struct l9p_acl *pacl, struct stat *pst,
+ struct l9p_acl *cacl, struct stat *cst,
+ struct fs_authinfo *ai, gid_t egid)
+{
+ struct l9p_acl_check_args args;
+
+ /*
+ * If we have ACLs, use them exclusively, ignoring Unix
+ * permissions. Otherwise, fall back on stat st_mode
+ * bits, and allow super-user as well.
+ */
+ args.aca_uid = ai->ai_uid;
+ args.aca_gid = egid;
+ args.aca_groups = ai->ai_gids;
+ args.aca_ngroups = (size_t)ai->ai_ngids;
+ args.aca_parent = pacl;
+ args.aca_pstat = pst;
+ args.aca_child = cacl;
+ args.aca_cstat = cst;
+ args.aca_aclmode = pacl == NULL && cacl == NULL
+ ? L9P_ACM_STAT_MODE
+ : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL;
+
+ args.aca_superuser = true;
+ return (l9p_acl_check_access(opmask, &args));
+}
+
+static int
+fs_attach(void *softc, struct l9p_request *req)
+{
+ struct fs_authinfo *ai;
+ struct fs_softc *sc = (struct fs_softc *)softc;
+ struct fs_fid *file;
+ struct passwd *pwd;
+ struct stat st;
+ struct r_pgdata udata;
+ uint32_t n_uname;
+ gid_t *gids;
+ uid_t uid;
+ int error;
+ int ngroups;
+
+ assert(req->lr_fid != NULL);
+
+ /*
+ * Single-thread pwd/group related items. We have a reentrant
+ * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist
+ * may use non-reentrant C library getgr* routines.
+ */
+ pthread_mutex_lock(&fs_attach_mutex);
+
+ n_uname = req->lr_req.tattach.n_uname;
+ if (n_uname != L9P_NONUNAME) {
+ uid = (uid_t)n_uname;
+ pwd = fs_getpwuid(sc, uid, &udata);
+ if (pwd == NULL)
+ L9P_LOG(L9P_DEBUG,
+ "Tattach: uid %ld: no such user", (long)uid);
+ } else {
+ uid = (uid_t)-1;
+#if defined(WITH_CASPER)
+ pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname);
+#else
+ pwd = getpwnam(req->lr_req.tattach.uname);
+#endif
+ if (pwd == NULL)
+ L9P_LOG(L9P_DEBUG,
+ "Tattach: %s: no such user",
+ req->lr_req.tattach.uname);
+ }
+
+ /*
+ * If caller didn't give a numeric UID, pick it up from pwd
+ * if possible. If that doesn't work we can't continue.
+ *
+ * Note that pwd also supplies the group set. This assumes
+ * the server has the right mapping; this needs improvement.
+ * We do at least support ai->ai_ngids==0 properly now though.
+ */
+ if (uid == (uid_t)-1 && pwd != NULL)
+ uid = pwd->pw_uid;
+ if (uid == (uid_t)-1)
+ error = EPERM;
+ else {
+ error = 0;
+ if (fstat(sc->fs_rootfd, &st) != 0)
+ error = errno;
+ else if (!S_ISDIR(st.st_mode))
+ error = ENOTDIR;
+ }
+ if (error) {
+ pthread_mutex_unlock(&fs_attach_mutex);
+ L9P_LOG(L9P_DEBUG,
+ "Tattach: denying uid=%ld access to rootdir: %s",
+ (long)uid, strerror(error));
+ /*
+ * Pass ENOENT and ENOTDIR through for diagnosis;
+ * others become EPERM. This should not leak too
+ * much security.
+ */
+ return (error == ENOENT || error == ENOTDIR ? error : EPERM);
+ }
+
+ if (pwd != NULL) {
+ /*
+ * This either succeeds and fills in ngroups and
+ * returns non-NULL, or fails and sets ngroups to 0
+ * and returns NULL. Either way ngroups is correct.
+ */
+ gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups);
+ } else {
+ gids = NULL;
+ ngroups = 0;
+ }
+
+ /*
+ * Done with pwd and group related items that may use
+ * non-reentrant C library routines; allow other threads in.
+ */
+ pthread_mutex_unlock(&fs_attach_mutex);
+
+ ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t));
+ if (ai == NULL) {
+ free(gids);
+ return (ENOMEM);
+ }
+ error = pthread_mutex_init(&ai->ai_mtx, NULL);
+ if (error) {
+ free(gids);
+ free(ai);
+ return (error);
+ }
+ ai->ai_refcnt = 0;
+ ai->ai_uid = uid;
+ ai->ai_flags = 0; /* XXX for now */
+ ai->ai_ngids = ngroups;
+ memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t));
+ free(gids);
+
+ file = open_fid(sc->fs_rootfd, ".", ai, true);
+ if (file == NULL) {
+ pthread_mutex_destroy(&ai->ai_mtx);
+ free(ai);
+ return (ENOMEM);
+ }
+
+ req->lr_fid->lo_aux = file;
+ generate_qid(&st, &req->lr_resp.rattach.qid);
+ return (0);
+}
+
+static int
+fs_clunk(void *softc __unused, struct l9p_fid *fid)
+{
+ struct fs_fid *file;
+
+ file = fid->lo_aux;
+ assert(file != NULL);
+
+ if (file->ff_dir) {
+ closedir(file->ff_dir);
+ file->ff_dir = NULL;
+ } else if (file->ff_fd != -1) {
+ close(file->ff_fd);
+ file->ff_fd = -1;
+ }
+
+ return (0);
+}
+
+/*
+ * Create ops.
+ *
+ * We are to create a new file under some existing path,
+ * where the new file's name is in the Tcreate request and the
+ * existing path is due to a fid-based file (req->lr_fid).
+ *
+ * One op (create regular file) sets file->fd, the rest do not.
+ */
+static int
+fs_create(void *softc, struct l9p_request *req)
+{
+ struct l9p_fid *dir;
+ struct stat st;
+ uint32_t dmperm;
+ mode_t perm;
+ char *name;
+ int error;
+
+ dir = req->lr_fid;
+ name = req->lr_req.tcreate.name;
+ dmperm = req->lr_req.tcreate.perm;
+ perm = (mode_t)(dmperm & 0777);
+
+ if (dmperm & L9P_DMDIR)
+ error = fs_imkdir(softc, dir, name, true,
+ perm, (gid_t)-1, &st);
+ else if (dmperm & L9P_DMSYMLINK)
+ error = fs_isymlink(softc, dir, name,
+ req->lr_req.tcreate.extension, (gid_t)-1, &st);
+ else if (dmperm & L9P_DMNAMEDPIPE)
+ error = fs_imkfifo(softc, dir, name, true,
+ perm, (gid_t)-1, &st);
+ else if (dmperm & L9P_DMSOCKET)
+ error = fs_imksocket(softc, dir, name, true,
+ perm, (gid_t)-1, &st);
+ else if (dmperm & L9P_DMDEVICE) {
+ unsigned int major, minor;
+ char type;
+ dev_t dev;
+
+ /*
+ * ??? Should this be testing < 3? For now, allow a single
+ * integer mode with minor==0 implied.
+ */
+ minor = 0;
+ if (sscanf(req->lr_req.tcreate.extension, "%c %u %u",
+ &type, &major, &minor) < 2) {
+ return (EINVAL);
+ }
+
+ switch (type) {
+ case 'b':
+ perm |= S_IFBLK;
+ break;
+ case 'c':
+ perm |= S_IFCHR;
+ break;
+ default:
+ return (EINVAL);
+ }
+ dev = makedev(major, minor);
+ error = fs_imknod(softc, dir, name, true, perm, dev,
+ (gid_t)-1, &st);
+ } else {
+ enum l9p_omode p9;
+ int flags;
+
+ p9 = req->lr_req.tcreate.mode;
+ error = fs_oflags_dotu(p9, &flags);
+ if (error)
+ return (error);
+ error = fs_icreate(softc, dir, name, flags,
+ true, perm, (gid_t)-1, &st);
+ req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size;
+ }
+
+ if (error == 0)
+ generate_qid(&st, &req->lr_resp.rcreate.qid);
+
+ return (error);
+}
+
+/*
+ * https://swtch.com/plan9port/man/man9/open.html and
+ * http://plan9.bell-labs.com/magic/man2html/5/open
+ * say that permissions are actually
+ * perm & (~0666 | (dir.perm & 0666))
+ * for files, and
+ * perm & (~0777 | (dir.perm & 0777))
+ * for directories. That is, the parent directory may
+ * take away permissions granted by the operation.
+ *
+ * This seems a bit restrictive; probably
+ * there should be a control knob for this.
+ */
+static inline mode_t
+fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir)
+{
+
+ if (isdir)
+ perm &= ~0777 | (dir_perm & 0777);
+ else
+ perm &= ~0666 | (dir_perm & 0666);
+ return (perm);
+}
+
+/*
+ * Internal form of create (plain file).
+ *
+ * Our caller takes care of splitting off all the special
+ * types of create (mknod, etc), so this is purely for files.
+ * We receive the fs_softc <softc>, the directory fid <dir>
+ * in which the new file is to be created, the name of the
+ * new file, a flag <isp9> indicating whether to do plan9 style
+ * permissions or Linux style permissions, the permissions <perm>,
+ * an effective group id <egid>, and a pointer to a stat structure
+ * <st> to fill in describing the final result on success.
+ *
+ * On successful create, the fid switches to the newly created
+ * file, which is now open; its associated file-name changes too.
+ *
+ * Note that the original (dir) fid is never currently open,
+ * so there is nothing to close.
+ */
+static int
+fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags,
+ bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+ struct fs_fid *file;
+ gid_t gid;
+ uid_t uid;
+ char newname[MAXPATHLEN];
+ int error, fd;
+
+ file = dir->lo_aux;
+
+ /*
+ * Build full path name from directory + file name. We'll
+ * check permissions on the parent directory, then race to
+ * create the file before anything bad happens like symlinks.
+ *
+ * (To close this race we need to use openat(), which is
+ * left for a later version of this code.)
+ */
+ error = fs_buildname(dir, name, newname, sizeof(newname));
+ if (error)
+ return (error);
+
+ /* In case of success, we will need a new file->ff_name. */
+ name = strdup(newname);
+ if (name == NULL)
+ return (ENOMEM);
+
+ /* Check create permission and compute new file ownership. */
+ error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+ if (error) {
+ free(name);
+ return (error);
+ }
+
+ /* Adjust new-file permissions for Plan9 protocol. */
+ if (isp9)
+ perm = fs_p9perm(perm, st->st_mode, false);
+
+ /* Create is always exclusive so O_TRUNC is irrelevant. */
+ fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm);
+ if (fd < 0) {
+ error = errno;
+ free(name);
+ return (error);
+ }
+
+ /* Fix permissions and owner. */
+ if (fchmod(fd, perm) != 0 ||
+ fchown(fd, uid, gid) != 0 ||
+ fstat(fd, st) != 0) {
+ error = errno;
+ (void) close(fd);
+ /* unlink(newname); ? */
+ free(name);
+ return (error);
+ }
+
+ /* It *was* a directory; now it's a file, and it's open. */
+ free(file->ff_name);
+ file->ff_name = name;
+ file->ff_fd = fd;
+ return (0);
+}
+
+/*
+ * Internal form of open: stat file and verify permissions (from p9
+ * argument), then open the file-or-directory, leaving the internal
+ * fs_fid fields set up. If we cannot open the file, return a
+ * suitable error number, and leave everything unchanged.
+ *
+ * To mitigate the race between permissions testing and the actual
+ * open, we can stat the file twice (once with lstat() before open,
+ * then with fstat() after). We assume O_NOFOLLOW is set in flags,
+ * so if some other race-winner substitutes in a symlink we won't
+ * open it here. (However, embedded symlinks, if they occur, are
+ * still an issue. Ideally we would like to have an O_NEVERFOLLOW
+ * that fails on embedded symlinks, and a way to pass this to
+ * lstat() as well.)
+ *
+ * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely
+ * on substitution-detection via fstat(). To simplify the code we
+ * just always re-check.
+ *
+ * (For a proper fix in the future, we can require openat(), keep
+ * each parent directory open during walk etc, and allow only final
+ * name components with O_NOFOLLOW.)
+ *
+ * On successful return, st has been filled in.
+ */
+static int
+fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9,
+ gid_t egid __unused, struct stat *st)
+{
+ struct fs_softc *sc = softc;
+ struct fs_fid *file;
+ struct stat first;
+ int32_t op;
+ char *name;
+ int error;
+ int fd;
+ DIR *dirp;
+
+ /* Forbid write ops on read-only file system. */
+ if (sc->fs_readonly) {
+ if ((flags & O_TRUNC) != 0)
+ return (EROFS);
+ if ((flags & O_ACCMODE) != O_RDONLY)
+ return (EROFS);
+ if (p9 & L9P_ORCLOSE)
+ return (EROFS);
+ }
+
+ file = fid->lo_aux;
+ assert(file != NULL);
+ name = file->ff_name;
+
+ if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+ if (S_ISLNK(first.st_mode))
+ return (EPERM);
+
+ /* Can we rely on O_APPEND here? Best not, can be cleared. */
+ switch (flags & O_ACCMODE) {
+ case O_RDONLY:
+ op = L9P_ACE_READ_DATA;
+ break;
+ case O_WRONLY:
+ op = L9P_ACE_WRITE_DATA;
+ break;
+ case O_RDWR:
+ op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA;
+ break;
+ default:
+ return (EINVAL);
+ }
+ fillacl(file);
+ error = check_access(op, NULL, NULL, file->ff_acl, &first,
+ file->ff_ai, (gid_t)-1);
+ if (error)
+ return (error);
+
+ if (S_ISDIR(first.st_mode)) {
+ /* Forbid write or truncate on directory. */
+ if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC))
+ return (EPERM);
+ fd = openat(file->ff_dirfd, name, O_DIRECTORY);
+ dirp = fdopendir(fd);
+ if (dirp == NULL)
+ return (EPERM);
+ fd = dirfd(dirp);
+ } else {
+ dirp = NULL;
+ fd = openat(file->ff_dirfd, name, flags);
+ if (fd < 0)
+ return (EPERM);
+ }
+
+ /*
+ * We have a valid fd, and maybe non-null dirp. Re-check
+ * the file, and fail if st_dev or st_ino changed.
+ */
+ if (fstat(fd, st) != 0 ||
+ first.st_dev != st->st_dev ||
+ first.st_ino != st->st_ino) {
+ if (dirp != NULL)
+ (void) closedir(dirp);
+ else
+ (void) close(fd);
+ return (EPERM);
+ }
+ if (dirp != NULL)
+ file->ff_dir = dirp;
+ else
+ file->ff_fd = fd;
+ return (0);
+}
+
+/*
+ * Internal form of mkdir (common code for all forms).
+ * We receive the fs_softc <softc>, the directory fid <dir>
+ * in which the new entry is to be created, the name of the
+ * new entry, a flag <isp9> indicating whether to do plan9 style
+ * permissions or Linux style permissions, the permissions <perm>,
+ * an effective group id <egid>, and a pointer to a stat structure
+ * <st> to fill in describing the final result on success.
+ *
+ * See also fs_icreate() above.
+ */
+static int
+fs_imkdir(void *softc, struct l9p_fid *dir, char *name,
+ bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+ struct fs_fid *ff;
+ gid_t gid;
+ uid_t uid;
+ char newname[MAXPATHLEN];
+ int error, fd;
+
+ ff = dir->lo_aux;
+ error = fs_buildname(dir, name, newname, sizeof(newname));
+ if (error)
+ return (error);
+
+ error = fs_nde(softc, dir, true, egid, st, &uid, &gid);
+ if (error)
+ return (error);
+
+ if (isp9)
+ perm = fs_p9perm(perm, st->st_mode, true);
+
+ if (mkdirat(ff->ff_dirfd, newname, perm) != 0)
+ return (errno);
+
+ fd = openat(ff->ff_dirfd, newname,
+ O_DIRECTORY | O_RDONLY | O_NOFOLLOW);
+ if (fd < 0 ||
+ fchown(fd, uid, gid) != 0 ||
+ fchmod(fd, perm) != 0 ||
+ fstat(fd, st) != 0) {
+ error = errno;
+ /* rmdir(newname) ? */
+ }
+ if (fd >= 0)
+ (void) close(fd);
+
+ return (error);
+}
+
+#ifdef __APPLE__
+/*
+ * This is an undocumented OS X syscall. It would be best to avoid it,
+ * but there doesn't seem to be another safe way to implement mknodat.
+ * Dear Apple, please implement mknodat before you remove this syscall.
+ */
+static int fs_ifchdir_thread_local(int fd)
+{
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+ return syscall(SYS___pthread_fchdir, fd);
+#pragma clang diagnostic pop
+}
+#endif
+
+/*
+ * Internal form of mknod (special device).
+ *
+ * The device type (S_IFBLK, S_IFCHR) is included in the <mode> parameter.
+ */
+static int
+fs_imknod(void *softc, struct l9p_fid *dir, char *name,
+ bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st)
+{
+ struct fs_fid *ff;
+ mode_t perm;
+ gid_t gid;
+ uid_t uid;
+ char newname[MAXPATHLEN];
+ int error;
+
+ ff = dir->lo_aux;
+ error = fs_buildname(dir, name, newname, sizeof(newname));
+ if (error)
+ return (error);
+
+ error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+ if (error)
+ return (error);
+
+ if (isp9) {
+ perm = fs_p9perm(mode & 0777, st->st_mode, false);
+ mode = (mode & ~0777) | perm;
+ } else {
+ perm = mode & 0777;
+ }
+
+#ifdef __APPLE__
+ if (fs_ifchdir_thread_local(ff->ff_dirfd) < 0) {
+ return -1;
+ }
+ error = mknod(newname, mode, dev);
+ int preserved_errno = errno;
+ /* Stop using the thread-local cwd */
+ fs_ifchdir_thread_local(-1);
+ if (error < 0) {
+ errno = preserved_errno;
+ return errno;
+ }
+#else
+ if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0)
+ return (errno);
+#endif
+
+ /* We cannot open the new name; race to use l* syscalls. */
+ if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+ error = errno;
+ else if ((st->st_mode & S_IFMT) != (mode & S_IFMT))
+ error = EPERM; /* ??? lost a race anyway */
+
+ /* if (error) unlink(newname) ? */
+
+ return (error);
+}
+
+/*
+ * Internal form of mkfifo.
+ */
+static int
+fs_imkfifo(void *softc, struct l9p_fid *dir, char *name,
+ bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+ struct fs_fid *ff;
+ gid_t gid;
+ uid_t uid;
+ char newname[MAXPATHLEN];
+ int error;
+
+ ff = dir->lo_aux;
+ error = fs_buildname(dir, name, newname, sizeof(newname));
+ if (error)
+ return (error);
+
+ error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+ if (error)
+ return (error);
+
+ if (isp9)
+ perm = fs_p9perm(perm, st->st_mode, false);
+
+ if (mkfifo(newname, perm) != 0)
+ return (errno);
+
+ /* We cannot open the new name; race to use l* syscalls. */
+ if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+ error = errno;
+ else if (!S_ISFIFO(st->st_mode))
+ error = EPERM; /* ??? lost a race anyway */
+
+ /* if (error) unlink(newname) ? */
+
+ return (error);
+}
+
+/*
+ * Internal form of mksocket.
+ *
+ * This is a bit different because of the horrible socket naming
+ * system (bind() with sockaddr_un sun_path).
+ */
+static int
+fs_imksocket(void *softc, struct l9p_fid *dir, char *name,
+ bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+ struct fs_fid *ff;
+ struct sockaddr_un sun;
+ char *path;
+ char newname[MAXPATHLEN];
+ gid_t gid;
+ uid_t uid;
+ int error = 0, s, fd;
+
+ ff = dir->lo_aux;
+ error = fs_buildname(dir, name, newname, sizeof(newname));
+ if (error)
+ return (error);
+
+ error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+ if (error)
+ return (error);
+
+ if (isp9)
+ perm = fs_p9perm(perm, st->st_mode, false);
+
+ s = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (s < 0)
+ return (errno);
+
+ path = newname;
+ fd = -1;
+#ifdef HAVE_BINDAT
+ /* Try bindat() if needed. */
+ if (strlen(path) >= sizeof(sun.sun_path)) {
+ fd = openat(ff->ff_dirfd, ff->ff_name,
+ O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
+ if (fd >= 0)
+ path = name;
+ }
+#endif
+
+ /*
+ * Can only create the socket if the path will fit.
+ * Even if we are using bindat() there are limits
+ * (the API for AF_UNIX sockets is ... not good).
+ *
+ * Note: in theory we can fill sun_path to the end
+ * (omitting a terminating '\0') but in at least one
+ * Unix-like system, this was known to behave oddly,
+ * so we test for ">=" rather than just ">".
+ */
+ if (strlen(path) >= sizeof(sun.sun_path)) {
+ error = ENAMETOOLONG;
+ goto out;
+ }
+ sun.sun_family = AF_UNIX;
+ sun.sun_len = sizeof(struct sockaddr_un);
+ strncpy(sun.sun_path, path, sizeof(sun.sun_path));
+
+#ifdef HAVE_BINDAT
+ if (fd >= 0) {
+ if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0)
+ error = errno;
+ goto out; /* done now, for good or ill */
+ }
+#endif
+
+ if (bind(s, (struct sockaddr *)&sun, sun.sun_len) < 0)
+ error = errno;
+out:
+
+ if (error == 0) {
+ /*
+ * We believe we created the socket-inode. Fix
+ * permissions etc. Note that we cannot use
+ * fstat() on the socket descriptor: it succeeds,
+ * but we get bogus data!
+ */
+ if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+ error = errno;
+ else if (!S_ISSOCK(st->st_mode))
+ error = EPERM; /* ??? lost a race anyway */
+
+ /* if (error) unlink(newname) ? */
+ }
+
+ /*
+ * It's not clear which error should override, although
+ * ideally we should never see either close() call fail.
+ * In any case we do want to try to close both fd and s,
+ * always. Let's set error only if it is not already set,
+ * so that all exit paths can use the same code.
+ */
+ if (fd >= 0 && close(fd) != 0)
+ if (error == 0)
+ error = errno;
+ if (close(s) != 0)
+ if (error == 0)
+ error = errno;
+
+ return (error);
+}
+
+/*
+ * Internal form of symlink.
+ *
+ * Note that symlinks are presumed to carry no permission bits.
+ * They do have owners, however (who may be charged for quotas).
+ */
+static int
+fs_isymlink(void *softc, struct l9p_fid *dir, char *name,
+ char *symtgt, gid_t egid, struct stat *st)
+{
+ struct fs_fid *ff;
+ gid_t gid;
+ uid_t uid;
+ char newname[MAXPATHLEN];
+ int error;
+
+ ff = dir->lo_aux;
+ error = fs_buildname(dir, name, newname, sizeof(newname));
+ if (error)
+ return (error);
+
+ error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+ if (error)
+ return (error);
+
+ if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0)
+ return (errno);
+
+ /* We cannot open the new name; race to use l* syscalls. */
+ if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+ error = errno;
+ else if (!S_ISLNK(st->st_mode))
+ error = EPERM; /* ??? lost a race anyway */
+
+ /* if (error) unlink(newname) ? */
+
+ return (error);
+}
+
+static int
+fs_open(void *softc, struct l9p_request *req)
+{
+ struct l9p_fid *fid = req->lr_fid;
+ struct stat st;
+ enum l9p_omode p9;
+ int error, flags;
+
+ p9 = req->lr_req.topen.mode;
+ error = fs_oflags_dotu(p9, &flags);
+ if (error)
+ return (error);
+
+ error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st);
+ if (error)
+ return (error);
+
+ generate_qid(&st, &req->lr_resp.ropen.qid);
+ req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size;
+ return (0);
+}
+
+/*
+ * Helper for directory read. We want to run an lstat on each
+ * file name within the directory. This is a lot faster if we
+ * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not
+ * all systems do, so hide the ifdef-ed code in an inline function.
+ */
+static inline int
+fs_lstatat(struct fs_fid *file, char *name, struct stat *st)
+{
+
+ return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW));
+}
+
+static int
+fs_read(void *softc, struct l9p_request *req)
+{
+ struct l9p_stat l9stat;
+ struct fs_softc *sc;
+ struct fs_fid *file;
+ bool dotu = req->lr_conn->lc_version >= L9P_2000U;
+ ssize_t ret;
+
+ sc = softc;
+ file = req->lr_fid->lo_aux;
+ assert(file != NULL);
+
+ if (file->ff_dir != NULL) {
+ struct dirent *d;
+ struct stat st;
+ struct l9p_message msg;
+ long o;
+
+ pthread_mutex_lock(&file->ff_mtx);
+
+ /*
+ * Must use telldir before readdir since seekdir
+ * takes cookie values. Unfortunately this wastes
+ * a lot of time (and memory) building unneeded
+ * cookies that can only be flushed by closing
+ * the directory.
+ *
+ * NB: FreeBSD libc seekdir has SINGLEUSE defined,
+ * so in fact, we can discard the cookies by
+ * calling seekdir on them. This clears up wasted
+ * memory at the cost of even more wasted time...
+ *
+ * XXX: readdir/telldir/seekdir not thread safe
+ */
+ l9p_init_msg(&msg, req, L9P_PACK);
+ for (;;) {
+ o = telldir(file->ff_dir);
+ d = readdir(file->ff_dir);
+ if (d == NULL)
+ break;
+ if (fs_lstatat(file, d->d_name, &st))
+ continue;
+ dostat(sc, &l9stat, d->d_name, &st, dotu);
+ if (l9p_pack_stat(&msg, req, &l9stat) != 0) {
+ seekdir(file->ff_dir, o);
+ break;
+ }
+#if defined(__FreeBSD__)
+ seekdir(file->ff_dir, o);
+ (void) readdir(file->ff_dir);
+#endif
+ }
+
+ pthread_mutex_unlock(&file->ff_mtx);
+ } else {
+ size_t niov = l9p_truncate_iov(req->lr_data_iov,
+ req->lr_data_niov, req->lr_req.io.count);
+
+#if defined(__FreeBSD__)
+ ret = preadv(file->ff_fd, req->lr_data_iov, niov,
+ req->lr_req.io.offset);
+#else
+ /* XXX: not thread safe, should really use aio_listio. */
+ if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
+ return (errno);
+
+ ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov);
+#endif
+
+ if (ret < 0)
+ return (errno);
+
+ req->lr_resp.io.count = (uint32_t)ret;
+ }
+
+ return (0);
+}
+
+static int
+fs_remove(void *softc, struct l9p_fid *fid)
+{
+ struct fs_softc *sc = softc;
+ struct l9p_acl *parent_acl;
+ struct fs_fid *file;
+ struct stat pst, cst;
+ char dirname[MAXPATHLEN];
+ int error;
+
+ if (sc->fs_readonly)
+ return (EROFS);
+
+ error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst);
+ if (error)
+ return (error);
+
+ file = fid->lo_aux;
+ if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0)
+ return (error);
+
+ parent_acl = getacl(file, -1, dirname);
+ fillacl(file);
+
+ error = check_access(L9P_ACOP_UNLINK,
+ parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1);
+ l9p_acl_free(parent_acl);
+ if (error)
+ return (error);
+
+ if (unlinkat(file->ff_dirfd, file->ff_name,
+ S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0)
+ error = errno;
+
+ return (error);
+}
+
+static int
+fs_stat(void *softc, struct l9p_request *req)
+{
+ struct fs_softc *sc;
+ struct fs_fid *file;
+ struct stat st;
+ bool dotu = req->lr_conn->lc_version >= L9P_2000U;
+
+ sc = softc;
+ file = req->lr_fid->lo_aux;
+ assert(file);
+
+ if (fstatat(file->ff_dirfd, file->ff_name, &st,
+ AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+
+ dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu);
+ return (0);
+}
+
+static int
+fs_walk(void *softc, struct l9p_request *req)
+{
+ struct l9p_acl *acl;
+ struct fs_authinfo *ai;
+ struct fs_fid *file = req->lr_fid->lo_aux;
+ struct fs_fid *newfile;
+ struct stat st;
+ size_t clen, namelen, need;
+ char *comp, *succ, *next, *swtmp;
+ bool atroot;
+ bool dotdot;
+ int i, nwname;
+ int error = 0;
+ char namebufs[2][MAXPATHLEN];
+
+ /*
+ * https://swtch.com/plan9port/man/man9/walk.html:
+ *
+ * It is legal for nwname to be zero, in which case newfid
+ * will represent the same file as fid and the walk will
+ * usually succeed; this is equivalent to walking to dot.
+ * [Aside: it's not clear if we should test S_ISDIR here.]
+ * ...
+ * The name ".." ... represents the parent directory.
+ * The name "." ... is not used in the protocol.
+ * ... A walk of the name ".." in the root directory
+ * of the server is equivalent to a walk with no name
+ * elements.
+ *
+ * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM,
+ * so it is safe to convert to plain int.
+ *
+ * We are to return an error only if the first walk fails,
+ * else stop at the end of the names or on the first error.
+ * The final fid is based on the last name successfully
+ * walked.
+ *
+ * Note that we *do* get Twalk requests with nwname==0 on files.
+ *
+ * Set up "successful name" buffer pointer with base fid name,
+ * initially. We'll swap each new success into it as we go.
+ *
+ * Invariant: atroot and stat data correspond to current
+ * (succ) path.
+ */
+ succ = namebufs[0];
+ next = namebufs[1];
+ namelen = strlcpy(succ, file->ff_name, MAXPATHLEN);
+ if (namelen >= MAXPATHLEN)
+ return (ENAMETOOLONG);
+ if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return (errno);
+ ai = file->ff_ai;
+ atroot = strlen(succ) == 0; /* XXX? */
+ fillacl(file);
+ acl = file->ff_acl;
+
+ nwname = (int)req->lr_req.twalk.nwname;
+
+ for (i = 0; i < nwname; i++) {
+ /*
+ * Must have execute permission to search a directory.
+ * Then, look up each component in its directory-so-far.
+ * Check for ".." along the way, handlng specially
+ * as needed. Forbid "/" in name components.
+ *
+ */
+ if (!S_ISDIR(st.st_mode)) {
+ error = ENOTDIR;
+ goto out;
+ }
+ error = check_access(L9P_ACE_EXECUTE,
+ NULL, NULL, acl, &st, ai, (gid_t)-1);
+ if (error) {
+ L9P_LOG(L9P_DEBUG,
+ "Twalk: denying dir-walk on \"%s\" for uid %u",
+ succ, (unsigned)ai->ai_uid);
+ error = EPERM;
+ goto out;
+ }
+ comp = req->lr_req.twalk.wname[i];
+ if (strchr(comp, '/') != NULL) {
+ error = EINVAL;
+ break;
+ }
+
+ clen = strlen(comp);
+ dotdot = false;
+
+ /*
+ * Build next pathname (into "next"). If "..",
+ * just strip one name component off the success
+ * name so far. Since we know this name fits, the
+ * stripped down version also fits. Otherwise,
+ * the name is the base name plus '/' plus the
+ * component name plus terminating '\0'; this may
+ * or may not fit.
+ */
+ if (comp[0] == '.') {
+ if (clen == 1) {
+ error = EINVAL;
+ break;
+ }
+ if (comp[1] == '.' && clen == 2)
+ dotdot = true;
+ }
+ if (dotdot) {
+ /*
+ * It's not clear how ".." at root should
+ * be handled when i > 0. Obeying the man
+ * page exactly, we reset i to 0 and stop,
+ * declaring terminal success.
+ *
+ * Otherwise, we just climbed up one level
+ * so adjust "atroot".
+ */
+ if (atroot) {
+ i = 0;
+ break;
+ }
+ (void) r_dirname(succ, next, MAXPATHLEN);
+ namelen = strlen(next);
+ atroot = strlen(next) == 0; /* XXX? */
+ } else {
+ need = namelen + 1 + clen + 1;
+ if (need > MAXPATHLEN) {
+ error = ENAMETOOLONG;
+ break;
+ }
+ memcpy(next, succ, namelen);
+ next[namelen++] = '/';
+ memcpy(&next[namelen], comp, clen + 1);
+ namelen += clen;
+ /*
+ * Since name is never ".", we are necessarily
+ * descending below the root now.
+ */
+ atroot = false;
+ }
+
+ if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ error = ENOENT;
+ break;
+ }
+
+ /*
+ * Success: generate qid and swap this
+ * successful name into place. Update acl.
+ */
+ generate_qid(&st, &req->lr_resp.rwalk.wqid[i]);
+ swtmp = succ;
+ succ = next;
+ next = swtmp;
+ if (acl != NULL && acl != file->ff_acl)
+ l9p_acl_free(acl);
+ acl = getacl(file, -1, next);
+ }
+
+ /*
+ * Fail only if we failed on the first name.
+ * Otherwise we succeeded on something, and "succ"
+ * points to the last successful name in namebufs[].
+ */
+ if (error) {
+ if (i == 0)
+ goto out;
+ error = 0;
+ }
+
+ newfile = open_fid(file->ff_dirfd, succ, ai, false);
+ if (newfile == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ if (req->lr_newfid == req->lr_fid) {
+ /*
+ * Before overwriting fid->lo_aux, free the old value.
+ * Note that this doesn't free the l9p_fid data,
+ * just the fs_fid data. (But it does ditch ff_acl.)
+ */
+ if (acl == file->ff_acl)
+ acl = NULL;
+ fs_freefid(softc, req->lr_fid);
+ file = NULL;
+ }
+ req->lr_newfid->lo_aux = newfile;
+ if (file != NULL && acl != file->ff_acl) {
+ newfile->ff_acl = acl;
+ acl = NULL;
+ }
+ req->lr_resp.rwalk.nwqid = (uint16_t)i;
+out:
+ if (file != NULL && acl != file->ff_acl)
+ l9p_acl_free(acl);
+ return (error);
+}
+
+static int
+fs_write(void *softc, struct l9p_request *req)
+{
+ struct fs_softc *sc = softc;
+ struct fs_fid *file;
+ ssize_t ret;
+
+ file = req->lr_fid->lo_aux;
+ assert(file != NULL);
+
+ if (sc->fs_readonly)
+ return (EROFS);
+
+ size_t niov = l9p_truncate_iov(req->lr_data_iov,
+ req->lr_data_niov, req->lr_req.io.count);
+
+#if defined(__FreeBSD__)
+ ret = pwritev(file->ff_fd, req->lr_data_iov, niov,
+ req->lr_req.io.offset);
+#else
+ /* XXX: not thread safe, should really use aio_listio. */
+ if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
+ return (errno);
+
+ ret = writev(file->ff_fd, req->lr_data_iov,
+ (int)niov);
+#endif
+
+ if (ret < 0)
+ return (errno);
+
+ req->lr_resp.io.count = (uint32_t)ret;
+ return (0);
+}
+
+static int
+fs_wstat(void *softc, struct l9p_request *req)
+{
+ struct fs_softc *sc = softc;
+ struct l9p_stat *l9stat = &req->lr_req.twstat.stat;
+ struct l9p_fid *fid;
+ struct fs_fid *file;
+ int error = 0;
+
+ fid = req->lr_fid;
+ file = fid->lo_aux;
+ assert(file != NULL);
+
+ /*
+ * XXX:
+ *
+ * stat(9P) sez:
+ *
+ * Either all the changes in wstat request happen, or none of them
+ * does: if the request succeeds, all changes were made; if it fails,
+ * none were.
+ *
+ * Atomicity is clearly missing in current implementation.
+ */
+
+ if (sc->fs_readonly)
+ return (EROFS);
+
+ if (l9stat->atime != (uint32_t)~0) {
+ /* XXX: not implemented, ignore */
+ }
+
+ if (l9stat->mtime != (uint32_t)~0) {
+ /* XXX: not implemented, ignore */
+ }
+
+ if (l9stat->dev != (uint32_t)~0) {
+ error = EPERM;
+ goto out;
+ }
+
+ if (l9stat->length != (uint64_t)~0) {
+ if (file->ff_dir != NULL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (truncate(file->ff_name, (off_t)l9stat->length) != 0) {
+ error = errno;
+ goto out;
+ }
+ }
+
+ if (req->lr_conn->lc_version >= L9P_2000U) {
+ if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid,
+ l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) {
+ error = errno;
+ goto out;
+ }
+ }
+
+ if (l9stat->mode != (uint32_t)~0) {
+ if (fchmodat(file->ff_dirfd, file->ff_name,
+ l9stat->mode & 0777, 0) != 0) {
+ error = errno;
+ goto out;
+ }
+ }
+
+ if (strlen(l9stat->name) > 0) {
+ struct l9p_acl *parent_acl;
+ struct stat st;
+ char *tmp;
+ char newname[MAXPATHLEN];
+
+ /*
+ * Rename-within-directory: it's not deleting anything,
+ * but we need write permission on the directory. This
+ * should suffice.
+ */
+ error = fs_pdir(softc, fid, newname, sizeof(newname), &st);
+ if (error)
+ goto out;
+ parent_acl = getacl(file, -1, newname);
+ error = check_access(L9P_ACE_ADD_FILE,
+ parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1);
+ l9p_acl_free(parent_acl);
+ if (error)
+ goto out;
+ error = fs_dpf(newname, l9stat->name, sizeof(newname));
+ if (error)
+ goto out;
+ tmp = strdup(newname);
+ if (tmp == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
+ tmp) != 0) {
+ error = errno;
+ free(tmp);
+ goto out;
+ }
+ /* Successful rename, update file->ff_name. ACL can stay. */
+ free(file->ff_name);
+ file->ff_name = tmp;
+ }
+out:
+ return (error);
+}
+
+static int
+fs_statfs(void *softc __unused, struct l9p_request *req)
+{
+ struct fs_fid *file;
+ struct stat st;
+ struct statfs f;
+ long name_max;
+ int error;
+ int fd;
+
+ file = req->lr_fid->lo_aux;
+ assert(file);
+
+ if (fstatat(file->ff_dirfd, file->ff_name, &st,
+ AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+
+ /*
+ * Not entirely clear what access to require; we'll go
+ * for "read data".
+ */
+ fillacl(file);
+ error = check_access(L9P_ACE_READ_DATA, NULL, NULL,
+ file->ff_acl, &st, file->ff_ai, (gid_t)-1);
+ if (error)
+ return (error);
+
+ fd = openat(file->ff_dirfd, file->ff_name, 0);
+ if (fd < 0)
+ return (errno);
+
+ if (fstatfs(fd, &f) != 0)
+ return (errno);
+
+ name_max = fpathconf(fd, _PC_NAME_MAX);
+ error = errno;
+ close(fd);
+
+ if (name_max == -1)
+ return (error);
+
+ dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max);
+
+ return (0);
+}
+
+static int
+fs_lopen(void *softc, struct l9p_request *req)
+{
+ struct l9p_fid *fid = req->lr_fid;
+ struct stat st;
+ enum l9p_omode p9;
+ gid_t gid;
+ int error, flags;
+
+ error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9);
+ if (error)
+ return (error);
+
+ gid = req->lr_req.tlopen.gid;
+ error = fs_iopen(softc, fid, flags, p9, gid, &st);
+ if (error)
+ return (error);
+
+ generate_qid(&st, &req->lr_resp.rlopen.qid);
+ req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size;
+ return (0);
+}
+
+static int
+fs_lcreate(void *softc, struct l9p_request *req)
+{
+ struct l9p_fid *dir;
+ struct stat st;
+ enum l9p_omode p9;
+ char *name;
+ mode_t perm;
+ gid_t gid;
+ int error, flags;
+
+ dir = req->lr_fid;
+ name = req->lr_req.tlcreate.name;
+
+ error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9);
+ if (error)
+ return (error);
+
+ perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */
+ gid = req->lr_req.tlcreate.gid;
+ error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st);
+ if (error == 0)
+ generate_qid(&st, &req->lr_resp.rlcreate.qid);
+ req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size;
+ return (error);
+}
+
+static int
+fs_symlink(void *softc, struct l9p_request *req)
+{
+ struct l9p_fid *dir;
+ struct stat st;
+ gid_t gid;
+ char *name, *symtgt;
+ int error;
+
+ dir = req->lr_fid;
+ name = req->lr_req.tsymlink.name;
+ symtgt = req->lr_req.tsymlink.symtgt;
+ gid = req->lr_req.tsymlink.gid;
+ error = fs_isymlink(softc, dir, name, symtgt, gid, &st);
+ if (error == 0)
+ generate_qid(&st, &req->lr_resp.rsymlink.qid);
+ return (error);
+}
+
+static int
+fs_mknod(void *softc, struct l9p_request *req)
+{
+ struct l9p_fid *dir;
+ struct stat st;
+ uint32_t mode, major, minor;
+ dev_t dev;
+ gid_t gid;
+ char *name;
+ int error;
+
+ dir = req->lr_fid;
+ name = req->lr_req.tmknod.name;
+ mode = req->lr_req.tmknod.mode;
+ gid = req->lr_req.tmknod.gid;
+
+ switch (mode & S_IFMT) {
+ case S_IFBLK:
+ case S_IFCHR:
+ mode = (mode & S_IFMT) | (mode & 0777); /* ??? */
+ major = req->lr_req.tmknod.major;
+ minor = req->lr_req.tmknod.major;
+ dev = makedev(major, minor);
+ error = fs_imknod(softc, dir, name, false,
+ (mode_t)mode, dev, gid, &st);
+ break;
+
+ case S_IFIFO:
+ error = fs_imkfifo(softc, dir, name, false,
+ (mode_t)(mode & 0777), gid, &st);
+ break;
+
+ case S_IFSOCK:
+ error = fs_imksocket(softc, dir, name, false,
+ (mode_t)(mode & 0777), gid, &st);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ if (error == 0)
+ generate_qid(&st, &req->lr_resp.rmknod.qid);
+ return (error);
+}
+
+static int
+fs_rename(void *softc, struct l9p_request *req)
+{
+ struct fs_softc *sc = softc;
+ struct fs_authinfo *ai;
+ struct l9p_acl *oparent_acl;
+ struct l9p_fid *fid, *f2;
+ struct fs_fid *file, *f2ff;
+ struct stat cst, opst, npst;
+ int32_t op;
+ bool reparenting;
+ char *tmp;
+ char olddir[MAXPATHLEN], newname[MAXPATHLEN];
+ int error;
+
+ if (sc->fs_readonly)
+ return (EROFS);
+
+ /*
+ * Note: lr_fid represents the file that is to be renamed,
+ * so we must locate its parent directory and verify that
+ * both this parent directory and the new directory f2 are
+ * writable. But if the new parent directory is the same
+ * path as the old parent directory, our job is simpler.
+ */
+ fid = req->lr_fid;
+ file = fid->lo_aux;
+ assert(file != NULL);
+ ai = file->ff_ai;
+
+ error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst);
+ if (error)
+ return (error);
+
+ f2 = req->lr_fid2;
+ f2ff = f2->lo_aux;
+ assert(f2ff != NULL);
+
+ reparenting = strcmp(olddir, f2ff->ff_name) != 0;
+
+ fillacl(file);
+ fillacl(f2ff);
+
+ if (fstatat(file->ff_dirfd, file->ff_name, &cst,
+ AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+
+ /*
+ * Are we moving from olddir? If so, we're unlinking
+ * from it, in terms of ACL access.
+ */
+ if (reparenting) {
+ oparent_acl = getacl(file, -1, olddir);
+ error = check_access(L9P_ACOP_UNLINK,
+ oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1);
+ l9p_acl_free(oparent_acl);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Now check that we're allowed to "create" a file or directory in
+ * f2. (Should we do this, too, only if reparenting? Maybe check
+ * for dir write permission if not reparenting -- but that's just
+ * add-file/add-subdir, which means doing this always.)
+ */
+ if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst,
+ AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+
+ op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+ error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL,
+ ai, (gid_t)-1);
+ if (error)
+ return (error);
+
+ /*
+ * Directories OK, file systems not R/O, etc; build final name.
+ * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general
+ * paranoia, let's double check anyway.
+ */
+ if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname))
+ return (ENAMETOOLONG);
+ error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname));
+ if (error)
+ return (error);
+ tmp = strdup(newname);
+ if (tmp == NULL)
+ return (ENOMEM);
+
+ if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) {
+ error = errno;
+ free(tmp);
+ return (error);
+ }
+
+ /* file has been renamed but old fid is not clunked */
+ free(file->ff_name);
+ file->ff_name = tmp;
+
+ dropacl(file);
+ return (0);
+}
+
+static int
+fs_readlink(void *softc __unused, struct l9p_request *req)
+{
+ struct fs_fid *file;
+ ssize_t linklen;
+ char buf[MAXPATHLEN];
+ int error = 0;
+
+ file = req->lr_fid->lo_aux;
+ assert(file);
+
+ linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf));
+ if (linklen < 0)
+ error = errno;
+ else if ((size_t)linklen >= sizeof(buf))
+ error = ENOMEM; /* todo: allocate dynamically */
+ else if ((req->lr_resp.rreadlink.target = strndup(buf,
+ (size_t)linklen)) == NULL)
+ error = ENOMEM;
+ return (error);
+}
+
+static int
+fs_getattr(void *softc __unused, struct l9p_request *req)
+{
+ uint64_t mask, valid;
+ struct fs_fid *file;
+ struct stat st;
+ int error = 0;
+
+ file = req->lr_fid->lo_aux;
+ assert(file);
+
+ valid = 0;
+ if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
+ error = errno;
+ goto out;
+ }
+ /* ?? Can we provide items not-requested? If so, can skip tests. */
+ mask = req->lr_req.tgetattr.request_mask;
+ if (mask & L9PL_GETATTR_MODE) {
+ /* It is not clear if we need any translations. */
+ req->lr_resp.rgetattr.mode = st.st_mode;
+ valid |= L9PL_GETATTR_MODE;
+ }
+ if (mask & L9PL_GETATTR_NLINK) {
+ req->lr_resp.rgetattr.nlink = st.st_nlink;
+ valid |= L9PL_GETATTR_NLINK;
+ }
+ if (mask & L9PL_GETATTR_UID) {
+ /* provide st_uid, or file->ff_uid? */
+ req->lr_resp.rgetattr.uid = st.st_uid;
+ valid |= L9PL_GETATTR_UID;
+ }
+ if (mask & L9PL_GETATTR_GID) {
+ /* provide st_gid, or file->ff_gid? */
+ req->lr_resp.rgetattr.gid = st.st_gid;
+ valid |= L9PL_GETATTR_GID;
+ }
+ if (mask & L9PL_GETATTR_RDEV) {
+ /* It is not clear if we need any translations. */
+ req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev;
+ valid |= L9PL_GETATTR_RDEV;
+ }
+ if (mask & L9PL_GETATTR_ATIME) {
+ req->lr_resp.rgetattr.atime_sec =
+ (uint64_t)st.st_atimespec.tv_sec;
+ req->lr_resp.rgetattr.atime_nsec =
+ (uint64_t)st.st_atimespec.tv_nsec;
+ valid |= L9PL_GETATTR_ATIME;
+ }
+ if (mask & L9PL_GETATTR_MTIME) {
+ req->lr_resp.rgetattr.mtime_sec =
+ (uint64_t)st.st_mtimespec.tv_sec;
+ req->lr_resp.rgetattr.mtime_nsec =
+ (uint64_t)st.st_mtimespec.tv_nsec;
+ valid |= L9PL_GETATTR_MTIME;
+ }
+ if (mask & L9PL_GETATTR_CTIME) {
+ req->lr_resp.rgetattr.ctime_sec =
+ (uint64_t)st.st_ctimespec.tv_sec;
+ req->lr_resp.rgetattr.ctime_nsec =
+ (uint64_t)st.st_ctimespec.tv_nsec;
+ valid |= L9PL_GETATTR_CTIME;
+ }
+ if (mask & L9PL_GETATTR_BTIME) {
+#if defined(HAVE_BIRTHTIME)
+ req->lr_resp.rgetattr.btime_sec =
+ (uint64_t)st.st_birthtim.tv_sec;
+ req->lr_resp.rgetattr.btime_nsec =
+ (uint64_t)st.st_birthtim.tv_nsec;
+#else
+ req->lr_resp.rgetattr.btime_sec = 0;
+ req->lr_resp.rgetattr.btime_nsec = 0;
+#endif
+ valid |= L9PL_GETATTR_BTIME;
+ }
+ if (mask & L9PL_GETATTR_INO)
+ valid |= L9PL_GETATTR_INO;
+ if (mask & L9PL_GETATTR_SIZE) {
+ req->lr_resp.rgetattr.size = (uint64_t)st.st_size;
+ valid |= L9PL_GETATTR_SIZE;
+ }
+ if (mask & L9PL_GETATTR_BLOCKS) {
+ req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize;
+ req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks;
+ valid |= L9PL_GETATTR_BLOCKS;
+ }
+ if (mask & L9PL_GETATTR_GEN) {
+ req->lr_resp.rgetattr.gen = st.st_gen;
+ valid |= L9PL_GETATTR_GEN;
+ }
+ /* don't know what to do with data version yet */
+
+ generate_qid(&st, &req->lr_resp.rgetattr.qid);
+out:
+ req->lr_resp.rgetattr.valid = valid;
+ return (error);
+}
+
+/*
+ * Should combine some of this with wstat code.
+ */
+static int
+fs_setattr(void *softc, struct l9p_request *req)
+{
+ uint64_t mask;
+ struct fs_softc *sc = softc;
+ struct timespec ts[2];
+ struct fs_fid *file;
+ struct stat st;
+ int error = 0;
+ uid_t uid, gid;
+
+ file = req->lr_fid->lo_aux;
+ assert(file);
+
+ if (sc->fs_readonly)
+ return (EROFS);
+
+ /*
+ * As with WSTAT we have atomicity issues.
+ */
+ mask = req->lr_req.tsetattr.valid;
+
+ if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
+ error = errno;
+ goto out;
+ }
+
+ if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) {
+ error = EISDIR;
+ goto out;
+ }
+
+ if (mask & L9PL_SETATTR_MODE) {
+ if (fchmodat(file->ff_dirfd, file->ff_name,
+ req->lr_req.tsetattr.mode & 0777,
+ AT_SYMLINK_NOFOLLOW)) {
+ error = errno;
+ goto out;
+ }
+ }
+
+ if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) {
+ uid = mask & L9PL_SETATTR_UID
+ ? req->lr_req.tsetattr.uid
+ : (uid_t)-1;
+
+ gid = mask & L9PL_SETATTR_GID
+ ? req->lr_req.tsetattr.gid
+ : (gid_t)-1;
+
+ if (fchownat(file->ff_dirfd, file->ff_name, uid, gid,
+ AT_SYMLINK_NOFOLLOW)) {
+ error = errno;
+ goto out;
+ }
+ }
+
+ if (mask & L9PL_SETATTR_SIZE) {
+ /* Truncate follows symlinks, is this OK? */
+ int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR);
+ if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) {
+ error = errno;
+ (void) close(fd);
+ goto out;
+ }
+ (void) close(fd);
+ }
+
+ if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) {
+ ts[0].tv_sec = st.st_atimespec.tv_sec;
+ ts[0].tv_nsec = st.st_atimespec.tv_nsec;
+ ts[1].tv_sec = st.st_mtimespec.tv_sec;
+ ts[1].tv_nsec = st.st_mtimespec.tv_nsec;
+
+ if (mask & L9PL_SETATTR_ATIME) {
+ if (mask & L9PL_SETATTR_ATIME_SET) {
+ ts[0].tv_sec = req->lr_req.tsetattr.atime_sec;
+ ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec;
+ } else {
+ if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) {
+ error = errno;
+ goto out;
+ }
+ }
+ }
+
+ if (mask & L9PL_SETATTR_MTIME) {
+ if (mask & L9PL_SETATTR_MTIME_SET) {
+ ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec;
+ ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec;
+ } else {
+ if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) {
+ error = errno;
+ goto out;
+ }
+ }
+ }
+
+ if (utimensat(file->ff_dirfd, file->ff_name, ts,
+ AT_SYMLINK_NOFOLLOW)) {
+ error = errno;
+ goto out;
+ }
+ }
+out:
+ return (error);
+}
+
+static int
+fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused)
+{
+ return (EOPNOTSUPP);
+}
+
+static int
+fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused)
+{
+ return (EOPNOTSUPP);
+}
+
+static int
+fs_readdir(void *softc __unused, struct l9p_request *req)
+{
+ struct l9p_message msg;
+ struct l9p_dirent de;
+ struct fs_fid *file;
+ struct dirent *dp;
+ struct stat st;
+ uint32_t count;
+ int error = 0;
+
+ file = req->lr_fid->lo_aux;
+ assert(file);
+
+ if (file->ff_dir == NULL)
+ return (ENOTDIR);
+
+ pthread_mutex_lock(&file->ff_mtx);
+
+ /*
+ * It's not clear whether we can use the same trick for
+ * discarding offsets here as we do in fs_read. It
+ * probably should work, we'll have to see if some
+ * client(s) use the zero-offset thing to rescan without
+ * clunking the directory first.
+ *
+ * Probably the thing to do is switch to calling
+ * getdirentries() / getdents() directly, instead of
+ * going through libc.
+ */
+ if (req->lr_req.io.offset == 0)
+ rewinddir(file->ff_dir);
+ else
+ seekdir(file->ff_dir, (long)req->lr_req.io.offset);
+
+ l9p_init_msg(&msg, req, L9P_PACK);
+ count = (uint32_t)msg.lm_size; /* in case we get no entries */
+ while ((dp = readdir(file->ff_dir)) != NULL) {
+ /*
+ * Although "." is forbidden in naming and ".." is
+ * special cased, testing shows that we must transmit
+ * them through readdir. (For ".." at root, we
+ * should perhaps alter the inode number, but not
+ * yet.)
+ */
+
+ /*
+ * TODO: we do a full lstat here; could use dp->d_*
+ * to construct the qid more efficiently, as long
+ * as dp->d_type != DT_UNKNOWN.
+ */
+ if (fs_lstatat(file, dp->d_name, &st))
+ continue;
+
+ de.qid.type = 0;
+ generate_qid(&st, &de.qid);
+ de.offset = (uint64_t)telldir(file->ff_dir);
+ de.type = dp->d_type;
+ de.name = dp->d_name;
+
+ /* Update count only if we completely pack the dirent. */
+ if (l9p_pudirent(&msg, &de) < 0)
+ break;
+ count = (uint32_t)msg.lm_size;
+ }
+
+ pthread_mutex_unlock(&file->ff_mtx);
+ req->lr_resp.io.count = count;
+ return (error);
+}
+
+static int
+fs_fsync(void *softc __unused, struct l9p_request *req)
+{
+ struct fs_fid *file;
+ int error = 0;
+
+ file = req->lr_fid->lo_aux;
+ assert(file);
+ if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd))
+ error = errno;
+ return (error);
+}
+
+static int
+fs_lock(void *softc __unused, struct l9p_request *req)
+{
+
+ switch (req->lr_req.tlock.type) {
+ case L9PL_LOCK_TYPE_RDLOCK:
+ case L9PL_LOCK_TYPE_WRLOCK:
+ case L9PL_LOCK_TYPE_UNLOCK:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS;
+ return (0);
+}
+
+static int
+fs_getlock(void *softc __unused, struct l9p_request *req)
+{
+
+ /*
+ * Client wants to see if a request to lock a region would
+ * block. This is, of course, not atomic anyway, so the
+ * op is useless. QEMU simply says "unlocked!", so we do
+ * too.
+ */
+ switch (req->lr_req.getlock.type) {
+ case L9PL_LOCK_TYPE_RDLOCK:
+ case L9PL_LOCK_TYPE_WRLOCK:
+ case L9PL_LOCK_TYPE_UNLOCK:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ req->lr_resp.getlock = req->lr_req.getlock;
+ req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK;
+ req->lr_resp.getlock.client_id = strdup(""); /* XXX what should go here? */
+ return (0);
+}
+
+static int
+fs_link(void *softc __unused, struct l9p_request *req)
+{
+ struct l9p_fid *dir;
+ struct fs_fid *file;
+ struct fs_fid *dirf;
+ struct stat fst, tdst;
+ int32_t op;
+ char *name;
+ char newname[MAXPATHLEN];
+ int error;
+
+ /* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */
+ dir = req->lr_fid2;
+ dirf = dir->lo_aux;
+ assert(dirf != NULL);
+
+ name = req->lr_req.tlink.name;
+ error = fs_buildname(dir, name, newname, sizeof(newname));
+ if (error)
+ return (error);
+
+ file = req->lr_fid->lo_aux;
+ assert(file != NULL);
+
+ if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+ if (S_ISDIR(fst.st_mode))
+ return (EISDIR);
+ fillacl(dirf);
+ op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+ error = check_access(op,
+ dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1);
+ if (error)
+ return (error);
+
+ if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
+ newname, 0) != 0)
+ error = errno;
+ else
+ dropacl(file);
+
+ return (error);
+}
+
+static int
+fs_mkdir(void *softc, struct l9p_request *req)
+{
+ struct l9p_fid *dir;
+ struct stat st;
+ mode_t perm;
+ gid_t gid;
+ char *name;
+ int error;
+
+ dir = req->lr_fid;
+ name = req->lr_req.tmkdir.name;
+ perm = (mode_t)req->lr_req.tmkdir.mode;
+ gid = req->lr_req.tmkdir.gid;
+
+ error = fs_imkdir(softc, dir, name, false, perm, gid, &st);
+ if (error == 0)
+ generate_qid(&st, &req->lr_resp.rmkdir.qid);
+ return (error);
+}
+
+static int
+fs_renameat(void *softc, struct l9p_request *req)
+{
+ struct fs_softc *sc = softc;
+ struct l9p_fid *olddir, *newdir;
+ struct l9p_acl *facl;
+ struct fs_fid *off, *nff;
+ struct stat odst, ndst, fst;
+ int32_t op;
+ bool reparenting;
+ char *onp, *nnp;
+ char onb[MAXPATHLEN], nnb[MAXPATHLEN];
+ int error;
+
+ if (sc->fs_readonly)
+ return (EROFS);
+
+ olddir = req->lr_fid;
+ newdir = req->lr_fid2;
+ assert(olddir != NULL && newdir != NULL);
+ off = olddir->lo_aux;
+ nff = newdir->lo_aux;
+ assert(off != NULL && nff != NULL);
+
+ onp = req->lr_req.trenameat.oldname;
+ nnp = req->lr_req.trenameat.newname;
+ error = fs_buildname(olddir, onp, onb, sizeof(onb));
+ if (error)
+ return (error);
+ error = fs_buildname(newdir, nnp, nnb, sizeof(nnb));
+ if (error)
+ return (error);
+ if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+
+ reparenting = olddir != newdir &&
+ strcmp(off->ff_name, nff->ff_name) != 0;
+
+ if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+ if (!S_ISDIR(odst.st_mode))
+ return (ENOTDIR);
+ fillacl(off);
+
+ if (reparenting) {
+ if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+ if (!S_ISDIR(ndst.st_mode))
+ return (ENOTDIR);
+ facl = getacl(off, -1, onb);
+ fillacl(nff);
+
+ error = check_access(L9P_ACOP_UNLINK,
+ off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1);
+ l9p_acl_free(facl);
+ if (error)
+ return (error);
+ op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY :
+ L9P_ACE_ADD_FILE;
+ error = check_access(op,
+ nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1);
+ if (error)
+ return (error);
+ }
+
+ if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb))
+ error = errno;
+
+ return (error);
+}
+
+/*
+ * Unlink file in given directory, or remove directory in given
+ * directory, based on flags.
+ */
+static int
+fs_unlinkat(void *softc, struct l9p_request *req)
+{
+ struct fs_softc *sc = softc;
+ struct l9p_acl *facl;
+ struct l9p_fid *dir;
+ struct fs_fid *dirff;
+ struct stat dirst, fst;
+ char *name;
+ char newname[MAXPATHLEN];
+ int error;
+
+ if (sc->fs_readonly)
+ return (EROFS);
+
+ dir = req->lr_fid;
+ dirff = dir->lo_aux;
+ assert(dirff != NULL);
+ name = req->lr_req.tunlinkat.name;
+ error = fs_buildname(dir, name, newname, sizeof(newname));
+ if (error)
+ return (error);
+ if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 ||
+ fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0)
+ return (errno);
+ fillacl(dirff);
+ facl = getacl(dirff, -1, newname);
+ error = check_access(L9P_ACOP_UNLINK,
+ dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1);
+ l9p_acl_free(facl);
+ if (error)
+ return (error);
+
+ if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) {
+ if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0)
+ error = errno;
+ } else {
+ if (unlinkat(dirff->ff_dirfd, newname, 0) != 0)
+ error = errno;
+ }
+ return (error);
+}
+
+static void
+fs_freefid(void *softc __unused, struct l9p_fid *fid)
+{
+ struct fs_fid *f = fid->lo_aux;
+ struct fs_authinfo *ai;
+ uint32_t newcount;
+
+ if (f == NULL) {
+ /* Nothing to do here */
+ return;
+ }
+
+ if (f->ff_fd != -1)
+ close(f->ff_fd);
+
+ if (f->ff_dir)
+ closedir(f->ff_dir);
+
+ pthread_mutex_destroy(&f->ff_mtx);
+ free(f->ff_name);
+ ai = f->ff_ai;
+ l9p_acl_free(f->ff_acl);
+ free(f);
+ pthread_mutex_lock(&ai->ai_mtx);
+ newcount = --ai->ai_refcnt;
+ pthread_mutex_unlock(&ai->ai_mtx);
+ if (newcount == 0) {
+ /*
+ * We *were* the last ref, no one can have gained a ref.
+ */
+ L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p",
+ (void *)ai);
+ pthread_mutex_destroy(&ai->ai_mtx);
+ free(ai);
+ } else {
+ L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
+ (void *)ai, (u_long)newcount);
+ }
+}
+
+int
+l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro)
+{
+ struct l9p_backend *backend;
+ struct fs_softc *sc;
+ int error;
+#if defined(WITH_CASPER)
+ cap_channel_t *capcas;
+#endif
+
+ if (!fs_attach_mutex_inited) {
+ error = pthread_mutex_init(&fs_attach_mutex, NULL);
+ if (error) {
+ errno = error;
+ return (-1);
+ }
+ fs_attach_mutex_inited = true;
+ }
+
+ backend = l9p_malloc(sizeof(*backend));
+ backend->attach = fs_attach;
+ backend->clunk = fs_clunk;
+ backend->create = fs_create;
+ backend->open = fs_open;
+ backend->read = fs_read;
+ backend->remove = fs_remove;
+ backend->stat = fs_stat;
+ backend->walk = fs_walk;
+ backend->write = fs_write;
+ backend->wstat = fs_wstat;
+ backend->statfs = fs_statfs;
+ backend->lopen = fs_lopen;
+ backend->lcreate = fs_lcreate;
+ backend->symlink = fs_symlink;
+ backend->mknod = fs_mknod;
+ backend->rename = fs_rename;
+ backend->readlink = fs_readlink;
+ backend->getattr = fs_getattr;
+ backend->setattr = fs_setattr;
+ backend->xattrwalk = fs_xattrwalk;
+ backend->xattrcreate = fs_xattrcreate;
+ backend->readdir = fs_readdir;
+ backend->fsync = fs_fsync;
+ backend->lock = fs_lock;
+ backend->getlock = fs_getlock;
+ backend->link = fs_link;
+ backend->mkdir = fs_mkdir;
+ backend->renameat = fs_renameat;
+ backend->unlinkat = fs_unlinkat;
+ backend->freefid = fs_freefid;
+
+ sc = l9p_malloc(sizeof(*sc));
+ sc->fs_rootfd = rootfd;
+ sc->fs_readonly = ro;
+ backend->softc = sc;
+
+#if defined(WITH_CASPER)
+ capcas = cap_init();
+ if (capcas == NULL)
+ return (-1);
+
+ sc->fs_cappwd = cap_service_open(capcas, "system.pwd");
+ if (sc->fs_cappwd == NULL)
+ return (-1);
+
+ sc->fs_capgrp = cap_service_open(capcas, "system.grp");
+ if (sc->fs_capgrp == NULL)
+ return (-1);
+
+ cap_setpassent(sc->fs_cappwd, 1);
+ cap_setgroupent(sc->fs_capgrp, 1);
+ cap_close(capcas);
+#else
+ setpassent(1);
+#endif
+
+ *backendp = backend;
+ return (0);
+}
diff --git a/backend/fs.h b/backend/fs.h
new file mode 100644
index 000000000000..84b37171c271
--- /dev/null
+++ b/backend/fs.h
@@ -0,0 +1,37 @@
+
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_BACKEND_FS_H
+#define LIB9P_BACKEND_FS_H
+
+#include <stdbool.h>
+#include "backend.h"
+
+int l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro);
+
+#endif /* LIB9P_BACKEND_FS_H */
diff --git a/connection.c b/connection.c
new file mode 100644
index 000000000000..20c27796b829
--- /dev/null
+++ b/connection.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/queue.h>
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "fid.h"
+#include "hashtable.h"
+#include "log.h"
+#include "threadpool.h"
+#include "backend/backend.h"
+
+int
+l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend)
+{
+ struct l9p_server *server;
+
+ server = l9p_calloc(1, sizeof (*server));
+ server->ls_max_version = L9P_2000L;
+ server->ls_backend = backend;
+ LIST_INIT(&server->ls_conns);
+
+ *serverp = server;
+ return (0);
+}
+
+int
+l9p_connection_init(struct l9p_server *server, struct l9p_connection **conn)
+{
+ struct l9p_connection *newconn;
+
+ assert(server != NULL);
+ assert(conn != NULL);
+
+ newconn = calloc(1, sizeof (*newconn));
+ if (newconn == NULL)
+ return (-1);
+ newconn->lc_server = server;
+ newconn->lc_msize = L9P_DEFAULT_MSIZE;
+ if (l9p_threadpool_init(&newconn->lc_tp, L9P_NUMTHREADS)) {
+ free(newconn);
+ return (-1);
+ }
+ ht_init(&newconn->lc_files, 100);
+ ht_init(&newconn->lc_requests, 100);
+ LIST_INSERT_HEAD(&server->ls_conns, newconn, lc_link);
+ *conn = newconn;
+
+ return (0);
+}
+
+void
+l9p_connection_free(struct l9p_connection *conn)
+{
+
+ LIST_REMOVE(conn, lc_link);
+ free(conn);
+}
+
+void
+l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov,
+ const size_t niov, void *aux)
+{
+ struct l9p_request *req;
+ int error;
+
+ req = l9p_calloc(1, sizeof (struct l9p_request));
+ req->lr_aux = aux;
+ req->lr_conn = conn;
+
+ req->lr_req_msg.lm_mode = L9P_UNPACK;
+ req->lr_req_msg.lm_niov = niov;
+ memcpy(req->lr_req_msg.lm_iov, iov, sizeof (struct iovec) * niov);
+
+ req->lr_resp_msg.lm_mode = L9P_PACK;
+
+ if (l9p_pufcall(&req->lr_req_msg, &req->lr_req, conn->lc_version) != 0) {
+ L9P_LOG(L9P_WARNING, "cannot unpack received message");
+ l9p_freefcall(&req->lr_req);
+ free(req);
+ return;
+ }
+
+ if (ht_add(&conn->lc_requests, req->lr_req.hdr.tag, req)) {
+ L9P_LOG(L9P_WARNING, "client reusing outstanding tag %d",
+ req->lr_req.hdr.tag);
+ l9p_freefcall(&req->lr_req);
+ free(req);
+ return;
+ }
+
+ error = conn->lc_lt.lt_get_response_buffer(req,
+ req->lr_resp_msg.lm_iov,
+ &req->lr_resp_msg.lm_niov,
+ conn->lc_lt.lt_aux);
+ if (error) {
+ L9P_LOG(L9P_WARNING, "cannot obtain buffers for response");
+ ht_remove(&conn->lc_requests, req->lr_req.hdr.tag);
+ l9p_freefcall(&req->lr_req);
+ free(req);
+ return;
+ }
+
+ /*
+ * NB: it's up to l9p_threadpool_run to decide whether
+ * to queue the work or to run it immediately and wait
+ * (it must do the latter for Tflush requests).
+ */
+ l9p_threadpool_run(&conn->lc_tp, req);
+}
+
+void
+l9p_connection_close(struct l9p_connection *conn)
+{
+ struct ht_iter iter;
+ struct l9p_fid *fid;
+ struct l9p_request *req;
+
+ L9P_LOG(L9P_DEBUG, "waiting for thread pool to shut down");
+ l9p_threadpool_shutdown(&conn->lc_tp);
+
+ /* Drain pending requests (if any) */
+ L9P_LOG(L9P_DEBUG, "draining pending requests");
+ ht_iter(&conn->lc_requests, &iter);
+ while ((req = ht_next(&iter)) != NULL) {
+#ifdef notyet
+ /* XXX would be good to know if there is anyone listening */
+ if (anyone listening) {
+ /* XXX crude - ops like Tclunk should succeed */
+ req->lr_error = EINTR;
+ l9p_respond(req, false, false);
+ } else
+#endif
+ l9p_respond(req, true, false); /* use no-answer path */
+ ht_remove_at_iter(&iter);
+ }
+
+ /* Close opened files (if any) */
+ L9P_LOG(L9P_DEBUG, "closing opened files");
+ ht_iter(&conn->lc_files, &iter);
+ while ((fid = ht_next(&iter)) != NULL) {
+ conn->lc_server->ls_backend->freefid(
+ conn->lc_server->ls_backend->softc, fid);
+ free(fid);
+ ht_remove_at_iter(&iter);
+ }
+
+ ht_destroy(&conn->lc_requests);
+ ht_destroy(&conn->lc_files);
+}
+
+struct l9p_fid *
+l9p_connection_alloc_fid(struct l9p_connection *conn, uint32_t fid)
+{
+ struct l9p_fid *file;
+
+ file = l9p_calloc(1, sizeof (struct l9p_fid));
+ file->lo_fid = fid;
+ /*
+ * Note that the new fid is not marked valid yet.
+ * The insert here will fail if the fid number is
+ * in use, otherwise we have an invalid fid in the
+ * table (as desired).
+ */
+
+ if (ht_add(&conn->lc_files, fid, file) != 0) {
+ free(file);
+ return (NULL);
+ }
+
+ return (file);
+}
+
+void
+l9p_connection_remove_fid(struct l9p_connection *conn, struct l9p_fid *fid)
+{
+ struct l9p_backend *be;
+
+ /* fid should be marked invalid by this point */
+ assert(!l9p_fid_isvalid(fid));
+
+ be = conn->lc_server->ls_backend;
+ be->freefid(be->softc, fid);
+
+ ht_remove(&conn->lc_files, fid->lo_fid);
+ free(fid);
+}
diff --git a/example/Makefile b/example/Makefile
new file mode 100644
index 000000000000..94b218099c34
--- /dev/null
+++ b/example/Makefile
@@ -0,0 +1,10 @@
+PROG= server
+SRCS= server.c
+MAN=
+
+CFLAGS= -pthread -g -O0
+
+LDFLAGS=-L..
+LDADD= -lsbuf -l9p -lcasper -lcap_pwd -lcap_grp
+
+.include <bsd.prog.mk>
diff --git a/example/server.c b/example/server.c
new file mode 100644
index 000000000000..971c239c8526
--- /dev/null
+++ b/example/server.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <err.h>
+#include <unistd.h>
+#include "../lib9p.h"
+#include "../backend/fs.h"
+#include "../transport/socket.h"
+
+int
+main(int argc, char **argv)
+{
+ struct l9p_backend *fs_backend;
+ struct l9p_server *server;
+ char *host = "0.0.0.0";
+ char *port = "564";
+ char *path;
+ bool ro = false;
+ int rootfd;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "h:p:r")) != -1) {
+ switch (opt) {
+ case 'h':
+ host = optarg;
+ break;
+ case 'p':
+ port = optarg;
+ break;
+ case 'r':
+ ro = true;
+ break;
+ case '?':
+ default:
+ goto usage;
+ }
+ }
+
+ if (optind >= argc) {
+usage:
+ errx(1, "Usage: server [-h <host>] [-p <port>] [-r] <path>");
+ }
+
+ path = argv[optind];
+ rootfd = open(path, O_DIRECTORY);
+
+ if (rootfd < 0)
+ err(1, "cannot open root directory");
+
+ if (l9p_backend_fs_init(&fs_backend, rootfd, ro) != 0)
+ err(1, "cannot init backend");
+
+ if (l9p_server_init(&server, fs_backend) != 0)
+ err(1, "cannot create server");
+
+ server->ls_max_version = L9P_2000L;
+ if (l9p_start_server(server, host, port))
+ err(1, "l9p_start_server() failed");
+
+ /* XXX - we never get here, l9p_start_server does not return */
+ exit(0);
+}
diff --git a/fcall.h b/fcall.h
new file mode 100644
index 000000000000..f779ea6ad56f
--- /dev/null
+++ b/fcall.h
@@ -0,0 +1,624 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#ifndef LIB9P_FCALL_H
+#define LIB9P_FCALL_H
+
+#include <stdint.h>
+
+#define L9P_MAX_WELEM 256
+
+/*
+ * Function call/reply (Tfoo/Rfoo) numbers.
+ *
+ * These are protocol code numbers, so the exact values
+ * matter. However, __FIRST and __LAST_PLUS_ONE are for
+ * debug code, and just need to encompass the entire range.
+ *
+ * Note that we rely (in the debug code) on Rfoo == Tfoo+1.
+ */
+enum l9p_ftype {
+ L9P__FIRST = 6, /* NB: must be <= all legal values */
+ L9P_TLERROR = 6, /* illegal; exists for parity with Rlerror */
+ L9P_RLERROR,
+ L9P_TSTATFS = 8,
+ L9P_RSTATFS,
+ L9P_TLOPEN = 12,
+ L9P_RLOPEN,
+ L9P_TLCREATE = 14,
+ L9P_RLCREATE,
+ L9P_TSYMLINK = 16,
+ L9P_RSYMLINK,
+ L9P_TMKNOD = 18,
+ L9P_RMKNOD,
+ L9P_TRENAME = 20,
+ L9P_RRENAME,
+ L9P_TREADLINK = 22,
+ L9P_RREADLINK,
+ L9P_TGETATTR = 24,
+ L9P_RGETATTR,
+ L9P_TSETATTR = 26,
+ L9P_RSETATTR,
+ L9P_TXATTRWALK = 30,
+ L9P_RXATTRWALK,
+ L9P_TXATTRCREATE = 32,
+ L9P_RXATTRCREATE,
+ L9P_TREADDIR = 40,
+ L9P_RREADDIR,
+ L9P_TFSYNC = 50,
+ L9P_RFSYNC,
+ L9P_TLOCK = 52,
+ L9P_RLOCK,
+ L9P_TGETLOCK = 54,
+ L9P_RGETLOCK,
+ L9P_TLINK = 70,
+ L9P_RLINK,
+ L9P_TMKDIR = 72,
+ L9P_RMKDIR,
+ L9P_TRENAMEAT = 74,
+ L9P_RRENAMEAT,
+ L9P_TUNLINKAT = 76,
+ L9P_RUNLINKAT,
+ L9P_TVERSION = 100,
+ L9P_RVERSION,
+ L9P_TAUTH = 102,
+ L9P_RAUTH,
+ L9P_TATTACH = 104,
+ L9P_RATTACH,
+ L9P_TERROR = 106, /* illegal */
+ L9P_RERROR,
+ L9P_TFLUSH = 108,
+ L9P_RFLUSH,
+ L9P_TWALK = 110,
+ L9P_RWALK,
+ L9P_TOPEN = 112,
+ L9P_ROPEN,
+ L9P_TCREATE = 114,
+ L9P_RCREATE,
+ L9P_TREAD = 116,
+ L9P_RREAD,
+ L9P_TWRITE = 118,
+ L9P_RWRITE,
+ L9P_TCLUNK = 120,
+ L9P_RCLUNK,
+ L9P_TREMOVE = 122,
+ L9P_RREMOVE,
+ L9P_TSTAT = 124,
+ L9P_RSTAT,
+ L9P_TWSTAT = 126,
+ L9P_RWSTAT,
+ L9P__LAST_PLUS_1, /* NB: must be last */
+};
+
+/*
+ * When a Tfoo request comes over the wire, we decode it
+ * (pack.c) from wire format into a request laid out in
+ * a "union l9p_fcall" object. This object is not in wire
+ * format, but rather in something more convenient for us
+ * to operate on.
+ *
+ * We then dispatch the request (request.c, backend/fs.c) and
+ * use another "union l9p_fcall" object to build a reply.
+ * The reply is converted to wire format on the way back out
+ * (pack.c again).
+ *
+ * All sub-objects start with a header containing the request
+ * or reply type code and two-byte tag, and whether or not it
+ * is needed, a four-byte fid.
+ *
+ * What this means here is that the data structures within
+ * the union can be shared across various requests and replies.
+ * For instance, replies to OPEN, CREATE, LCREATE, LOPEN, MKDIR, and
+ * SYMLINK are all fairly similar (providing a qid and sometimes
+ * an iounit) and hence can all use the l9p_f_ropen structure.
+ * Which structures are used for which operations is somewhat
+ * arbitrary; for programming ease, if an operation shares a
+ * data structure, it still has its own name: there are union
+ * members named ropen, rcreate, rlcreate, rlopen, rmkdir, and
+ * rsymlink, even though all use struct l9p_f_ropen.
+ *
+ * The big exception to the above rule is struct l9p_f_io, which
+ * is used as both request and reply for all of READ, WRITE, and
+ * READDIR. Moreover, the READDIR reply must be pre-packed into
+ * wire format (it is handled like raw data a la READ).
+ *
+ * Some request messages (e.g., TREADLINK) fit in a header, having
+ * just type code, tag, and fid. These have no separate data
+ * structure, nor union member name. Similarly, some reply
+ * messages (e.g., RCLUNK, RREMOVE, RRENAME) have just the type
+ * code and tag.
+ */
+
+/*
+ * Type code bits in (the first byte of) a qid.
+ */
+enum l9p_qid_type {
+ L9P_QTDIR = 0x80, /* type bit for directories */
+ L9P_QTAPPEND = 0x40, /* type bit for append only files */
+ L9P_QTEXCL = 0x20, /* type bit for exclusive use files */
+ L9P_QTMOUNT = 0x10, /* type bit for mounted channel */
+ L9P_QTAUTH = 0x08, /* type bit for authentication file */
+ L9P_QTTMP = 0x04, /* type bit for non-backed-up file */
+ L9P_QTSYMLINK = 0x02, /* type bit for symbolic link */
+ L9P_QTFILE = 0x00 /* type bits for plain file */
+};
+
+/*
+ * Extra permission bits in create and file modes (stat).
+ */
+#define L9P_DMDIR 0x80000000
+enum {
+ L9P_DMAPPEND = 0x40000000,
+ L9P_DMEXCL = 0x20000000,
+ L9P_DMMOUNT = 0x10000000,
+ L9P_DMAUTH = 0x08000000,
+ L9P_DMTMP = 0x04000000,
+ L9P_DMSYMLINK = 0x02000000,
+ /* 9P2000.u extensions */
+ L9P_DMDEVICE = 0x00800000,
+ L9P_DMNAMEDPIPE = 0x00200000,
+ L9P_DMSOCKET = 0x00100000,
+ L9P_DMSETUID = 0x00080000,
+ L9P_DMSETGID = 0x00040000,
+};
+
+/*
+ * Open/create mode bits in 9P2000 and 9P2000.u operations
+ * (not Linux lopen and lcreate flags, which are different).
+ * Note that the mode field is only one byte wide.
+ */
+enum l9p_omode {
+ L9P_OREAD = 0, /* open for read */
+ L9P_OWRITE = 1, /* write */
+ L9P_ORDWR = 2, /* read and write */
+ L9P_OEXEC = 3, /* execute, == read but check execute permission */
+ L9P_OACCMODE = 3, /* mask for the above access-mode bits */
+ L9P_OTRUNC = 16, /* or'ed in (except for exec), truncate file first */
+ L9P_OCEXEC = 32, /* or'ed in, close on exec */
+ L9P_ORCLOSE = 64, /* or'ed in, remove on close */
+ L9P_ODIRECT = 128, /* or'ed in, direct access */
+};
+
+/*
+ * Flag bits in 9P2000.L operations (Tlopen, Tlcreate). These are
+ * basically just the Linux L_* flags. The bottom 3 bits are the
+ * same as for l9p_omode, although open-for-exec is not used:
+ * instead, the client does a Tgetattr and checks the mode for
+ * execute bits, then just opens for reading.
+ *
+ * Each L_O_xxx is just value O_xxx has on Linux in <fcntl.h>;
+ * not all are necessarily used. From observation, we do get
+ * L_O_CREAT and L_O_EXCL when creating with exclusive, and always
+ * get L_O_LARGEFILE. We do get L_O_APPEND when opening for
+ * append. We also get both L_O_DIRECT and L_O_DIRECTORY set
+ * when opening directories.
+ *
+ * We probably never get L_O_NOCTTY which makes no sense, and
+ * some of the other options may need to be handled on the client.
+ */
+enum l9p_l_o_flags {
+ L9P_L_O_CREAT = 000000100U,
+ L9P_L_O_EXCL = 000000200U,
+ L9P_L_O_NOCTTY = 000000400U,
+ L9P_L_O_TRUNC = 000001000U,
+ L9P_L_O_APPEND = 000002000U,
+ L9P_L_O_NONBLOCK = 000004000U,
+ L9P_L_O_DSYNC = 000010000U,
+ L9P_L_O_FASYNC = 000020000U,
+ L9P_L_O_DIRECT = 000040000U,
+ L9P_L_O_LARGEFILE = 000100000U,
+ L9P_L_O_DIRECTORY = 000200000U,
+ L9P_L_O_NOFOLLOW = 000400000U,
+ L9P_L_O_NOATIME = 001000000U,
+ L9P_L_O_CLOEXEC = 002000000U,
+ L9P_L_O_SYNC = 004000000U,
+ L9P_L_O_PATH = 010000000U,
+ L9P_L_O_TMPFILE = 020000000U,
+};
+
+struct l9p_hdr {
+ uint8_t type;
+ uint16_t tag;
+ uint32_t fid;
+};
+
+struct l9p_qid {
+ uint8_t type;
+ uint32_t version;
+ uint64_t path;
+};
+
+struct l9p_stat {
+ uint16_t type;
+ uint32_t dev;
+ struct l9p_qid qid;
+ uint32_t mode;
+ uint32_t atime;
+ uint32_t mtime;
+ uint64_t length;
+ char *name;
+ char *uid;
+ char *gid;
+ char *muid;
+ char *extension;
+ uint32_t n_uid;
+ uint32_t n_gid;
+ uint32_t n_muid;
+};
+
+#define L9P_FSTYPE 0x01021997
+
+struct l9p_statfs {
+ uint32_t type; /* file system type */
+ uint32_t bsize; /* block size for I/O */
+ uint64_t blocks; /* file system size (bsize-byte blocks) */
+ uint64_t bfree; /* free blocks in fs */
+ uint64_t bavail; /* free blocks avail to non-superuser*/
+ uint64_t files; /* file nodes in file system (# inodes) */
+ uint64_t ffree; /* free file nodes in fs */
+ uint64_t fsid; /* file system identifier */
+ uint32_t namelen; /* maximum length of filenames */
+};
+
+struct l9p_f_version {
+ struct l9p_hdr hdr;
+ uint32_t msize;
+ char *version;
+};
+
+struct l9p_f_tflush {
+ struct l9p_hdr hdr;
+ uint16_t oldtag;
+};
+
+struct l9p_f_error {
+ struct l9p_hdr hdr;
+ char *ename;
+ uint32_t errnum;
+};
+
+struct l9p_f_ropen {
+ struct l9p_hdr hdr;
+ struct l9p_qid qid;
+ uint32_t iounit;
+};
+
+struct l9p_f_rauth {
+ struct l9p_hdr hdr;
+ struct l9p_qid aqid;
+};
+
+struct l9p_f_attach {
+ struct l9p_hdr hdr;
+ uint32_t afid;
+ char *uname;
+ char *aname;
+ uint32_t n_uname;
+};
+#define L9P_NOFID ((uint32_t)-1) /* in Tattach, no auth fid */
+#define L9P_NONUNAME ((uint32_t)-1) /* in Tattach, no n_uname */
+
+struct l9p_f_tcreate {
+ struct l9p_hdr hdr;
+ uint32_t perm;
+ char *name;
+ uint8_t mode; /* +Topen */
+ char *extension;
+};
+
+struct l9p_f_twalk {
+ struct l9p_hdr hdr;
+ uint32_t newfid;
+ uint16_t nwname;
+ char *wname[L9P_MAX_WELEM];
+};
+
+struct l9p_f_rwalk {
+ struct l9p_hdr hdr;
+ uint16_t nwqid;
+ struct l9p_qid wqid[L9P_MAX_WELEM];
+};
+
+struct l9p_f_io {
+ struct l9p_hdr hdr;
+ uint64_t offset; /* Tread, Twrite, Treaddir */
+ uint32_t count; /* Tread, Twrite, Rread, Treaddir, Rreaddir */
+};
+
+struct l9p_f_rstat {
+ struct l9p_hdr hdr;
+ struct l9p_stat stat;
+};
+
+struct l9p_f_twstat {
+ struct l9p_hdr hdr;
+ struct l9p_stat stat;
+};
+
+struct l9p_f_rstatfs {
+ struct l9p_hdr hdr;
+ struct l9p_statfs statfs;
+};
+
+/* Used for Tlcreate, Tlopen, Tmkdir, Tunlinkat. */
+struct l9p_f_tlcreate {
+ struct l9p_hdr hdr;
+ char *name; /* Tlcreate, Tmkdir, Tunlinkat */
+ uint32_t flags; /* Tlcreate, Tlopen, Tmkdir, Tunlinkat */
+ uint32_t mode; /* Tlcreate, Tmkdir */
+ uint32_t gid; /* Tlcreate, Tmkdir */
+};
+
+struct l9p_f_tsymlink {
+ struct l9p_hdr hdr;
+ char *name;
+ char *symtgt;
+ uint32_t gid;
+};
+
+struct l9p_f_tmknod {
+ struct l9p_hdr hdr;
+ char *name;
+ uint32_t mode;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t gid;
+};
+
+struct l9p_f_trename {
+ struct l9p_hdr hdr;
+ uint32_t dfid;
+ char *name;
+};
+
+struct l9p_f_rreadlink {
+ struct l9p_hdr hdr;
+ char *target;
+};
+
+struct l9p_f_tgetattr {
+ struct l9p_hdr hdr;
+ uint64_t request_mask;
+};
+
+struct l9p_f_rgetattr {
+ struct l9p_hdr hdr;
+ uint64_t valid;
+ struct l9p_qid qid;
+ uint32_t mode;
+ uint32_t uid;
+ uint32_t gid;
+ uint64_t nlink;
+ uint64_t rdev;
+ uint64_t size;
+ uint64_t blksize;
+ uint64_t blocks;
+ uint64_t atime_sec;
+ uint64_t atime_nsec;
+ uint64_t mtime_sec;
+ uint64_t mtime_nsec;
+ uint64_t ctime_sec;
+ uint64_t ctime_nsec;
+ uint64_t btime_sec;
+ uint64_t btime_nsec;
+ uint64_t gen;
+ uint64_t data_version;
+};
+
+/* Fields in req->request_mask and reply->valid for Tgetattr, Rgetattr. */
+enum l9pl_getattr_flags {
+ L9PL_GETATTR_MODE = 0x00000001,
+ L9PL_GETATTR_NLINK = 0x00000002,
+ L9PL_GETATTR_UID = 0x00000004,
+ L9PL_GETATTR_GID = 0x00000008,
+ L9PL_GETATTR_RDEV = 0x00000010,
+ L9PL_GETATTR_ATIME = 0x00000020,
+ L9PL_GETATTR_MTIME = 0x00000040,
+ L9PL_GETATTR_CTIME = 0x00000080,
+ L9PL_GETATTR_INO = 0x00000100,
+ L9PL_GETATTR_SIZE = 0x00000200,
+ L9PL_GETATTR_BLOCKS = 0x00000400,
+ /* everything up to and including BLOCKS is BASIC */
+ L9PL_GETATTR_BASIC = L9PL_GETATTR_MODE |
+ L9PL_GETATTR_NLINK |
+ L9PL_GETATTR_UID |
+ L9PL_GETATTR_GID |
+ L9PL_GETATTR_RDEV |
+ L9PL_GETATTR_ATIME |
+ L9PL_GETATTR_MTIME |
+ L9PL_GETATTR_CTIME |
+ L9PL_GETATTR_INO |
+ L9PL_GETATTR_SIZE |
+ L9PL_GETATTR_BLOCKS,
+ L9PL_GETATTR_BTIME = 0x00000800,
+ L9PL_GETATTR_GEN = 0x00001000,
+ L9PL_GETATTR_DATA_VERSION = 0x00002000,
+ /* BASIC + birthtime + gen + data-version = ALL */
+ L9PL_GETATTR_ALL = L9PL_GETATTR_BASIC |
+ L9PL_GETATTR_BTIME |
+ L9PL_GETATTR_GEN |
+ L9PL_GETATTR_DATA_VERSION,
+};
+
+struct l9p_f_tsetattr {
+ struct l9p_hdr hdr;
+ uint32_t valid;
+ uint32_t mode;
+ uint32_t uid;
+ uint32_t gid;
+ uint64_t size;
+ uint64_t atime_sec; /* if valid & L9PL_SETATTR_ATIME_SET */
+ uint64_t atime_nsec; /* (else use on-server time) */
+ uint64_t mtime_sec; /* if valid & L9PL_SETATTR_MTIME_SET */
+ uint64_t mtime_nsec; /* (else use on-server time) */
+};
+
+/* Fields in req->valid for Tsetattr. */
+enum l9pl_setattr_flags {
+ L9PL_SETATTR_MODE = 0x00000001,
+ L9PL_SETATTR_UID = 0x00000002,
+ L9PL_SETATTR_GID = 0x00000004,
+ L9PL_SETATTR_SIZE = 0x00000008,
+ L9PL_SETATTR_ATIME = 0x00000010,
+ L9PL_SETATTR_MTIME = 0x00000020,
+ L9PL_SETATTR_CTIME = 0x00000040,
+ L9PL_SETATTR_ATIME_SET = 0x00000080,
+ L9PL_SETATTR_MTIME_SET = 0x00000100,
+};
+
+struct l9p_f_txattrwalk {
+ struct l9p_hdr hdr;
+ uint32_t newfid;
+ char *name;
+};
+
+struct l9p_f_rxattrwalk {
+ struct l9p_hdr hdr;
+ uint64_t size;
+};
+
+struct l9p_f_txattrcreate {
+ struct l9p_hdr hdr;
+ char *name;
+ uint64_t attr_size;
+ uint32_t flags;
+};
+
+struct l9p_f_tlock {
+ struct l9p_hdr hdr;
+ uint8_t type; /* from l9pl_lock_type */
+ uint32_t flags; /* from l9pl_lock_flags */
+ uint64_t start;
+ uint64_t length;
+ uint32_t proc_id;
+ char *client_id;
+};
+
+enum l9pl_lock_type {
+ L9PL_LOCK_TYPE_RDLOCK = 0,
+ L9PL_LOCK_TYPE_WRLOCK = 1,
+ L9PL_LOCK_TYPE_UNLOCK = 2,
+};
+
+enum l9pl_lock_flags {
+ L9PL_LOCK_TYPE_BLOCK = 1,
+ L9PL_LOCK_TYPE_RECLAIM = 2,
+};
+
+struct l9p_f_rlock {
+ struct l9p_hdr hdr;
+ uint8_t status; /* from l9pl_lock_status */
+};
+
+enum l9pl_lock_status {
+ L9PL_LOCK_SUCCESS = 0,
+ L9PL_LOCK_BLOCKED = 1,
+ L9PL_LOCK_ERROR = 2,
+ L9PL_LOCK_GRACE = 3,
+};
+
+struct l9p_f_getlock {
+ struct l9p_hdr hdr;
+ uint8_t type; /* from l9pl_lock_type */
+ uint64_t start;
+ uint64_t length;
+ uint32_t proc_id;
+ char *client_id;
+};
+
+struct l9p_f_tlink {
+ struct l9p_hdr hdr;
+ uint32_t dfid;
+ char *name;
+};
+
+struct l9p_f_trenameat {
+ struct l9p_hdr hdr;
+ char *oldname;
+ uint32_t newdirfid;
+ char *newname;
+};
+
+/*
+ * Flags in Tunlinkat (which re-uses f_tlcreate data structure but
+ * with different meaning).
+ */
+enum l9p_l_unlinkat_flags {
+ /* not sure if any other AT_* flags are passed through */
+ L9PL_AT_REMOVEDIR = 0x0200,
+};
+
+union l9p_fcall {
+ struct l9p_hdr hdr;
+ struct l9p_f_version version;
+ struct l9p_f_tflush tflush;
+ struct l9p_f_ropen ropen;
+ struct l9p_f_ropen rcreate;
+ struct l9p_f_ropen rattach;
+ struct l9p_f_error error;
+ struct l9p_f_rauth rauth;
+ struct l9p_f_attach tattach;
+ struct l9p_f_attach tauth;
+ struct l9p_f_tcreate tcreate;
+ struct l9p_f_tcreate topen;
+ struct l9p_f_twalk twalk;
+ struct l9p_f_rwalk rwalk;
+ struct l9p_f_twstat twstat;
+ struct l9p_f_rstat rstat;
+ struct l9p_f_rstatfs rstatfs;
+ struct l9p_f_tlcreate tlopen;
+ struct l9p_f_ropen rlopen;
+ struct l9p_f_tlcreate tlcreate;
+ struct l9p_f_ropen rlcreate;
+ struct l9p_f_tsymlink tsymlink;
+ struct l9p_f_ropen rsymlink;
+ struct l9p_f_tmknod tmknod;
+ struct l9p_f_ropen rmknod;
+ struct l9p_f_trename trename;
+ struct l9p_f_rreadlink rreadlink;
+ struct l9p_f_tgetattr tgetattr;
+ struct l9p_f_rgetattr rgetattr;
+ struct l9p_f_tsetattr tsetattr;
+ struct l9p_f_txattrwalk txattrwalk;
+ struct l9p_f_rxattrwalk rxattrwalk;
+ struct l9p_f_txattrcreate txattrcreate;
+ struct l9p_f_tlock tlock;
+ struct l9p_f_rlock rlock;
+ struct l9p_f_getlock getlock;
+ struct l9p_f_tlink tlink;
+ struct l9p_f_tlcreate tmkdir;
+ struct l9p_f_ropen rmkdir;
+ struct l9p_f_trenameat trenameat;
+ struct l9p_f_tlcreate tunlinkat;
+ struct l9p_f_io io;
+};
+
+#endif /* LIB9P_FCALL_H */
diff --git a/fid.h b/fid.h
new file mode 100644
index 000000000000..cdfdd7ec93d0
--- /dev/null
+++ b/fid.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_FID_H
+#define LIB9P_FID_H
+
+#include <stdbool.h>
+
+/*
+ * Data structure for a fid. All active fids in one session
+ * are stored in a hash table; the hash table provides the
+ * iterator to process them. (See also l9p_connection in lib9p.h.)
+ *
+ * The back-end code has additional data per fid, found via
+ * lo_aux. Currently this is allocated with a separate calloc().
+ *
+ * Most fids represent a file or directory, but a few are special
+ * purpose, including the auth fid from Tauth+Tattach, and the
+ * fids used for extended attributes. We have our own set of
+ * flags here in lo_flags.
+ *
+ * Note that all new fids start as potentially-valid (reserving
+ * their 32-bit fid value), but not actually-valid. If another
+ * (threaded) op is invoked on a not-yet-valid fid, the fid cannot
+ * be used. A fid can also be locked against other threads, in
+ * which case they must wait for it: this happens during create
+ * and open, which on success result in the fid changing from a
+ * directory to a file. (At least, all this applies in principle
+ * -- we're currently single-threaded per connection so the locks
+ * are nop-ed out and the valid bit is mainly just for debug.)
+ *
+ * Fids that are "open" (the underlying file or directory is open)
+ * are marked as well.
+ *
+ * Locking is managed by the front end (request.c); validation
+ * and type-marking can be done by either side as needed.
+ *
+ * Fid types and validity are manipulated by set* and unset*
+ * functions, and tested by is* ops. Note that we only
+ * distinguish between "directory" and "not directory" at this
+ * level, i.e., symlinks and devices are just "not a directory
+ * fid". Also, fids cannot be unset as auth or xattr fids,
+ * nor can an open fid become closed, except by being clunked.
+ * While files should not normally become directories, it IS normal
+ * for directory fids to become file fids due to Twalk operations.
+ *
+ * (These accessor functions are just to leave wiggle room for
+ * different future implementations.)
+ */
+struct l9p_fid {
+ void *lo_aux;
+ uint32_t lo_fid;
+ uint32_t lo_flags; /* volatile atomic_t when threaded? */
+};
+
+enum l9p_lo_flags {
+ L9P_LO_ISAUTH = 0x01,
+ L9P_LO_ISDIR = 0x02,
+ L9P_LO_ISOPEN = 0x04,
+ L9P_LO_ISVALID = 0x08,
+ L9P_LO_ISXATTR = 0x10,
+};
+
+static inline bool
+l9p_fid_isauth(struct l9p_fid *fid)
+{
+ return ((fid->lo_flags & L9P_LO_ISAUTH) != 0);
+}
+
+static inline void
+l9p_fid_setauth(struct l9p_fid *fid)
+{
+ fid->lo_flags |= L9P_LO_ISAUTH;
+}
+
+static inline bool
+l9p_fid_isdir(struct l9p_fid *fid)
+{
+ return ((fid->lo_flags & L9P_LO_ISDIR) != 0);
+}
+
+static inline void
+l9p_fid_setdir(struct l9p_fid *fid)
+{
+ fid->lo_flags |= L9P_LO_ISDIR;
+}
+
+static inline void
+l9p_fid_unsetdir(struct l9p_fid *fid)
+{
+ fid->lo_flags &= ~(uint32_t)L9P_LO_ISDIR;
+}
+
+static inline bool
+l9p_fid_isopen(struct l9p_fid *fid)
+{
+ return ((fid->lo_flags & L9P_LO_ISOPEN) != 0);
+}
+
+static inline void
+l9p_fid_setopen(struct l9p_fid *fid)
+{
+ fid->lo_flags |= L9P_LO_ISOPEN;
+}
+
+static inline bool
+l9p_fid_isvalid(struct l9p_fid *fid)
+{
+ return ((fid->lo_flags & L9P_LO_ISVALID) != 0);
+}
+
+static inline void
+l9p_fid_setvalid(struct l9p_fid *fid)
+{
+ fid->lo_flags |= L9P_LO_ISVALID;
+}
+
+static inline void
+l9p_fid_unsetvalid(struct l9p_fid *fid)
+{
+ fid->lo_flags &= ~(uint32_t)L9P_LO_ISVALID;
+}
+
+static inline bool
+l9p_fid_isxattr(struct l9p_fid *fid)
+{
+ return ((fid->lo_flags & L9P_LO_ISXATTR) != 0);
+}
+
+static inline void
+l9p_fid_setxattr(struct l9p_fid *fid)
+{
+ fid->lo_flags |= L9P_LO_ISXATTR;
+}
+
+#endif /* LIB9P_FID_H */
diff --git a/genacl.c b/genacl.c
new file mode 100644
index 000000000000..fed3d2ba10ce
--- /dev/null
+++ b/genacl.c
@@ -0,0 +1,720 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/acl.h>
+#include <sys/stat.h>
+
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "genacl.h"
+#include "fid.h"
+#include "log.h"
+
+typedef int econvertfn(acl_entry_t, struct l9p_ace *);
+
+#ifndef __APPLE__
+static struct l9p_acl *l9p_new_acl(uint32_t acetype, uint32_t aceasize);
+static struct l9p_acl *l9p_growacl(struct l9p_acl *acl, uint32_t aceasize);
+static int l9p_count_aces(acl_t sysacl);
+static struct l9p_acl *l9p_sysacl_to_acl(int, acl_t, econvertfn *);
+#endif
+static bool l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids);
+static int l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st,
+ uid_t uid, gid_t gid, gid_t *gids, size_t ngids);
+
+void
+l9p_acl_free(struct l9p_acl *acl)
+{
+
+ free(acl);
+}
+
+/*
+ * Is the given group ID tid (test-id) any of the gid's in agids?
+ */
+static bool
+l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids)
+{
+ size_t i;
+
+ if (tid == gid)
+ return (true);
+ for (i = 0; i < ngids; i++)
+ if (tid == gids[i])
+ return (true);
+ return (false);
+}
+
+/* #define ACE_DEBUG */
+
+/*
+ * Note that NFSv4 tests are done on a "first match" basis.
+ * That is, we check each ACE sequentially until we run out
+ * of ACEs, or find something explicitly denied (DENIED!),
+ * or have cleared out all our attempt-something bits. Once
+ * we come across an ALLOW entry for the bits we're trying,
+ * we clear those from the bits we're still looking for, in
+ * the order they appear.
+ *
+ * The result is either "definitely allowed" (we cleared
+ * all the bits), "definitely denied" (we hit a deny with
+ * some or all of the bits), or "unspecified". We
+ * represent these three states as +1 (positive = yes = allow),
+ * -1 (negative = no = denied), or 0 (no strong answer).
+ *
+ * For our caller's convenience, if we are called with a
+ * mask of 0, we return 0 (no answer).
+ */
+static int
+l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st,
+ uid_t uid, gid_t gid, gid_t *gids, size_t ngids)
+{
+ uint32_t i;
+ struct l9p_ace *ace;
+#ifdef ACE_DEBUG
+ const char *acetype, *allowdeny;
+ bool show_tid;
+#endif
+ bool match;
+ uid_t tid;
+
+ if (mask == 0)
+ return (0);
+
+ for (i = 0; mask != 0 && i < acl->acl_nace; i++) {
+ ace = &acl->acl_aces[i];
+ switch (ace->ace_type) {
+ case L9P_ACET_ACCESS_ALLOWED:
+ case L9P_ACET_ACCESS_DENIED:
+ break;
+ default:
+ /* audit, alarm - ignore */
+ continue;
+ }
+#ifdef ACE_DEBUG
+ show_tid = false;
+#endif
+ if (ace->ace_flags & L9P_ACEF_OWNER) {
+#ifdef ACE_DEBUG
+ acetype = "OWNER@";
+#endif
+ match = st->st_uid == uid;
+ } else if (ace->ace_flags & L9P_ACEF_GROUP) {
+#ifdef ACE_DEBUG
+ acetype = "GROUP@";
+#endif
+ match = l9p_ingroup(st->st_gid, gid, gids, ngids);
+ } else if (ace->ace_flags & L9P_ACEF_EVERYONE) {
+#ifdef ACE_DEBUG
+ acetype = "EVERYONE@";
+#endif
+ match = true;
+ } else {
+ if (ace->ace_idsize != sizeof(tid))
+ continue;
+#ifdef ACE_DEBUG
+ show_tid = true;
+#endif
+ memcpy(&tid, &ace->ace_idbytes, sizeof(tid));
+ if (ace->ace_flags & L9P_ACEF_IDENTIFIER_GROUP) {
+#ifdef ACE_DEBUG
+ acetype = "group";
+#endif
+ match = l9p_ingroup(tid, gid, gids, ngids);
+ } else {
+#ifdef ACE_DEBUG
+ acetype = "user";
+#endif
+ match = tid == uid;
+ }
+ }
+ /*
+ * If this ACE applies to us, check remaining bits.
+ * If any of those bits also apply, check the type:
+ * DENY means "stop now", ALLOW means allow these bits
+ * and keep checking.
+ */
+#ifdef ACE_DEBUG
+ allowdeny = ace->ace_type == L9P_ACET_ACCESS_DENIED ?
+ "deny" : "allow";
+#endif
+ if (match && (ace->ace_mask & (uint32_t)mask) != 0) {
+#ifdef ACE_DEBUG
+ if (show_tid)
+ L9P_LOG(L9P_DEBUG,
+ "ACE: %s %s %d: mask 0x%x ace_mask 0x%x",
+ allowdeny, acetype, (int)tid,
+ (u_int)mask, (u_int)ace->ace_mask);
+ else
+ L9P_LOG(L9P_DEBUG,
+ "ACE: %s %s: mask 0x%x ace_mask 0x%x",
+ allowdeny, acetype,
+ (u_int)mask, (u_int)ace->ace_mask);
+#endif
+ if (ace->ace_type == L9P_ACET_ACCESS_DENIED)
+ return (-1);
+ mask &= ~ace->ace_mask;
+#ifdef ACE_DEBUG
+ L9P_LOG(L9P_DEBUG, "clear 0x%x: now mask=0x%x",
+ (u_int)ace->ace_mask, (u_int)mask);
+#endif
+ } else {
+#ifdef ACE_DEBUG
+ if (show_tid)
+ L9P_LOG(L9P_DEBUG,
+ "ACE: SKIP %s %s %d: "
+ "match %d mask 0x%x ace_mask 0x%x",
+ allowdeny, acetype, (int)tid,
+ (int)match, (u_int)mask,
+ (u_int)ace->ace_mask);
+ else
+ L9P_LOG(L9P_DEBUG,
+ "ACE: SKIP %s %s: "
+ "match %d mask 0x%x ace_mask 0x%x",
+ allowdeny, acetype,
+ (int)match, (u_int)mask,
+ (u_int)ace->ace_mask);
+#endif
+ }
+ }
+
+ /* Return 1 if access definitely granted. */
+#ifdef ACE_DEBUG
+ L9P_LOG(L9P_DEBUG, "ACE: end of ACEs, mask now 0x%x: %s",
+ mask, mask ? "no-definitive-answer" : "ALLOW");
+#endif
+ return (mask == 0 ? 1 : 0);
+}
+
+/*
+ * Test against ACLs.
+ *
+ * The return value is normally 0 (access allowed) or EPERM
+ * (access denied), so it could just be a boolean....
+ *
+ * For "make new dir in dir" and "remove dir in dir", you must
+ * set the mask to test the directory permissions (not ADD_FILE but
+ * ADD_SUBDIRECTORY, and DELETE_CHILD). For "make new file in dir"
+ * you must set the opmask to test file ADD_FILE.
+ *
+ * The L9P_ACE_DELETE flag means "can delete this thing"; it's not
+ * clear whether it should override the parent directory's ACL if
+ * any. In our case it does not, but a caller may try
+ * L9P_ACE_DELETE_CHILD (separately, on its own) and then a
+ * (second, separate) L9P_ACE_DELETE, to make the permissions work
+ * as "or" instead of "and".
+ *
+ * Pass a NULL parent/pstat if they are not applicable, e.g.,
+ * for doing operations on an existing file, such as reading or
+ * writing data or attributes. Pass in a null child/cstat if
+ * that's not applicable, such as creating a new file/dir.
+ *
+ * NB: it's probably wise to allow the owner of any file to update
+ * the ACLs of that file, but we leave that test to the caller.
+ */
+int l9p_acl_check_access(int32_t opmask, struct l9p_acl_check_args *args)
+{
+ struct l9p_acl *parent, *child;
+ struct stat *pstat, *cstat;
+ int32_t pop, cop;
+ size_t ngids;
+ uid_t uid;
+ gid_t gid, *gids;
+ int panswer, canswer;
+
+ assert(opmask != 0);
+ parent = args->aca_parent;
+ pstat = args->aca_pstat;
+ child = args->aca_child;
+ cstat = args->aca_cstat;
+ uid = args->aca_uid;
+ gid = args->aca_gid;
+ gids = args->aca_groups;
+ ngids = args->aca_ngroups;
+
+#ifdef ACE_DEBUG
+ L9P_LOG(L9P_DEBUG,
+ "l9p_acl_check_access: opmask=0x%x uid=%ld gid=%ld ngids=%zd",
+ (u_int)opmask, (long)uid, (long)gid, ngids);
+#endif
+ /*
+ * If caller said "superuser semantics", check that first.
+ * Note that we apply them regardless of ACLs.
+ */
+ if (uid == 0 && args->aca_superuser)
+ return (0);
+
+ /*
+ * If told to ignore ACLs and use only stat-based permissions,
+ * discard any non-NULL ACL pointers.
+ *
+ * This will need some fancying up when we support POSIX ACLs.
+ */
+ if ((args->aca_aclmode & L9P_ACM_NFS_ACL) == 0)
+ parent = child = NULL;
+
+ assert(parent == NULL || parent->acl_acetype == L9P_ACLTYPE_NFSv4);
+ assert(parent == NULL || pstat != NULL);
+ assert(child == NULL || child->acl_acetype == L9P_ACLTYPE_NFSv4);
+ assert(child == NULL || cstat != NULL);
+ assert(pstat != NULL || cstat != NULL);
+
+ /*
+ * If the operation is UNLINK we should have either both ACLs
+ * or no ACLs, but we won't require that here.
+ *
+ * If a parent ACL is supplied, it's a directory by definition.
+ * Make sure we're allowed to do this there, whatever this is.
+ * If a child ACL is supplied, check it too. Note that the
+ * DELETE permission only applies in the child though, not
+ * in the parent, and the DELETE_CHILD only applies in the
+ * parent.
+ */
+ pop = cop = opmask;
+ if (parent != NULL || pstat != NULL) {
+ /*
+ * Remove child-only bits from parent op and
+ * parent-only bits from child op.
+ *
+ * L9P_ACE_DELETE is child-only.
+ *
+ * L9P_ACE_DELETE_CHILD is parent-only, and three data
+ * access bits overlap with three directory access bits.
+ * We should have child==NULL && cstat==NULL, so the
+ * three data bits should be redundant, but it's
+ * both trivial and safest to remove them anyway.
+ */
+ pop &= ~L9P_ACE_DELETE;
+ cop &= ~(L9P_ACE_DELETE_CHILD | L9P_ACE_LIST_DIRECTORY |
+ L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY);
+ } else {
+ /*
+ * Remove child-only bits from parent op. We need
+ * not bother since we just found we have no parent
+ * and no pstat, and hence won't actually *use* pop.
+ *
+ * pop &= ~(L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA |
+ * L9P_ACE_APPEND_DATA);
+ */
+ }
+ panswer = 0;
+ canswer = 0;
+ if (parent != NULL)
+ panswer = l9p_check_aces(pop, parent, pstat,
+ uid, gid, gids, ngids);
+ if (child != NULL)
+ canswer = l9p_check_aces(cop, child, cstat,
+ uid, gid, gids, ngids);
+
+ if (panswer || canswer) {
+ /*
+ * Got a definitive answer from parent and/or
+ * child ACLs. We're not quite done yet though.
+ */
+ if (opmask == L9P_ACOP_UNLINK) {
+ /*
+ * For UNLINK, we can get an allow from child
+ * and deny from parent, or vice versa. It's
+ * not 100% clear how to handle the two-answer
+ * case. ZFS says that if either says "allow",
+ * we allow, and if both definitely say "deny",
+ * we deny. This makes sense, so we do that
+ * here for all cases, even "strict".
+ */
+ if (panswer > 0 || canswer > 0)
+ return (0);
+ if (panswer < 0 && canswer < 0)
+ return (EPERM);
+ /* non-definitive answer from one! move on */
+ } else {
+ /*
+ * Have at least one definitive answer, and
+ * should have only one; obey whichever
+ * one it is.
+ */
+ if (panswer)
+ return (panswer < 0 ? EPERM : 0);
+ return (canswer < 0 ? EPERM : 0);
+ }
+ }
+
+ /*
+ * No definitive answer from ACLs alone. Check for ZFS style
+ * permissions checking and an "UNLINK" operation under ACLs.
+ * If so, find write-and-execute permission on parent.
+ * Note that WRITE overlaps with ADD_FILE -- that's ZFS's
+ * way of saying "allow write to dir" -- but EXECUTE is
+ * separate from LIST_DIRECTORY, so that's at least a little
+ * bit cleaner.
+ *
+ * Note also that only a definitive yes (both bits are
+ * explicitly allowed) results in granting unlink, and
+ * a definitive no (at least one bit explicitly denied)
+ * results in EPERM. Only "no answer" moves on.
+ */
+ if ((args->aca_aclmode & L9P_ACM_ZFS_ACL) &&
+ opmask == L9P_ACOP_UNLINK && parent != NULL) {
+ panswer = l9p_check_aces(L9P_ACE_ADD_FILE | L9P_ACE_EXECUTE,
+ parent, pstat, uid, gid, gids, ngids);
+ if (panswer)
+ return (panswer < 0 ? EPERM : 0);
+ }
+
+ /*
+ * No definitive answer from ACLs.
+ *
+ * Try POSIX style rwx permissions if allowed. This should
+ * be rare, occurring mainly when caller supplied no ACLs
+ * or set the mode to suppress them.
+ *
+ * The stat to check is the parent's if we don't have a child
+ * (i.e., this is a dir op), or if the DELETE_CHILD bit is set
+ * (i.e., this is an unlink or similar). Otherwise it's the
+ * child's.
+ */
+ if (args->aca_aclmode & L9P_ACM_STAT_MODE) {
+ struct stat *st;
+ int rwx, bits;
+
+ rwx = l9p_ace_mask_to_rwx(opmask);
+ if ((st = cstat) == NULL || (opmask & L9P_ACE_DELETE_CHILD))
+ st = pstat;
+ if (uid == st->st_uid)
+ bits = (st->st_mode >> 6) & 7;
+ else if (l9p_ingroup(st->st_gid, gid, gids, ngids))
+ bits = (st->st_mode >> 3) & 7;
+ else
+ bits = st->st_mode & 7;
+ /*
+ * If all the desired bits are set, we're OK.
+ */
+ if ((rwx & bits) == rwx)
+ return (0);
+ }
+
+ /* all methods have failed, return EPERM */
+ return (EPERM);
+}
+
+/*
+ * Collapse fancy ACL operation mask down to simple Unix bits.
+ *
+ * Directory operations don't map that well. However, listing
+ * a directory really does require read permission, and adding
+ * or deleting files really does require write permission, so
+ * this is probably sufficient.
+ */
+int
+l9p_ace_mask_to_rwx(int32_t opmask)
+{
+ int rwx = 0;
+
+ if (opmask &
+ (L9P_ACE_READ_DATA | L9P_ACE_READ_NAMED_ATTRS |
+ L9P_ACE_READ_ATTRIBUTES | L9P_ACE_READ_ACL))
+ rwx |= 4;
+ if (opmask &
+ (L9P_ACE_WRITE_DATA | L9P_ACE_APPEND_DATA |
+ L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY |
+ L9P_ACE_DELETE | L9P_ACE_DELETE_CHILD |
+ L9P_ACE_WRITE_NAMED_ATTRS | L9P_ACE_WRITE_ATTRIBUTES |
+ L9P_ACE_WRITE_ACL))
+ rwx |= 2;
+ if (opmask & L9P_ACE_EXECUTE)
+ rwx |= 1;
+ return (rwx);
+}
+
+#ifndef __APPLE__
+/*
+ * Allocate new ACL holder and ACEs.
+ */
+static struct l9p_acl *
+l9p_new_acl(uint32_t acetype, uint32_t aceasize)
+{
+ struct l9p_acl *ret;
+ size_t asize, size;
+
+ asize = aceasize * sizeof(struct l9p_ace);
+ size = sizeof(struct l9p_acl) + asize;
+ ret = malloc(size);
+ if (ret != NULL) {
+ ret->acl_acetype = acetype;
+ ret->acl_nace = 0;
+ ret->acl_aceasize = aceasize;
+ }
+ return (ret);
+}
+
+/*
+ * Expand ACL to accomodate more entries.
+ *
+ * Currently won't shrink, only grow, so it's a fast no-op until
+ * we hit the allocated size. After that, it's best to grow in
+ * big chunks, or this will be O(n**2).
+ */
+static struct l9p_acl *
+l9p_growacl(struct l9p_acl *acl, uint32_t aceasize)
+{
+ struct l9p_acl *tmp;
+ size_t asize, size;
+
+ if (acl->acl_aceasize < aceasize) {
+ asize = aceasize * sizeof(struct l9p_ace);
+ size = sizeof(struct l9p_acl) + asize;
+ tmp = realloc(acl, size);
+ if (tmp == NULL)
+ free(acl);
+ acl = tmp;
+ }
+ return (acl);
+}
+
+/*
+ * Annoyingly, there's no POSIX-standard way to count the number
+ * of ACEs in a system ACL other than to walk through them all.
+ * This is silly, but at least 2n is still O(n), and the walk is
+ * short. (If the system ACL mysteriously grows, we'll handle
+ * that OK via growacl(), too.)
+ */
+static int
+l9p_count_aces(acl_t sysacl)
+{
+ acl_entry_t entry;
+ uint32_t n;
+ int id;
+
+ id = ACL_FIRST_ENTRY;
+ for (n = 0; acl_get_entry(sysacl, id, &entry) == 1; n++)
+ id = ACL_NEXT_ENTRY;
+
+ return ((int)n);
+}
+
+/*
+ * Create ACL with ACEs from the given acl_t. We use the given
+ * convert function on each ACE.
+ */
+static struct l9p_acl *
+l9p_sysacl_to_acl(int acetype, acl_t sysacl, econvertfn *convert)
+{
+ struct l9p_acl *acl;
+ acl_entry_t entry;
+ uint32_t n;
+ int error, id;
+
+ acl = l9p_new_acl((uint32_t)acetype, (uint32_t)l9p_count_aces(sysacl));
+ if (acl == NULL)
+ return (NULL);
+ id = ACL_FIRST_ENTRY;
+ for (n = 0;;) {
+ if (acl_get_entry(sysacl, id, &entry) != 1)
+ break;
+ acl = l9p_growacl(acl, n + 1);
+ if (acl == NULL)
+ return (NULL);
+ error = (*convert)(entry, &acl->acl_aces[n]);
+ id = ACL_NEXT_ENTRY;
+ if (error == 0)
+ n++;
+ }
+ acl->acl_nace = n;
+ return (acl);
+}
+#endif
+
+#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
+struct l9p_acl *
+l9p_posix_acl_to_acl(acl_t sysacl)
+{
+}
+#endif
+
+#if defined(HAVE_FREEBSD_ACLS)
+static int
+l9p_frombsdnfs4(acl_entry_t sysace, struct l9p_ace *ace)
+{
+ acl_tag_t tag; /* e.g., USER_OBJ, GROUP, etc */
+ acl_entry_type_t entry_type; /* e.g., allow/deny */
+ acl_permset_t absdperm;
+ acl_flagset_t absdflag;
+ acl_perm_t bsdperm; /* e.g., READ_DATA */
+ acl_flag_t bsdflag; /* e.g., FILE_INHERIT_ACE */
+ uint32_t flags, mask;
+ int error;
+ uid_t uid, *aid;
+
+ error = acl_get_tag_type(sysace, &tag);
+ if (error == 0)
+ error = acl_get_entry_type_np(sysace, &entry_type);
+ if (error == 0)
+ error = acl_get_flagset_np(sysace, &absdflag);
+ if (error == 0)
+ error = acl_get_permset(sysace, &absdperm);
+ if (error)
+ return (error);
+
+ flags = 0;
+ uid = 0;
+ aid = NULL;
+
+ /* move user/group/everyone + id-is-group-id into flags */
+ switch (tag) {
+ case ACL_USER_OBJ:
+ flags |= L9P_ACEF_OWNER;
+ break;
+ case ACL_GROUP_OBJ:
+ flags |= L9P_ACEF_GROUP;
+ break;
+ case ACL_EVERYONE:
+ flags |= L9P_ACEF_EVERYONE;
+ break;
+ case ACL_GROUP:
+ flags |= L9P_ACEF_IDENTIFIER_GROUP;
+ /* FALLTHROUGH */
+ case ACL_USER:
+ aid = acl_get_qualifier(sysace); /* ugh, this malloc()s */
+ if (aid == NULL)
+ return (ENOMEM);
+ uid = *(uid_t *)aid;
+ free(aid);
+ aid = &uid;
+ break;
+ default:
+ return (EINVAL); /* can't happen */
+ }
+
+ switch (entry_type) {
+
+ case ACL_ENTRY_TYPE_ALLOW:
+ ace->ace_type = L9P_ACET_ACCESS_ALLOWED;
+ break;
+
+ case ACL_ENTRY_TYPE_DENY:
+ ace->ace_type = L9P_ACET_ACCESS_DENIED;
+ break;
+
+ case ACL_ENTRY_TYPE_AUDIT:
+ ace->ace_type = L9P_ACET_SYSTEM_AUDIT;
+ break;
+
+ case ACL_ENTRY_TYPE_ALARM:
+ ace->ace_type = L9P_ACET_SYSTEM_ALARM;
+ break;
+
+ default:
+ return (EINVAL); /* can't happen */
+ }
+
+ /* transform remaining BSD flags to internal NFS-y form */
+ bsdflag = *absdflag;
+ if (bsdflag & ACL_ENTRY_FILE_INHERIT)
+ flags |= L9P_ACEF_FILE_INHERIT_ACE;
+ if (bsdflag & ACL_ENTRY_DIRECTORY_INHERIT)
+ flags |= L9P_ACEF_DIRECTORY_INHERIT_ACE;
+ if (bsdflag & ACL_ENTRY_NO_PROPAGATE_INHERIT)
+ flags |= L9P_ACEF_NO_PROPAGATE_INHERIT_ACE;
+ if (bsdflag & ACL_ENTRY_INHERIT_ONLY)
+ flags |= L9P_ACEF_INHERIT_ONLY_ACE;
+ if (bsdflag & ACL_ENTRY_SUCCESSFUL_ACCESS)
+ flags |= L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG;
+ if (bsdflag & ACL_ENTRY_FAILED_ACCESS)
+ flags |= L9P_ACEF_FAILED_ACCESS_ACE_FLAG;
+ ace->ace_flags = flags;
+
+ /*
+ * Transform BSD permissions to ace_mask. Note that directory
+ * vs file bits are the same in both sets, so we don't need
+ * to worry about that, at least.
+ *
+ * There seem to be no BSD equivalents for WRITE_RETENTION
+ * and WRITE_RETENTION_HOLD.
+ */
+ mask = 0;
+ bsdperm = *absdperm;
+ if (bsdperm & ACL_READ_DATA)
+ mask |= L9P_ACE_READ_DATA;
+ if (bsdperm & ACL_WRITE_DATA)
+ mask |= L9P_ACE_WRITE_DATA;
+ if (bsdperm & ACL_APPEND_DATA)
+ mask |= L9P_ACE_APPEND_DATA;
+ if (bsdperm & ACL_READ_NAMED_ATTRS)
+ mask |= L9P_ACE_READ_NAMED_ATTRS;
+ if (bsdperm & ACL_WRITE_NAMED_ATTRS)
+ mask |= L9P_ACE_WRITE_NAMED_ATTRS;
+ if (bsdperm & ACL_EXECUTE)
+ mask |= L9P_ACE_EXECUTE;
+ if (bsdperm & ACL_DELETE_CHILD)
+ mask |= L9P_ACE_DELETE_CHILD;
+ if (bsdperm & ACL_READ_ATTRIBUTES)
+ mask |= L9P_ACE_READ_ATTRIBUTES;
+ if (bsdperm & ACL_WRITE_ATTRIBUTES)
+ mask |= L9P_ACE_WRITE_ATTRIBUTES;
+ /* L9P_ACE_WRITE_RETENTION */
+ /* L9P_ACE_WRITE_RETENTION_HOLD */
+ /* 0x00800 */
+ if (bsdperm & ACL_DELETE)
+ mask |= L9P_ACE_DELETE;
+ if (bsdperm & ACL_READ_ACL)
+ mask |= L9P_ACE_READ_ACL;
+ if (bsdperm & ACL_WRITE_ACL)
+ mask |= L9P_ACE_WRITE_ACL;
+ if (bsdperm & ACL_WRITE_OWNER)
+ mask |= L9P_ACE_WRITE_OWNER;
+ if (bsdperm & ACL_SYNCHRONIZE)
+ mask |= L9P_ACE_SYNCHRONIZE;
+ ace->ace_mask = mask;
+
+ /* fill in variable-size user or group ID bytes */
+ if (aid == NULL)
+ ace->ace_idsize = 0;
+ else {
+ ace->ace_idsize = sizeof(uid);
+ memcpy(&ace->ace_idbytes[0], aid, sizeof(uid));
+ }
+
+ return (0);
+}
+
+struct l9p_acl *
+l9p_freebsd_nfsv4acl_to_acl(acl_t sysacl)
+{
+
+ return (l9p_sysacl_to_acl(L9P_ACLTYPE_NFSv4, sysacl, l9p_frombsdnfs4));
+}
+#endif
+
+#if defined(HAVE_DARWIN_ACLS) && 0 /* not yet */
+struct l9p_acl *
+l9p_darwin_nfsv4acl_to_acl(acl_t sysacl)
+{
+}
+#endif
diff --git a/genacl.h b/genacl.h
new file mode 100644
index 000000000000..f5feee716177
--- /dev/null
+++ b/genacl.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * General ACL support for 9P2000.L.
+ *
+ * We mostly use Linux's xattr name space and nfs4 ACL bits, as
+ * these are the most general forms available.
+ *
+ * Linux requests attributes named
+ *
+ * "system.posix_acl_default"
+ * "system.posix_acl_access"
+ *
+ * to get POSIX style ACLs, and:
+ *
+ * "system.nfs4_acl"
+ *
+ * to get NFSv4 style ACLs. The v9fs client does not explicitly
+ * ask for the latter, but if you use the Ubuntu nfs4-acl-tools
+ * package, it should be able to read and write these.
+ *
+ * For the record, the Linux kernel source code also shows:
+ *
+ * - Lustre uses "trusted.*", with "*" matching "lov", "lma",
+ * "lmv", "dmv", "link", "fid", "version", "som", "hsm", and
+ * "lfsck_namespace".
+ *
+ * - ceph has a name tree of the form "ceph.<type>.<name>" with
+ * <type,name> pairs like <"dir","entries">, <"dir","files>,
+ * <"file","layout">, and so on.
+ *
+ * - ext4 uses the POSIX names, plus some special ext4-specific
+ * goop that might not get externalized.
+ *
+ * - NFS uses both the POSIX names and the NFSv4 ACLs. However,
+ * what it mainly does is have nfsd generate fake NFSv4 ACLs
+ * from POSIX ACLs. If you run an NFS client, the client
+ * relies on the server actually implementing the ACLs, and
+ * lets nfs4-acl-tools read and write the system.nfs4_acl xattr
+ * data. If you run an NFS server off, e.g., an ext4 file system,
+ * the server looks for the system.nfs4_acl xattr, serves that
+ * out if found, and otherwise just generates the fakes.
+ *
+ * - "security.*" and "selinux.*" are reserved.
+ *
+ * - "security.capability" is the name for capabilities.
+ *
+ * - sockets use "system.sockprotoname".
+ */
+
+#if defined(__APPLE__)
+ #define HAVE_POSIX_ACLS
+ #define HAVE_DARWIN_ACLS
+#endif
+
+#if defined(__FreeBSD__)
+ #define HAVE_POSIX_ACLS
+ #define HAVE_FREEBSD_ACLS
+#endif
+
+#include <sys/types.h>
+#include <sys/acl.h> /* XXX assumes existence of sys/acl.h */
+
+/*
+ * An ACL consists of a number of ACEs that grant some kind of
+ * "allow" or "deny" to some specific entity.
+ *
+ * The number of ACEs is potentially unlimited, although in practice
+ * they tend not to be that long.
+ *
+ * It's the responsibility of the back-end to supply the ACL
+ * for each test. However, the ACL may be in some sort of
+ * system-specific form. It's the responsibility of some
+ * (system-specific) code to translate it to *this* form, after
+ * which the backend may use l9p_acl_check_access() to get
+ * access granted or denied (and, eventually, audits and alarms
+ * recorded and raises, although that's yet to be designed).
+ *
+ * The reason for all this faffing-about with formats is so that
+ * we can *report* the ACLs using Linux 9p style xattrs.
+ */
+
+struct l9p_acl;
+struct l9p_fid;
+
+void l9p_acl_free(struct l9p_acl *);
+
+/*
+ * An ACL is made up of ACEs.
+ *
+ * Each ACE has:
+ *
+ * - a type: allow, deny, audit, alarm
+ * - a set of flags
+ * - permissions bits: a "mask"
+ * - an optional, nominally-variable-length identity
+ *
+ * The last part is especially tricky and currently has limited
+ * support here: it's always a 16 byte field on Darwin, and just
+ * a uint32_t on BSD (should be larger, really). Linux supports
+ * very large, actually-variable-size values; we'll deal with
+ * this later, maybe.
+ *
+ * We will define the mask first, below, since these are also the bits
+ * passed in for the accmask argument to l9p_acl_check_access().
+ */
+
+/*
+ * ACL entry mask, and accmask argument flags.
+ *
+ * NB: not every bit is implemented, but they are all here because
+ * they are all defined as part of an NFSv4 ACL entry, which is
+ * more or less a superset of a POSIX ACL entry. This means you
+ * can put a complete NFSv4 ACL in and we can reproduce it.
+ *
+ * Note that the LIST_DIRECTORY, ADD_FILE, and ADD_SUBDIRECTORY bits
+ * apply only to a directory, while the READ_DATA, WRITE_DATA, and
+ * APPEND_DATA bits apply only to a file. See aca_parent/aca_child
+ * below.
+ */
+#define L9P_ACE_READ_DATA 0x00001
+#define L9P_ACE_LIST_DIRECTORY 0x00001 /* same as READ_DATA */
+#define L9P_ACE_WRITE_DATA 0x00002
+#define L9P_ACE_ADD_FILE 0x00002 /* same as WRITE_DATA */
+#define L9P_ACE_APPEND_DATA 0x00004
+#define L9P_ACE_ADD_SUBDIRECTORY 0x00004 /* same as APPEND_DATA */
+#define L9P_ACE_READ_NAMED_ATTRS 0x00008
+#define L9P_ACE_WRITE_NAMED_ATTRS 0x00010
+#define L9P_ACE_EXECUTE 0x00020
+#define L9P_ACE_DELETE_CHILD 0x00040
+#define L9P_ACE_READ_ATTRIBUTES 0x00080
+#define L9P_ACE_WRITE_ATTRIBUTES 0x00100
+#define L9P_ACE_WRITE_RETENTION 0x00200 /* not used here */
+#define L9P_ACE_WRITE_RETENTION_HOLD 0x00400 /* not used here */
+/* 0x00800 unused? */
+#define L9P_ACE_DELETE 0x01000
+#define L9P_ACE_READ_ACL 0x02000
+#define L9P_ACE_WRITE_ACL 0x04000
+#define L9P_ACE_WRITE_OWNER 0x08000
+#define L9P_ACE_SYNCHRONIZE 0x10000 /* not used here */
+
+/*
+ * This is not an ACE bit, but is used with the access checking
+ * below. It represents a request to unlink (delete child /
+ * delete) an entity, and is equivalent to asking for *either*
+ * (not both) permission.
+ */
+#define L9P_ACOP_UNLINK (L9P_ACE_DELETE_CHILD | L9P_ACE_DELETE)
+
+/*
+ * Access checking takes a lot of arguments, so they are
+ * collected into a "struct" here.
+ *
+ * The aca_parent and aca_pstat fields may/must be NULL if the
+ * operation itself does not involve "directory" permissions.
+ * The aca_child and aca_cstat fields may/must be NULL if the
+ * operation does not involve anything *but* a directory. This
+ * is how we decide whether you're interested in L9P_ACE_READ_DATA
+ * vs L9P_ACE_LIST_DIRECTORY, for instance.
+ *
+ * Note that it's OK for both parent and child to be directories
+ * (as is the case when we're adding or deleting a subdirectory).
+ */
+struct l9p_acl_check_args {
+ uid_t aca_uid; /* the uid that is requesting access */
+ gid_t aca_gid; /* the gid that is requesting access */
+ gid_t *aca_groups; /* the additional group-set, if any */
+ size_t aca_ngroups; /* number of groups in group-set */
+ struct l9p_acl *aca_parent; /* ACLs associated with parent/dir */
+ struct stat *aca_pstat; /* stat data for parent/dir */
+ struct l9p_acl *aca_child; /* ACLs associated with file */
+ struct stat *aca_cstat; /* stat data for file */
+ int aca_aclmode; /* mode checking bits, see below */
+ bool aca_superuser; /* alway allow uid==0 in STAT_MODE */
+};
+
+/*
+ * Access checking mode bits in aca_checkmode. If you enable
+ * ACLs, they are used first, optionally with ZFS style ACLs.
+ * This means that even if aca_superuser is set, if an ACL denies
+ * permission to uid 0, permission is really denied.
+ *
+ * NFS style ACLs run before POSIX style ACLs (though POSIX
+ * ACLs aren't done yet anyway).
+ *
+ * N.B.: you probably want L9P_ACL_ZFS, especially when operating
+ * with a ZFS file system on FreeBSD.
+ */
+#define L9P_ACM_NFS_ACL 0x0001 /* enable NFS ACL checking */
+#define L9P_ACM_ZFS_ACL 0x0002 /* use ZFS ACL unlink semantics */
+#define L9P_ACM_POSIX_ACL 0x0004 /* enable POSIX ACL checking (notyet) */
+#define L9P_ACM_STAT_MODE 0x0008 /* enable st_mode bits */
+
+/*
+ * Requests to access some file or directory must provide:
+ *
+ * - An operation. This should usually be just one bit from the
+ * L9P_ACE_* bit-sets above, or our special L9P_ACOP_UNLINK.
+ * For a few file-open operations it may be multiple bits,
+ * e.g., both read and write data.
+ * - The identity of the accessor: uid + gid + gid-set.
+ * - The type of access desired: this may be multiple bits.
+ * - The parent directory, if applicable.
+ * - The child file/dir being accessed, if applicable.
+ * - stat data for parent and/or child, if applicable.
+ *
+ * The ACLs and/or stat data of the parent and/or child get used
+ * here, so the caller must provide them. We should have a way to
+ * cache these on fids, but not yet. The parent and child
+ * arguments are a bit tricky; see the code in genacl.c.
+ */
+int l9p_acl_check_access(int32_t op, struct l9p_acl_check_args *args);
+
+/*
+ * When falling back to POSIX ACL or Unix-style permissions
+ * testing, it's nice to collapse the above detailed permissions
+ * into simple read/write/execute bits (value 0..7). We provide
+ * a small utility function that does this.
+ */
+int l9p_ace_mask_to_rwx(int32_t);
+
+/*
+ * The rest of the data in an ACE.
+ */
+
+/* type in ace_type */
+#define L9P_ACET_ACCESS_ALLOWED 0
+#define L9P_ACET_ACCESS_DENIED 1
+#define L9P_ACET_SYSTEM_AUDIT 2
+#define L9P_ACET_SYSTEM_ALARM 3
+
+/* flags in ace_flags */
+#define L9P_ACEF_FILE_INHERIT_ACE 0x001
+#define L9P_ACEF_DIRECTORY_INHERIT_ACE 0x002
+#define L9P_ACEF_NO_PROPAGATE_INHERIT_ACE 0x004
+#define L9P_ACEF_INHERIT_ONLY_ACE 0x008
+#define L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG 0x010
+#define L9P_ACEF_FAILED_ACCESS_ACE_FLAG 0x020
+#define L9P_ACEF_IDENTIFIER_GROUP 0x040
+#define L9P_ACEF_OWNER 0x080
+#define L9P_ACEF_GROUP 0x100
+#define L9P_ACEF_EVERYONE 0x200
+
+#if defined(__APPLE__)
+# define L9P_ACE_IDSIZE 16 /* but, how do we map Darwin uuid? */
+#else
+# define L9P_ACE_IDSIZE 4
+#endif
+
+struct l9p_ace {
+ uint16_t ace_type; /* ACL entry type */
+ uint16_t ace_flags; /* ACL entry flags */
+ uint32_t ace_mask; /* ACL entry mask */
+ uint32_t ace_idsize; /* length of ace_idbytes */
+ unsigned char ace_idbytes[L9P_ACE_IDSIZE];
+};
+
+#define L9P_ACLTYPE_NFSv4 1 /* currently the only valid type */
+struct l9p_acl {
+ uint32_t acl_acetype; /* reserved for future expansion */
+ uint32_t acl_nace; /* number of occupied ACEs */
+ uint32_t acl_aceasize; /* actual size of ACE array */
+ struct l9p_ace acl_aces[]; /* variable length ACE array */
+};
+
+/*
+ * These are the system-specific converters.
+ *
+ * Right now the backend needs to just find BSD NFSv4 ACLs
+ * and convert them before each operation that needs to be
+ * tested.
+ */
+#if defined(HAVE_DARWIN_ACLS)
+struct l9p_acl *l9p_darwin_nfsv4acl_to_acl(acl_t acl);
+#endif
+
+#if defined(HAVE_FREEBSD_ACLS)
+struct l9p_acl *l9p_freebsd_nfsv4acl_to_acl(acl_t acl);
+#endif
+
+#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
+struct l9p_acl *l9p_posix_acl_to_acl(acl_t acl);
+#endif
diff --git a/hashtable.c b/hashtable.c
new file mode 100644
index 000000000000..d6558eb65598
--- /dev/null
+++ b/hashtable.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include "lib9p_impl.h"
+#include "hashtable.h"
+
+static struct ht_item *ht_iter_advance(struct ht_iter *, struct ht_item *);
+
+void
+ht_init(struct ht *h, ssize_t size)
+{
+ ssize_t i;
+
+ memset(h, 0, sizeof(struct ht));
+ h->ht_nentries = size;
+ h->ht_entries = l9p_calloc((size_t)size, sizeof(struct ht_entry));
+ pthread_rwlock_init(&h->ht_rwlock, NULL);
+
+ for (i = 0; i < size; i++)
+ TAILQ_INIT(&h->ht_entries[i].hte_items);
+}
+
+void
+ht_destroy(struct ht *h)
+{
+ struct ht_entry *he;
+ struct ht_item *item, *tmp;
+ ssize_t i;
+
+ for (i = 0; i < h->ht_nentries; i++) {
+ he = &h->ht_entries[i];
+ TAILQ_FOREACH_SAFE(item, &he->hte_items, hti_link, tmp) {
+ free(item);
+ }
+ }
+
+ pthread_rwlock_destroy(&h->ht_rwlock);
+ free(h->ht_entries);
+ h->ht_entries = NULL;
+}
+
+void *
+ht_find(struct ht *h, uint32_t hash)
+{
+ void *result;
+
+ ht_rdlock(h);
+ result = ht_find_locked(h, hash);
+ ht_unlock(h);
+ return (result);
+}
+
+void *
+ht_find_locked(struct ht *h, uint32_t hash)
+{
+ struct ht_entry *entry;
+ struct ht_item *item;
+
+ entry = &h->ht_entries[hash % h->ht_nentries];
+
+ TAILQ_FOREACH(item, &entry->hte_items, hti_link) {
+ if (item->hti_hash == hash)
+ return (item->hti_data);
+ }
+
+ return (NULL);
+}
+
+int
+ht_add(struct ht *h, uint32_t hash, void *value)
+{
+ struct ht_entry *entry;
+ struct ht_item *item;
+
+ ht_wrlock(h);
+ entry = &h->ht_entries[hash % h->ht_nentries];
+
+ TAILQ_FOREACH(item, &entry->hte_items, hti_link) {
+ if (item->hti_hash == hash) {
+ errno = EEXIST;
+ ht_unlock(h);
+ return (-1);
+ }
+ }
+
+ item = l9p_calloc(1, sizeof(struct ht_item));
+ item->hti_hash = hash;
+ item->hti_data = value;
+ TAILQ_INSERT_TAIL(&entry->hte_items, item, hti_link);
+ ht_unlock(h);
+
+ return (0);
+}
+
+int
+ht_remove(struct ht *h, uint32_t hash)
+{
+ int result;
+
+ ht_wrlock(h);
+ result = ht_remove_locked(h, hash);
+ ht_unlock(h);
+ return (result);
+}
+
+int
+ht_remove_locked(struct ht *h, uint32_t hash)
+{
+ struct ht_entry *entry;
+ struct ht_item *item, *tmp;
+ ssize_t slot = hash % h->ht_nentries;
+
+ entry = &h->ht_entries[slot];
+
+ TAILQ_FOREACH_SAFE(item, &entry->hte_items, hti_link, tmp) {
+ if (item->hti_hash == hash) {
+ TAILQ_REMOVE(&entry->hte_items, item, hti_link);
+ free(item);
+ return (0);
+ }
+ }
+
+ errno = ENOENT;
+ return (-1);
+}
+
+/*
+ * Inner workings for advancing the iterator.
+ *
+ * If we have a current item, that tells us how to find the
+ * next item. If not, we get the first item from the next
+ * slot (well, the next slot with an item); in any case, we
+ * record the new slot and return the next item.
+ *
+ * For bootstrapping, iter->htit_slot can be -1 to start
+ * searching at slot 0.
+ *
+ * Caller must hold a lock on the table.
+ */
+static struct ht_item *
+ht_iter_advance(struct ht_iter *iter, struct ht_item *cur)
+{
+ struct ht_item *next;
+ struct ht *h;
+ ssize_t slot;
+
+ h = iter->htit_parent;
+
+ if (cur == NULL)
+ next = NULL;
+ else
+ next = TAILQ_NEXT(cur, hti_link);
+
+ if (next == NULL) {
+ slot = iter->htit_slot;
+ while (++slot < h->ht_nentries) {
+ next = TAILQ_FIRST(&h->ht_entries[slot].hte_items);
+ if (next != NULL)
+ break;
+ }
+ iter->htit_slot = slot;
+ }
+ return (next);
+}
+
+/*
+ * Remove the current item - there must be one, or this is an
+ * error. This (necessarily) pre-locates the next item, so callers
+ * must not use it on an actively-changing table.
+ */
+int
+ht_remove_at_iter(struct ht_iter *iter)
+{
+ struct ht_item *item;
+ struct ht *h;
+ ssize_t slot;
+
+ assert(iter != NULL);
+
+ if ((item = iter->htit_curr) == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ /* remove the item from the table, saving the NEXT one */
+ h = iter->htit_parent;
+ ht_wrlock(h);
+ slot = iter->htit_slot;
+ iter->htit_next = ht_iter_advance(iter, item);
+ TAILQ_REMOVE(&h->ht_entries[slot].hte_items, item, hti_link);
+ ht_unlock(h);
+
+ /* mark us as no longer on an item, then free it */
+ iter->htit_curr = NULL;
+ free(item);
+
+ return (0);
+}
+
+/*
+ * Initialize iterator. Subsequent ht_next calls will find the
+ * first item, then the next, and so on. Callers should in general
+ * not use this on actively-changing tables, though we do our best
+ * to make it semi-sensible.
+ */
+void
+ht_iter(struct ht *h, struct ht_iter *iter)
+{
+
+ iter->htit_parent = h;
+ iter->htit_curr = NULL;
+ iter->htit_next = NULL;
+ iter->htit_slot = -1; /* which will increment to 0 */
+}
+
+/*
+ * Return the next item, which is the first item if we have not
+ * yet been called on this iterator, or the next item if we have.
+ */
+void *
+ht_next(struct ht_iter *iter)
+{
+ struct ht_item *item;
+ struct ht *h;
+
+ if ((item = iter->htit_next) == NULL) {
+ /* no pre-loaded next; find next from current */
+ h = iter->htit_parent;
+ ht_rdlock(h);
+ item = ht_iter_advance(iter, iter->htit_curr);
+ ht_unlock(h);
+ } else
+ iter->htit_next = NULL;
+ iter->htit_curr = item;
+ return (item == NULL ? NULL : item->hti_data);
+}
diff --git a/hashtable.h b/hashtable.h
new file mode 100644
index 000000000000..60b8dfff7ba4
--- /dev/null
+++ b/hashtable.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_HASHTABLE_H
+#define LIB9P_HASHTABLE_H
+
+#include <pthread.h>
+#include <sys/queue.h>
+
+struct ht {
+ struct ht_entry * ht_entries;
+ ssize_t ht_nentries;
+ pthread_rwlock_t ht_rwlock;
+};
+
+struct ht_entry {
+ TAILQ_HEAD(, ht_item) hte_items;
+};
+
+struct ht_item {
+ uint32_t hti_hash;
+ void * hti_data;
+ TAILQ_ENTRY(ht_item) hti_link;
+};
+
+struct ht_iter {
+ struct ht * htit_parent;
+ struct ht_item * htit_curr;
+ struct ht_item * htit_next;
+ ssize_t htit_slot;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety-analysis"
+#endif
+
+/*
+ * Obtain read-lock on hash table.
+ */
+static inline int
+ht_rdlock(struct ht *h)
+{
+
+ return (pthread_rwlock_rdlock(&h->ht_rwlock));
+}
+
+/*
+ * Obtain write-lock on hash table.
+ */
+static inline int
+ht_wrlock(struct ht *h)
+{
+
+ return (pthread_rwlock_wrlock(&h->ht_rwlock));
+}
+
+/*
+ * Release lock on hash table.
+ */
+static inline int
+ht_unlock(struct ht *h)
+{
+
+ return (pthread_rwlock_unlock(&h->ht_rwlock));
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+void ht_init(struct ht *h, ssize_t size);
+void ht_destroy(struct ht *h);
+void *ht_find(struct ht *h, uint32_t hash);
+void *ht_find_locked(struct ht *h, uint32_t hash);
+int ht_add(struct ht *h, uint32_t hash, void *value);
+int ht_remove(struct ht *h, uint32_t hash);
+int ht_remove_locked(struct ht *h, uint32_t hash);
+int ht_remove_at_iter(struct ht_iter *iter);
+void ht_iter(struct ht *h, struct ht_iter *iter);
+void *ht_next(struct ht_iter *iter);
+
+#endif /* LIB9P_HASHTABLE_H */
diff --git a/lib9p.h b/lib9p.h
new file mode 100644
index 000000000000..79b741c98887
--- /dev/null
+++ b/lib9p.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef LIB9P_LIB9P_H
+#define LIB9P_LIB9P_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/uio.h>
+#include <pthread.h>
+
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+
+#include "fcall.h"
+#include "threadpool.h"
+#include "hashtable.h"
+
+#define L9P_DEFAULT_MSIZE 8192
+#define L9P_MAX_IOV 128
+#define L9P_NUMTHREADS 8
+
+struct l9p_request;
+struct l9p_backend;
+struct l9p_fid;
+
+/*
+ * Functions to implement underlying transport for lib9p.
+ *
+ * The transport is responsible for:
+ *
+ * - allocating a response buffer (filling in the iovec and niov)
+ * (gets req, pointer to base of iov array of size L9P_MAX_IOV,
+ * pointer to niov, lt_aux)
+ *
+ * - sending a response, when a request has a reply ready
+ * (gets req, pointer to iov, niov, actual response length, lt_aux)
+ *
+ * - dropping the response buffer, when a request has been
+ * flushed or otherwise dropped without a response
+ * (gets req, pointer to iov, niov, lt_aux)
+ *
+ * The transport is of course also responsible for feeding in
+ * request-buffers, but that happens by the transport calling
+ * l9p_connection_recv().
+ */
+struct l9p_transport {
+ void *lt_aux;
+ int (*lt_get_response_buffer)(struct l9p_request *, struct iovec *,
+ size_t *, void *);
+ int (*lt_send_response)(struct l9p_request *, const struct iovec *,
+ size_t, size_t, void *);
+ void (*lt_drop_response)(struct l9p_request *, const struct iovec *,
+ size_t, void *);
+};
+
+enum l9p_pack_mode {
+ L9P_PACK,
+ L9P_UNPACK
+};
+
+enum l9p_integer_type {
+ L9P_BYTE = 1,
+ L9P_WORD = 2,
+ L9P_DWORD = 4,
+ L9P_QWORD = 8
+};
+
+enum l9p_version {
+ L9P_INVALID_VERSION = 0,
+ L9P_2000 = 1,
+ L9P_2000U = 2,
+ L9P_2000L = 3
+};
+
+/*
+ * This structure is used for unpacking (decoding) incoming
+ * requests and packing (encoding) outgoing results. It has its
+ * own copy of the iov array, with its own counters for working
+ * through that array, but it borrows the actual DATA from the
+ * original iov array associated with the original request (see
+ * below).
+ */
+struct l9p_message {
+ enum l9p_pack_mode lm_mode;
+ struct iovec lm_iov[L9P_MAX_IOV];
+ size_t lm_niov;
+ size_t lm_cursor_iov;
+ size_t lm_cursor_offset;
+ size_t lm_size;
+};
+
+/*
+ * Data structure for a request/response pair (Tfoo/Rfoo).
+ *
+ * Note that the response is not formatted out into raw data
+ * (overwriting the request raw data) until we are really
+ * responding, with the exception of read operations Tread
+ * and Treaddir, which overlay their result-data into the
+ * iov array in the process of reading.
+ *
+ * We have room for two incoming fids, in case we are
+ * using 9P2000.L protocol. Note that nothing that uses two
+ * fids also has an output fid (newfid), so we could have a
+ * union of lr_fid2 and lr_newfid, but keeping them separate
+ * is probably a bit less error-prone. (If we want to shave
+ * memory requirements there are more places to look.)
+ *
+ * (The fid, fid2, and newfid fields should be removed via
+ * reorganization, as they are only used for smuggling data
+ * between request.c and the backend and should just be
+ * parameters to backend ops.)
+ */
+struct l9p_request {
+ struct l9p_message lr_req_msg; /* for unpacking the request */
+ struct l9p_message lr_resp_msg; /* for packing the response */
+ union l9p_fcall lr_req; /* the request, decoded/unpacked */
+ union l9p_fcall lr_resp; /* the response, not yet packed */
+
+ struct l9p_fid *lr_fid;
+ struct l9p_fid *lr_fid2;
+ struct l9p_fid *lr_newfid;
+
+ struct l9p_connection *lr_conn; /* containing connection */
+ void *lr_aux; /* reserved for transport layer */
+
+ struct iovec lr_data_iov[L9P_MAX_IOV]; /* iovecs for req + resp */
+ size_t lr_data_niov; /* actual size of data_iov */
+
+ int lr_error; /* result from l9p_dispatch_request */
+
+ /* proteced by threadpool mutex */
+ enum l9p_workstate lr_workstate; /* threadpool: work state */
+ enum l9p_flushstate lr_flushstate; /* flush state if flushee */
+ struct l9p_worker *lr_worker; /* threadpool: worker */
+ STAILQ_ENTRY(l9p_request) lr_worklink; /* reserved to threadpool */
+
+ /* protected by tag hash table lock */
+ struct l9p_request_queue lr_flushq; /* q of flushers */
+ STAILQ_ENTRY(l9p_request) lr_flushlink; /* link w/in flush queue */
+};
+
+/* N.B.: these dirents are variable length and for .L only */
+struct l9p_dirent {
+ struct l9p_qid qid;
+ uint64_t offset;
+ uint8_t type;
+ char *name;
+};
+
+/*
+ * The 9pfs protocol has the notion of a "session", which is
+ * traffic between any two "Tversion" requests. All fids
+ * (lc_files, below) are specific to one particular session.
+ *
+ * We need a data structure per connection (client/server
+ * pair). This data structure lasts longer than these 9pfs
+ * sessions, but contains the request/response pairs and fids.
+ * Logically, the per-session data should be separate, but
+ * most of the time that would just require an extra
+ * indirection. Instead, a new session simply clunks all
+ * fids, and otherwise keeps using this same connection.
+ */
+struct l9p_connection {
+ struct l9p_server *lc_server;
+ struct l9p_transport lc_lt;
+ struct l9p_threadpool lc_tp;
+ enum l9p_version lc_version;
+ uint32_t lc_msize;
+ uint32_t lc_max_io_size;
+ struct ht lc_files;
+ struct ht lc_requests;
+ LIST_ENTRY(l9p_connection) lc_link;
+};
+
+struct l9p_server {
+ struct l9p_backend *ls_backend;
+ enum l9p_version ls_max_version;
+ LIST_HEAD(, l9p_connection) ls_conns;
+};
+
+int l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall,
+ enum l9p_version version);
+ssize_t l9p_pustat(struct l9p_message *msg, struct l9p_stat *s,
+ enum l9p_version version);
+uint16_t l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version);
+int l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req,
+ struct l9p_stat *s);
+ssize_t l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de);
+
+int l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend);
+
+int l9p_connection_init(struct l9p_server *server,
+ struct l9p_connection **connp);
+void l9p_connection_free(struct l9p_connection *conn);
+void l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov,
+ size_t niov, void *aux);
+void l9p_connection_close(struct l9p_connection *conn);
+struct l9p_fid *l9p_connection_alloc_fid(struct l9p_connection *conn,
+ uint32_t fid);
+void l9p_connection_remove_fid(struct l9p_connection *conn,
+ struct l9p_fid *fid);
+
+int l9p_dispatch_request(struct l9p_request *req);
+void l9p_respond(struct l9p_request *req, bool drop, bool rmtag);
+
+void l9p_init_msg(struct l9p_message *msg, struct l9p_request *req,
+ enum l9p_pack_mode mode);
+void l9p_seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2,
+ size_t *niov2, size_t seek);
+size_t l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length);
+void l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version,
+ struct sbuf *sb);
+void l9p_freefcall(union l9p_fcall *fcall);
+void l9p_freestat(struct l9p_stat *stat);
+
+gid_t *l9p_getgrlist(const char *, gid_t, int *);
+
+#endif /* LIB9P_LIB9P_H */
diff --git a/lib9p_impl.h b/lib9p_impl.h
new file mode 100644
index 000000000000..41ff07ae189c
--- /dev/null
+++ b/lib9p_impl.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LIB9P_IMPL_H
+#define LIB9P_LIB9P_IMPL_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef _KERNEL
+static inline void *
+l9p_malloc(size_t size)
+{
+ void *r = malloc(size);
+
+ if (r == NULL) {
+ fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+ size);
+ abort();
+ }
+
+ return (r);
+}
+
+static inline void *
+l9p_calloc(size_t n, size_t size)
+{
+ void *r = calloc(n, size);
+
+ if (r == NULL) {
+ fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+ n * size);
+ abort();
+ }
+
+ return (r);
+}
+
+static inline void *
+l9p_realloc(void *ptr, size_t newsize)
+{
+ void *r = realloc(ptr, newsize);
+
+ if (r == NULL) {
+ fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+ newsize);
+ abort();
+ }
+
+ return (r);
+}
+#endif /* _KERNEL */
+
+#endif /* LIB9P_LIB9P_IMPL_H */
diff --git a/linux_errno.h b/linux_errno.h
new file mode 100644
index 000000000000..72778daa2336
--- /dev/null
+++ b/linux_errno.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LINUX_ERRNO_H
+#define LIB9P_LINUX_ERRNO_H
+
+/*
+ * Linux error numbers that are outside of the original base range
+ * (which ends with ERANGE).
+ *
+ * This is pretty much the same as Linux's errno.h except that the
+ * names are prefixed with "LINUX_", and we add _STR with the
+ * string name.
+ *
+ * The string expansions were obtained with a little program to
+ * print every strerror().
+ *
+ * Note that BSD EDEADLK is 11 and BSD EAGAIN is 35, vs
+ * Linux / Plan9 EAGAIN at 11. So one value in the ERANGE
+ * range still needs translation too.
+ */
+
+#define LINUX_EAGAIN 11
+#define LINUX_EAGAIN_STR "Resource temporarily unavailable"
+
+#define LINUX_EDEADLK 35
+#define LINUX_EDEADLK_STR "Resource deadlock avoided"
+#define LINUX_ENAMETOOLONG 36
+#define LINUX_ENAMETOOLONG_STR "File name too long"
+#define LINUX_ENOLCK 37
+#define LINUX_ENOLCK_STR "No locks available"
+#define LINUX_ENOSYS 38
+#define LINUX_ENOSYS_STR "Function not implemented"
+#define LINUX_ENOTEMPTY 39
+#define LINUX_ENOTEMPTY_STR "Directory not empty"
+#define LINUX_ELOOP 40
+#define LINUX_ELOOP_STR "Too many levels of symbolic links"
+/* 41 unused */
+#define LINUX_ENOMSG 42
+#define LINUX_ENOMSG_STR "No message of desired type"
+#define LINUX_EIDRM 43
+#define LINUX_EIDRM_STR "Identifier removed"
+#define LINUX_ECHRNG 44
+#define LINUX_ECHRNG_STR "Channel number out of range"
+#define LINUX_EL2NSYNC 45
+#define LINUX_EL2NSYNC_STR "Level 2 not synchronized"
+#define LINUX_EL3HLT 46
+#define LINUX_EL3HLT_STR "Level 3 halted"
+#define LINUX_EL3RST 47
+#define LINUX_EL3RST_STR "Level 3 reset"
+#define LINUX_ELNRNG 48
+#define LINUX_ELNRNG_STR "Link number out of range"
+#define LINUX_EUNATCH 49
+#define LINUX_EUNATCH_STR "Protocol driver not attached"
+#define LINUX_ENOCSI 50
+#define LINUX_ENOCSI_STR "No CSI structure available"
+#define LINUX_EL2HLT 51
+#define LINUX_EL2HLT_STR "Level 2 halted"
+#define LINUX_EBADE 52
+#define LINUX_EBADE_STR "Invalid exchange"
+#define LINUX_EBADR 53
+#define LINUX_EBADR_STR "Invalid request descriptor"
+#define LINUX_EXFULL 54
+#define LINUX_EXFULL_STR "Exchange full"
+#define LINUX_ENOANO 55
+#define LINUX_ENOANO_STR "No anode"
+#define LINUX_EBADRQC 56
+#define LINUX_EBADRQC_STR "Invalid request code"
+#define LINUX_EBADSLT 57
+#define LINUX_EBADSLT_STR "Invalid slot"
+/* 58 unused */
+#define LINUX_EBFONT 59
+#define LINUX_EBFONT_STR "Bad font file format"
+#define LINUX_ENOSTR 60
+#define LINUX_ENOSTR_STR "Device not a stream"
+#define LINUX_ENODATA 61
+#define LINUX_ENODATA_STR "No data available"
+#define LINUX_ETIME 62
+#define LINUX_ETIME_STR "Timer expired"
+#define LINUX_ENOSR 63
+#define LINUX_ENOSR_STR "Out of streams resources"
+#define LINUX_ENONET 64
+#define LINUX_ENONET_STR "Machine is not on the network"
+#define LINUX_ENOPKG 65
+#define LINUX_ENOPKG_STR "Package not installed"
+#define LINUX_EREMOTE 66
+#define LINUX_EREMOTE_STR "Object is remote"
+#define LINUX_ENOLINK 67
+#define LINUX_ENOLINK_STR "Link has been severed"
+#define LINUX_EADV 68
+#define LINUX_EADV_STR "Advertise error"
+#define LINUX_ESRMNT 69
+#define LINUX_ESRMNT_STR "Srmount error"
+#define LINUX_ECOMM 70
+#define LINUX_ECOMM_STR "Communication error on send"
+#define LINUX_EPROTO 71
+#define LINUX_EPROTO_STR "Protocol error"
+#define LINUX_EMULTIHOP 72
+#define LINUX_EMULTIHOP_STR "Multihop attempted"
+#define LINUX_EDOTDOT 73
+#define LINUX_EDOTDOT_STR "RFS specific error"
+#define LINUX_EBADMSG 74
+#define LINUX_EBADMSG_STR "Bad message"
+#define LINUX_EOVERFLOW 75
+#define LINUX_EOVERFLOW_STR "Value too large for defined data type"
+#define LINUX_ENOTUNIQ 76
+#define LINUX_ENOTUNIQ_STR "Name not unique on network"
+#define LINUX_EBADFD 77
+#define LINUX_EBADFD_STR "File descriptor in bad state"
+#define LINUX_EREMCHG 78
+#define LINUX_EREMCHG_STR "Remote address changed"
+#define LINUX_ELIBACC 79
+#define LINUX_ELIBACC_STR "Can not access a needed shared library"
+#define LINUX_ELIBBAD 80
+#define LINUX_ELIBBAD_STR "Accessing a corrupted shared library"
+#define LINUX_ELIBSCN 81
+#define LINUX_ELIBSCN_STR ".lib section in a.out corrupted"
+#define LINUX_ELIBMAX 82
+#define LINUX_ELIBMAX_STR "Attempting to link in too many shared libraries"
+#define LINUX_ELIBEXEC 83
+#define LINUX_ELIBEXEC_STR "Cannot exec a shared library directly"
+#define LINUX_EILSEQ 84
+#define LINUX_EILSEQ_STR "Invalid or incomplete multibyte or wide character"
+#define LINUX_ERESTART 85
+#define LINUX_ERESTART_STR "Interrupted system call should be restarted"
+#define LINUX_ESTRPIPE 86
+#define LINUX_ESTRPIPE_STR "Streams pipe error"
+#define LINUX_EUSERS 87
+#define LINUX_EUSERS_STR "Too many users"
+#define LINUX_ENOTSOCK 88
+#define LINUX_ENOTSOCK_STR "Socket operation on non-socket"
+#define LINUX_EDESTADDRREQ 89
+#define LINUX_EDESTADDRREQ_STR "Destination address required"
+#define LINUX_EMSGSIZE 90
+#define LINUX_EMSGSIZE_STR "Message too long"
+#define LINUX_EPROTOTYPE 91
+#define LINUX_EPROTOTYPE_STR "Protocol wrong type for socket"
+#define LINUX_ENOPROTOOPT 92
+#define LINUX_ENOPROTOOPT_STR "Protocol not available"
+#define LINUX_EPROTONOSUPPORT 93
+#define LINUX_EPROTONOSUPPORT_STR "Protocol not supported"
+#define LINUX_ESOCKTNOSUPPORT 94
+#define LINUX_ESOCKTNOSUPPORT_STR "Socket type not supported"
+#define LINUX_EOPNOTSUPP 95
+#define LINUX_EOPNOTSUPP_STR "Operation not supported"
+#define LINUX_EPFNOSUPPORT 96
+#define LINUX_EPFNOSUPPORT_STR "Protocol family not supported"
+#define LINUX_EAFNOSUPPORT 97
+#define LINUX_EAFNOSUPPORT_STR "Address family not supported by protocol"
+#define LINUX_EADDRINUSE 98
+#define LINUX_EADDRINUSE_STR "Address already in use"
+#define LINUX_EADDRNOTAVAIL 99
+#define LINUX_EADDRNOTAVAIL_STR "Cannot assign requested address"
+#define LINUX_ENETDOWN 100
+#define LINUX_ENETDOWN_STR "Network is down"
+#define LINUX_ENETUNREACH 101
+#define LINUX_ENETUNREACH_STR "Network is unreachable"
+#define LINUX_ENETRESET 102
+#define LINUX_ENETRESET_STR "Network dropped connection on reset"
+#define LINUX_ECONNABORTED 103
+#define LINUX_ECONNABORTED_STR "Software caused connection abort"
+#define LINUX_ECONNRESET 104
+#define LINUX_ECONNRESET_STR "Connection reset by peer"
+#define LINUX_ENOBUFS 105
+#define LINUX_ENOBUFS_STR "No buffer space available"
+#define LINUX_EISCONN 106
+#define LINUX_EISCONN_STR "Transport endpoint is already connected"
+#define LINUX_ENOTCONN 107
+#define LINUX_ENOTCONN_STR "Transport endpoint is not connected"
+#define LINUX_ESHUTDOWN 108
+#define LINUX_ESHUTDOWN_STR "Cannot send after transport endpoint shutdown"
+#define LINUX_ETOOMANYREFS 109
+#define LINUX_ETOOMANYREFS_STR "Too many references: cannot splice"
+#define LINUX_ETIMEDOUT 110
+#define LINUX_ETIMEDOUT_STR "Connection timed out"
+#define LINUX_ECONNREFUSED 111
+#define LINUX_ECONNREFUSED_STR "Connection refused"
+#define LINUX_EHOSTDOWN 112
+#define LINUX_EHOSTDOWN_STR "Host is down"
+#define LINUX_EHOSTUNREACH 113
+#define LINUX_EHOSTUNREACH_STR "No route to host"
+#define LINUX_EALREADY 114
+#define LINUX_EALREADY_STR "Operation already in progress"
+#define LINUX_EINPROGRESS 115
+#define LINUX_EINPROGRESS_STR "Operation now in progress"
+#define LINUX_ESTALE 116
+#define LINUX_ESTALE_STR "Stale file handle"
+#define LINUX_EUCLEAN 117
+#define LINUX_EUCLEAN_STR "Structure needs cleaning"
+#define LINUX_ENOTNAM 118
+#define LINUX_ENOTNAM_STR "Not a XENIX named type file"
+#define LINUX_ENAVAIL 119
+#define LINUX_ENAVAIL_STR "No XENIX semaphores available"
+#define LINUX_EISNAM 120
+#define LINUX_EISNAM_STR "Is a named type file"
+#define LINUX_EREMOTEIO 121
+#define LINUX_EREMOTEIO_STR "Remote I/O error"
+#define LINUX_EDQUOT 122
+#define LINUX_EDQUOT_STR "Quota exceeded"
+#define LINUX_ENOMEDIUM 123
+#define LINUX_ENOMEDIUM_STR "No medium found"
+#define LINUX_EMEDIUMTYPE 124
+#define LINUX_EMEDIUMTYPE_STR "Wrong medium type"
+#define LINUX_ECANCELED 125
+#define LINUX_ECANCELED_STR "Operation canceled"
+#define LINUX_ENOKEY 126
+#define LINUX_ENOKEY_STR "Required key not available"
+#define LINUX_EKEYEXPIRED 127
+#define LINUX_EKEYEXPIRED_STR "Key has expired"
+#define LINUX_EKEYREVOKED 128
+#define LINUX_EKEYREVOKED_STR "Key has been revoked"
+#define LINUX_EKEYREJECTED 129
+#define LINUX_EKEYREJECTED_STR "Key was rejected by service"
+#define LINUX_EOWNERDEAD 130
+#define LINUX_EOWNERDEAD_STR "Owner died"
+#define LINUX_ENOTRECOVERABLE 131
+#define LINUX_ENOTRECOVERABLE_STR "State not recoverable"
+#define LINUX_ERFKILL 132
+#define LINUX_ERFKILL_STR "Operation not possible due to RF-kill"
+#define LINUX_EHWPOISON 133
+#define LINUX_EHWPOISON_STR "Memory page has hardware error"
+
+#endif /* LIB9P_LINUX_ERRNO_H */
diff --git a/log.c b/log.c
new file mode 100644
index 000000000000..fb2596a16f4e
--- /dev/null
+++ b/log.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include "log.h"
+
+static const char *l9p_log_level_names[] = {
+ "DEBUG",
+ "INFO",
+ "WARN",
+ "ERROR"
+};
+
+void
+l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...)
+{
+ const char *dest = NULL;
+ static FILE *stream = NULL;
+ va_list ap;
+
+ if (stream == NULL) {
+ dest = getenv("LIB9P_LOGGING");
+ if (dest == NULL)
+ return;
+ else if (!strcmp(dest, "stderr"))
+ stream = stderr;
+ else {
+ stream = fopen(dest, "a");
+ if (stream == NULL)
+ return;
+ }
+ }
+
+ va_start(ap, fmt);
+ fprintf(stream, "[%s]\t %s: ", l9p_log_level_names[level], func);
+ vfprintf(stream, fmt, ap);
+ fprintf(stream, "\n");
+ fflush(stream);
+ va_end(ap);
+}
diff --git a/log.h b/log.h
new file mode 100644
index 000000000000..b801d4017afa
--- /dev/null
+++ b/log.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LOG_H
+#define LIB9P_LOG_H
+
+enum l9p_log_level {
+ L9P_DEBUG,
+ L9P_INFO,
+ L9P_WARNING,
+ L9P_ERROR
+};
+
+void l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...);
+
+#if defined(L9P_DEBUG)
+#define L9P_LOG(level, fmt, ...) l9p_logf(level, __func__, fmt, ##__VA_ARGS__)
+#else
+#define L9P_LOG(level, fmt, ...)
+#endif
+
+#endif /* LIB9P_LOG_H */
diff --git a/pack.c b/pack.c
new file mode 100644
index 000000000000..88f0ccb4ad73
--- /dev/null
+++ b/pack.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#ifdef __APPLE__
+# include "apple_endian.h"
+#else
+# include <sys/endian.h>
+#endif
+#include <sys/uio.h>
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "log.h"
+
+#define N(ary) (sizeof(ary) / sizeof(*ary))
+#define STRING_SIZE(s) (L9P_WORD + (s != NULL ? (uint16_t)strlen(s) : 0))
+#define QID_SIZE (L9P_BYTE + L9P_DWORD + L9P_QWORD)
+
+static ssize_t l9p_iov_io(struct l9p_message *, void *, size_t);
+static inline ssize_t l9p_pu8(struct l9p_message *, uint8_t *);
+static inline ssize_t l9p_pu16(struct l9p_message *, uint16_t *);
+static inline ssize_t l9p_pu32(struct l9p_message *, uint32_t *);
+static inline ssize_t l9p_pu64(struct l9p_message *, uint64_t *);
+static ssize_t l9p_pustring(struct l9p_message *, char **s);
+static ssize_t l9p_pustrings(struct l9p_message *, uint16_t *, char **, size_t);
+static ssize_t l9p_puqid(struct l9p_message *, struct l9p_qid *);
+static ssize_t l9p_puqids(struct l9p_message *, uint16_t *, struct l9p_qid *q);
+
+/*
+ * Transfer data from incoming request, or to outgoing response,
+ * using msg to track position and direction within request/response.
+ *
+ * Returns the number of bytes actually transferred (which is always
+ * just len itself, converted to signed), or -1 if we ran out of space.
+ *
+ * Note that if we return -1, subsequent l9p_iov_io() calls with
+ * the same (and not-reset) msg and len > 0 will also return -1.
+ * This means most users can just check the *last* call for failure.
+ */
+static ssize_t
+l9p_iov_io(struct l9p_message *msg, void *buffer, size_t len)
+{
+ size_t done = 0;
+ size_t left = len;
+
+ assert(msg != NULL);
+
+ if (len == 0)
+ return (0);
+
+ if (msg->lm_cursor_iov >= msg->lm_niov)
+ return (-1);
+
+ assert(buffer != NULL);
+
+ while (left > 0) {
+ size_t idx = msg->lm_cursor_iov;
+ size_t space = msg->lm_iov[idx].iov_len - msg->lm_cursor_offset;
+ size_t towrite = MIN(space, left);
+
+ if (msg->lm_mode == L9P_PACK) {
+ memcpy((char *)msg->lm_iov[idx].iov_base +
+ msg->lm_cursor_offset, (char *)buffer + done,
+ towrite);
+ }
+
+ if (msg->lm_mode == L9P_UNPACK) {
+ memcpy((char *)buffer + done,
+ (char *)msg->lm_iov[idx].iov_base +
+ msg->lm_cursor_offset, towrite);
+ }
+
+ msg->lm_cursor_offset += towrite;
+
+ done += towrite;
+ left -= towrite;
+
+ if (space - towrite == 0) {
+ /* Advance to next iov */
+ msg->lm_cursor_iov++;
+ msg->lm_cursor_offset = 0;
+
+ if (msg->lm_cursor_iov >= msg->lm_niov && left > 0)
+ return (-1);
+ }
+ }
+
+ msg->lm_size += done;
+ return ((ssize_t)done);
+}
+
+/*
+ * Pack or unpack a byte (8 bits).
+ *
+ * Returns 1 (success, 1 byte) or -1 (error).
+ */
+static inline ssize_t
+l9p_pu8(struct l9p_message *msg, uint8_t *val)
+{
+
+ return (l9p_iov_io(msg, val, sizeof (uint8_t)));
+}
+
+/*
+ * Pack or unpack 16-bit value.
+ *
+ * Returns 2 or -1.
+ */
+static inline ssize_t
+l9p_pu16(struct l9p_message *msg, uint16_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+ /*
+ * The ifdefs are annoying, but there is no need
+ * for all of this foolery on little-endian hosts,
+ * and I don't expect the compiler to optimize it
+ * all away.
+ */
+ uint16_t copy;
+ ssize_t ret;
+
+ if (msg->lm_mode == L9P_PACK) {
+ copy = htole16(*val);
+ return (l9p_iov_io(msg, &copy, sizeof (uint16_t)));
+ }
+ ret = l9p_iov_io(msg, val, sizeof (uint16_t));
+ *val = le16toh(*val);
+ return (ret);
+#else
+ return (l9p_iov_io(msg, val, sizeof (uint16_t)));
+#endif
+}
+
+/*
+ * Pack or unpack 32-bit value.
+ *
+ * Returns 4 or -1.
+ */
+static inline ssize_t
+l9p_pu32(struct l9p_message *msg, uint32_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+ uint32_t copy;
+ ssize_t ret;
+
+ if (msg->lm_mode == L9P_PACK) {
+ copy = htole32(*val);
+ return (l9p_iov_io(msg, &copy, sizeof (uint32_t)));
+ }
+ ret = l9p_iov_io(msg, val, sizeof (uint32_t));
+ *val = le32toh(*val);
+ return (ret);
+#else
+ return (l9p_iov_io(msg, val, sizeof (uint32_t)));
+#endif
+}
+
+/*
+ * Pack or unpack 64-bit value.
+ *
+ * Returns 8 or -1.
+ */
+static inline ssize_t
+l9p_pu64(struct l9p_message *msg, uint64_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+ uint64_t copy;
+ ssize_t ret;
+
+ if (msg->lm_mode == L9P_PACK) {
+ copy = htole64(*val);
+ return (l9p_iov_io(msg, &copy, sizeof (uint64_t)));
+ }
+ ret = l9p_iov_io(msg, val, sizeof (uint32_t));
+ *val = le64toh(*val);
+ return (ret);
+#else
+ return (l9p_iov_io(msg, val, sizeof (uint64_t)));
+#endif
+}
+
+/*
+ * Pack or unpack a string, encoded as 2-byte length followed by
+ * string bytes. The returned length is 2 greater than the
+ * length of the string itself.
+ *
+ * When unpacking, this allocates a new string (NUL-terminated).
+ *
+ * Return -1 on error (not space, or failed to allocate string,
+ * or illegal string).
+ *
+ * Note that pustring (and hence pustrings) can return an error
+ * even when l9p_iov_io succeeds.
+ */
+static ssize_t
+l9p_pustring(struct l9p_message *msg, char **s)
+{
+ uint16_t len;
+
+ if (msg->lm_mode == L9P_PACK)
+ len = *s != NULL ? (uint16_t)strlen(*s) : 0;
+
+ if (l9p_pu16(msg, &len) < 0)
+ return (-1);
+
+ if (msg->lm_mode == L9P_UNPACK) {
+ *s = l9p_calloc(1, len + 1);
+ if (*s == NULL)
+ return (-1);
+ }
+
+ if (l9p_iov_io(msg, *s, len) < 0)
+ return (-1);
+
+ if (msg->lm_mode == L9P_UNPACK) {
+ /*
+ * An embedded NUL byte in a string is illegal.
+ * We don't necessarily have to check (we'll just
+ * treat it as a shorter string), but checking
+ * seems like a good idea.
+ */
+ if (memchr(*s, '\0', len) != NULL)
+ return (-1);
+ }
+
+ return ((ssize_t)len + 2);
+}
+
+/*
+ * Pack or unpack a number (*num) of strings (but at most max of
+ * them).
+ *
+ * Returns the number of bytes transferred, including the packed
+ * number of strings. If packing and the packed number of strings
+ * was reduced, the original *num value is unchanged; only the
+ * wire-format number is reduced. If unpacking and the input
+ * number of strings exceeds the max, the incoming *num is reduced
+ * to lim, if needed. (NOTE ASYMMETRY HERE!)
+ *
+ * Returns -1 on error.
+ */
+static ssize_t
+l9p_pustrings(struct l9p_message *msg, uint16_t *num, char **strings,
+ size_t max)
+{
+ size_t i, lim;
+ ssize_t r, ret;
+ uint16_t adjusted;
+
+ if (msg->lm_mode == L9P_PACK) {
+ lim = *num;
+ if (lim > max)
+ lim = max;
+ adjusted = (uint16_t)lim;
+ r = l9p_pu16(msg, &adjusted);
+ } else {
+ r = l9p_pu16(msg, num);
+ lim = *num;
+ if (lim > max)
+ *num = (uint16_t)(lim = max);
+ }
+ if (r < 0)
+ return (-1);
+
+ for (i = 0; i < lim; i++) {
+ ret = l9p_pustring(msg, &strings[i]);
+ if (ret < 1)
+ return (-1);
+
+ r += ret;
+ }
+
+ return (r);
+}
+
+/*
+ * Pack or unpack a qid.
+ *
+ * Returns 13 (success) or -1 (error).
+ */
+static ssize_t
+l9p_puqid(struct l9p_message *msg, struct l9p_qid *qid)
+{
+ ssize_t r;
+ uint8_t type;
+
+ if (msg->lm_mode == L9P_PACK) {
+ type = qid->type;
+ r = l9p_pu8(msg, &type);
+ } else {
+ r = l9p_pu8(msg, &type);
+ qid->type = type;
+ }
+ if (r > 0)
+ r = l9p_pu32(msg, &qid->version);
+ if (r > 0)
+ r = l9p_pu64(msg, &qid->path);
+
+ return (r > 0 ? QID_SIZE : r);
+}
+
+/*
+ * Pack or unpack *num qids.
+ *
+ * Returns 2 + 13 * *num (after possibly setting *num), or -1 on error.
+ */
+static ssize_t
+l9p_puqids(struct l9p_message *msg, uint16_t *num, struct l9p_qid *qids)
+{
+ size_t i, lim;
+ ssize_t ret, r;
+
+ r = l9p_pu16(msg, num);
+ if (r > 0) {
+ for (i = 0, lim = *num; i < lim; i++) {
+ ret = l9p_puqid(msg, &qids[i]);
+ if (ret < 0)
+ return (-1);
+ r += ret;
+ }
+ }
+ return (r);
+}
+
+/*
+ * Pack or unpack a l9p_stat.
+ *
+ * These have variable size, and the size further depends on
+ * the protocol version.
+ *
+ * Returns the number of bytes packed/unpacked, or -1 on error.
+ */
+ssize_t
+l9p_pustat(struct l9p_message *msg, struct l9p_stat *stat,
+ enum l9p_version version)
+{
+ ssize_t r = 0;
+ uint16_t size;
+
+ /* The on-wire size field excludes the size of the size field. */
+ if (msg->lm_mode == L9P_PACK)
+ size = l9p_sizeof_stat(stat, version) - 2;
+
+ r += l9p_pu16(msg, &size);
+ r += l9p_pu16(msg, &stat->type);
+ r += l9p_pu32(msg, &stat->dev);
+ r += l9p_puqid(msg, &stat->qid);
+ r += l9p_pu32(msg, &stat->mode);
+ r += l9p_pu32(msg, &stat->atime);
+ r += l9p_pu32(msg, &stat->mtime);
+ r += l9p_pu64(msg, &stat->length);
+ r += l9p_pustring(msg, &stat->name);
+ r += l9p_pustring(msg, &stat->uid);
+ r += l9p_pustring(msg, &stat->gid);
+ r += l9p_pustring(msg, &stat->muid);
+
+ if (version >= L9P_2000U) {
+ r += l9p_pustring(msg, &stat->extension);
+ r += l9p_pu32(msg, &stat->n_uid);
+ r += l9p_pu32(msg, &stat->n_gid);
+ r += l9p_pu32(msg, &stat->n_muid);
+ }
+
+ if (r < size + 2)
+ return (-1);
+
+ return (r);
+}
+
+/*
+ * Pack or unpack a variable-length dirent.
+ *
+ * If unpacking, the name field is malloc()ed and the caller must
+ * free it.
+ *
+ * Returns the wire-format length, or -1 if we ran out of room.
+ */
+ssize_t
+l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de)
+{
+ ssize_t r, s;
+
+ r = l9p_puqid(msg, &de->qid);
+ r += l9p_pu64(msg, &de->offset);
+ r += l9p_pu8(msg, &de->type);
+ s = l9p_pustring(msg, &de->name);
+ if (r < QID_SIZE + 8 + 1 || s < 0)
+ return (-1);
+ return (r + s);
+}
+
+/*
+ * Pack or unpack a request or response (fcall).
+ *
+ * Returns 0 on success, -1 on error. (It's up to the caller
+ * to call l9p_freefcall on our failure.)
+ */
+int
+l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall,
+ enum l9p_version version)
+{
+ uint32_t length = 0;
+ ssize_t r;
+
+ /*
+ * Get overall length, type, and tag, which should appear
+ * in all messages. If not even that works, abort immediately.
+ */
+ l9p_pu32(msg, &length);
+ l9p_pu8(msg, &fcall->hdr.type);
+ r = l9p_pu16(msg, &fcall->hdr.tag);
+ if (r < 0)
+ return (-1);
+
+ /*
+ * Decode remainder of message. When unpacking, this may
+ * allocate memory, even if we fail during the decode.
+ * Note that the initial fcall is zeroed out, though, so
+ * we can just freefcall() to release whatever might have
+ * gotten allocated, if the unpack fails due to a short
+ * packet.
+ */
+ switch (fcall->hdr.type) {
+ case L9P_TVERSION:
+ case L9P_RVERSION:
+ l9p_pu32(msg, &fcall->version.msize);
+ r = l9p_pustring(msg, &fcall->version.version);
+ break;
+
+ case L9P_TAUTH:
+ l9p_pu32(msg, &fcall->tauth.afid);
+ r = l9p_pustring(msg, &fcall->tauth.uname);
+ if (r < 0)
+ break;
+ r = l9p_pustring(msg, &fcall->tauth.aname);
+ if (r < 0)
+ break;
+ if (version >= L9P_2000U)
+ r = l9p_pu32(msg, &fcall->tauth.n_uname);
+ break;
+
+ case L9P_RAUTH:
+ r = l9p_puqid(msg, &fcall->rauth.aqid);
+ break;
+
+ case L9P_TATTACH:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu32(msg, &fcall->tattach.afid);
+ r = l9p_pustring(msg, &fcall->tattach.uname);
+ if (r < 0)
+ break;
+ r = l9p_pustring(msg, &fcall->tattach.aname);
+ if (r < 0)
+ break;
+ if (version >= L9P_2000U)
+ r = l9p_pu32(msg, &fcall->tattach.n_uname);
+ break;
+
+ case L9P_RATTACH:
+ r = l9p_puqid(msg, &fcall->rattach.qid);
+ break;
+
+ case L9P_RERROR:
+ r = l9p_pustring(msg, &fcall->error.ename);
+ if (r < 0)
+ break;
+ if (version >= L9P_2000U)
+ r = l9p_pu32(msg, &fcall->error.errnum);
+ break;
+
+ case L9P_RLERROR:
+ r = l9p_pu32(msg, &fcall->error.errnum);
+ break;
+
+ case L9P_TFLUSH:
+ r = l9p_pu16(msg, &fcall->tflush.oldtag);
+ break;
+
+ case L9P_RFLUSH:
+ break;
+
+ case L9P_TWALK:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu32(msg, &fcall->twalk.newfid);
+ r = l9p_pustrings(msg, &fcall->twalk.nwname,
+ fcall->twalk.wname, N(fcall->twalk.wname));
+ break;
+
+ case L9P_RWALK:
+ r = l9p_puqids(msg, &fcall->rwalk.nwqid, fcall->rwalk.wqid);
+ break;
+
+ case L9P_TOPEN:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pu8(msg, &fcall->topen.mode);
+ break;
+
+ case L9P_ROPEN:
+ l9p_puqid(msg, &fcall->ropen.qid);
+ r = l9p_pu32(msg, &fcall->ropen.iounit);
+ break;
+
+ case L9P_TCREATE:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->tcreate.name);
+ if (r < 0)
+ break;
+ l9p_pu32(msg, &fcall->tcreate.perm);
+ r = l9p_pu8(msg, &fcall->tcreate.mode);
+ if (version >= L9P_2000U)
+ r = l9p_pustring(msg, &fcall->tcreate.extension);
+ break;
+
+ case L9P_RCREATE:
+ l9p_puqid(msg, &fcall->rcreate.qid);
+ r = l9p_pu32(msg, &fcall->rcreate.iounit);
+ break;
+
+ case L9P_TREAD:
+ case L9P_TREADDIR:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu64(msg, &fcall->io.offset);
+ r = l9p_pu32(msg, &fcall->io.count);
+ break;
+
+ case L9P_RREAD:
+ case L9P_RREADDIR:
+ r = l9p_pu32(msg, &fcall->io.count);
+ break;
+
+ case L9P_TWRITE:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu64(msg, &fcall->io.offset);
+ r = l9p_pu32(msg, &fcall->io.count);
+ break;
+
+ case L9P_RWRITE:
+ r = l9p_pu32(msg, &fcall->io.count);
+ break;
+
+ case L9P_TCLUNK:
+ case L9P_TSTAT:
+ case L9P_TREMOVE:
+ case L9P_TSTATFS:
+ r = l9p_pu32(msg, &fcall->hdr.fid);
+ break;
+
+ case L9P_RCLUNK:
+ case L9P_RREMOVE:
+ break;
+
+ case L9P_RSTAT:
+ {
+ uint16_t size = l9p_sizeof_stat(&fcall->rstat.stat,
+ version);
+ l9p_pu16(msg, &size);
+ r = l9p_pustat(msg, &fcall->rstat.stat, version);
+ }
+ break;
+
+ case L9P_TWSTAT:
+ {
+ uint16_t size;
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu16(msg, &size);
+ r = l9p_pustat(msg, &fcall->twstat.stat, version);
+ }
+ break;
+
+ case L9P_RWSTAT:
+ break;
+
+ case L9P_RSTATFS:
+ l9p_pu32(msg, &fcall->rstatfs.statfs.type);
+ l9p_pu32(msg, &fcall->rstatfs.statfs.bsize);
+ l9p_pu64(msg, &fcall->rstatfs.statfs.blocks);
+ l9p_pu64(msg, &fcall->rstatfs.statfs.bfree);
+ l9p_pu64(msg, &fcall->rstatfs.statfs.bavail);
+ l9p_pu64(msg, &fcall->rstatfs.statfs.files);
+ l9p_pu64(msg, &fcall->rstatfs.statfs.ffree);
+ l9p_pu64(msg, &fcall->rstatfs.statfs.fsid);
+ r = l9p_pu32(msg, &fcall->rstatfs.statfs.namelen);
+ break;
+
+ case L9P_TLOPEN:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pu32(msg, &fcall->tlopen.flags);
+ break;
+
+ case L9P_RLOPEN:
+ l9p_puqid(msg, &fcall->rlopen.qid);
+ r = l9p_pu32(msg, &fcall->rlopen.iounit);
+ break;
+
+ case L9P_TLCREATE:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->tlcreate.name);
+ if (r < 0)
+ break;
+ l9p_pu32(msg, &fcall->tlcreate.flags);
+ l9p_pu32(msg, &fcall->tlcreate.mode);
+ r = l9p_pu32(msg, &fcall->tlcreate.gid);
+ break;
+
+ case L9P_RLCREATE:
+ l9p_puqid(msg, &fcall->rlcreate.qid);
+ r = l9p_pu32(msg, &fcall->rlcreate.iounit);
+ break;
+
+ case L9P_TSYMLINK:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->tsymlink.name);
+ if (r < 0)
+ break;
+ r = l9p_pustring(msg, &fcall->tsymlink.symtgt);
+ if (r < 0)
+ break;
+ r = l9p_pu32(msg, &fcall->tlcreate.gid);
+ break;
+
+ case L9P_RSYMLINK:
+ r = l9p_puqid(msg, &fcall->rsymlink.qid);
+ break;
+
+ case L9P_TMKNOD:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->tmknod.name);
+ if (r < 0)
+ break;
+ l9p_pu32(msg, &fcall->tmknod.mode);
+ l9p_pu32(msg, &fcall->tmknod.major);
+ l9p_pu32(msg, &fcall->tmknod.minor);
+ r = l9p_pu32(msg, &fcall->tmknod.gid);
+ break;
+
+ case L9P_RMKNOD:
+ r = l9p_puqid(msg, &fcall->rmknod.qid);
+ break;
+
+ case L9P_TRENAME:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu32(msg, &fcall->trename.dfid);
+ r = l9p_pustring(msg, &fcall->trename.name);
+ break;
+
+ case L9P_RRENAME:
+ break;
+
+ case L9P_TREADLINK:
+ r = l9p_pu32(msg, &fcall->hdr.fid);
+ break;
+
+ case L9P_RREADLINK:
+ r = l9p_pustring(msg, &fcall->rreadlink.target);
+ break;
+
+ case L9P_TGETATTR:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pu64(msg, &fcall->tgetattr.request_mask);
+ break;
+
+ case L9P_RGETATTR:
+ l9p_pu64(msg, &fcall->rgetattr.valid);
+ l9p_puqid(msg, &fcall->rgetattr.qid);
+ l9p_pu32(msg, &fcall->rgetattr.mode);
+ l9p_pu32(msg, &fcall->rgetattr.uid);
+ l9p_pu32(msg, &fcall->rgetattr.gid);
+ l9p_pu64(msg, &fcall->rgetattr.nlink);
+ l9p_pu64(msg, &fcall->rgetattr.rdev);
+ l9p_pu64(msg, &fcall->rgetattr.size);
+ l9p_pu64(msg, &fcall->rgetattr.blksize);
+ l9p_pu64(msg, &fcall->rgetattr.blocks);
+ l9p_pu64(msg, &fcall->rgetattr.atime_sec);
+ l9p_pu64(msg, &fcall->rgetattr.atime_nsec);
+ l9p_pu64(msg, &fcall->rgetattr.mtime_sec);
+ l9p_pu64(msg, &fcall->rgetattr.mtime_nsec);
+ l9p_pu64(msg, &fcall->rgetattr.ctime_sec);
+ l9p_pu64(msg, &fcall->rgetattr.ctime_nsec);
+ l9p_pu64(msg, &fcall->rgetattr.btime_sec);
+ l9p_pu64(msg, &fcall->rgetattr.btime_nsec);
+ l9p_pu64(msg, &fcall->rgetattr.gen);
+ r = l9p_pu64(msg, &fcall->rgetattr.data_version);
+ break;
+
+ case L9P_TSETATTR:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu32(msg, &fcall->tsetattr.valid);
+ l9p_pu32(msg, &fcall->tsetattr.mode);
+ l9p_pu32(msg, &fcall->tsetattr.uid);
+ l9p_pu32(msg, &fcall->tsetattr.gid);
+ l9p_pu64(msg, &fcall->tsetattr.size);
+ l9p_pu64(msg, &fcall->tsetattr.atime_sec);
+ l9p_pu64(msg, &fcall->tsetattr.atime_nsec);
+ l9p_pu64(msg, &fcall->tsetattr.mtime_sec);
+ r = l9p_pu64(msg, &fcall->tsetattr.mtime_nsec);
+ break;
+
+ case L9P_RSETATTR:
+ break;
+
+ case L9P_TXATTRWALK:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu32(msg, &fcall->txattrwalk.newfid);
+ r = l9p_pustring(msg, &fcall->txattrwalk.name);
+ break;
+
+ case L9P_RXATTRWALK:
+ r = l9p_pu64(msg, &fcall->rxattrwalk.size);
+ break;
+
+ case L9P_TXATTRCREATE:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->txattrcreate.name);
+ if (r < 0)
+ break;
+ l9p_pu64(msg, &fcall->txattrcreate.attr_size);
+ r = l9p_pu32(msg, &fcall->txattrcreate.flags);
+ break;
+
+ case L9P_RXATTRCREATE:
+ break;
+
+ case L9P_TFSYNC:
+ r = l9p_pu32(msg, &fcall->hdr.fid);
+ break;
+
+ case L9P_RFSYNC:
+ break;
+
+ case L9P_TLOCK:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ l9p_pu8(msg, &fcall->tlock.type);
+ l9p_pu32(msg, &fcall->tlock.flags);
+ l9p_pu64(msg, &fcall->tlock.start);
+ l9p_pu64(msg, &fcall->tlock.length);
+ l9p_pu32(msg, &fcall->tlock.proc_id);
+ r = l9p_pustring(msg, &fcall->tlock.client_id);
+ break;
+
+ case L9P_RLOCK:
+ r = l9p_pu8(msg, &fcall->rlock.status);
+ break;
+
+ case L9P_TGETLOCK:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ /* FALLTHROUGH */
+
+ case L9P_RGETLOCK:
+ l9p_pu8(msg, &fcall->getlock.type);
+ l9p_pu64(msg, &fcall->getlock.start);
+ l9p_pu64(msg, &fcall->getlock.length);
+ l9p_pu32(msg, &fcall->getlock.proc_id);
+ r = l9p_pustring(msg, &fcall->getlock.client_id);
+ break;
+
+ case L9P_TLINK:
+ l9p_pu32(msg, &fcall->tlink.dfid);
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->tlink.name);
+ break;
+
+ case L9P_RLINK:
+ break;
+
+ case L9P_TMKDIR:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->tmkdir.name);
+ if (r < 0)
+ break;
+ l9p_pu32(msg, &fcall->tmkdir.mode);
+ r = l9p_pu32(msg, &fcall->tmkdir.gid);
+ break;
+
+ case L9P_RMKDIR:
+ r = l9p_puqid(msg, &fcall->rmkdir.qid);
+ break;
+
+ case L9P_TRENAMEAT:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->trenameat.oldname);
+ if (r < 0)
+ break;
+ l9p_pu32(msg, &fcall->trenameat.newdirfid);
+ r = l9p_pustring(msg, &fcall->trenameat.newname);
+ break;
+
+ case L9P_RRENAMEAT:
+ break;
+
+ case L9P_TUNLINKAT:
+ l9p_pu32(msg, &fcall->hdr.fid);
+ r = l9p_pustring(msg, &fcall->tunlinkat.name);
+ if (r < 0)
+ break;
+ r = l9p_pu32(msg, &fcall->tunlinkat.flags);
+ break;
+
+ case L9P_RUNLINKAT:
+ break;
+
+ default:
+ L9P_LOG(L9P_ERROR, "%s(): missing case for type %d",
+ __func__, fcall->hdr.type);
+ break;
+ }
+
+ /* Check for over- or under-run, or pustring error. */
+ if (r < 0)
+ return (-1);
+
+ if (msg->lm_mode == L9P_PACK) {
+ /* Rewind to the beginning and install size at front. */
+ uint32_t len = (uint32_t)msg->lm_size;
+ msg->lm_cursor_offset = 0;
+ msg->lm_cursor_iov = 0;
+
+ /*
+ * Subtract 4 bytes from current size, becase we're
+ * overwriting size (rewinding message to the beginning)
+ * and writing again, which will increase it 4 more.
+ */
+ msg->lm_size -= sizeof(uint32_t);
+
+ if (fcall->hdr.type == L9P_RREAD ||
+ fcall->hdr.type == L9P_RREADDIR)
+ len += fcall->io.count;
+
+ l9p_pu32(msg, &len);
+ }
+
+ return (0);
+}
+
+/*
+ * Free any strings or other data malloc'ed in the process of
+ * packing or unpacking an fcall.
+ */
+void
+l9p_freefcall(union l9p_fcall *fcall)
+{
+ uint16_t i;
+
+ switch (fcall->hdr.type) {
+
+ case L9P_TVERSION:
+ case L9P_RVERSION:
+ free(fcall->version.version);
+ return;
+
+ case L9P_TATTACH:
+ free(fcall->tattach.aname);
+ free(fcall->tattach.uname);
+ return;
+
+ case L9P_TWALK:
+ for (i = 0; i < fcall->twalk.nwname; i++)
+ free(fcall->twalk.wname[i]);
+ return;
+
+ case L9P_TCREATE:
+ case L9P_TOPEN:
+ free(fcall->tcreate.name);
+ free(fcall->tcreate.extension);
+ return;
+
+ case L9P_RSTAT:
+ l9p_freestat(&fcall->rstat.stat);
+ return;
+
+ case L9P_TWSTAT:
+ l9p_freestat(&fcall->twstat.stat);
+ return;
+
+ case L9P_TLCREATE:
+ free(fcall->tlcreate.name);
+ return;
+
+ case L9P_TSYMLINK:
+ free(fcall->tsymlink.name);
+ free(fcall->tsymlink.symtgt);
+ return;
+
+ case L9P_TMKNOD:
+ free(fcall->tmknod.name);
+ return;
+
+ case L9P_TRENAME:
+ free(fcall->trename.name);
+ return;
+
+ case L9P_RREADLINK:
+ free(fcall->rreadlink.target);
+ return;
+
+ case L9P_TXATTRWALK:
+ free(fcall->txattrwalk.name);
+ return;
+
+ case L9P_TXATTRCREATE:
+ free(fcall->txattrcreate.name);
+ return;
+
+ case L9P_TLOCK:
+ free(fcall->tlock.client_id);
+ return;
+
+ case L9P_TGETLOCK:
+ case L9P_RGETLOCK:
+ free(fcall->getlock.client_id);
+ return;
+
+ case L9P_TLINK:
+ free(fcall->tlink.name);
+ return;
+
+ case L9P_TMKDIR:
+ free(fcall->tmkdir.name);
+ return;
+
+ case L9P_TRENAMEAT:
+ free(fcall->trenameat.oldname);
+ free(fcall->trenameat.newname);
+ return;
+
+ case L9P_TUNLINKAT:
+ free(fcall->tunlinkat.name);
+ return;
+ }
+}
+
+void
+l9p_freestat(struct l9p_stat *stat)
+{
+ free(stat->name);
+ free(stat->extension);
+ free(stat->uid);
+ free(stat->gid);
+ free(stat->muid);
+}
+
+uint16_t
+l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version)
+{
+ uint16_t size = L9P_WORD /* size */
+ + L9P_WORD /* type */
+ + L9P_DWORD /* dev */
+ + QID_SIZE /* qid */
+ + 3 * L9P_DWORD /* mode, atime, mtime */
+ + L9P_QWORD /* length */
+ + STRING_SIZE(stat->name)
+ + STRING_SIZE(stat->uid)
+ + STRING_SIZE(stat->gid)
+ + STRING_SIZE(stat->muid);
+
+ if (version >= L9P_2000U) {
+ size += STRING_SIZE(stat->extension)
+ + 3 * L9P_DWORD;
+ }
+
+ return (size);
+}
diff --git a/pytest/.gitignore b/pytest/.gitignore
new file mode 100644
index 000000000000..72be9ceecd4d
--- /dev/null
+++ b/pytest/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+__pycache__
+testconf.ini
diff --git a/pytest/Makefile b/pytest/Makefile
new file mode 100644
index 000000000000..14dd17777636
--- /dev/null
+++ b/pytest/Makefile
@@ -0,0 +1,9 @@
+PYTHON?=python
+
+selftest:
+ for f in lerrno p9err pfod protocol sequencer; do \
+ ${PYTHON} $$f.py; \
+ done
+
+clean cleandir:
+ rm -rf *.pyc __pycache__ *.log
diff --git a/pytest/README b/pytest/README
new file mode 100644
index 000000000000..6c8369d9521d
--- /dev/null
+++ b/pytest/README
@@ -0,0 +1,32 @@
+Here are some very skeletal instructions for using
+the client test code.
+
+on server (assumes BSD style LD_LIBRARY_PATH):
+
+mkdir /tmp/foo
+cd lib9p
+env LD_LIBRARY_PATH=. LIB9P_LOGGING=stderr example/server -h localhost -p 12345 /tmp/foo
+
+(this can be run as a non-root user for now, but some things
+only work when run as root)
+
+on client (same machine as server, but can always be run as
+non-root user):
+
+cd lib9p/pytest
+ONE TIME ONLY: copy testconf.ini.sample to testconf.ini, adjust to taste
+./client.py
+
+TODO: rework ./client so it can locate the .ini file better
+
+########
+
+IF USING diod (http://github.com/chaos/diod) AS THE SERVER ON
+A LINUX MACHINE:
+
+ - The instructions for running the server are (or were):
+ sudo ./diod -f -d 1 -n -e /tmp/9
+ - You must mkdir the exported 9pfs file system (e.g., mkdir /tmp/9).
+ - While uname is not really used, aname (the attach name) IS used
+ and must match the exported file system, e.g., testconf.ini
+ must have "aname = /tmp/9".
diff --git a/pytest/client.py b/pytest/client.py
new file mode 100755
index 000000000000..1746d4f6e277
--- /dev/null
+++ b/pytest/client.py
@@ -0,0 +1,643 @@
+#! /usr/bin/env python
+
+"""
+Run various tests, as a client.
+"""
+
+from __future__ import print_function
+
+import argparse
+try:
+ import ConfigParser as configparser
+except ImportError:
+ import configparser
+import functools
+import logging
+import os
+import socket
+import struct
+import sys
+import time
+import traceback
+
+import p9conn
+import protocol
+
+LocalError = p9conn.LocalError
+RemoteError = p9conn.RemoteError
+TEError = p9conn.TEError
+
+class TestState(object):
+ def __init__(self):
+ self.config = None
+ self.logger = None
+ self.successes = 0
+ self.skips = 0
+ self.failures = 0
+ self.exceptions = 0
+ self.clnt_tab = {}
+ self.mkclient = None
+ self.stop = False
+ self.gid = 0
+
+ def ccc(self, cid=None):
+ """
+ Connect or reconnect as client (ccc = check and connect client).
+
+ If caller provides a cid (client ID) we check that specific
+ client. Otherwise the default ID ('base') is used.
+ In any case we return the now-connected client, plus the
+ attachment (session info) if any.
+ """
+ if cid is None:
+ cid = 'base'
+ pair = self.clnt_tab.get(cid)
+ if pair is None:
+ clnt = self.mkclient()
+ pair = [clnt, None]
+ self.clnt_tab[cid] = pair
+ else:
+ clnt = pair[0]
+ if not clnt.is_connected():
+ clnt.connect()
+ return pair
+
+ def dcc(self, cid=None):
+ """
+ Disconnect client (disconnect checked client). If no specific
+ client ID is provided, this disconnects ALL checked clients!
+ """
+ if cid is None:
+ for cid in list(self.clnt_tab.keys()):
+ self.dcc(cid)
+ pair = self.clnt_tab.get(cid)
+ if pair is not None:
+ clnt = pair[0]
+ if clnt.is_connected():
+ clnt.shutdown()
+ del self.clnt_tab[cid]
+
+ def ccs(self, cid=None):
+ """
+ Like ccc, but establish a session as well, by setting up
+ the uname/n_uname.
+
+ Return the client instance (only).
+ """
+ pair = self.ccc(cid)
+ clnt = pair[0]
+ if pair[1] is None:
+ # No session yet - establish one. Note, this may fail.
+ section = None if cid is None else ('client-' + cid)
+ aname = getconf(self.config, section, 'aname', '')
+ uname = getconf(self.config, section, 'uname', '')
+ if clnt.proto > protocol.plain:
+ n_uname = getint(self.config, section, 'n_uname', 1001)
+ else:
+ n_uname = None
+ clnt.attach(afid=None, aname=aname, uname=uname, n_uname=n_uname)
+ pair[1] = (aname, uname, n_uname)
+ return clnt
+
+def getconf(conf, section, name, default=None, rtype=str):
+ """
+ Get configuration item for given section, or for "client" if
+ there is no entry for that particular section (or if section
+ is None).
+
+ This lets us get specific values for specific tests or
+ groups ([foo] name=value), falling back to general values
+ ([client] name=value).
+
+ The type of the returned value <rtype> can be str, int, bool,
+ or float. The default is str (and see getconfint, getconfbool,
+ getconffloat below).
+
+ A default value may be supplied; if it is, that's the default
+ return value (this default should have the right type). If
+ no default is supplied, a missing value is an error.
+ """
+ try:
+ # note: conf.get(None, 'foo') raises NoSectionError
+ where = section
+ result = conf.get(where, name)
+ except (configparser.NoSectionError, configparser.NoOptionError):
+ try:
+ where = 'client'
+ result = conf.get(where, name)
+ except configparser.NoSectionError:
+ sys.exit('no [{0}] section in configuration!'.format(where))
+ except configparser.NoOptionError:
+ if default is not None:
+ return default
+ if section is not None:
+ where = '[{0}] or [{1}]'.format(section, where)
+ else:
+ where = '[{0}]'.format(where)
+ raise LocalError('need {0}=value in {1}'.format(name, where))
+ where = '[{0}]'.format(where)
+ if rtype is str:
+ return result
+ if rtype is int:
+ return int(result)
+ if rtype is float:
+ return float(result)
+ if rtype is bool:
+ if result.lower() in ('1', 't', 'true', 'y', 'yes'):
+ return True
+ if result.lower() in ('0', 'f', 'false', 'n', 'no'):
+ return False
+ raise ValueError('{0} {1}={2}: invalid boolean'.format(where, name,
+ result))
+ raise ValueError('{0} {1}={2}: internal error: bad result type '
+ '{3!r}'.format(where, name, result, rtype))
+
+def getint(conf, section, name, default=None):
+ "get integer config item"
+ return getconf(conf, section, name, default, int)
+
+def getfloat(conf, section, name, default=None):
+ "get float config item"
+ return getconf(conf, section, name, default, float)
+
+def getbool(conf, section, name, default=None):
+ "get boolean config item"
+ return getconf(conf, section, name, default, bool)
+
+def pluralize(n, singular, plural):
+ "return singular or plural based on value of n"
+ return plural if n != 1 else singular
+
+class TCDone(Exception):
+ "used in succ/fail/skip - skips rest of testcase with"
+ pass
+
+class TestCase(object):
+ """
+ Start a test case. Most callers must then do a ccs() to connect.
+
+ A failed test will generally disconnect from the server; a
+ new ccs() will reconnect, if the server is still alive.
+ """
+ def __init__(self, name, tstate):
+ self.name = name
+ self.status = None
+ self.detail = None
+ self.tstate = tstate
+ self._shutdown = None
+ self._autoclunk = None
+ self._acconn = None
+
+ def auto_disconnect(self, conn):
+ self._shutdown = conn
+
+ def succ(self, detail=None):
+ "set success status"
+ self.status = 'SUCC'
+ self.detail = detail
+ raise TCDone()
+
+ def fail(self, detail):
+ "set failure status"
+ self.status = 'FAIL'
+ self.detail = detail
+ raise TCDone()
+
+ def skip(self, detail=None):
+ "set skip status"
+ self.status = 'SKIP'
+ self.detail = detail
+ raise TCDone()
+
+ def autoclunk(self, fid):
+ "mark fid to be closed/clunked on test exit"
+ if self._acconn is None:
+ raise ValueError('autoclunk: no _acconn')
+ self._autoclunk.append(fid)
+
+ def trace(self, msg, *args, **kwargs):
+ "add tracing info to log-file output"
+ level = kwargs.pop('level', logging.INFO)
+ self.tstate.logger.log(level, ' ' + msg, *args, **kwargs)
+
+ def ccs(self):
+ "call tstate ccs, turn socket.error connect failure into test fail"
+ try:
+ self.detail = 'connecting'
+ ret = self.tstate.ccs()
+ self.detail = None
+ self._acconn = ret
+ return ret
+ except socket.error as err:
+ self.fail(str(err))
+
+ def __enter__(self):
+ self.tstate.logger.log(logging.DEBUG, 'ENTER: %s', self.name)
+ self._autoclunk = []
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ tstate = self.tstate
+ eat_exc = False
+ tb_detail = None
+ if exc_type is TCDone:
+ # we exited with succ, fail, or skip
+ eat_exc = True
+ exc_type = None
+ if exc_type is not None:
+ if self.status is None:
+ self.status = 'EXCP'
+ else:
+ self.status += ' EXC'
+ if exc_type == TEError:
+ # timeout/eof - best guess is that we crashed the server!
+ eat_exc = True
+ tb_detail = ['timeout or EOF']
+ elif exc_type in (socket.error, RemoteError, LocalError):
+ eat_exc = True
+ tb_detail = traceback.format_exception(exc_type, exc_val,
+ exc_tb)
+ level = logging.ERROR
+ tstate.failures += 1
+ tstate.exceptions += 1
+ else:
+ if self.status is None:
+ self.status = 'SUCC'
+ if self.status == 'SUCC':
+ level = logging.INFO
+ tstate.successes += 1
+ elif self.status == 'SKIP':
+ level = logging.INFO
+ tstate.skips += 1
+ else:
+ level = logging.ERROR
+ tstate.failures += 1
+ tstate.logger.log(level, '%s: %s', self.status, self.name)
+ if self.detail:
+ tstate.logger.log(level, ' detail: %s', self.detail)
+ if tb_detail:
+ for line in tb_detail:
+ tstate.logger.log(level, ' %s', line.rstrip())
+ for fid in self._autoclunk:
+ self._acconn.clunk(fid, ignore_error=True)
+ if self._shutdown:
+ self._shutdown.shutdown()
+ return eat_exc
+
+def main():
+ "the usual main"
+ parser = argparse.ArgumentParser(description='run tests against a server')
+
+ parser.add_argument('-c', '--config',
+ action='append',
+ help='specify additional file(s) to read (beyond testconf.ini)')
+
+ args = parser.parse_args()
+ config = configparser.SafeConfigParser()
+ # use case sensitive keys
+ config.optionxform = str
+
+ try:
+ with open('testconf.ini', 'r') as stream:
+ config.readfp(stream)
+ except (OSError, IOError) as err:
+ sys.exit(str(err))
+ if args.config:
+ ok = config.read(args.config)
+ failed = set(ok) - set(args.config)
+ if len(failed):
+ nfailed = len(failed)
+ word = 'files' if nfailed > 1 else 'file'
+ failed = ', '.join(failed)
+ print('failed to read {0} {1}: {2}'.format(nfailed, word, failed))
+ sys.exit(1)
+
+ logging.basicConfig(level=config.get('client', 'loglevel').upper())
+ logger = logging.getLogger(__name__)
+ tstate = TestState()
+ tstate.logger = logger
+ tstate.config = config
+
+ server = config.get('client', 'server')
+ port = config.getint('client', 'port')
+ proto = config.get('client', 'protocol')
+ may_downgrade = config.getboolean('client', 'may_downgrade')
+ timeout = config.getfloat('client', 'timeout')
+
+ tstate.stop = True # unless overwritten below
+ with TestCase('send bad packet', tstate) as tc:
+ tc.detail = 'connecting to {0}:{1}'.format(server, port)
+ try:
+ conn = p9conn.P9SockIO(logger, server=server, port=port)
+ except socket.error as err:
+ tc.fail('cannot connect at all (server down?)')
+ tc.auto_disconnect(conn)
+ tc.detail = None
+ pkt = struct.pack('<I', 256);
+ conn.write(pkt)
+ # ignore reply if any, we're just trying to trip the server
+ tstate.stop = False
+ tc.succ()
+
+ if not tstate.stop:
+ tstate.mkclient = functools.partial(p9conn.P9Client, logger,
+ timeout, proto, may_downgrade,
+ server=server, port=port)
+ tstate.stop = True
+ with TestCase('send bad Tversion', tstate) as tc:
+ try:
+ clnt = tstate.mkclient()
+ except socket.error as err:
+ tc.fail('can no longer connect, did bad pkt crash server?')
+ tc.auto_disconnect(clnt)
+ clnt.set_monkey('version', b'wrongo, fishbreath!')
+ tc.detail = 'connecting'
+ try:
+ clnt.connect()
+ except RemoteError as err:
+ tstate.stop = False
+ tc.succ(err.args[0])
+ tc.fail('server accepted a bad Tversion')
+
+ if not tstate.stop:
+ # All NUL characters in strings are invalid.
+ with TestCase('send illegal NUL in Tversion', tstate) as tc:
+ clnt = tstate.mkclient()
+ tc.auto_disconnect(clnt)
+ clnt.set_monkey('version', b'9P2000\0')
+ # Forcibly allow downgrade so that Tversion
+ # succeeds if they ignore the \0.
+ clnt.may_downgrade = True
+ tc.detail = 'connecting'
+ try:
+ clnt.connect()
+ except (TEError, RemoteError) as err:
+ tc.succ(err.args[0])
+ tc.fail('server accepted NUL in Tversion')
+
+ if not tstate.stop:
+ with TestCase('connect normally', tstate) as tc:
+ tc.detail = 'connecting'
+ try:
+ tstate.ccc()
+ except RemoteError as err:
+ # can't test any further, but this might be success
+ tstate.stop = True
+ if 'they only support version' in err.args[0]:
+ tc.succ(err.args[0])
+ tc.fail(err.args[0])
+ tc.succ()
+
+ if not tstate.stop:
+ with TestCase('attach with bad afid', tstate) as tc:
+ clnt = tstate.ccc()[0]
+ section = 'attach-with-bad-afid'
+ aname = getconf(tstate.config, section, 'aname', '')
+ uname = getconf(tstate.config, section, 'uname', '')
+ if clnt.proto > protocol.plain:
+ n_uname = getint(tstate.config, section, 'n_uname', 1001)
+ else:
+ n_uname = None
+ try:
+ clnt.attach(afid=42, aname=aname, uname=uname, n_uname=n_uname)
+ except RemoteError as err:
+ tc.succ(err.args[0])
+ tc.dcc()
+ tc.fail('bad attach afid not rejected')
+
+ try:
+ if not tstate.stop:
+ # Various Linux tests need gids. Just get them for everyone.
+ tstate.gid = getint(tstate.config, 'client', 'gid', 0)
+ more_test_cases(tstate)
+ finally:
+ tstate.dcc()
+
+ n_tests = tstate.successes + tstate.failures
+ print('summary:')
+ if tstate.successes:
+ print('{0}/{1} tests succeeded'.format(tstate.successes, n_tests))
+ if tstate.failures:
+ print('{0}/{1} tests failed'.format(tstate.failures, n_tests))
+ if tstate.skips:
+ print('{0} {1} skipped'.format(tstate.skips,
+ pluralize(tstate.skips,
+ 'test', 'tests')))
+ if tstate.exceptions:
+ print('{0} {1} occurred'.format(tstate.exceptions,
+ pluralize(tstate.exceptions,
+ 'exception', 'exceptions')))
+ if tstate.stop:
+ print('tests stopped early')
+ return 1 if tstate.stop or tstate.exceptions or tstate.failures else 0
+
+def more_test_cases(tstate):
+ "run cases that can only proceed if connecting works at all"
+ with TestCase('attach normally', tstate) as tc:
+ tc.ccs()
+ tc.succ()
+ if tstate.stop:
+ return
+
+ # Empty string is not technically illegal. It's not clear
+ # whether it should be accepted or rejected. However, it
+ # used to crash the server entirely, so it's a desirable
+ # test case.
+ with TestCase('empty string in Twalk request', tstate) as tc:
+ clnt = tc.ccs()
+ try:
+ fid, qid = clnt.lookup(clnt.rootfid, [b''])
+ except RemoteError as err:
+ tc.succ(err.args[0])
+ clnt.clunk(fid)
+ tc.succ('note: empty Twalk component name not rejected')
+
+ # Name components may not contain /
+ with TestCase('embedded / in lookup component name', tstate) as tc:
+ clnt = tc.ccs()
+ try:
+ fid, qid = clnt.lookup(clnt.rootfid, [b'/'])
+ tc.autoclunk(fid)
+ except RemoteError as err:
+ tc.succ(err.args[0])
+ tc.fail('/ in lookup component name not rejected')
+
+ # Proceed from a clean tree. As a side effect, this also tests
+ # either the old style readdir (read() on a directory fid) or
+ # the dot-L readdir().
+ #
+ # The test case will fail if we don't have permission to remove
+ # some file(s).
+ with TestCase('clean up tree (readdir+remove)', tstate) as tc:
+ clnt = tc.ccs()
+ fset = clnt.uxreaddir(b'/')
+ fset = [i for i in fset if i != '.' and i != '..']
+ tc.trace("what's there initially: {0!r}".format(fset))
+ try:
+ clnt.uxremove(b'/', force=False, recurse=True)
+ except RemoteError as err:
+ tc.trace('failed to read or clean up tree', level=logging.ERROR)
+ tc.trace('this might be a permissions error', level=logging.ERROR)
+ tstate.stop = True
+ tc.fail(str(err))
+ fset = clnt.uxreaddir(b'/')
+ fset = [i for i in fset if i != '.' and i != '..']
+ tc.trace("what's left after removing everything: {0!r}".format(fset))
+ if fset:
+ tstate.stop = True
+ tc.trace('note: could be a permissions error', level=logging.ERROR)
+ tc.fail('/ not empty after removing all: {0!r}'.format(fset))
+ tc.succ()
+ if tstate.stop:
+ return
+
+ # Name supplied to create, mkdir, etc, may not contain /.
+ # Note that this test may fail for the wrong reason if /dir
+ # itself does not already exist, so first let's make /dir.
+ only_dotl = getbool(tstate.config, 'client', 'only_dotl', False)
+ with TestCase('mkdir', tstate) as tc:
+ clnt = tc.ccs()
+ if only_dotl and not clnt.supports(protocol.td.Tmkdir):
+ tc.skip('cannot test dot-L mkdir on {0}'.format(clnt.proto))
+ try:
+ fid, qid = clnt.uxlookup(b'/dir', None)
+ tc.autoclunk(fid)
+ tstate.stop = True
+ tc.fail('found existing /dir after cleaning tree')
+ except RemoteError as err:
+ # we'll just assume it's "no such file or directory"
+ pass
+ if only_dotl:
+ qid = clnt.mkdir(clnt.rootfid, b'dir', 0o777, tstate.gid)
+ else:
+ qid, _ = clnt.create(clnt.rootfid, b'dir',
+ protocol.td.DMDIR | 0o777,
+ protocol.td.OREAD)
+ if qid.type != protocol.td.QTDIR:
+ tstate.stop = True
+ tc.fail('creating /dir: result is not a directory')
+ tc.trace('now attempting to create /dir/sub the wrong way')
+ try:
+ if only_dotl:
+ qid = clnt.mkdir(clnt.rootfid, b'dir/sub', 0o777, tstate.gid)
+ else:
+ qid, _ = clnt.create(clnt.rootfid, b'dir/sub',
+ protocol.td.DMDIR | 0o777,
+ protocol.td.OREAD)
+ # it's not clear what happened on the server at this point!
+ tc.trace("creating dir/sub (with embedded '/') should have "
+ 'failed but did not')
+ tstate.stop = True
+ fset = clnt.uxreaddir(b'/dir')
+ if 'sub' in fset:
+ tc.trace('(found our dir/sub detritus)')
+ clnt.uxremove(b'dir/sub', force=True)
+ fset = clnt.uxreaddir(b'/dir')
+ if 'sub' not in fset:
+ tc.trace('(successfully removed our dir/sub detritus)')
+ tstate.stop = False
+ tc.fail('created dir/sub as single directory with embedded slash')
+ except RemoteError as err:
+ # we'll just assume it's the right kind of error
+ tc.trace('invalid path dir/sub failed with: %s', str(err))
+ tc.succ('embedded slash in mkdir correctly refused')
+ if tstate.stop:
+ return
+
+ with TestCase('getattr/setattr', tstate) as tc:
+ # This test is not really thorough enough, need to test
+ # all combinations of settings. Should also test that
+ # old values are restored on failure, although it is not
+ # clear how to trigger failures.
+ clnt = tc.ccs()
+ if not clnt.supports(protocol.td.Tgetattr):
+ tc.skip('%s does not support Tgetattr', clnt)
+ fid, _, _, _ = clnt.uxopen(b'/dir/file', os.O_CREAT | os.O_RDWR, 0o666,
+ gid=tstate.gid)
+ tc.autoclunk(fid)
+ written = clnt.write(fid, 0, 'bytes\n')
+ if written != 6:
+ tc.trace('expected to write 6 bytes, actually wrote %d', written,
+ level=logging.WARN)
+ attrs = clnt.Tgetattr(fid)
+ #tc.trace('getattr: after write, before setattr: got %s', attrs)
+ if attrs.size != written:
+ tc.fail('getattr: expected size=%d, got size=%d',
+ written, attrs.size)
+ # now truncate, set mtime to (3,14), and check result
+ set_time_to = p9conn.Timespec(sec=0, nsec=140000000)
+ clnt.Tsetattr(fid, size=0, mtime=set_time_to)
+ attrs = clnt.Tgetattr(fid)
+ #tc.trace('getattr: after setattr: got %s', attrs)
+ if attrs.mtime.sec != set_time_to.sec or attrs.size != 0:
+ tc.fail('setattr: expected to get back mtime.sec={0}, size=0; '
+ 'got mtime.sec={1}, size='
+ '{1}'.format(set_time_to.sec, attrs.mtime.sec, attrs.size))
+ # nsec is not as stable but let's check
+ if attrs.mtime.nsec != set_time_to.nsec:
+ tc.trace('setattr: expected to get back mtime_nsec=%d; '
+ 'got %d', set_time_to.nsec, mtime_nsec)
+ tc.succ('able to set and see size and mtime')
+
+ # this test should be much later, but we know the current
+ # server is broken...
+ with TestCase('rename adjusts other fids', tstate) as tc:
+ clnt = tc.ccs()
+ dirfid, _ = clnt.uxlookup(b'/dir')
+ tc.autoclunk(dirfid)
+ clnt.uxmkdir(b'd1', 0o777, tstate.gid, startdir=dirfid)
+ clnt.uxmkdir(b'd1/sub', 0o777, tstate.gid, startdir=dirfid)
+ d1fid, _ = clnt.uxlookup(b'd1', dirfid)
+ tc.autoclunk(d1fid)
+ subfid, _ = clnt.uxlookup(b'sub', d1fid)
+ tc.autoclunk(subfid)
+ fid, _, _, _ = clnt.uxopen(b'file', os.O_CREAT | os.O_RDWR,
+ 0o666, startdir=subfid, gid=tstate.gid)
+ tc.autoclunk(fid)
+ written = clnt.write(fid, 0, 'filedata\n')
+ if written != 9:
+ tc.trace('expected to write 9 bytes, actually wrote %d', written,
+ level=logging.WARN)
+ # Now if we rename /dir/d1 to /dir/d2, the fids for both
+ # sub/file and sub itself should still be usable. This
+ # holds for both Trename (Linux only) and Twstat based
+ # rename ops.
+ #
+ # Note that some servers may cache some number of files and/or
+ # diretories held open, so we should open many fids to wipe
+ # out the cache (XXX notyet).
+ if clnt.supports(protocol.td.Trename):
+ clnt.rename(d1fid, dirfid, name=b'd2')
+ else:
+ clnt.wstat(d1fid, name=b'd2')
+ try:
+ rofid, _, _, _ = clnt.uxopen(b'file', os.O_RDONLY, startdir=subfid)
+ clnt.clunk(rofid)
+ except RemoteError as err:
+ tc.fail('open file in renamed dir/d2/sub: {0}'.format(err))
+ tc.succ()
+
+ # Even if xattrwalk is supported by the protocol, it's optional
+ # on the server.
+ with TestCase('xattrwalk', tstate) as tc:
+ clnt = tc.ccs()
+ if not clnt.supports(protocol.td.Txattrwalk):
+ tc.skip('{0} does not support Txattrwalk'.format(clnt))
+ dirfid, _ = clnt.uxlookup(b'/dir')
+ tc.autoclunk(dirfid)
+ try:
+ # need better tests...
+ attrfid, size = clnt.xattrwalk(dirfid)
+ tc.autoclunk(attrfid)
+ data = clnt.read(attrfid, 0, size)
+ tc.trace('xattrwalk with no name: data=%r', data)
+ tc.succ('xattrwalk size={0} datalen={1}'.format(size, len(data)))
+ except RemoteError as err:
+ tc.trace('xattrwalk on /dir: {0}'.format(err))
+ tc.succ('xattrwalk apparently not implemented')
+
+if __name__ == '__main__':
+ try:
+ sys.exit(main())
+ except KeyboardInterrupt:
+ sys.exit('\nInterrupted')
diff --git a/pytest/lerrno.py b/pytest/lerrno.py
new file mode 100644
index 000000000000..80a9a5098b87
--- /dev/null
+++ b/pytest/lerrno.py
@@ -0,0 +1,291 @@
+#! /usr/bin/env python
+
+"""
+Error number definitions for Linux.
+"""
+
+EPERM = 1
+ENOENT = 2
+ESRCH = 3
+EINTR = 4
+EIO = 5
+ENXIO = 6
+E2BIG = 7
+ENOEXEC = 8
+EBADF = 9
+ECHILD = 10
+EAGAIN = 11
+ENOMEM = 12
+EACCES = 13
+EFAULT = 14
+ENOTBLK = 15
+EBUSY = 16
+EEXIST = 17
+EXDEV = 18
+ENODEV = 19
+ENOTDIR = 20
+EISDIR = 21
+EINVAL = 22
+ENFILE = 23
+EMFILE = 24
+ENOTTY = 25
+ETXTBSY = 26
+EFBIG = 27
+ENOSPC = 28
+ESPIPE = 29
+EROFS = 30
+EMLINK = 31
+EPIPE = 32
+EDOM = 33
+ERANGE = 34
+EDEADLK = 35
+ENAMETOOLONG = 36
+ENOLCK = 37
+ENOSYS = 38
+ENOTEMPTY = 39
+ELOOP = 40
+# 41 unused
+ENOMSG = 42
+EIDRM = 43
+ECHRNG = 44
+EL2NSYNC = 45
+EL3HLT = 46
+EL3RST = 47
+ELNRNG = 48
+EUNATCH = 49
+ENOCSI = 50
+EL2HLT = 51
+EBADE = 52
+EBADR = 53
+EXFULL = 54
+ENOANO = 55
+EBADRQC = 56
+EBADSLT = 57
+# 58 unused
+EBFONT = 59
+ENOSTR = 60
+ENODATA = 61
+ETIME = 62
+ENOSR = 63
+ENONET = 64
+ENOPKG = 65
+EREMOTE = 66
+ENOLINK = 67
+EADV = 68
+ESRMNT = 69
+ECOMM = 70
+EPROTO = 71
+EMULTIHOP = 72
+EDOTDOT = 73
+EBADMSG = 74
+EOVERFLOW = 75
+ENOTUNIQ = 76
+EBADFD = 77
+EREMCHG = 78
+ELIBACC = 79
+ELIBBAD = 80
+ELIBSCN = 81
+ELIBMAX = 82
+ELIBEXEC = 83
+EILSEQ = 84
+ERESTART = 85
+ESTRPIPE = 86
+EUSERS = 87
+ENOTSOCK = 88
+EDESTADDRREQ = 89
+EMSGSIZE = 90
+EPROTOTYPE = 91
+ENOPROTOOPT = 92
+EPROTONOSUPPORT = 93
+ESOCKTNOSUPPORT = 94
+EOPNOTSUPP = 95
+EPFNOSUPPORT = 96
+EAFNOSUPPORT = 97
+EADDRINUSE = 98
+EADDRNOTAVAIL = 99
+ENETDOWN = 100
+ENETUNREACH = 101
+ENETRESET = 102
+ECONNABORTED = 103
+ECONNRESET = 104
+ENOBUFS = 105
+EISCONN = 106
+ENOTCONN = 107
+ESHUTDOWN = 108
+ETOOMANYREFS = 109
+ETIMEDOUT = 110
+ECONNREFUSED = 111
+EHOSTDOWN = 112
+EHOSTUNREACH = 113
+EALREADY = 114
+EINPROGRESS = 115
+ESTALE = 116
+EUCLEAN = 117
+ENOTNAM = 118
+ENAVAIL = 119
+EISNAM = 120
+EREMOTEIO = 121
+EDQUOT = 122
+ENOMEDIUM = 123
+EMEDIUMTYPE = 124
+ECANCELED = 125
+ENOKEY = 126
+EKEYEXPIRED = 127
+EKEYREVOKED = 128
+EKEYREJECTED = 129
+EOWNERDEAD = 130
+ENOTRECOVERABLE = 131
+ERFKILL = 132
+EHWPOISON = 133
+
+_strerror = {
+ EPERM: 'Permission denied',
+ ENOENT: 'No such file or directory',
+ ESRCH: 'No such process',
+ EINTR: 'Interrupted system call',
+ EIO: 'Input/output error',
+ ENXIO: 'Device not configured',
+ E2BIG: 'Argument list too long',
+ ENOEXEC: 'Exec format error',
+ EBADF: 'Bad file descriptor',
+ ECHILD: 'No child processes',
+ EAGAIN: 'Resource temporarily unavailable',
+ ENOMEM: 'Cannot allocate memory',
+ EACCES: 'Permission denied',
+ EFAULT: 'Bad address',
+ ENOTBLK: 'Block device required',
+ EBUSY: 'Device busy',
+ EEXIST: 'File exists',
+ EXDEV: 'Cross-device link',
+ ENODEV: 'Operation not supported by device',
+ ENOTDIR: 'Not a directory',
+ EISDIR: 'Is a directory',
+ EINVAL: 'Invalid argument',
+ ENFILE: 'Too many open files in system',
+ EMFILE: 'Too many open files',
+ ENOTTY: 'Inappropriate ioctl for device',
+ ETXTBSY: 'Text file busy',
+ EFBIG: 'File too large',
+ ENOSPC: 'No space left on device',
+ ESPIPE: 'Illegal seek',
+ EROFS: 'Read-only filesystem',
+ EMLINK: 'Too many links',
+ EPIPE: 'Broken pipe',
+ EDOM: 'Numerical argument out of domain',
+ ERANGE: 'Result too large',
+ EDEADLK: 'Resource deadlock avoided',
+ ENAMETOOLONG: 'File name too long',
+ ENOLCK: 'No locks available',
+ ENOSYS: 'Function not implemented',
+ ENOTEMPTY: 'Directory not empty',
+ ELOOP: 'Too many levels of symbolic links',
+ ENOMSG: 'No message of desired type',
+ EIDRM: 'Identifier removed',
+ ECHRNG: 'Channel number out of range',
+ EL2NSYNC: 'Level 2 not synchronized',
+ EL3HLT: 'Level 3 halted',
+ EL3RST: 'Level 3 reset',
+ ELNRNG: 'Link number out of range',
+ EUNATCH: 'Protocol driver not attached',
+ ENOCSI: 'No CSI structure available',
+ EL2HLT: 'Level 2 halted',
+ EBADE: 'Invalid exchange',
+ EBADR: 'Invalid request descriptor',
+ EXFULL: 'Exchange full',
+ ENOANO: 'No anode',
+ EBADRQC: 'Invalid request code',
+ EBADSLT: 'Invalid slot',
+ EBFONT: 'Bad font file format',
+ ENOSTR: 'Device not a stream',
+ ENODATA: 'No data available',
+ ETIME: 'Timer expired',
+ ENOSR: 'Out of streams resources',
+ ENONET: 'Machine is not on the network',
+ ENOPKG: 'Package not installed',
+ EREMOTE: 'Object is remote',
+ ENOLINK: 'Link has been severed',
+ EADV: 'Advertise error',
+ ESRMNT: 'Srmount error',
+ ECOMM: 'Communication error on send',
+ EPROTO: 'Protocol error',
+ EMULTIHOP: 'Multihop attempted',
+ EDOTDOT: 'RFS specific error',
+ EBADMSG: 'Bad message',
+ EOVERFLOW: 'Value too large for defined data type',
+ ENOTUNIQ: 'Name not unique on network',
+ EBADFD: 'File descriptor in bad state',
+ EREMCHG: 'Remote address changed',
+ ELIBACC: 'Can not access a needed shared library',
+ ELIBBAD: 'Accessing a corrupted shared library',
+ ELIBSCN: '.lib section in a.out corrupted',
+ ELIBMAX: 'Attempting to link in too many shared libraries',
+ ELIBEXEC: 'Cannot exec a shared library directly',
+ EILSEQ: 'Invalid or incomplete multibyte or wide character',
+ ERESTART: 'Interrupted system call should be restarted',
+ ESTRPIPE: 'Streams pipe error',
+ EUSERS: 'Too many users',
+ ENOTSOCK: 'Socket operation on non-socket',
+ EDESTADDRREQ: 'Destination address required',
+ EMSGSIZE: 'Message too long',
+ EPROTOTYPE: 'Protocol wrong type for socket',
+ ENOPROTOOPT: 'Protocol not available',
+ EPROTONOSUPPORT: 'Protocol not supported',
+ ESOCKTNOSUPPORT: 'Socket type not supported',
+ EOPNOTSUPP: 'Operation not supported',
+ EPFNOSUPPORT: 'Protocol family not supported',
+ EAFNOSUPPORT: 'Address family not supported by protocol',
+ EADDRINUSE: 'Address already in use',
+ EADDRNOTAVAIL: 'Cannot assign requested address',
+ ENETDOWN: 'Network is down',
+ ENETUNREACH: 'Network is unreachable',
+ ENETRESET: 'Network dropped connection on reset',
+ ECONNABORTED: 'Software caused connection abort',
+ ECONNRESET: 'Connection reset by peer',
+ ENOBUFS: 'No buffer space available',
+ EISCONN: 'Transport endpoint is already connected',
+ ENOTCONN: 'Transport endpoint is not connected',
+ ESHUTDOWN: 'Cannot send after transport endpoint shutdown',
+ ETOOMANYREFS: 'Too many references: cannot splice',
+ ETIMEDOUT: 'Connection timed out',
+ ECONNREFUSED: 'Connection refused',
+ EHOSTDOWN: 'Host is down',
+ EHOSTUNREACH: 'No route to host',
+ EALREADY: 'Operation already in progress',
+ EINPROGRESS: 'Operation now in progress',
+ ESTALE: 'Stale file handle',
+ EUCLEAN: 'Structure needs cleaning',
+ ENOTNAM: 'Not a XENIX named type file',
+ ENAVAIL: 'No XENIX semaphores available',
+ EISNAM: 'Is a named type file',
+ EREMOTEIO: 'Remote I/O error',
+ EDQUOT: 'Quota exceeded',
+ ENOMEDIUM: 'No medium found',
+ EMEDIUMTYPE: 'Wrong medium type',
+ ECANCELED: 'Operation canceled',
+ ENOKEY: 'Required key not available',
+ EKEYEXPIRED: 'Key has expired',
+ EKEYREVOKED: 'Key has been revoked',
+ EKEYREJECTED: 'Key was rejected by service',
+ EOWNERDEAD: 'Owner died',
+ ENOTRECOVERABLE: 'State not recoverable',
+ ERFKILL: 'Operation not possible due to RF-kill',
+ EHWPOISON: 'Memory page has hardware error',
+}
+
+def strerror(errnum):
+ """
+ Translate Linux errno to string.
+
+ >>> strerror(ENOKEY)
+ 'Required key not available'
+ >>> strerror(41)
+ 'Unknown error 41'
+ """
+ ret = _strerror.get(errnum)
+ if ret:
+ return ret
+ return 'Unknown error {0}'.format(errnum)
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
diff --git a/pytest/numalloc.py b/pytest/numalloc.py
new file mode 100644
index 000000000000..4623e88e6c79
--- /dev/null
+++ b/pytest/numalloc.py
@@ -0,0 +1,379 @@
+#! /usr/bin/env python
+
+"""
+Integer number allocator.
+
+Basically, these keep track of a set of allocatable values in
+some range (you provide min and max) and let you allocate out of
+the range and return values into the range.
+
+You may pick a value using "next since last time", or "next
+available after provided value". Note that next-after will
+wrap around as needed (modular arithmetic style).
+
+The free lists are thread-locked so that this code can be used
+with threads.
+
+ >>> a = NumAlloc(5, 10) # note closed interval: 5..10 inclusive
+ >>> a
+ NumAlloc(5, 10)
+ >>> a.avail
+ [[5, 10]]
+ >>> a.alloc()
+ 5
+ >>> a.avail
+ [[6, 10]]
+ >>> a.alloc(8)
+ 8
+ >>> a.avail
+ [[6, 7], [9, 10]]
+ >>> a.free(5)
+ >>> a.avail
+ [[5, 7], [9, 10]]
+ >>> a.free(8)
+ >>> a.avail
+ [[5, 10]]
+
+Attempting to free a value that is already free is an error:
+
+ >>> a.free(5)
+ Traceback (most recent call last):
+ ...
+ ValueError: free: 5 already available
+
+You can, however, free a value that is outside the min/max
+range. You can also free multiple values at once:
+
+ >>> a.free_multi([0, 1, 2, 4])
+ >>> a.avail
+ [[0, 2], [4, 10]]
+ >>> a.free_multi([3, 12])
+ >>> a.avail
+ [[0, 10], [12, 12]]
+
+Note that this changes the min/max values:
+
+ >>> a
+ NumAlloc(0, 12)
+
+To prevent adding values outside the min/max range, create the
+NumArray with autoextend=False, or set .autoextend=False at any
+time:
+
+ >>> a.autoextend = False
+ >>> a
+ NumAlloc(0, 12, autoextend=False)
+ >>> a.free(13)
+ Traceback (most recent call last):
+ ...
+ ValueError: free: 13 is outside range limit
+
+You can create an empty range, which is really only useful once
+you free values into it:
+
+ >>> r = NumAlloc(0, -1)
+ >>> r
+ NumAlloc(0, -1)
+ >>> r.alloc() is None
+ True
+ >>> r.free_multi(range(50))
+ >>> r
+ NumAlloc(0, 49)
+
+Note that r.alloc() starts from where you last left off, even if
+you've freed a value:
+
+ >>> r.alloc()
+ 0
+ >>> r.free(0)
+ >>> r.alloc()
+ 1
+
+Of course, in multithreaded code you can't really depend on this
+since it will race other threads. Still, it generally makes for
+efficient allocation. To force allocation to start from the
+range's minimum, provide the minimum (e.g., r.min_val) as an
+argument to r.alloc():
+
+ >>> r.alloc()
+ 2
+ >>> r.alloc(r.min_val)
+ 0
+
+Providing a number to alloc() tries to allocate that number,
+but wraps around to the next one if needed:
+
+ >>> r.alloc(49)
+ 49
+ >>> r.alloc(49)
+ 3
+ >>> r.alloc(99999)
+ 4
+ >>> r.avail
+ [[5, 48]]
+
+There is currently no way to find all allocated values, although
+the obvious method (going through r.avail) will work. Any iterator
+would not be thread-safe.
+"""
+
+import threading
+
+class NumAlloc(object):
+ """
+ Number allocator object.
+ """
+ def __init__(self, min_val, max_val, autoextend=True):
+ self.min_val = min_val
+ self.max_val = max_val
+ if min_val <= max_val:
+ self.avail = [[min_val, max_val]]
+ else:
+ self.avail = []
+ self.autoextend = autoextend
+ self.last = None
+ self.lock = threading.Lock()
+
+ def __repr__(self):
+ myname = self.__class__.__name__
+ if self.autoextend:
+ ae = ''
+ else:
+ ae = ', autoextend=False'
+ return '{0}({1}, {2}{3})'.format(myname, self.min_val, self.max_val, ae)
+
+ def _find_block(self, val):
+ """
+ Find the block that contains val, or that should contain val.
+ Remember that self.avail is a list of avaliable ranges of
+ the form [[min1, max1], [min2, max2], ..., [minN, maxN]]
+ where max1 < min2, max2 < min3, ..., < minN.
+
+ The input value either falls into one of the available
+ blocks, or falls into a gap between two available blocks.
+ We want to know which block it goes in, or if it goes
+ between two, which block it comes before.
+
+ We can do a binary search to find this block. When we
+ find it, return its index and its values.
+
+ If we find that val is not in a block, return the position
+ where the value should go, were it to be put into a new
+ block by itself. E.g., suppose val is 17, and there is a
+ block [14,16] and a block [18,20]. We would make this
+ [14,16],[17,17],[18,20] by inserting [17,17] between them.
+ (Afterward, we will want to fuse all three blocks to make
+ [14,18]. However, if we insert as block 0, e.g., if the
+ list starts with [18,20] and we insert to get
+ [17,17][18,20], we really end up just modifying block 0 to
+ [17,20]. Or, if we insert as the new final block, we
+ might end up modifying the last block.)
+ """
+ low = 0
+ high = len(self.avail) - 1
+ while low <= high:
+ mid = low + ((high - low) // 2)
+ pair = self.avail[mid]
+ if val < pair[0]:
+ # must go before block mid
+ high = mid - 1
+ elif val > pair[1]:
+ # must go after block mid
+ low = mid + 1
+ else:
+ # val >= first and val <= last, so we found it
+ return mid, pair
+ # Low > high: no block actually contains val, or
+ # there are no blocks at all. If there are no blocks,
+ # return block #0 and None. Otherwise return the
+ return low, None
+
+ def alloc(self, val=None):
+ """
+ Get new available value.
+
+ If val is None, we start from the most recently
+ allocated value, plus 1.
+
+ If val is a numeric value, we start from that value.
+ Hence, since the range is min_val..max_val, you can
+ provide min_val to take the first available value.
+
+ This may return None, if no values are still available.
+ """
+ with self.lock:
+ if val is None:
+ val = self.last + 1 if self.last is not None else self.min_val
+ if val is None or val > self.max_val or val < self.min_val:
+ val = self.min_val
+ i, pair = self._find_block(val)
+ if pair is None:
+ # Value is is not available. The next
+ # available value that is greater than val
+ # is in the block right after block i.
+ # If there is no block after i, the next
+ # available value is in block 0. If there
+ # is no block 0, there are no available
+ # values.
+ nblocks = len(self.avail)
+ i += 1
+ if i >= nblocks:
+ if nblocks == 0:
+ return None
+ i = 0
+ pair = self.avail[i]
+ val = pair[0]
+ # Value val is available - take it.
+ #
+ # There are four special cases to handle.
+ #
+ # 1. pair[0] < val < pair[1]: split the pair.
+ # 2. pair[0] == val < pair[1]: increase pair[0].
+ # 3. pair[0] == val == pair[1]: delete the pair
+ # 4. pair[0] < val == pair[1]: decrease pair[1].
+ assert pair[0] <= val <= pair[1]
+ if pair[0] == val:
+ # case 2 or 3: Take the left edge or delete the pair.
+ if val == pair[1]:
+ del self.avail[i]
+ else:
+ pair[0] = val + 1
+ else:
+ # case 1 or 4: split the pair or take the right edge.
+ if val == pair[1]:
+ pair[1] = val - 1
+ else:
+ newpair = [val + 1, pair[1]]
+ pair[1] = val - 1
+ self.avail.insert(i + 1, newpair)
+ self.last = val
+ return val
+
+ def free(self, val):
+ "Free one value"
+ self._free_multi('free', [val])
+
+ def free_multi(self, values):
+ "Free many values (provide any iterable)"
+ values = list(values)
+ values.sort()
+ self._free_multi('free_multi', values)
+
+ def _free_multi(self, how, values):
+ """
+ Free a (sorted) list of values.
+ """
+ if len(values) == 0:
+ return
+ with self.lock:
+ while values:
+ # Take highest value, and any contiguous lower values.
+ # Note that it can be significantly faster this way
+ # since coalesced ranges make for shorter copies.
+ highval = values.pop()
+ val = highval
+ while len(values) and values[-1] == val - 1:
+ val = values.pop()
+ self._free_range(how, val, highval)
+
+ def _maybe_increase_max(self, how, val):
+ """
+ If needed, widen our range to include new high val -- i.e.,
+ possibly increase self.max_val. Do nothing if this is not a
+ new all time high; fail if we have autoextend disabled.
+ """
+ if val <= self.max_val:
+ return
+ if self.autoextend:
+ self.max_val = val
+ return
+ raise ValueError('{0}: {1} is outside range limit'.format(how, val))
+
+ def _maybe_decrease_min(self, how, val):
+ """
+ If needed, widen our range to include new low val -- i.e.,
+ possibly decrease self.min_val. Do nothing if this is not a
+ new all time low; fail if we have autoextend disabled.
+ """
+ if val >= self.min_val:
+ return
+ if self.autoextend:
+ self.min_val = val
+ return
+ raise ValueError('{0}: {1} is outside range limit'.format(how, val))
+
+ def _free_range(self, how, val, highval):
+ """
+ Free the range [val..highval]. Note, val==highval it's just
+ a one-element range.
+
+ The lock is already held.
+ """
+ # Find the place to store the lower value.
+ # We should never find an actual pair here.
+ i, pair = self._find_block(val)
+ if pair:
+ raise ValueError('{0}: {1} already available'.format(how, val))
+ # If we're freeing a range, check that the high val
+ # does not span into the *next* range, either.
+ if highval > val and i < len(self.avail):
+ if self.avail[i][0] <= highval:
+ raise ValueError('{0}: {2} (from {{1}..{2}) already '
+ 'available'.format(how, val, highval))
+
+ # We'll need to insert a block and perhaps fuse it
+ # with blocks before and/or after. First, check
+ # whether there *is* a before and/or after, and find
+ # their corresponding edges and whether we abut them.
+ if i > 0:
+ abuts_below = self.avail[i - 1][1] + 1 == val
+ else:
+ abuts_below = False
+ if i < len(self.avail):
+ abuts_above = self.avail[i][0] - 1 == highval
+ else:
+ abuts_above = False
+ # Now there are these four cases:
+ # 1. abuts below and above: fuse the two blocks.
+ # 2. abuts below only: adjust previous (i-1'th) block
+ # 3. abuts above only: adjust next (i'th) block
+ # 4. doesn't abut: insert new block
+ if abuts_below:
+ if abuts_above:
+ # case 1
+ self.avail[i - 1][1] = self.avail[i][1]
+ del self.avail[i]
+ else:
+ # case 2
+ self._maybe_increase_max(how, highval)
+ self.avail[i - 1][1] = highval
+ else:
+ if abuts_above:
+ # case 3
+ self._maybe_decrease_min(how, val)
+ self.avail[i][0] = val
+ else:
+ # case 4
+ self._maybe_decrease_min(how, val)
+ self._maybe_increase_max(how, highval)
+ newblock = [val, highval]
+ self.avail.insert(i, newblock)
+
+if __name__ == '__main__':
+ import doctest
+ import sys
+
+ doctest.testmod()
+ if sys.version_info[0] >= 3:
+ xrange = range
+ # run some worst case tests
+ # NB: coalesce is terribly slow when done bottom up
+ r = NumAlloc(0, 2**16 - 1)
+ for i in xrange(r.min_val, r.max_val, 2):
+ r.alloc(i)
+ print('worst case alloc: len(r.avail) = {0}'.format(len(r.avail)))
+ for i in xrange(r.max_val - 1, r.min_val, -2):
+ r.free(i)
+ print('free again; len(r.avail) should be 1; is {0}'.format(len(r.avail)))
+ if len(r.avail) != 1:
+ sys.exit('failure')
diff --git a/pytest/p9conn.py b/pytest/p9conn.py
new file mode 100644
index 000000000000..459cd1169944
--- /dev/null
+++ b/pytest/p9conn.py
@@ -0,0 +1,1788 @@
+#! /usr/bin/env python
+
+"""
+handle plan9 server <-> client connections
+
+(We can act as either server or client.)
+
+This code needs some doctests or other unit tests...
+"""
+
+import collections
+import errno
+import logging
+import math
+import os
+import socket
+import stat
+import struct
+import sys
+import threading
+import time
+
+import lerrno
+import numalloc
+import p9err
+import pfod
+import protocol
+
+# Timespec based timestamps, if present, have
+# both seconds and nanoseconds.
+Timespec = collections.namedtuple('Timespec', 'sec nsec')
+
+# File attributes from Tgetattr, or given to Tsetattr.
+# (move to protocol.py?) We use pfod here instead of
+# namedtuple so that we can create instances with all-None
+# fields easily.
+Fileattrs = pfod.pfod('Fileattrs',
+ 'ino mode uid gid nlink rdev size blksize blocks '
+ 'atime mtime ctime btime gen data_version')
+
+qt2n = protocol.qid_type2name
+
+STD_P9_PORT=564
+
+class P9Error(Exception):
+ pass
+
+class RemoteError(P9Error):
+ """
+ Used when the remote returns an error. We track the client
+ (connection instance), the operation being attempted, the
+ message, and an error number and type. The message may be
+ from the Rerror reply, or from converting the errno in a dot-L
+ or dot-u Rerror reply. The error number may be None if the
+ type is 'Rerror' rather than 'Rlerror'. The message may be
+ None or empty string if a non-None errno supplies the error
+ instead.
+ """
+ def __init__(self, client, op, msg, etype, errno):
+ self.client = str(client)
+ self.op = op
+ self.msg = msg
+ self.etype = etype # 'Rerror' or 'Rlerror'
+ self.errno = errno # may be None
+ self.message = self._get_message()
+ super(RemoteError, self).__init__(self, self.message)
+
+ def __repr__(self):
+ return ('{0!r}({1}, {2}, {3}, {4}, '
+ '{5})'.format(self.__class__.__name__, self.client, self.op,
+ self.msg, self.errno, self.etype))
+ def __str__(self):
+ prefix = '{0}: {1}: '.format(self.client, self.op)
+ if self.errno: # check for "is not None", or just non-false-y?
+ name = {'Rerror': '.u', 'Rlerror': 'Linux'}[self.etype]
+ middle = '[{0} error {1}] '.format(name, self.errno)
+ else:
+ middle = ''
+ return '{0}{1}{2}'.format(prefix, middle, self.message)
+
+ def is_ENOTSUP(self):
+ if self.etype == 'Rlerror':
+ return self.errno == lerrno.EOPNOTSUPP
+ return self.errno == errno.EOPNOTSUPP
+
+ def _get_message(self):
+ "get message based on self.msg or self.errno"
+ if self.errno is not None:
+ return {
+ 'Rlerror': p9err.dotl_strerror,
+ 'Rerror' : p9err.dotu_strerror,
+ }[self.etype](self.errno)
+ return self.msg
+
+class LocalError(P9Error):
+ pass
+
+class TEError(LocalError):
+ pass
+
+class P9SockIO(object):
+ """
+ Common base for server and client, handle send and
+ receive to communications channel. Note that this
+ need not set up the channel initially, only the logger.
+ The channel is typically connected later. However, you
+ can provide one initially.
+ """
+ def __init__(self, logger, name=None, server=None, port=STD_P9_PORT):
+ self.logger = logger
+ self.channel = None
+ self.name = name
+ self.maxio = None
+ self.size_coder = struct.Struct('<I')
+ if server is not None:
+ self.connect(server, port)
+ self.max_payload = 2**32 - self.size_coder.size
+
+ def __str__(self):
+ if self.name:
+ return self.name
+ return repr(self)
+
+ def get_recommended_maxio(self):
+ "suggest a max I/O size, for when self.maxio is 0 / unset"
+ return 16 * 4096
+
+ def min_maxio(self):
+ "return a minimum size below which we refuse to work"
+ return self.size_coder.size + 100
+
+ def connect(self, server, port=STD_P9_PORT):
+ """
+ Connect to given server name / IP address.
+
+ If self.name was none, sets self.name to ip:port on success.
+ """
+ if self.is_connected():
+ raise LocalError('already connected')
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
+ sock.connect((server, port))
+ if self.name is None:
+ if port == STD_P9_PORT:
+ name = server
+ else:
+ name = '{0}:{1}'.format(server, port)
+ else:
+ name = None
+ self.declare_connected(sock, name, None)
+
+ def is_connected(self):
+ "predicate: are we connected?"
+ return self.channel != None
+
+ def declare_connected(self, chan, name, maxio):
+ """
+ Now available for normal protocol (size-prefixed) I/O.
+
+ Replaces chan and name and adjusts maxio, if those
+ parameters are not None.
+ """
+ if maxio:
+ minio = self.min_maxio()
+ if maxio < minio:
+ raise LocalError('maxio={0} < minimum {1}'.format(maxio, minio))
+ if chan is not None:
+ self.channel = chan
+ if name is not None:
+ self.name = name
+ if maxio is not None:
+ self.maxio = maxio
+ self.max_payload = maxio - self.size_coder.size
+
+ def reduce_maxio(self, maxio):
+ "Reduce maximum I/O size per other-side request"
+ minio = self.min_maxio()
+ if maxio < minio:
+ raise LocalError('new maxio={0} < minimum {1}'.format(maxio, minio))
+ if maxio > self.maxio:
+ raise LocalError('new maxio={0} > current {1}'.format(maxio,
+ self.maxio))
+ self.maxio = maxio
+ self.max_payload = maxio - self.size_coder.size
+
+ def declare_disconnected(self):
+ "Declare comm channel dead (note: leaves self.name set!)"
+ self.channel = None
+ self.maxio = None
+
+ def shutwrite(self):
+ "Do a SHUT_WR on the outbound channel - can't send more"
+ chan = self.channel
+ # we're racing other threads here
+ try:
+ chan.shutdown(socket.SHUT_WR)
+ except (OSError, AttributeError):
+ pass
+
+ def shutdown(self):
+ "Shut down comm channel"
+ if self.channel:
+ try:
+ self.channel.shutdown(socket.SHUT_RDWR)
+ except socket.error:
+ pass
+ self.channel.close()
+ self.declare_disconnected()
+
+ def read(self):
+ """
+ Try to read a complete packet.
+
+ Returns '' for EOF, as read() usually does.
+
+ If we can't even get the size, this still returns ''.
+ If we get a sensible size but are missing some data,
+ we can return a short packet. Since we know if we did
+ this, we also return a boolean: True means "really got a
+ complete packet."
+
+ Note that '' EOF always returns False: EOF is never a
+ complete packet.
+ """
+ if self.channel is None:
+ return b'', False
+ size_field = self.xread(self.size_coder.size)
+ if len(size_field) < self.size_coder.size:
+ if len(size_field) == 0:
+ self.logger.log(logging.INFO, '%s: normal EOF', self)
+ else:
+ self.logger.log(logging.ERROR,
+ '%s: EOF while reading size (got %d bytes)',
+ self, len(size_field))
+ # should we raise an error here?
+ return b'', False
+
+ size = self.size_coder.unpack(size_field)[0] - self.size_coder.size
+ if size <= 0 or size > self.max_payload:
+ self.logger.log(logging.ERROR,
+ '%s: incoming size %d is insane '
+ '(max payload is %d)',
+ self, size, self.max_payload)
+ # indicate EOF - should we raise an error instead, here?
+ return b'', False
+ data = self.xread(size)
+ return data, len(data) == size
+
+ def xread(self, nbytes):
+ """
+ Read nbytes bytes, looping if necessary. Return '' for
+ EOF; may return a short count if we get some data, then
+ EOF.
+ """
+ assert nbytes > 0
+ # Try to get everything at once (should usually succeed).
+ # Return immediately for EOF or got-all-data.
+ data = self.channel.recv(nbytes)
+ if data == b'' or len(data) == nbytes:
+ return data
+
+ # Gather data fragments into an array, then join it all at
+ # the end.
+ count = len(data)
+ data = [data]
+ while count < nbytes:
+ more = self.channel.recv(nbytes - count)
+ if more == b'':
+ break
+ count += len(more)
+ data.append(more)
+ return b''.join(data)
+
+ def write(self, data):
+ """
+ Write all the data, in the usual encoding. Note that
+ the length of the data, including the length of the length
+ itself, is already encoded in the first 4 bytes of the
+ data.
+
+ Raises IOError if we can't write everything.
+
+ Raises LocalError if len(data) exceeds max_payload.
+ """
+ size = len(data)
+ assert size >= 4
+ if size > self.max_payload:
+ raise LocalError('data length {0} exceeds '
+ 'maximum {1}'.format(size, self.max_payload))
+ self.channel.sendall(data)
+
+def _pathcat(prefix, suffix):
+ """
+ Concatenate paths we are using on the server side. This is
+ basically just prefix + / + suffix, with two complications:
+
+ It's possible we don't have a prefix path, in which case
+ we want the suffix without a leading slash.
+
+ It's possible that the prefix is just b'/', in which case we
+ want prefix + suffix.
+ """
+ if prefix:
+ if prefix == b'/': # or prefix.endswith(b'/')?
+ return prefix + suffix
+ return prefix + b'/' + suffix
+ return suffix
+
+class P9Client(P9SockIO):
+ """
+ Act as client.
+
+ We need the a logger (see logging), a timeout, and a protocol
+ version to request. By default, we will downgrade to a lower
+ version if asked.
+
+ If server and port are supplied, they are remembered and become
+ the default for .connect() (which is still deferred).
+
+ Note that we keep a table of fid-to-path in self.live_fids,
+ but at any time (except while holding the lock) a fid can
+ be deleted entirely, and the table entry may just be True
+ if we have no path name. In general, we update the name
+ when we can.
+ """
+ def __init__(self, logger, timeout, version, may_downgrade=True,
+ server=None, port=None):
+ super(P9Client, self).__init__(logger)
+ self.timeout = timeout
+ self.iproto = protocol.p9_version(version)
+ self.may_downgrade = may_downgrade
+ self.tagalloc = numalloc.NumAlloc(0, 65534)
+ self.tagstate = {}
+ # The next bit is slighlty dirty: perhaps we should just
+ # allocate NOFID out of the 2**32-1 range, so as to avoid
+ # "knowing" that it's 2**32-1.
+ self.fidalloc = numalloc.NumAlloc(0, protocol.td.NOFID - 1)
+ self.live_fids = {}
+ self.rootfid = None
+ self.rootqid = None
+ self.rthread = None
+ self.lock = threading.Lock()
+ self.new_replies = threading.Condition(self.lock)
+ self._monkeywrench = {}
+ self._server = server
+ self._port = port
+ self._unsup = {}
+
+ def get_monkey(self, what):
+ "check for a monkey-wrench"
+ with self.lock:
+ wrench = self._monkeywrench.get(what)
+ if wrench is None:
+ return None
+ if isinstance(wrench, list):
+ # repeats wrench[0] times, or forever if that's 0
+ ret = wrench[1]
+ if wrench[0] > 0:
+ wrench[0] -= 1
+ if wrench[0] == 0:
+ del self._monkeywrench[what]
+ else:
+ ret = wrench
+ del self._monkeywrench[what]
+ return ret
+
+ def set_monkey(self, what, how, repeat=None):
+ """
+ Set a monkey-wrench. If repeat is not None it is the number of
+ times the wrench is applied (0 means forever, or until you call
+ set again with how=None). What is what to monkey-wrench, which
+ depends on the op. How is generally a replacement value.
+ """
+ if how is None:
+ with self.lock:
+ try:
+ del self._monkeywrench[what]
+ except KeyError:
+ pass
+ return
+ if repeat is not None:
+ how = [repeat, how]
+ with self.lock:
+ self._monkeywrench[what] = how
+
+ def get_tag(self, for_Tversion=False):
+ "get next available tag ID"
+ with self.lock:
+ if for_Tversion:
+ tag = 65535
+ else:
+ tag = self.tagalloc.alloc()
+ if tag is None:
+ raise LocalError('all tags in use')
+ self.tagstate[tag] = True # ie, in use, still waiting
+ return tag
+
+ def set_tag(self, tag, reply):
+ "set the reply info for the given tag"
+ assert tag >= 0 and tag < 65536
+ with self.lock:
+ # check whether we're still waiting for the tag
+ state = self.tagstate.get(tag)
+ if state is True:
+ self.tagstate[tag] = reply # i.e., here's the answer
+ self.new_replies.notify_all()
+ return
+ # state must be one of these...
+ if state is False:
+ # We gave up on this tag. Reply came anyway.
+ self.logger.log(logging.INFO,
+ '%s: got tag %d = %r after timing out on it',
+ self, tag, reply)
+ self.retire_tag_locked(tag)
+ return
+ if state is None:
+ # We got a tag back from the server that was not
+ # outstanding!
+ self.logger.log(logging.WARNING,
+ '%s: got tag %d = %r when tag %d not in use!',
+ self, tag, reply, tag)
+ return
+ # We got a second reply before handling the first reply!
+ self.logger.log(logging.WARNING,
+ '%s: got tag %d = %r when tag %d = %r!',
+ self, tag, reply, tag, state)
+ return
+
+ def retire_tag(self, tag):
+ "retire the given tag - only used by the thread that handled the result"
+ if tag == 65535:
+ return
+ assert tag >= 0 and tag < 65535
+ with self.lock:
+ self.retire_tag_locked(tag)
+
+ def retire_tag_locked(self, tag):
+ "retire the given tag while holding self.lock"
+ # must check "in tagstate" because we can race
+ # with retire_all_tags.
+ if tag in self.tagstate:
+ del self.tagstate[tag]
+ self.tagalloc.free(tag)
+
+ def retire_all_tags(self):
+ "retire all tags, after connection drop"
+ with self.lock:
+ # release all tags in any state (waiting, answered, timedout)
+ self.tagalloc.free_multi(self.tagstate.keys())
+ self.tagstate = {}
+ self.new_replies.notify_all()
+
+ def alloc_fid(self):
+ "allocate new fid"
+ with self.lock:
+ fid = self.fidalloc.alloc()
+ self.live_fids[fid] = True
+ return fid
+
+ def getpath(self, fid):
+ "get path from fid, or return None if no path known, or not valid"
+ with self.lock:
+ path = self.live_fids.get(fid)
+ if path is True:
+ path = None
+ return path
+
+ def getpathX(self, fid):
+ """
+ Much like getpath, but return <fid N, unknown path> if necessary.
+ If we do have a path, return its repr().
+ """
+ path = self.getpath(fid)
+ if path is None:
+ return '<fid {0}, unknown path>'.format(fid)
+ return repr(path)
+
+ def setpath(self, fid, path):
+ "associate fid with new path (possibly from another fid)"
+ with self.lock:
+ if isinstance(path, int):
+ path = self.live_fids.get(path)
+ # path might now be None (not a live fid after all), or
+ # True (we have no path name), or potentially even the
+ # empty string (invalid for our purposes). Treat all of
+ # those as True, meaning "no known path".
+ if not path:
+ path = True
+ if self.live_fids.get(fid):
+ # Existing fid maps to either True or its old path.
+ # Set the new path (which may be just a placeholder).
+ self.live_fids[fid] = path
+
+ def did_rename(self, fid, ncomp, newdir=None):
+ """
+ Announce that we renamed using a fid - we'll try to update
+ other fids based on this (we can't really do it perfectly).
+
+ NOTE: caller must provide a final-component.
+ The caller can supply the new path (and should
+ do so if the rename is not based on the retained path
+ for the supplied fid, i.e., for rename ops where fid
+ can move across directories). The rules:
+
+ - If newdir is None (default), we use stored path.
+ - Otherwise, newdir provides the best approximation
+ we have to the path that needs ncomp appended.
+
+ (This is based on the fact that renames happen via Twstat
+ or Trename, or Trenameat, which change just one tail component,
+ but the path names vary.)
+ """
+ if ncomp is None:
+ return
+ opath = self.getpath(fid)
+ if newdir is None:
+ if opath is None:
+ return
+ ocomps = opath.split(b'/')
+ ncomps = ocomps[0:-1]
+ else:
+ ocomps = None # well, none yet anyway
+ ncomps = newdir.split(b'/')
+ ncomps.append(ncomp)
+ if opath is None or opath[0] != '/':
+ # We don't have enough information to fix anything else.
+ # Just store the new path and return. We have at least
+ # a partial path now, which is no worse than before.
+ npath = b'/'.join(ncomps)
+ with self.lock:
+ if fid in self.live_fids:
+ self.live_fids[fid] = npath
+ return
+ if ocomps is None:
+ ocomps = opath.split(b'/')
+ olen = len(ocomps)
+ ofinal = ocomps[olen - 1]
+ # Old paths is full path. Find any other fids that start
+ # with some or all the components in ocomps. Note that if
+ # we renamed /one/two/three to /four/five this winds up
+ # renaming files /one/a to /four/a, /one/two/b to /four/five/b,
+ # and so on.
+ with self.lock:
+ for fid2, path2 in self.live_fids.iteritems():
+ # Skip fids without byte-string paths
+ if not isinstance(path2, bytes):
+ continue
+ # Before splitting (which is a bit expensive), try
+ # a straightforward prefix match. This might give
+ # some false hits, e.g., prefix /one/two/threepenny
+ # starts with /one/two/three, but it quickly eliminates
+ # /raz/baz/mataz and the like.
+ if not path2.startswith(opath):
+ continue
+ # Split up the path, and use that to make sure that
+ # the final component is a full match.
+ parts2 = path2.split(b'/')
+ if parts2[olen - 1] != ofinal:
+ continue
+ # OK, path2 starts with the old (renamed) sequence.
+ # Replace the old components with the new ones.
+ # This updates the renamed fid when we come across
+ # it! It also handles a change in the number of
+ # components, thanks to Python's slice assignment.
+ parts2[0:olen] = ncomps
+ self.live_fids[fid2] = b'/'.join(parts2)
+
+ def retire_fid(self, fid):
+ "retire one fid"
+ with self.lock:
+ self.fidalloc.free(fid)
+ del self.live_fids[fid]
+
+ def retire_all_fids(self):
+ "return live fids to pool"
+ # this is useful for debugging fid leaks:
+ #for fid in self.live_fids:
+ # print 'retiring', fid, self.getpathX(fid)
+ with self.lock:
+ self.fidalloc.free_multi(self.live_fids.keys())
+ self.live_fids = {}
+
+ def read_responses(self):
+ "Read responses. This gets spun off as a thread."
+ while self.is_connected():
+ pkt, is_full = super(P9Client, self).read()
+ if pkt == b'':
+ self.shutwrite()
+ self.retire_all_tags()
+ return
+ if not is_full:
+ self.logger.log(logging.WARNING, '%s: got short packet', self)
+ try:
+ # We have one special case: if we're not yet connected
+ # with a version, we must unpack *as if* it's a plain
+ # 9P2000 response.
+ if self.have_version:
+ resp = self.proto.unpack(pkt)
+ else:
+ resp = protocol.plain.unpack(pkt)
+ except protocol.SequenceError as err:
+ self.logger.log(logging.ERROR, '%s: bad response: %s',
+ self, err)
+ try:
+ resp = self.proto.unpack(pkt, noerror=True)
+ except protocol.SequenceError:
+ header = self.proto.unpack_header(pkt, noerror=True)
+ self.logger.log(logging.ERROR,
+ '%s: (not even raw-decodable)', self)
+ self.logger.log(logging.ERROR,
+ '%s: header decode produced %r',
+ self, header)
+ else:
+ self.logger.log(logging.ERROR,
+ '%s: raw decode produced %r',
+ self, resp)
+ # after this kind of problem, probably need to
+ # shut down, but let's leave that out for a bit
+ else:
+ # NB: all protocol responses have a "tag",
+ # so resp['tag'] always exists.
+ self.logger.log(logging.DEBUG, "read_resp: tag %d resp %r", resp.tag, resp)
+ self.set_tag(resp.tag, resp)
+
+ def wait_for(self, tag):
+ """
+ Wait for a response to the given tag. Return the response,
+ releasing the tag. If self.timeout is not None, wait at most
+ that long (and release the tag even if there's no reply), else
+ wait forever.
+
+ If this returns None, either the tag was bad initially, or
+ a timeout occurred, or the connection got shut down.
+ """
+ self.logger.log(logging.DEBUG, "wait_for: tag %d", tag)
+ if self.timeout is None:
+ deadline = None
+ else:
+ deadline = time.time() + self.timeout
+ with self.lock:
+ while True:
+ # tagstate is True (waiting) or False (timedout) or
+ # a valid response, or None if we've reset the tag
+ # states (retire_all_tags, after connection drop).
+ resp = self.tagstate.get(tag, None)
+ if resp is None:
+ # out of sync, exit loop
+ break
+ if resp is True:
+ # still waiting for a response - wait some more
+ self.new_replies.wait(self.timeout)
+ if deadline and time.time() > deadline:
+ # Halt the waiting, but go around once more.
+ # Note we may have killed the tag by now though.
+ if tag in self.tagstate:
+ self.tagstate[tag] = False
+ continue
+ # resp is either False (timeout) or a reply.
+ # If resp is False, change it to None; the tag
+ # is now dead until we get a reply (then we
+ # just toss the reply).
+ # Otherwise, we're done with the tag: free it.
+ # In either case, stop now.
+ if resp is False:
+ resp = None
+ else:
+ self.tagalloc.free(tag)
+ del self.tagstate[tag]
+ break
+ return resp
+
+ def badresp(self, req, resp):
+ """
+ Complain that a response was not something expected.
+ """
+ if resp is None:
+ self.shutdown()
+ raise TEError('{0}: {1}: timeout or EOF'.format(self, req))
+ if isinstance(resp, protocol.rrd.Rlerror):
+ raise RemoteError(self, req, None, 'Rlerror', resp.ecode)
+ if isinstance(resp, protocol.rrd.Rerror):
+ if resp.errnum is None:
+ raise RemoteError(self, req, resp.errstr, 'Rerror', None)
+ raise RemoteError(self, req, None, 'Rerror', resp.errnum)
+ raise LocalError('{0}: {1} got response {2!r}'.format(self, req, resp))
+
+ def supports(self, req_code):
+ """
+ Test self.proto.support(req_code) unless we've recorded that
+ while the protocol supports it, the client does not.
+ """
+ return req_code not in self._unsup and self.proto.supports(req_code)
+
+ def supports_all(self, *req_codes):
+ "basically just all(supports(...))"
+ return all(self.supports(code) for code in req_codes)
+
+ def unsupported(self, req_code):
+ """
+ Record an ENOTSUP (RemoteError was ENOTSUP) for a request.
+ Must be called from the op, this does not happen automatically.
+ (It's just an optimization.)
+ """
+ self._unsup[req_code] = True
+
+ def connect(self, server=None, port=None):
+ """
+ Connect to given server/port pair.
+
+ The server and port are remembered. If given as None,
+ the last remembered values are used. The initial
+ remembered values are from the creation of this client
+ instance.
+
+ New values are only remembered here on a *successful*
+ connect, however.
+ """
+ if server is None:
+ server = self._server
+ if server is None:
+ raise LocalError('connect: no server specified and no default')
+ if port is None:
+ port = self._port
+ if port is None:
+ port = STD_P9_PORT
+ self.name = None # wipe out previous name, if any
+ super(P9Client, self).connect(server, port)
+ maxio = self.get_recommended_maxio()
+ self.declare_connected(None, None, maxio)
+ self.proto = self.iproto # revert to initial protocol
+ self.have_version = False
+ self.rthread = threading.Thread(target=self.read_responses)
+ self.rthread.start()
+ tag = self.get_tag(for_Tversion=True)
+ req = protocol.rrd.Tversion(tag=tag, msize=maxio,
+ version=self.get_monkey('version'))
+ super(P9Client, self).write(self.proto.pack_from(req))
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rversion):
+ self.shutdown()
+ if isinstance(resp, protocol.rrd.Rerror):
+ version = req.version or self.proto.get_version()
+ # for python3, we need to convert version to string
+ if not isinstance(version, str):
+ version = version.decode('utf-8', 'surrogateescape')
+ raise RemoteError(self, 'version ' + version,
+ resp.errstr, 'Rerror', None)
+ self.badresp('version', resp)
+ their_maxio = resp.msize
+ try:
+ self.reduce_maxio(their_maxio)
+ except LocalError as err:
+ raise LocalError('{0}: sent maxio={1}, they tried {2}: '
+ '{3}'.format(self, maxio, their_maxio,
+ err.args[0]))
+ if resp.version != self.proto.get_version():
+ if not self.may_downgrade:
+ self.shutdown()
+ raise LocalError('{0}: they only support '
+ 'version {1!r}'.format(self, resp.version))
+ # raises LocalError if the version is bad
+ # (should we wrap it with a connect-to-{0} msg?)
+ self.proto = self.proto.downgrade_to(resp.version)
+ self._server = server
+ self._port = port
+ self.have_version = True
+
+ def attach(self, afid, uname, aname, n_uname):
+ """
+ Attach.
+
+ Currently we don't know how to do authentication,
+ but we'll pass any provided afid through.
+ """
+ if afid is None:
+ afid = protocol.td.NOFID
+ if uname is None:
+ uname = ''
+ if aname is None:
+ aname = ''
+ if n_uname is None:
+ n_uname = protocol.td.NONUNAME
+ tag = self.get_tag()
+ fid = self.alloc_fid()
+ pkt = self.proto.Tattach(tag=tag, fid=fid, afid=afid,
+ uname=uname, aname=aname,
+ n_uname=n_uname)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rattach):
+ self.retire_fid(fid)
+ self.badresp('attach', resp)
+ # probably should check resp.qid
+ self.rootfid = fid
+ self.rootqid = resp.qid
+ self.setpath(fid, b'/')
+
+ def shutdown(self):
+ "disconnect from server"
+ if self.rootfid is not None:
+ self.clunk(self.rootfid, ignore_error=True)
+ self.retire_all_tags()
+ self.retire_all_fids()
+ self.rootfid = None
+ self.rootqid = None
+ super(P9Client, self).shutdown()
+ if self.rthread:
+ self.rthread.join()
+ self.rthread = None
+
+ def dupfid(self, fid):
+ """
+ Copy existing fid to a new fid.
+ """
+ tag = self.get_tag()
+ newfid = self.alloc_fid()
+ pkt = self.proto.Twalk(tag=tag, fid=fid, newfid=newfid, nwname=0,
+ wname=[])
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rwalk):
+ self.retire_fid(newfid)
+ self.badresp('walk {0}'.format(self.getpathX(fid)), resp)
+ # Copy path too
+ self.setpath(newfid, fid)
+ return newfid
+
+ def lookup(self, fid, components):
+ """
+ Do Twalk. Caller must provide a starting fid, which should
+ be rootfid to look up from '/' - we do not do / vs . here.
+ Caller must also provide a component-ized path (on purpose,
+ so that caller can provide invalid components like '' or '/').
+ The components must be byte-strings as well, for the same
+ reason.
+
+ We do allocate the new fid ourselves here, though.
+
+ There's no logic here to split up long walks (yet?).
+ """
+ # these are too easy to screw up, so check
+ if self.rootfid is None:
+ raise LocalError('{0}: not attached'.format(self))
+ if (isinstance(components, (str, bytes) or
+ not all(isinstance(i, bytes) for i in components))):
+ raise LocalError('{0}: lookup: invalid '
+ 'components {1!r}'.format(self, components))
+ tag = self.get_tag()
+ newfid = self.alloc_fid()
+ startpath = self.getpath(fid)
+ pkt = self.proto.Twalk(tag=tag, fid=fid, newfid=newfid,
+ nwname=len(components), wname=components)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rwalk):
+ self.retire_fid(newfid)
+ self.badresp('walk {0} in '
+ '{1}'.format(components, self.getpathX(fid)),
+ resp)
+ # Just because we got Rwalk does not mean we got ALL the
+ # way down the path. Raise OSError(ENOENT) if we're short.
+ if resp.nwqid > len(components):
+ # ??? this should be impossible. Local error? Remote error?
+ # OS Error?
+ self.clunk(newfid, ignore_error=True)
+ raise LocalError('{0}: walk {1} in {2} returned {3} '
+ 'items'.format(self, components,
+ self.getpathX(fid), resp.nwqid))
+ if resp.nwqid < len(components):
+ self.clunk(newfid, ignore_error=True)
+ # Looking up a/b/c and got just a/b, c is what's missing.
+ # Looking up a/b/c and got just a, b is what's missing.
+ missing = components[resp.nwqid]
+ within = _pathcat(startpath, b'/'.join(components[:resp.nwqid]))
+ raise OSError(errno.ENOENT,
+ '{0}: {1} in {2}'.format(os.strerror(errno.ENOENT),
+ missing, within))
+ self.setpath(newfid, _pathcat(startpath, b'/'.join(components)))
+ return newfid, resp.wqid
+
+ def lookup_last(self, fid, components):
+ """
+ Like lookup, but return only the last component's qid.
+ As a special case, if components is an empty list, we
+ handle that.
+ """
+ rfid, wqid = self.lookup(fid, components)
+ if len(wqid):
+ return rfid, wqid[-1]
+ if fid == self.rootfid: # usually true, if we get here at all
+ return rfid, self.rootqid
+ tag = self.get_tag()
+ pkt = self.proto.Tstat(tag=tag, fid=rfid)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rstat):
+ self.badresp('stat {0}'.format(self.getpathX(fid)), resp)
+ statval = self.proto.unpack_wirestat(resp.data)
+ return rfid, statval.qid
+
+ def clunk(self, fid, ignore_error=False):
+ "issue clunk(fid)"
+ tag = self.get_tag()
+ pkt = self.proto.Tclunk(tag=tag, fid=fid)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rclunk):
+ if ignore_error:
+ return
+ self.badresp('clunk {0}'.format(self.getpathX(fid)), resp)
+ self.retire_fid(fid)
+
+ def remove(self, fid, ignore_error=False):
+ "issue remove (old style), which also clunks fid"
+ tag = self.get_tag()
+ pkt = self.proto.Tremove(tag=tag, fid=fid)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rremove):
+ if ignore_error:
+ # remove failed: still need to clunk the fid
+ self.clunk(fid, True)
+ return
+ self.badresp('remove {0}'.format(self.getpathX(fid)), resp)
+ self.retire_fid(fid)
+
+ def create(self, fid, name, perm, mode, filetype=None, extension=b''):
+ """
+ Issue create op (note that this may be mkdir, symlink, etc).
+ fid is the directory in which the create happens, and for
+ regular files, it becomes, on success, a fid referring to
+ the now-open file. perm is, e.g., 0644, 0755, etc.,
+ optionally with additional high bits. mode is a mode
+ byte (e.g., protocol.td.ORDWR, or OWRONLY|OTRUNC, etc.).
+
+ As a service to callers, we take two optional arguments
+ specifying the file type ('dir', 'symlink', 'device',
+ 'fifo', or 'socket') and additional info if needed.
+ The additional info for a symlink is the target of the
+ link (a byte string), and the additional info for a device
+ is a byte string with "b <major> <minor>" or "c <major> <minor>".
+
+ Otherwise, callers can leave filetype=None and encode the bits
+ into the mode (caller must still provide extension if needed).
+
+ We do NOT check whether the extension matches extra DM bits,
+ or that there's only one DM bit set, or whatever, since this
+ is a testing setup.
+ """
+ tag = self.get_tag()
+ if filetype is not None:
+ perm |= {
+ 'dir': protocol.td.DMDIR,
+ 'symlink': protocol.td.DMSYMLINK,
+ 'device': protocol.td.DMDEVICE,
+ 'fifo': protocol.td.DMNAMEDPIPE,
+ 'socket': protocol.td.DMSOCKET,
+ }[filetype]
+ pkt = self.proto.Tcreate(tag=tag, fid=fid, name=name,
+ perm=perm, mode=mode, extension=extension)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rcreate):
+ self.badresp('create {0} in {1}'.format(name, self.getpathX(fid)),
+ resp)
+ if resp.qid.type == protocol.td.QTFILE:
+ # Creating a regular file opens the file,
+ # thus changing the fid's path.
+ self.setpath(fid, _pathcat(self.getpath(fid), name))
+ return resp.qid, resp.iounit
+
+ def open(self, fid, mode):
+ "use Topen to open file or directory fid (mode is 1 byte)"
+ tag = self.get_tag()
+ pkt = self.proto.Topen(tag=tag, fid=fid, mode=mode)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Ropen):
+ self.badresp('open {0}'.format(self.getpathX(fid)), resp)
+ return resp.qid, resp.iounit
+
+ def lopen(self, fid, flags):
+ "use Tlopen to open file or directory fid (flags from L_O_*)"
+ tag = self.get_tag()
+ pkt = self.proto.Tlopen(tag=tag, fid=fid, flags=flags)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rlopen):
+ self.badresp('lopen {0}'.format(self.getpathX(fid)), resp)
+ return resp.qid, resp.iounit
+
+ def read(self, fid, offset, count):
+ "read (up to) count bytes from offset, given open fid"
+ tag = self.get_tag()
+ pkt = self.proto.Tread(tag=tag, fid=fid, offset=offset, count=count)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rread):
+ self.badresp('read {0} bytes at offset {1} in '
+ '{2}'.format(count, offset, self.getpathX(fid)),
+ resp)
+ return resp.data
+
+ def write(self, fid, offset, data):
+ "write (up to) count bytes to offset, given open fid"
+ tag = self.get_tag()
+ pkt = self.proto.Twrite(tag=tag, fid=fid, offset=offset,
+ count=len(data), data=data)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rwrite):
+ self.badresp('write {0} bytes at offset {1} in '
+ '{2}'.format(len(data), offset, self.getpathX(fid)),
+ resp)
+ return resp.count
+
+ # Caller may
+ # - pass an actual stat object, or
+ # - pass in all the individual to-set items by keyword, or
+ # - mix and match a bit: get an existing stat, then use
+ # keywords to override fields.
+ # We convert "None"s to the internal "do not change" values,
+ # and for diagnostic purposes, can turn "do not change" back
+ # to None at the end, too.
+ def wstat(self, fid, statobj=None, **kwargs):
+ if statobj is None:
+ statobj = protocol.td.stat()
+ else:
+ statobj = statobj._copy()
+ # Fields in stat that you can't send as a wstat: the
+ # type and qid are informative. Similarly, the
+ # 'extension' is an input when creating a file but
+ # read-only when stat-ing.
+ #
+ # It's not clear what it means to set dev, but we'll leave
+ # it in as an optional parameter here. fs/backend.c just
+ # errors out on an attempt to change it.
+ if self.proto == protocol.plain:
+ forbid = ('type', 'qid', 'extension',
+ 'n_uid', 'n_gid', 'n_muid')
+ else:
+ forbid = ('type', 'qid', 'extension')
+ nochange = {
+ 'type': 0,
+ 'qid': protocol.td.qid(0, 0, 0),
+ 'dev': 2**32 - 1,
+ 'mode': 2**32 - 1,
+ 'atime': 2**32 - 1,
+ 'mtime': 2**32 - 1,
+ 'length': 2**64 - 1,
+ 'name': b'',
+ 'uid': b'',
+ 'gid': b'',
+ 'muid': b'',
+ 'extension': b'',
+ 'n_uid': 2**32 - 1,
+ 'n_gid': 2**32 - 1,
+ 'n_muid': 2**32 - 1,
+ }
+ for field in statobj._fields:
+ if field in kwargs:
+ if field in forbid:
+ raise ValueError('cannot wstat a stat.{0}'.format(field))
+ statobj[field] = kwargs.pop(field)
+ else:
+ if field in forbid or statobj[field] is None:
+ statobj[field] = nochange[field]
+ if kwargs:
+ raise TypeError('wstat() got an unexpected keyword argument '
+ '{0!r}'.format(kwargs.popitem()))
+
+ data = self.proto.pack_wirestat(statobj)
+ tag = self.get_tag()
+ pkt = self.proto.Twstat(tag=tag, fid=fid, data=data)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rwstat):
+ # For error viewing, switch all the do-not-change
+ # and can't-change fields to None.
+ statobj.qid = None
+ for field in statobj._fields:
+ if field in forbid:
+ statobj[field] = None
+ elif field in nochange and statobj[field] == nochange[field]:
+ statobj[field] = None
+ self.badresp('wstat {0}={1}'.format(self.getpathX(fid), statobj),
+ resp)
+ # wstat worked - change path names if needed
+ if statobj.name != b'':
+ self.did_rename(fid, statobj.name)
+
+ def readdir(self, fid, offset, count):
+ "read (up to) count bytes of dir data from offset, given open fid"
+ tag = self.get_tag()
+ pkt = self.proto.Treaddir(tag=tag, fid=fid, offset=offset, count=count)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rreaddir):
+ self.badresp('readdir {0} bytes at offset {1} in '
+ '{2}'.format(count, offset, self.getpathX(fid)),
+ resp)
+ return resp.data
+
+ def rename(self, fid, dfid, name):
+ "invoke Trename: rename file <fid> to <dfid>/name"
+ tag = self.get_tag()
+ pkt = self.proto.Trename(tag=tag, fid=fid, dfid=dfid, name=name)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rrename):
+ self.badresp('rename {0} to {2} in '
+ '{1}'.format(self.getpathX(fid),
+ self.getpathX(dfid), name),
+ resp)
+ self.did_rename(fid, name, self.getpath(dfid))
+
+ def renameat(self, olddirfid, oldname, newdirfid, newname):
+ "invoke Trenameat: rename <olddirfid>/oldname to <newdirfid>/newname"
+ tag = self.get_tag()
+ pkt = self.proto.Trenameat(tag=tag,
+ olddirfid=olddirfid, oldname=oldname,
+ newdirfid=newdirfid, newname=newname)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rrenameat):
+ self.badresp('rename {1} in {0} to {3} in '
+ '{2}'.format(oldname, self.getpathX(olddirfid),
+ newname, self.getpathX(newdirdfid)),
+ resp)
+ # There's no renamed *fid*, just a renamed file! So no
+ # call to self.did_rename().
+
+ def unlinkat(self, dirfd, name, flags):
+ "invoke Tunlinkat - flags should be 0 or protocol.td.AT_REMOVEDIR"
+ tag = self.get_tag()
+ pkt = self.proto.Tunlinkat(tag=tag, dirfd=dirfd,
+ name=name, flags=flags)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Runlinkat):
+ self.badresp('unlinkat {0} in '
+ '{1}'.format(name, self.getpathX(dirfd)), resp)
+
+ def decode_stat_objects(self, bstring, noerror=False):
+ """
+ Read on a directory returns an array of stat objects.
+ Note that for .u these encode extra data.
+
+ It's possible for this to produce a SequenceError, if
+ the data are incorrect, unless you pass noerror=True.
+ """
+ objlist = []
+ offset = 0
+ while offset < len(bstring):
+ obj, offset = self.proto.unpack_wirestat(bstring, offset, noerror)
+ objlist.append(obj)
+ return objlist
+
+ def decode_readdir_dirents(self, bstring, noerror=False):
+ """
+ Readdir on a directory returns an array of dirent objects.
+
+ It's possible for this to produce a SequenceError, if
+ the data are incorrect, unless you pass noerror=True.
+ """
+ objlist = []
+ offset = 0
+ while offset < len(bstring):
+ obj, offset = self.proto.unpack_dirent(bstring, offset, noerror)
+ objlist.append(obj)
+ return objlist
+
+ def lcreate(self, fid, name, lflags, mode, gid):
+ "issue lcreate (.L)"
+ tag = self.get_tag()
+ pkt = self.proto.Tlcreate(tag=tag, fid=fid, name=name,
+ flags=lflags, mode=mode, gid=gid)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rlcreate):
+ self.badresp('create {0} in '
+ '{1}'.format(name, self.getpathX(fid)), resp)
+ # Creating a file opens the file,
+ # thus changing the fid's path.
+ self.setpath(fid, _pathcat(self.getpath(fid), name))
+ return resp.qid, resp.iounit
+
+ def mkdir(self, dfid, name, mode, gid):
+ "issue mkdir (.L)"
+ tag = self.get_tag()
+ pkt = self.proto.Tmkdir(tag=tag, dfid=dfid, name=name,
+ mode=mode, gid=gid)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rmkdir):
+ self.badresp('mkdir {0} in '
+ '{1}'.format(name, self.getpathX(dfid)), resp)
+ return resp.qid
+
+ # We don't call this getattr(), for the obvious reason.
+ def Tgetattr(self, fid, request_mask=protocol.td.GETATTR_ALL):
+ "issue Tgetattr.L - get what you ask for, or everything by default"
+ tag = self.get_tag()
+ pkt = self.proto.Tgetattr(tag=tag, fid=fid, request_mask=request_mask)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rgetattr):
+ self.badresp('Tgetattr {0} of '
+ '{1}'.format(request_mask, self.getpathX(fid)), resp)
+ attrs = Fileattrs()
+ # Handle the simplest valid-bit tests:
+ for name in ('mode', 'nlink', 'uid', 'gid', 'rdev',
+ 'size', 'blocks', 'gen', 'data_version'):
+ bit = getattr(protocol.td, 'GETATTR_' + name.upper())
+ if resp.valid & bit:
+ attrs[name] = resp[name]
+ # Handle the timestamps, which are timespec pairs
+ for name in ('atime', 'mtime', 'ctime', 'btime'):
+ bit = getattr(protocol.td, 'GETATTR_' + name.upper())
+ if resp.valid & bit:
+ attrs[name] = Timespec(sec=resp[name + '_sec'],
+ nsec=resp[name + '_nsec'])
+ # There is no control bit for blksize; qemu and Linux always
+ # provide one.
+ attrs.blksize = resp.blksize
+ # Handle ino, which comes out of qid.path
+ if resp.valid & protocol.td.GETATTR_INO:
+ attrs.ino = resp.qid.path
+ return attrs
+
+ # We don't call this setattr(), for the obvious reason.
+ # See wstat for usage. Note that time fields can be set
+ # with either second or nanosecond resolutions, and some
+ # can be set without supplying an actual timestamp, so
+ # this is all pretty ad-hoc.
+ #
+ # There's also one keyword-only argument, ctime=<anything>,
+ # which means "set SETATTR_CTIME". This has the same effect
+ # as supplying valid=protocol.td.SETATTR_CTIME.
+ def Tsetattr(self, fid, valid=0, attrs=None, **kwargs):
+ if attrs is None:
+ attrs = Fileattrs()
+ else:
+ attrs = attrs._copy()
+
+ # Start with an empty (all-zero) Tsetattr instance. We
+ # don't really need to zero out tag and fid, but it doesn't
+ # hurt. Note that if caller says, e.g., valid=SETATTR_SIZE
+ # but does not supply an incoming size (via "attrs" or a size=
+ # argument), we'll ask to set that field to 0.
+ attrobj = protocol.rrd.Tsetattr()
+ for field in attrobj._fields:
+ attrobj[field] = 0
+
+ # In this case, forbid means "only as kwargs": these values
+ # in an incoming attrs object are merely ignored.
+ forbid = ('ino', 'nlink', 'rdev', 'blksize', 'blocks', 'btime',
+ 'gen', 'data_version')
+ for field in attrs._fields:
+ if field in kwargs:
+ if field in forbid:
+ raise ValueError('cannot Tsetattr {0}'.format(field))
+ attrs[field] = kwargs.pop(field)
+ elif attrs[field] is None:
+ continue
+ # OK, we're setting this attribute. Many are just
+ # numeric - if that's the case, we're good, set the
+ # field and the appropriate bit.
+ bitname = 'SETATTR_' + field.upper()
+ bit = getattr(protocol.td, bitname)
+ if field in ('mode', 'uid', 'gid', 'size'):
+ valid |= bit
+ attrobj[field] = attrs[field]
+ continue
+ # Timestamps are special: The value may be given as
+ # an integer (seconds), or as a float (we convert to
+ # (we convert to sec+nsec), or as a timespec (sec+nsec).
+ # If specified as 0, we mean "we are not providing the
+ # actual time, use the server's time."
+ #
+ # The ctime field's value, if any, is *ignored*.
+ if field in ('atime', 'mtime'):
+ value = attrs[field]
+ if hasattr(value, '__len__'):
+ if len(value) != 2:
+ raise ValueError('invalid {0}={1!r}'.format(field,
+ value))
+ sec = value[0]
+ nsec = value[1]
+ else:
+ sec = value
+ if isinstance(sec, float):
+ nsec, sec = math.modf(sec)
+ nsec = int(round(nsec * 1000000000))
+ else:
+ nsec = 0
+ valid |= bit
+ attrobj[field + '_sec'] = sec
+ attrobj[field + '_nsec'] = nsec
+ if sec != 0 or nsec != 0:
+ # Add SETATTR_ATIME_SET or SETATTR_MTIME_SET
+ # as appropriate, to tell the server to *this
+ # specific* time, instead of just "server now".
+ bit = getattr(protocol.td, bitname + '_SET')
+ valid |= bit
+ if 'ctime' in kwargs:
+ kwargs.pop('ctime')
+ valid |= protocol.td.SETATTR_CTIME
+ if kwargs:
+ raise TypeError('Tsetattr() got an unexpected keyword argument '
+ '{0!r}'.format(kwargs.popitem()))
+
+ tag = self.get_tag()
+ attrobj.valid = valid
+ attrobj.tag = tag
+ attrobj.fid = fid
+ pkt = self.proto.pack(attrobj)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rsetattr):
+ self.badresp('Tsetattr {0} {1} of '
+ '{2}'.format(valid, attrs, self.getpathX(fid)), resp)
+
+ def xattrwalk(self, fid, name=None):
+ "walk one name or all names: caller should read() the returned fid"
+ tag = self.get_tag()
+ newfid = self.alloc_fid()
+ pkt = self.proto.Txattrwalk(tag=tag, fid=fid, newfid=newfid,
+ name=name or '')
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rxattrwalk):
+ self.retire_fid(newfid)
+ self.badresp('Txattrwalk {0} of '
+ '{1}'.format(name, self.getpathX(fid)), resp)
+ if name:
+ self.setpath(newfid, 'xattr:' + name)
+ else:
+ self.setpath(newfid, 'xattr')
+ return newfid, resp.size
+
+ def _pathsplit(self, path, startdir, allow_empty=False):
+ "common code for uxlookup and uxopen"
+ if self.rootfid is None:
+ raise LocalError('{0}: not attached'.format(self))
+ if path.startswith(b'/') or startdir is None:
+ startdir = self.rootfid
+ components = [i for i in path.split(b'/') if i != b'']
+ if len(components) == 0 and not allow_empty:
+ raise LocalError('{0}: {1!r}: empty path'.format(self, path))
+ return components, startdir
+
+ def uxlookup(self, path, startdir=None):
+ """
+ Unix-style lookup. That is, lookup('/foo/bar') or
+ lookup('foo/bar'). If startdir is not None and the
+ path does not start with '/' we look up from there.
+ """
+ components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+ return self.lookup_last(startdir, components)
+
+ def uxopen(self, path, oflags=0, perm=None, gid=None,
+ startdir=None, filetype=None):
+ """
+ Unix-style open()-with-option-to-create, or mkdir().
+ oflags is 0/1/2 with optional os.O_CREAT, perm defaults
+ to 0o666 (files) or 0o777 (directories). If we use
+ a Linux create or mkdir op, we will need a gid, but it's
+ not required if you are opening an existing file.
+
+ Adds a final boolean value for "did we actually create".
+ Raises OSError if you ask for a directory but it's a file,
+ or vice versa. (??? reconsider this later)
+
+ Note that this does not handle other file types, only
+ directories.
+ """
+ needtype = {
+ 'dir': protocol.td.QTDIR,
+ None: protocol.td.QTFILE,
+ }[filetype]
+ omode_byte = oflags & 3 # cheating
+ # allow looking up /, but not creating /
+ allow_empty = (oflags & os.O_CREAT) == 0
+ components, startdir = self._pathsplit(path, startdir,
+ allow_empty=allow_empty)
+ if not (oflags & os.O_CREAT):
+ # Not creating, i.e., just look up and open existing file/dir.
+ fid, qid = self.lookup_last(startdir, components)
+ # If we got this far, use Topen on the fid; we did not
+ # create the file.
+ return self._uxopen2(path, needtype, fid, qid, omode_byte, False)
+
+ # Only used if using dot-L, but make sure it's always provided
+ # since this is generic.
+ if gid is None:
+ raise ValueError('gid is required when creating file or dir')
+
+ if len(components) > 1:
+ # Look up all but last component; this part must succeed.
+ fid, _ = self.lookup(startdir, components[:-1])
+
+ # Now proceed with the final component, using fid
+ # as the start dir. Remember to clunk it!
+ startdir = fid
+ clunk_startdir = True
+ components = components[-1:]
+ else:
+ # Use startdir as the start dir, and get a new fid.
+ # Do not clunk startdir!
+ clunk_startdir = False
+ fid = self.alloc_fid()
+
+ # Now look up the (single) component. If this fails,
+ # assume the file or directory needs to be created.
+ tag = self.get_tag()
+ pkt = self.proto.Twalk(tag=tag, fid=startdir, newfid=fid,
+ nwname=1, wname=components)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if isinstance(resp, protocol.rrd.Rwalk):
+ if clunk_startdir:
+ self.clunk(startdir, ignore_error=True)
+ # fid successfully walked to refer to final component.
+ # Just need to actually open the file.
+ self.setpath(fid, _pathcat(self.getpath(startdir), components[0]))
+ qid = resp.wqid[0]
+ return self._uxopen2(needtype, fid, qid, omode_byte, False)
+
+ # Walk failed. If we allocated a fid, retire it. Then set
+ # up a fid that points to the parent directory in which to
+ # create the file or directory. Note that if we're creating
+ # a file, this fid will get changed so that it points to the
+ # file instead of the directory, but if we're creating a
+ # directory, it will be unchanged.
+ if fid != startdir:
+ self.retire_fid(fid)
+ fid = self.dupfid(startdir)
+
+ try:
+ qid, iounit = self._uxcreate(filetype, fid, components[0],
+ oflags, omode_byte, perm, gid)
+
+ # Success. If we created an ordinary file, we have everything
+ # now as create alters the incoming (dir) fid to open the file.
+ # Otherwise (mkdir), we need to open the file, as with
+ # a successful lookup.
+ #
+ # Note that qid type should match "needtype".
+ if filetype != 'dir':
+ if qid.type == needtype:
+ return fid, qid, iounit, True
+ self.clunk(fid, ignore_error=True)
+ raise OSError(_wrong_file_type(qid),
+ '{0}: server told to create {1} but '
+ 'created {2} instead'.format(path,
+ qt2n(needtype),
+ qt2n(qid.type)))
+
+ # Success: created dir; but now need to walk to and open it.
+ fid = self.alloc_fid()
+ tag = self.get_tag()
+ pkt = self.proto.Twalk(tag=tag, fid=startdir, newfid=fid,
+ nwname=1, wname=components)
+ super(P9Client, self).write(pkt)
+ resp = self.wait_for(tag)
+ if not isinstance(resp, protocol.rrd.Rwalk):
+ self.clunk(fid, ignore_error=True)
+ raise OSError(errno.ENOENT,
+ '{0}: server made dir but then failed to '
+ 'find it again'.format(path))
+ self.setpath(fid, _pathcat(self.getpath(fid), components[0]))
+ return self._uxopen2(needtype, fid, qid, omode_byte, True)
+ finally:
+ # Regardless of success/failure/exception, make sure
+ # we clunk startdir if needed.
+ if clunk_startdir:
+ self.clunk(startdir, ignore_error=True)
+
+ def _uxcreate(self, filetype, fid, name, oflags, omode_byte, perm, gid):
+ """
+ Helper for creating dir-or-file. The fid argument is the
+ parent directory on input, but will point to the file (if
+ we're creating a file) on return. oflags only applies if
+ we're creating a file (even then we use omode_byte if we
+ are using the plan9 create op).
+ """
+ # Try to create or mkdir as appropriate.
+ if self.supports_all(protocol.td.Tlcreate, protocol.td.Tmkdir):
+ # Use Linux style create / mkdir.
+ if filetype == 'dir':
+ if perm is None:
+ perm = 0o777
+ return self.mkdir(startdir, name, perm, gid), None
+ if perm is None:
+ perm = 0o666
+ lflags = flags_to_linux_flags(oflags)
+ return self.lcreate(fid, name, lflags, perm, gid)
+
+ if filetype == 'dir':
+ if perm is None:
+ perm = protocol.td.DMDIR | 0o777
+ else:
+ perm |= protocol.td.DMDIR
+ else:
+ if perm is None:
+ perm = 0o666
+ return self.create(fid, name, perm, omode_byte)
+
+ def _uxopen2(self, needtype, fid, qid, omode_byte, didcreate):
+ "common code for finishing up uxopen"
+ if qid.type != needtype:
+ self.clunk(fid, ignore_error=True)
+ raise OSError(_wrong_file_type(qid),
+ '{0}: is {1}, expected '
+ '{2}'.format(path, qt2n(qid.type), qt2n(needtype)))
+ qid, iounit = self.open(fid, omode_byte)
+ # ? should we re-check qid? it should not have changed
+ return fid, qid, iounit, didcreate
+
+ def uxmkdir(self, path, perm, gid, startdir=None):
+ """
+ Unix-style mkdir.
+
+ The gid is only applied if we are using .L style mkdir.
+ """
+ components, startdir = self._pathsplit(path, startdir)
+ clunkme = None
+ if len(components) > 1:
+ fid, _ = self.lookup(startdir, components[:-1])
+ startdir = fid
+ clunkme = fid
+ components = components[-1:]
+ try:
+ if self.supports(protocol.td.Tmkdir):
+ qid = self.mkdir(startdir, components[0], perm, gid)
+ else:
+ qid, _ = self.create(startdir, components[0],
+ protocol.td.DMDIR | perm,
+ protocol.td.OREAD)
+ # Should we chown/chgrp the dir?
+ finally:
+ if clunkme:
+ self.clunk(clunkme, ignore_error=True)
+ return qid
+
+ def uxreaddir(self, path, startdir=None, no_dotl=False):
+ """
+ Read a directory to get a list of names (which may or may not
+ include '.' and '..').
+
+ If no_dotl is True (or anything non-false-y), this uses the
+ plain or .u readdir format, otherwise it uses dot-L readdir
+ if possible.
+ """
+ components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+ fid, qid = self.lookup_last(startdir, components)
+ try:
+ if qid.type != protocol.td.QTDIR:
+ raise OSError(errno.ENOTDIR,
+ '{0}: {1}'.format(self.getpathX(fid),
+ os.strerror(errno.ENOTDIR)))
+ # We need both Tlopen and Treaddir to use Treaddir.
+ if not self.supports_all(protocol.td.Tlopen, protocol.td.Treaddir):
+ no_dotl = True
+ if no_dotl:
+ statvals = self.uxreaddir_stat_fid(fid)
+ return [i.name for i in statvals]
+
+ dirents = self.uxreaddir_dotl_fid(fid)
+ return [dirent.name for dirent in dirents]
+ finally:
+ self.clunk(fid, ignore_error=True)
+
+ def uxreaddir_stat(self, path, startdir=None):
+ """
+ Use directory read to get plan9 style stat data (plain or .u readdir).
+
+ Note that this gets a fid, then opens it, reads, then clunks
+ the fid. If you already have a fid, you may want to use
+ uxreaddir_stat_fid (but note that this opens, yet does not
+ clunk, the fid).
+
+ We return the qid plus the list of the contents. If the
+ target is not a directory, the qid will not have type QTDIR
+ and the contents list will be empty.
+
+ Raises OSError if this is applied to a non-directory.
+ """
+ components, startdir = self._pathsplit(path, startdir)
+ fid, qid = self.lookup_last(startdir, components)
+ try:
+ if qid.type != protocol.td.QTDIR:
+ raise OSError(errno.ENOTDIR,
+ '{0}: {1}'.format(self.getpathX(fid),
+ os.strerror(errno.ENOTDIR)))
+ statvals = self.ux_readdir_stat_fid(fid)
+ return qid, statvals
+ finally:
+ self.clunk(fid, ignore_error=True)
+
+ def uxreaddir_stat_fid(self, fid):
+ """
+ Implement readdir loop that extracts stat values.
+ This opens, but does not clunk, the given fid.
+
+ Unlike uxreaddir_stat(), if this is applied to a file,
+ rather than a directory, it just returns no entries.
+ """
+ statvals = []
+ qid, iounit = self.open(fid, protocol.td.OREAD)
+ # ?? is a zero iounit allowed? if so, what do we use here?
+ if qid.type == protocol.td.QTDIR:
+ if iounit <= 0:
+ iounit = 512 # probably good enough
+ offset = 0
+ while True:
+ bstring = self.read(fid, offset, iounit)
+ if bstring == b'':
+ break
+ statvals.extend(self.decode_stat_objects(bstring))
+ offset += len(bstring)
+ return statvals
+
+ def uxreaddir_dotl_fid(self, fid):
+ """
+ Implement readdir loop that uses dot-L style dirents.
+ This opens, but does not clunk, the given fid.
+
+ If applied to a file, the lopen should fail, because of the
+ L_O_DIRECTORY flag.
+ """
+ dirents = []
+ qid, iounit = self.lopen(fid, protocol.td.OREAD |
+ protocol.td.L_O_DIRECTORY)
+ # ?? is a zero iounit allowed? if so, what do we use here?
+ # but, we want a minimum of over 256 anyway, let's go for 512
+ if iounit < 512:
+ iounit = 512
+ offset = 0
+ while True:
+ bstring = self.readdir(fid, offset, iounit)
+ if bstring == b'':
+ break
+ ents = self.decode_readdir_dirents(bstring)
+ if len(ents) == 0:
+ break # ???
+ dirents.extend(ents)
+ offset = ents[-1].offset
+ return dirents
+
+ def uxremove(self, path, startdir=None, filetype=None,
+ force=False, recurse=False):
+ """
+ Implement rm / rmdir, with optional -rf.
+ if filetype is None, remove dir or file. If 'dir' or 'file'
+ remove only if it's one of those. If force is set, ignore
+ failures to remove. If recurse is True, remove contents of
+ directories (recursively).
+
+ File type mismatches (when filetype!=None) raise OSError (?).
+ """
+ components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+ # Look up all components. If
+ # we get an error we'll just assume the file does not
+ # exist (is this good?).
+ try:
+ fid, qid = self.lookup_last(startdir, components)
+ except RemoteError:
+ return
+ if qid.type == protocol.td.QTDIR:
+ # it's a directory, remove only if allowed.
+ # Note that we must check for "rm -r /" (len(components)==0).
+ if filetype == 'file':
+ self.clunk(fid, ignore_error=True)
+ raise OSError(_wrong_file_type(qid),
+ '{0}: is dir, expected file'.format(path))
+ isroot = len(components) == 0
+ closer = self.clunk if isroot else self.remove
+ if recurse:
+ # NB: _rm_recursive does not clunk fid
+ self._rm_recursive(fid, filetype, force)
+ # This will fail if the directory is non-empty, unless of
+ # course we tell it to ignore error.
+ closer(fid, ignore_error=force)
+ return
+ # Not a directory, call it a file (even if socket or fifo etc).
+ if filetype == 'dir':
+ self.clunk(fid, ignore_error=True)
+ raise OSError(_wrong_file_type(qid),
+ '{0}: is file, expected dir'.format(path))
+ self.remove(fid, ignore_error=force)
+
+ def _rm_file_by_dfid(self, dfid, name, force=False):
+ """
+ Remove a file whose name is <name> (no path, just a component
+ name) whose parent directory is <dfid>. We may assume that the
+ file really is a file (or a socket, or fifo, or some such, but
+ definitely not a directory).
+
+ If force is set, ignore failures.
+ """
+ # If we have unlinkat, that's the fast way. But it may
+ # return an ENOTSUP error. If it does we shouldn't bother
+ # doing this again.
+ if self.supports(protocol.td.Tunlinkat):
+ try:
+ self.unlinkat(dfid, name, 0)
+ return
+ except RemoteError as err:
+ if not err.is_ENOTSUP():
+ raise
+ self.unsupported(protocol.td.Tunlinkat)
+ # fall through to remove() op
+ # Fall back to lookup + remove.
+ try:
+ fid, qid = self.lookup_last(dfid, [name])
+ except RemoteError:
+ # If this has an errno we could tell ENOENT from EPERM,
+ # and actually raise an error for the latter. Should we?
+ return
+ self.remove(fid, ignore_error=force)
+
+ def _rm_recursive(self, dfid, filetype, force):
+ """
+ Recursively remove a directory. filetype is probably None,
+ but if it's 'dir' we fail if the directory contains non-dir
+ files.
+
+ If force is set, ignore failures.
+
+ Although we open dfid (via the readdir.*_fid calls) we
+ do not clunk it here; that's the caller's job.
+ """
+ # first, remove contents
+ if self.supports_all(protocol.td.Tlopen, protocol.td.Treaddir):
+ for entry in self.uxreaddir_dotl_fid(dfid):
+ if entry.name in (b'.', b'..'):
+ continue
+ fid, qid = self.lookup(dfid, [entry.name])
+ try:
+ attrs = self.Tgetattr(fid, protocol.td.GETATTR_MODE)
+ if stat.S_ISDIR(attrs.mode):
+ self.uxremove(entry.name, dfid, filetype, force, True)
+ else:
+ self.remove(fid)
+ fid = None
+ finally:
+ if fid is not None:
+ self.clunk(fid, ignore_error=True)
+ else:
+ for statobj in self.uxreaddir_stat_fid(dfid):
+ # skip . and ..
+ name = statobj.name
+ if name in (b'.', b'..'):
+ continue
+ if statobj.qid.type == protocol.td.QTDIR:
+ self.uxremove(name, dfid, filetype, force, True)
+ else:
+ self._rm_file_by_dfid(dfid, name, force)
+
+def _wrong_file_type(qid):
+ "return EISDIR or ENOTDIR for passing to OSError"
+ if qid.type == protocol.td.QTDIR:
+ return errno.EISDIR
+ return errno.ENOTDIR
+
+def flags_to_linux_flags(flags):
+ """
+ Convert OS flags (O_CREAT etc) to Linux flags (protocol.td.L_O_CREAT etc).
+ """
+ flagmap = {
+ os.O_CREAT: protocol.td.L_O_CREAT,
+ os.O_EXCL: protocol.td.L_O_EXCL,
+ os.O_NOCTTY: protocol.td.L_O_NOCTTY,
+ os.O_TRUNC: protocol.td.L_O_TRUNC,
+ os.O_APPEND: protocol.td.L_O_APPEND,
+ os.O_DIRECTORY: protocol.td.L_O_DIRECTORY,
+ }
+
+ result = flags & os.O_RDWR
+ flags &= ~os.O_RDWR
+ for key, value in flagmap.iteritems():
+ if flags & key:
+ result |= value
+ flags &= ~key
+ if flags:
+ raise ValueError('untranslated bits 0x{0:x} in os flags'.format(flags))
+ return result
diff --git a/pytest/p9err.py b/pytest/p9err.py
new file mode 100644
index 000000000000..407278b2a404
--- /dev/null
+++ b/pytest/p9err.py
@@ -0,0 +1,146 @@
+#! /usr/bin/env python
+
+"""
+Error number definitions for 9P2000, .u, and .L.
+
+Note that there is no native-to-9P2000 (plain) translation
+table since 9P2000 takes error *strings* rather than error
+*numbers*.
+"""
+
+import errno as _errno
+import lerrno as _lerrno
+import os as _os
+
+_native_to_dotu = {
+ # These are in the "standard" range(1, errno.ERANGE)
+ # but do not map to themselves, so map them here first.
+ _errno.ENOTEMPTY: _errno.EPERM,
+ _errno.EDQUOT: _errno.EPERM,
+ _errno.ENOSYS: _errno.EPERM,
+}
+
+_native_to_dotl = {}
+
+# Add standard errno's.
+for _i in range(1, _errno.ERANGE):
+ _native_to_dotu.setdefault(_i, _i)
+ _native_to_dotl[_i] = _i
+
+# Add linux errno's. Note that Linux EAGAIN at #11 overrides BSD EDEADLK,
+# but Linux has EDEADLK at #35 which overrides BSD EAGAIN, so it all
+# works out.
+#
+# We just list every BSD error name here, since the hasattr()s do
+# the real work.
+for _i in (
+ 'EDEADLK',
+ 'EAGAIN',
+ 'EINPROGRESS',
+ 'EALREADY',
+ 'ENOTSOCK',
+ 'EDESTADDRREQ',
+ 'EMSGSIZE',
+ 'EPROTOTYPE',
+ 'ENOPROTOOPT',
+ 'EPROTONOSUPPORT',
+ 'ESOCKTNOSUPPORT',
+ 'EOPNOTSUPP',
+ 'EPFNOSUPPORT',
+ 'EAFNOSUPPORT',
+ 'EADDRINUSE',
+ 'EADDRNOTAVAIL',
+ 'ENETDOWN',
+ 'ENETUNREACH',
+ 'ENETRESET',
+ 'ECONNABORTED',
+ 'ECONNRESET',
+ 'ENOBUFS',
+ 'EISCONN',
+ 'ENOTCONN',
+ 'ESHUTDOWN',
+ 'ETOOMANYREFS',
+ 'ETIMEDOUT',
+ 'ECONNREFUSED',
+ 'ELOOP',
+ 'ENAMETOOLONG',
+ 'EHOSTDOWN',
+ 'EHOSTUNREACH',
+ 'ENOTEMPTY',
+ 'EPROCLIM',
+ 'EUSERS',
+ 'EDQUOT',
+ 'ESTALE',
+ 'EREMOTE',
+ 'EBADRPC',
+ 'ERPCMISMATCH',
+ 'EPROGUNAVAIL',
+ 'EPROGMISMATCH',
+ 'EPROCUNAVAIL',
+ 'ENOLCK',
+ 'ENOSYS',
+ 'EFTYPE',
+ 'EAUTH',
+ 'ENEEDAUTH',
+ 'EIDRM',
+ 'ENOMSG',
+ 'EOVERFLOW',
+ 'ECANCELED',
+ 'EILSEQ',
+ 'EDOOFUS',
+ 'EBADMSG',
+ 'EMULTIHOP',
+ 'ENOLINK',
+ 'EPROTO',
+ 'ENOTCAPABLE',
+ 'ECAPMODE',
+ 'ENOTRECOVERABLE',
+ 'EOWNERDEAD',
+):
+ if hasattr(_errno, _i) and hasattr(_lerrno, _i):
+ _native_to_dotl[getattr(_errno, _i)] = getattr(_lerrno, _i)
+del _i
+
+def to_dotu(errnum):
+ """
+ Translate native errno to 9P2000.u errno.
+
+ >>> import errno
+ >>> to_dotu(errno.EIO)
+ 5
+ >>> to_dotu(errno.EDQUOT)
+ 1
+ >>> to_dotu(errno.ELOOP)
+ 5
+
+ There is a corresponding dotu_strerror() (which is really
+ just os.strerror):
+
+ >>> dotu_strerror(5)
+ 'Input/output error'
+
+ """
+ return _native_to_dotu.get(errnum, _errno.EIO) # default to EIO
+
+def to_dotl(errnum):
+ """
+ Translate native errno to 9P2000.L errno.
+
+ >>> import errno
+ >>> to_dotl(errno.ELOOP)
+ 40
+
+ There is a corresponding dotl_strerror():
+
+ >>> dotl_strerror(40)
+ 'Too many levels of symbolic links'
+ """
+ return _native_to_dotl.get(errnum, _lerrno.ENOTRECOVERABLE)
+
+dotu_strerror = _os.strerror
+
+dotl_strerror = _lerrno.strerror
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
diff --git a/pytest/pfod.py b/pytest/pfod.py
new file mode 100644
index 000000000000..6167354e88cc
--- /dev/null
+++ b/pytest/pfod.py
@@ -0,0 +1,204 @@
+#! /usr/bin/env python
+
+from __future__ import print_function
+
+__all__ = ['pfod', 'OrderedDict']
+
+### shameless stealing from namedtuple here
+
+"""
+pfod - prefilled OrderedDict
+
+This is basically a hybrid of a class and an OrderedDict,
+or, sort of a data-only class. When an instance of the
+class is created, all its fields are set to None if not
+initialized.
+
+Because it is an OrderedDict you can add extra fields to an
+instance, and they will be in inst.keys(). Because it
+behaves in a class-like way, if the keys are 'foo' and 'bar'
+you can write print(inst.foo) or inst.bar = 3. Setting an
+attribute that does not currently exist causes a new key
+to be added to the instance.
+"""
+
+import sys as _sys
+from keyword import iskeyword as _iskeyword
+from collections import OrderedDict
+from collections import deque as _deque
+
+_class_template = '''\
+class {typename}(OrderedDict):
+ '{typename}({arg_list})'
+ __slots__ = ()
+
+ _fields = {field_names!r}
+
+ def __init__(self, *args, **kwargs):
+ 'Create new instance of {typename}()'
+ super({typename}, self).__init__()
+ args = _deque(args)
+ for field in self._fields:
+ if field in kwargs:
+ self[field] = kwargs.pop(field)
+ elif len(args) > 0:
+ self[field] = args.popleft()
+ else:
+ self[field] = None
+ if len(kwargs):
+ raise TypeError('unexpected kwargs %s' % kwargs.keys())
+ if len(args):
+ raise TypeError('unconsumed args %r' % tuple(args))
+
+ def _copy(self):
+ 'copy to new instance'
+ new = {typename}()
+ new.update(self)
+ return new
+
+ def __getattr__(self, attr):
+ if attr in self:
+ return self[attr]
+ raise AttributeError('%r object has no attribute %r' %
+ (self.__class__.__name__, attr))
+
+ def __setattr__(self, attr, val):
+ if attr.startswith('_OrderedDict_'):
+ super({typename}, self).__setattr__(attr, val)
+ else:
+ self[attr] = val
+
+ def __repr__(self):
+ 'Return a nicely formatted representation string'
+ return '{typename}({repr_fmt})'.format(**self)
+'''
+
+_repr_template = '{name}={{{name}!r}}'
+
+# Workaround for py2k exec-as-statement, vs py3k exec-as-function.
+# Since the syntax differs, we have to exec the definition of _exec!
+if _sys.version_info[0] < 3:
+ # py2k: need a real function. (There is a way to deal with
+ # this without a function if the py2k is new enough, but this
+ # works in more cases.)
+ exec("""def _exec(string, gdict, ldict):
+ "Python 2: exec string in gdict, ldict"
+ exec string in gdict, ldict""")
+else:
+ # py3k: just make an alias for builtin function exec
+ exec("_exec = exec")
+
+def pfod(typename, field_names, verbose=False, rename=False):
+ """
+ Return a new subclass of OrderedDict with named fields.
+
+ Fields are accessible by name. Note that this means
+ that to copy a PFOD you must use _copy() - field names
+ may not start with '_' unless they are all numeric.
+
+ When creating an instance of the new class, fields
+ that are not initialized are set to None.
+
+ >>> Point = pfod('Point', ['x', 'y'])
+ >>> Point.__doc__ # docstring for the new class
+ 'Point(x, y)'
+ >>> p = Point(11, y=22) # instantiate with positional args or keywords
+ >>> p
+ Point(x=11, y=22)
+ >>> p['x'] + p['y'] # indexable
+ 33
+ >>> p.x + p.y # fields also accessable by name
+ 33
+ >>> p._copy()
+ Point(x=11, y=22)
+ >>> p2 = Point()
+ >>> p2.extra = 2
+ >>> p2
+ Point(x=None, y=None)
+ >>> p2.extra
+ 2
+ >>> p2['extra']
+ 2
+ """
+
+ # Validate the field names. At the user's option, either generate an error
+ if _sys.version_info[0] >= 3:
+ string_type = str
+ else:
+ string_type = basestring
+ # message or automatically replace the field name with a valid name.
+ if isinstance(field_names, string_type):
+ field_names = field_names.replace(',', ' ').split()
+ field_names = list(map(str, field_names))
+ typename = str(typename)
+ if rename:
+ seen = set()
+ for index, name in enumerate(field_names):
+ if (not all(c.isalnum() or c=='_' for c in name)
+ or _iskeyword(name)
+ or not name
+ or name[0].isdigit()
+ or name.startswith('_')
+ or name in seen):
+ field_names[index] = '_%d' % index
+ seen.add(name)
+ for name in [typename] + field_names:
+ if type(name) != str:
+ raise TypeError('Type names and field names must be strings')
+ if not all(c.isalnum() or c=='_' for c in name):
+ raise ValueError('Type names and field names can only contain '
+ 'alphanumeric characters and underscores: %r' % name)
+ if _iskeyword(name):
+ raise ValueError('Type names and field names cannot be a '
+ 'keyword: %r' % name)
+ if name[0].isdigit():
+ raise ValueError('Type names and field names cannot start with '
+ 'a number: %r' % name)
+ seen = set()
+ for name in field_names:
+ if name.startswith('_OrderedDict_'):
+ raise ValueError('Field names cannot start with _OrderedDict_: '
+ '%r' % name)
+ if name.startswith('_') and not rename:
+ raise ValueError('Field names cannot start with an underscore: '
+ '%r' % name)
+ if name in seen:
+ raise ValueError('Encountered duplicate field name: %r' % name)
+ seen.add(name)
+
+ # Fill-in the class template
+ class_definition = _class_template.format(
+ typename = typename,
+ field_names = tuple(field_names),
+ arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
+ repr_fmt = ', '.join(_repr_template.format(name=name)
+ for name in field_names),
+ )
+ if verbose:
+ print(class_definition,
+ file=verbose if isinstance(verbose, file) else _sys.stdout)
+
+ # Execute the template string in a temporary namespace and support
+ # tracing utilities by setting a value for frame.f_globals['__name__']
+ namespace = dict(__name__='PFOD%s' % typename,
+ OrderedDict=OrderedDict, _deque=_deque)
+ try:
+ _exec(class_definition, namespace, namespace)
+ except SyntaxError as e:
+ raise SyntaxError(e.message + ':\n' + class_definition)
+ result = namespace[typename]
+
+ # For pickling to work, the __module__ variable needs to be set to the frame
+ # where the named tuple is created. Bypass this step in environments where
+ # sys._getframe is not defined (Jython for example) or sys._getframe is not
+ # defined for arguments greater than 0 (IronPython).
+ try:
+ result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
+ except (AttributeError, ValueError):
+ pass
+
+ return result
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
diff --git a/pytest/protocol.py b/pytest/protocol.py
new file mode 100644
index 000000000000..6b0cd9ad0327
--- /dev/null
+++ b/pytest/protocol.py
@@ -0,0 +1,1998 @@
+#! /usr/bin/env python
+
+"""
+Protocol definitions for python based lib9p server/client.
+
+The sub-namespace td has type definitions (qid, stat) and values
+that are "#define" constants in C code (e.g., DMDIR, QTFILE, etc).
+This also contains the byte values for protocol codes like Tversion,
+Rversion, Rerror, and so on.
+
+ >>> td.Tversion
+ 100
+ >>> td.Rlerror
+ 7
+
+The qid and stat types are PFOD classes and generate instances that
+are a cross between namedtuple and OrderedDictionary (see pfod.py
+for details):
+
+ >>> td.qid(type=td.QTFILE, path=2, version=1)
+ qid(type=0, version=1, path=2)
+
+The td.stat() type output is pretty long, since it has all the
+dotu-specific members (used only when packing for dotu/dotl and
+set only when unpacking those), so here's just one field:
+
+ >>> td.stat(*(15 * [0])).mode
+ 0
+ >>> import pprint; pprint.pprint(td.stat()._fields)
+ ('type',
+ 'dev',
+ 'qid',
+ 'mode',
+ 'atime',
+ 'mtime',
+ 'length',
+ 'name',
+ 'uid',
+ 'gid',
+ 'muid',
+ 'extension',
+ 'n_uid',
+ 'n_gid',
+ 'n_muid')
+
+Stat objects sent across the protocol must first be encoded into
+wirestat objects, which are basically size-counted pre-sequenced
+stat objects. The pre-sequencing uses:
+
+ >>> td.stat_seq
+ Sequencer('stat')
+
+For parsing bytes returned in a Tread on a directory, td.wirestat_seq
+is the sequencer. However, most users should rely on the packers and
+unpackers in each protocol (see {pack,unpack}_wirestat below).
+
+ >>> td.wirestat_seq
+ Sequencer('wirestat')
+
+There is a dictionary fcall_to_name that maps from byte value
+to protocol code. Names map to themselves as well:
+
+ >>> fcall_names[101]
+ 'Rversion'
+ >>> fcall_names['Tversion']
+ 'Tversion'
+
+The sub-namespace rrd has request (Tversion, Topen, etc) and
+response (Rversion, Ropen, etc) data definitions. Each of these
+is a PFOD class:
+
+ >>> rrd.Tversion(1000, 'hello', tag=0)
+ Tversion(tag=0, msize=1000, version='hello')
+
+The function p9_version() looks up the instance of each supported
+protocol, or raises a KeyError when given an invalid protocol.
+The names may be spelled in any mixture of cases.
+
+The names plain, dotu, and dotl are predefined as the three
+supported protocols:
+
+ >>> p9_version('invalid')
+ Traceback (most recent call last):
+ ...
+ KeyError: 'invalid'
+ >>> p9_version('9p2000') == plain
+ True
+ >>> p9_version('9P2000') == plain
+ True
+ >>> p9_version('9P2000.u') == dotu
+ True
+ >>> p9_version('9p2000.L') == dotl
+ True
+
+Protocol instances have a pack() method that encodes a set of
+arguments into a packet. To know what to encode, pack() must
+receive an fcall value and a dictionary containing argument
+values, or something equivalent. The required argument values
+depend on the fcall. For instance, a Tversion fcall needs three
+arguments: the version name, the tag, and the msize (these of
+course are the pre-filled fields in a Tversion PFOD instance).
+
+ >>> args = {'version': '!', 'tag': 1, 'msize': 1000}
+ >>> pkt = dotu.pack(fcall='Tversion', args=args)
+ >>> len(pkt)
+ 14
+
+The length of string '!' is 1, and the packet (or wire) format of
+a Tversion request is:
+
+ size[4] fcall[1] tag[2] msize[4] version[s]
+
+which corresponds to a struct's IBHIH (for the fixed size parts)
+followed by 1 B (for the string). The overall packet is 14 bytes
+long, so we have size=9, fcall=100, tag=1, msize=1000, and the
+version string is length=1, value=33 (ord('!')).
+
+ >>> import struct
+ >>> struct.unpack('<IBHIHB', pkt)
+ (14, 100, 1, 1000, 1, 33)
+
+Of course, this packed a completely bogus "version" string, but
+that's what we told it to do. Protocol instances remember their
+version, so we can get it right by omitting the version from the
+arguments:
+
+ >>> dotu.version
+ '9P2000.u'
+ >>> args = {'tag': 99, 'msize': 1000}
+ >>> pkt = dotu.pack(fcall='Tversion', args=args)
+ >>> len(pkt)
+ 21
+
+The fcall can be supplied numerically:
+
+ >>> pkt2 = dotu.pack(fcall=td.Tversion, args=args)
+ >>> pkt == pkt2
+ True
+
+Instead of providing an fcall you can provide an instance of
+the appropriate PFOD. In this case pack() finds the type from
+the PFOD instance. As usual, the version parameter is filled in
+for you:
+
+ >>> pkt2 = dotu.pack(rrd.Tversion(tag=99, msize=1000))
+ >>> pkt == pkt2
+ True
+
+Note that it's up to you to check the other end's version and
+switch to a "lower" protocol as needed. Each instance does provide
+a downgrade_to() method that gets you a possibly-downgraded instance.
+This will fail if you are actually trying to upgrade, and also if
+you provide a bogus version:
+
+ >>> dotu.downgrade_to('9P2000.L')
+ Traceback (most recent call last):
+ ...
+ KeyError: '9P2000.L'
+ >>> dotu.downgrade_to('we never heard of this protocol')
+ Traceback (most recent call last):
+ ...
+ KeyError: 'we never heard of this protocol'
+
+Hence you might use:
+
+ try:
+ proto = protocol.dotl.downgrade(vstr)
+ except KeyError:
+ pkt = protocol.plain.pack(fcall='Rerror',
+ args={'tag': tag, 'errstr': 'unknown protocol version '
+ '{0!r}'.format(vstr)})
+ else:
+ pkt = proto.pack(fcall='Rversion', args={'tag': tag, 'msize': msize})
+
+When using a PFOD instance, it is slightly more efficient to use
+pack_from():
+
+ try:
+ proto = protocol.dotl.downgrade(vstr)
+ reply = protocol.rrd.Rversion(tag=tag, msize=msize)
+ except KeyError:
+ proto = protocol.plain
+ reply = protocol.rrd.Rerror(tag=tag,
+ errstr='unknown protocol version {0!r}'.format(vstr))
+ pkt = proto.pack_from(reply)
+
+does the equivalent of the try/except/else variant. Note that
+the protocol.rrd.Rversion() instance has version=None. Like
+proto.pack, the pack_from will detect this "missing" value and
+fill it in.
+
+Because errors vary (one should use Rlerror for dotl and Rerror
+for dotu and plain), and it's convenient to use an Exception
+instance for an error, all protocols provide .error(). This
+builds the appropriate kind of error response, extracting and
+converting errno's and error messages as appropriate.
+
+If <err> is an instance of Exception, err.errno provides the errnum
+or ecode value (if used, for dotu and dotl) and err.strerror as the
+errstr value (if used, for plain 9p2000). Otherwise err should be
+an integer, and we'll use os.strerror() to get a message.
+
+When using plain 9P2000 this sends error *messages*:
+
+ >>> import errno, os
+ >>> utf8 = os.strerror(errno.ENOENT).encode('utf-8')
+ >>> pkt = None
+ >>> try:
+ ... os.open('presumably this file does not exist here', 0)
+ ... except OSError as err:
+ ... pkt = plain.error(1, err)
+ ...
+ >>> pkt[-len(utf8):] == utf8
+ True
+ >>> pkt2 = plain.error(1, errno.ENOENT)
+ >>> pkt == pkt2
+ True
+
+When using 9P2000.u it sends the error code as well, and when
+using 9P2000.L it sends only the error code (and more error
+codes can pass through):
+
+ >>> len(pkt)
+ 34
+ >>> len(dotu.error(1, errno.ENOENT))
+ 38
+ >>> len(dotl.error(1, errno.ENOENT))
+ 11
+
+For even more convenience (and another slight speed hack), the
+protocol has member functions for each valid pfod, which
+effectively do a pack_from of a pfod built from the arguments. In
+the above example this is not very useful (because we want two
+different replies), but for Rlink, for instance, which has only
+a tag, a server might implement Tlink() as:
+
+ def do_Tlink(proto, data): # data will be a protocol.rrd.Tlink(...)
+ tag = data.tag
+ dfid = data.dfid
+ fid = data.fid
+ name = data.name
+ ... some code to set up for doing the link link ...
+ try:
+ os.link(path1, path2)
+ except OSError as err:
+ return proto.error(tag, err)
+ else:
+ return proto.Rlink(tag)
+
+ >>> pkt = dotl.Rlink(12345)
+ >>> struct.unpack('<IBH', pkt)
+ (7, 71, 12345)
+
+Similarly, a client can build a Tversion packet quite trivially:
+
+ >>> vpkt = dotl.Tversion(tag=0, msize=12345)
+
+To see that this is a valid version packet, let's unpack its bytes.
+The overall length is 21 bytes: 4 bytes of size, 1 byte of code 100
+for Tversion, 2 bytes of tag, 4 bytes of msize, 2 bytes of string
+length, and 8 bytes of string '9P2000.L'.
+
+ >>> tup = struct.unpack('<IBHIH8B', vpkt)
+ >>> tup[0:5]
+ (21, 100, 0, 12345, 8)
+ >>> ''.join(chr(i) for i in tup[5:])
+ '9P2000.L'
+
+Of course, since you can *pack*, you can also *unpack*. It's
+possible that the incoming packet is malformed. If so, this
+raises various errors (see below).
+
+Unpack is actually a two step process: first we unpack a header
+(where the size is already removed and is implied by len(data)),
+then we unpack the data within the packet. You can invoke the
+first step separately. Furthermore, there's a noerror argument
+that leaves some fields set to None or empty strings, if the
+packet is too short. (Note that we need a hack for py2k vs py3k
+strings here, for doctests. Also, encoding 12345 into a byte
+string produces '90', by ASCII luck!)
+
+ >>> pkt = pkt[4:] # strip generated size
+ >>> import sys
+ >>> py3k = sys.version_info[0] >= 3
+ >>> b2s = lambda x: x.decode('utf-8') if py3k else x
+ >>> d = plain.unpack_header(pkt[0:1], noerror=True)
+ >>> d.data = b2s(d.data)
+ >>> d
+ Header(size=5, dsize=0, fcall=71, data='')
+ >>> d = plain.unpack_header(pkt[0:2], noerror=True)
+ >>> d.data = b2s(d.data)
+ >>> d
+ Header(size=6, dsize=1, fcall=71, data='9')
+
+Without noerror=True a short packet raises a SequenceError:
+
+ >>> plain.unpack_header(pkt[0:0]) # doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ ...
+ SequenceError: out of data while unpacking 'fcall'
+
+Of course, a normal packet decodes fine:
+
+ >>> d = plain.unpack_header(pkt)
+ >>> d.data = b2s(d.data)
+ >>> d
+ Header(size=7, dsize=2, fcall=71, data='90')
+
+but one that is too *long* potentially raises a SequencError.
+(This is impossible for a header, though, since the size and
+data size are both implied: either there is an fcall code, and
+the rest of the bytes are "data", or there isn't and the packet
+is too short. So we can only demonstrate this for regular
+unpack; see below.)
+
+Note that all along, this has been decoding Rlink (fcall=71),
+which is not valid for plain 9P2000 protocol. It's up to the
+caller to check:
+
+ >>> plain.supports(71)
+ False
+
+ >>> plain.unpack(pkt) # doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ ...
+ SequenceError: invalid fcall 'Rlink' for 9P2000
+ >>> dotl.unpack(pkt)
+ Rlink(tag=12345)
+
+However, the unpack() method DOES check that the fcall type is
+valid, even if you supply noerror=True. This is because we can
+only really decode the header, not the data, if the fcall is
+invalid:
+
+ >>> plain.unpack(pkt, noerror=True) # doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ ...
+ SequenceError: invalid fcall 'Rlink' for 9P2000
+
+The same applies to much-too-short packets even if noerror is set.
+Specifically, if the (post-"size") header shortens down to the empty
+string, the fcall will be None:
+
+ >>> dotl.unpack(b'', noerror=True) # doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ ...
+ SequenceError: invalid fcall None for 9P2000.L
+
+If there is at least a full header, though, noerror will do the obvious:
+
+ >>> dotl.unpack(pkt[0:1], noerror=True)
+ Rlink(tag=None)
+ >>> dotl.unpack(pkt[0:2], noerror=True)
+ Rlink(tag=None)
+
+If the packet is too long, noerror suppresses the SequenceError:
+
+ >>> dotl.unpack(pkt + b'x') # doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ ...
+ SequenceError: 1 byte(s) unconsumed
+ >>> dotl.unpack(pkt + b'x', noerror=True)
+ Rlink(tag=12345)
+
+To pack a stat object when producing data for reading a directory,
+use pack_wirestat. This puts a size in front of the packed stat
+data (they're represented this way in read()-of-directory data,
+but not elsewhere).
+
+To unpack the result of a Tstat or a read() on a directory, use
+unpack_wirestat. The stat values are variable length so this
+works with offsets. If the packet is truncated, you'll get a
+SequenceError, but just as for header unpacking, you can use
+noerror to suppress this.
+
+(First, we'll need to build some valid packet data.)
+
+ >>> statobj = td.stat(type=0,dev=0,qid=td.qid(0,0,0),mode=0,
+ ... atime=0,mtime=0,length=0,name=b'foo',uid=b'0',gid=b'0',muid=b'0')
+ >>> data = plain.pack_wirestat(statobj)
+ >>> len(data)
+ 55
+
+Now we can unpack it:
+
+ >>> newobj, offset = plain.unpack_wirestat(data, 0)
+ >>> newobj == statobj
+ True
+ >>> offset
+ 55
+
+Since the packed data do not include the dotu extensions, we get
+a SequenceError if we try to unpack with dotu or dotl:
+
+ >>> dotu.unpack_wirestat(data, 0) # doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ ...
+ SequenceError: out of data while unpacking 'extension'
+
+When using noerror, the returned new offset will be greater
+than the length of the packet, after a failed unpack, and some
+elements may be None:
+
+ >>> newobj, offset = plain.unpack_wirestat(data[0:10], 0, noerror=True)
+ >>> offset
+ 55
+ >>> newobj.length is None
+ True
+
+Similarly, use unpack_dirent to unpack the result of a dot-L
+readdir(), using offsets. (Build them with pack_dirent.)
+
+ >>> dirent = td.dirent(qid=td.qid(1,2,3),offset=0,
+ ... type=td.DT_REG,name=b'foo')
+ >>> pkt = dotl.pack_dirent(dirent)
+ >>> len(pkt)
+ 27
+
+and then:
+
+ >>> newde, offset = dotl.unpack_dirent(pkt, 0)
+ >>> newde == dirent
+ True
+ >>> offset
+ 27
+
+"""
+
+from __future__ import print_function
+
+import collections
+import os
+import re
+import sys
+
+import p9err
+import pfod
+import sequencer
+
+SequenceError = sequencer.SequenceError
+
+fcall_names = {}
+
+# begin ???
+# to interfere with (eg) the size part of the packet:
+# pkt = proto.pack(fcall=protocol.td.Tversion,
+# size=123, # wrong
+# args={ 'tag': 1, msize: 1000, version: '9p2000.u' })
+# a standard Twrite:
+# pkt = proto.pack(fcall=protocol.td.Twrite,
+# args={ 'tag': 1, 'fid': 2, 'offset': 0, 'data': b'rawdata' })
+# or:
+# pkt = proto.pack(fcall=protocol.td.Twrite,
+# data=proto.Twrite(tag=1, fid=2, offset=0, data=b'rawdata' })
+# a broken Twrite:
+# pkt = proto.pack(fcall=protocol.td.Twrite,
+# args={ 'tag': 1, 'fid': 2, 'offset': 0, 'count': 99,
+# 'data': b'rawdata' }) -- XXX won't work (yet?)
+#
+# build a QID: (td => typedefs and defines)
+# qid = protocol.td.qid(type=protocol.td.QTFILE, version=1, path=2)
+# build the Twrite data as a data structure:
+# wrdata = protocol.td.Twrite(tag=1, fid=2, offset=0, data=b'rawdata')
+#
+# turn incoming byte stream data into a Header and remaining data:
+# foo = proto.pack(data)
+
+class _PackInfo(object):
+ """
+ Essentially just a Sequencer, except that we remember
+ if there are any :auto annotations on any of the coders,
+ and we check for coders that are string coders ('data[size]').
+
+ This could in theory be a recursive check, but in practice
+ all the automatics are at the top level, and we have no mechanism
+ to pass down inner automatics.
+ """
+ def __init__(self, seq):
+ self.seq = seq
+ self.autos = None
+ for pair in seq: # (cond, code) pair
+ sub = pair[1]
+ if sub.aux is None:
+ continue
+ assert sub.aux == 'auto' or sub.aux == 'len'
+ if self.autos is None:
+ self.autos = []
+ self.autos.append(pair)
+
+ def __repr__(self):
+ return '{0}({1!r})'.format(self.__class__.__name__, self.seq)
+
+ def pack(self, auto_vars, conditions, data, rodata):
+ """
+ Pack data. Insert automatic and/or counted variables
+ automatically, if they are not already set in the data.
+
+ If rodata ("read-only data") is True we make sure not
+ to modify the caller's data. Since data is a PFOD rather
+ than a normal ordered dictionary, we use _copy().
+ """
+ if self.autos:
+ for cond, sub in self.autos:
+ # False conditionals don't need to be filled-in.
+ if cond is not None and not conditions[cond]:
+ continue
+ if sub.aux == 'auto':
+ # Automatic variable, e.g., version. The
+ # sub-coder's name ('version') is the test item.
+ if data.get(sub.name) is None:
+ if rodata:
+ data = data._copy()
+ rodata = False
+ data[sub.name] = auto_vars[sub.name]
+ else:
+ # Automatic length, e.g., data[count]. The
+ # sub-coders's repeat item ('count') is the
+ # test item. Of course, it's possible that
+ # the counted item is missing as well. If so
+ # we just leave both None and take the
+ # encoding error.
+ assert sub.aux == 'len'
+ if data.get(sub.repeat) is not None:
+ continue
+ item = data.get(sub.name)
+ if item is not None:
+ if rodata:
+ data = data._copy()
+ rodata = False
+ data[sub.repeat] = len(item)
+ return self.seq.pack(data, conditions)
+
+class _P9Proto(object):
+ def __init__(self, auto_vars, conditions, p9_data, pfods, index):
+ self.auto_vars = auto_vars # currently, just version
+ self.conditions = conditions # '.u'
+ self.pfods = pfods # dictionary, maps pfod to packinfo
+ self.index = index # for comparison: plain < dotu < dotl
+
+ self.use_rlerror = rrd.Rlerror in pfods
+
+ for dtype in pfods:
+ name = dtype.__name__
+ # For each Txxx/Rxxx, define a self.<name>() to
+ # call self.pack_from().
+ #
+ # The packinfo is from _Packinfo(seq); the fcall and
+ # seq come from p9_data.protocol[<name>].
+ proto_tuple = p9_data.protocol[name]
+ assert dtype == proto_tuple[0]
+ packinfo = pfods[dtype]
+ # in theory we can do this with no names using nested
+ # lambdas, but that's just too confusing, so let's
+ # do it with nested functions instead.
+ def builder(constructor=dtype, packinfo=packinfo):
+ "return function that calls _pack_from with built PFOD"
+ def invoker(self, *args, **kwargs):
+ "build PFOD and call _pack_from"
+ return self._pack_from(constructor(*args, **kwargs),
+ rodata=False, caller=None,
+ packinfo=packinfo)
+ return invoker
+ func = builder()
+ func.__name__ = name
+ func.__doc__ = 'pack from {0}'.format(name)
+ setattr(self.__class__, name, func)
+
+ def __repr__(self):
+ return '{0}({1!r})'.format(self.__class__.__name__, self.version)
+
+ def __str__(self):
+ return self.version
+
+ # define rich-comparison operators, so we can, e.g., test vers > plain
+ def __lt__(self, other):
+ return self.index < other.index
+ def __le__(self, other):
+ return self.index <= other.index
+ def __eq__(self, other):
+ return self.index == other.index
+ def __ne__(self, other):
+ return self.index != other.index
+ def __gt__(self, other):
+ return self.index > other.index
+ def __ge__(self, other):
+ return self.index >= other.index
+
+ def downgrade_to(self, other_name):
+ """
+ Downgrade from this protocol to a not-greater one.
+
+ Raises KeyError if other_name is not a valid protocol,
+ or this is not a downgrade (with setting back to self
+ considered a valid "downgrade", i.e., we're doing subseteq
+ rather than subset).
+ """
+ if not isinstance(other_name, str) and isinstance(other_name, bytes):
+ other_name = other_name.decode('utf-8', 'surrogateescape')
+ other = p9_version(other_name)
+ if other > self:
+ raise KeyError(other_name)
+ return other
+
+ def error(self, tag, err):
+ "produce Rerror or Rlerror, whichever is appropriate"
+ if isinstance(err, Exception):
+ errnum = err.errno
+ errmsg = err.strerror
+ else:
+ errnum = err
+ errmsg = os.strerror(errnum)
+ if self.use_rlerror:
+ return self.Rlerror(tag=tag, ecode=p9err.to_dotl(errnum))
+ return self.Rerror(tag=tag, errstr=errmsg,
+ errnum=p9err.to_dotu(errnum))
+
+ def pack(self, *args, **kwargs):
+ "pack up a pfod or fcall-and-arguments"
+ fcall = kwargs.pop('fcall', None)
+ if fcall is None:
+ # Called without fcall=...
+ # This requires that args have one argument that
+ # is the PFOD; kwargs should be empty (but we'll take
+ # data=pfod as well). The size is implied, and
+ # fcall comes from the pfod.
+ data = kwargs.pop('data', None)
+ if data is None:
+ if len(args) != 1:
+ raise TypeError('pack() with no fcall requires 1 argument')
+ data = args[0]
+ if len(kwargs):
+ raise TypeError('pack() got an unexpected keyword argument '
+ '{0}'.format(kwargs.popitem()[0]))
+ return self._pack_from(data, True, 'pack', None)
+
+ # Called as pack(fcall=whatever, data={...}).
+ # The data argument must be a dictionary since we're going to
+ # apply ** to it in the call to build the PFOD. Note that
+ # it could already be a PFOD, which is OK, but we're going to
+ # copy it to a new one regardless (callers that have a PFOD
+ # should use pack_from instead).
+ if len(args):
+ raise TypeError('pack() got unexpected arguments '
+ '{0!r}'.format(args))
+ data = kwargs.pop('args', None)
+ if len(kwargs):
+ raise TypeError('pack() got an unexpected keyword argument '
+ '{0}'.format(kwargs.popitem()[0]))
+ if not isinstance(data, dict):
+ raise TypeError('pack() with fcall and data '
+ 'requires data to be a dictionary')
+ try:
+ name = fcall_names[fcall]
+ except KeyError:
+ raise TypeError('pack(): {0} is not a valid '
+ 'fcall value'.format(fcall))
+ cls = getattr(rrd, name)
+ data = cls(**data)
+ return self._pack_from(data, False, 'pack', None)
+
+ def pack_from(self, data):
+ "pack from pfod data, using its type to determine fcall"
+ return self._pack_from(data, True, 'pack_from', None)
+
+ def _pack_from(self, data, rodata, caller, packinfo):
+ """
+ Internal pack(): called from both invokers (self.Tversion,
+ self.Rwalk, etc.) and from pack and pack_from methods.
+ "caller" says which. If rodata is True we're not supposed to
+ modify the incoming data, as it may belong to someone
+ else. Some calls to pack() build a PFOD and hence pass in
+ False.
+
+ The predefined invokers pass in a preconstructed PFOD,
+ *and* set rodata=False, *and* provide a packinfo, so that
+ we never have to copy, nor look up the packinfo.
+ """
+ if caller is not None:
+ assert caller in ('pack', 'pack_from') and packinfo is None
+ # Indirect call from pack_from(), or from pack() after
+ # pack() built a PFOD. We make sure this kind of PFOD
+ # is allowed for this protocol.
+ packinfo = self.pfods.get(data.__class__, None)
+ if packinfo is None:
+ raise TypeError('{0}({1!r}): invalid '
+ 'input'.format(caller, data))
+
+ # Pack the data
+ pkt = packinfo.pack(self.auto_vars, self.conditions, data, rodata)
+
+ fcall = data.__class__.__name__
+ fcall_code = getattr(td, fcall)
+
+ # That's the inner data; now we must add the header,
+ # with fcall (translated back to byte code value) and
+ # outer data. The size is implied by len(pkt). There
+ # are no other auto variables, and no conditions.
+ #
+ # NB: the size includes the size of the header itself
+ # and the fcall code byte, plus the size of the data.
+ data = _9p_data.header_pfod(size=4 + 1 + len(pkt), dsize=len(pkt),
+ fcall=fcall_code, data=pkt)
+ empty = None # logically should be {}, but not actually used below
+ pkt = _9p_data.header_pack_seq.pack(data, empty)
+ return pkt
+
+ @staticmethod
+ def unpack_header(bstring, noerror=False):
+ """
+ Unpack header.
+
+ We know that our caller has already stripped off the
+ overall size field (4 bytes), leaving us with the fcall
+ (1 byte) and data (len(bstring)-1 bytes). If len(bstring)
+ is 0, this is an invalid header: set dsize to 0 and let
+ fcall become None, if noerror is set.
+ """
+ vdict = _9p_data.header_pfod()
+ vdict['size'] = len(bstring) + 4
+ vdict['dsize'] = max(0, len(bstring) - 1)
+ _9p_data.header_unpack_seq.unpack(vdict, None, bstring, noerror)
+ return vdict
+
+ def unpack(self, bstring, noerror=False):
+ "produce filled PFOD from fcall in packet"
+ vdict = self.unpack_header(bstring, noerror)
+ # NB: vdict['dsize'] is used internally during unpack, to
+ # find out how many bytes to copy to vdict['data'], but by
+ # the time unpack is done, we no longer need it.
+ #
+ # size = vdict['size']
+ # dsize = vdict['dsize']
+ fcall = vdict['fcall']
+ data = vdict['data']
+ # Note: it's possible for size and/or fcall to be None,
+ # when noerror is true. However, if we support fcall, then
+ # clearly fcall is not None; and since fcall follows size,
+ # we can always proceed if we support fcall.
+ if self.supports(fcall):
+ fcall = fcall_names[fcall]
+ cls = getattr(rrd, fcall)
+ seq = self.pfods[cls].seq
+ elif fcall == td.Rlerror:
+ # As a special case for diod, we accept Rlerror even
+ # if it's not formally part of the protocol.
+ cls = rrd.Rlerror
+ seq = dotl.pfods[rrd.Rlerror].seq
+ else:
+ fcall = fcall_names.get(fcall, fcall)
+ raise SequenceError('invalid fcall {0!r} for '
+ '{1}'.format(fcall, self))
+ vdict = cls()
+ seq.unpack(vdict, self.conditions, data, noerror)
+ return vdict
+
+ def pack_wirestat(self, statobj):
+ """
+ Pack a stat object to appear as data returned by read()
+ on a directory. Essentially, we prefix the data with a size.
+ """
+ data = td.stat_seq.pack(statobj, self.conditions)
+ return td.wirestat_seq.pack({'size': len(data), 'data': data}, {})
+
+ def unpack_wirestat(self, bstring, offset, noerror=False):
+ """
+ Produce the next td.stat object from byte-string,
+ returning it and new offset.
+ """
+ statobj = td.stat()
+ d = { 'size': None }
+ newoff = td.wirestat_seq.unpack_from(d, self.conditions, bstring,
+ offset, noerror)
+ size = d['size']
+ if size is None: # implies noerror; newoff==offset+2
+ return statobj, newoff
+ # We now have size and data. If noerror, data might be
+ # too short, in which case we'll unpack a partial statobj.
+ # Or (with or without noeror), data might be too long, so
+ # that while len(data) == size, not all the data get used.
+ # That may be allowed by the protocol: it's not clear.
+ data = d['data']
+ used = td.stat_seq.unpack_from(statobj, self.conditions, data,
+ 0, noerror)
+ # if size != used ... then what?
+ return statobj, newoff
+
+ def pack_dirent(self, dirent):
+ """
+ Dirents (dot-L only) are easy to pack, but we provide
+ this function for symmetry. (Should we raise an error
+ if called on plain or dotu?)
+ """
+ return td.dirent_seq.pack(dirent, self.conditions)
+
+ def unpack_dirent(self, bstring, offset, noerror=False):
+ """
+ Produces the next td.dirent object from byte-string,
+ returning it and new offset.
+ """
+ deobj = td.dirent()
+ offset = td.dirent_seq.unpack_from(deobj, self.conditions, bstring,
+ offset, noerror)
+ return deobj, offset
+
+ def supports(self, fcall):
+ """
+ Return True if and only if this protocol supports the
+ given fcall.
+
+ >>> plain.supports(100)
+ True
+ >>> plain.supports('Tversion')
+ True
+ >>> plain.supports('Rlink')
+ False
+ """
+ fcall = fcall_names.get(fcall, None)
+ if fcall is None:
+ return False
+ cls = getattr(rrd, fcall)
+ return cls in self.pfods
+
+ def get_version(self, as_bytes=True):
+ "get Plan 9 protocol version, as string or (default) as bytes"
+ ret = self.auto_vars['version']
+ if as_bytes and not isinstance(ret, bytes):
+ ret = ret.encode('utf-8')
+ return ret
+
+ @property
+ def version(self):
+ "Plan 9 protocol version"
+ return self.get_version(as_bytes=False)
+
+DEBUG = False
+
+# This defines a special en/decoder named "s" using a magic
+# builtin. This and stat are the only variable-length
+# decoders, and this is the only recursively-variable-length
+# one (i.e., stat decoding is effectively fixed size once we
+# handle strings). So this magic avoids the need for recursion.
+#
+# Note that _string_ is, in effect, size[2] orig_var[size].
+_STRING_MAGIC = '_string_'
+SDesc = "typedef s: " + _STRING_MAGIC
+
+# This defines an en/decoder for type "qid",
+# which en/decodes 1 byte called type, 4 called version, and
+# 8 called path (for a total of 13 bytes).
+#
+# It also defines QTDIR, QTAPPEND, etc. (These are not used
+# for en/decode, or at least not yet.)
+QIDDesc = """\
+typedef qid: type[1] version[4] path[8]
+
+ #define QTDIR 0x80
+ #define QTAPPEND 0x40
+ #define QTEXCL 0x20
+ #define QTMOUNT 0x10
+ #define QTAUTH 0x08
+ #define QTTMP 0x04
+ #define QTSYMLINK 0x02
+ #define QTFILE 0x00
+"""
+
+# This defines a stat decoder, which has a 9p2000 standard front,
+# followed by an optional additional portion.
+#
+# The constants are named DMDIR etc.
+STATDesc = """
+typedef stat: type[2] dev[4] qid[qid] mode[4] atime[4] mtime[4] \
+length[8] name[s] uid[s] gid[s] muid[s] \
+{.u: extension[s] n_uid[4] n_gid[4] n_muid[4] }
+
+ #define DMDIR 0x80000000
+ #define DMAPPEND 0x40000000
+ #define DMMOUNT 0x10000000
+ #define DMAUTH 0x08000000
+ #define DMTMP 0x04000000
+ #define DMSYMLINK 0x02000000
+ /* 9P2000.u extensions */
+ #define DMDEVICE 0x00800000
+ #define DMNAMEDPIPE 0x00200000
+ #define DMSOCKET 0x00100000
+ #define DMSETUID 0x00080000
+ #define DMSETGID 0x00040000
+"""
+
+# This defines a wirestat decoder. A wirestat is a size and then
+# a (previously encoded, or future-decoded) stat.
+WirestatDesc = """
+typedef wirestat: size[2] data[size]
+"""
+
+# This defines a dirent decoder, which has a dot-L specific format.
+#
+# The dirent type fields are defined as DT_* (same as BSD and Linux).
+DirentDesc = """
+typedef dirent: qid[qid] offset[8] type[1] name[s]
+
+ #define DT_UNKNOWN 0
+ #define DT_FIFO 1
+ #define DT_CHR 2
+ #define DT_DIR 4
+ #define DT_BLK 6
+ #define DT_REG 8
+ #define DT_LNK 10
+ #define DT_SOCK 12
+ #define DT_WHT 14
+"""
+
+# N.B.: this is largely a slightly more rigidly formatted variant of
+# the contents of:
+# https://github.com/chaos/diod/blob/master/protocol.md
+#
+# Note that <name> = <value>: ... assigns names for the fcall
+# fcall (function call) table. Names without "= value" are
+# assumed to be the previous value +1 (and the two names are
+# also checked to make sure they are Tfoo,Rfoo).
+ProtocolDesc = """\
+Rlerror.L = 7: tag[2] ecode[4]
+ ecode is a numerical Linux errno
+
+Tstatfs.L = 8: tag[2] fid[4]
+Rstatfs.L: tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8] \
+ files[8] ffree[8] fsid[8] namelen[4]
+ Rstatfs corresponds to Linux statfs structure:
+ struct statfs {
+ long f_type; /* type of file system */
+ long f_bsize; /* optimal transfer block size */
+ long f_blocks; /* total data blocks in file system */
+ long f_bfree; /* free blocks in fs */
+ long f_bavail; /* free blocks avail to non-superuser */
+ long f_files; /* total file nodes in file system */
+ long f_ffree; /* free file nodes in fs */
+ fsid_t f_fsid; /* file system id */
+ long f_namelen; /* maximum length of filenames */
+ };
+
+ This comes from nowhere obvious...
+ #define FSTYPE 0x01021997
+
+Tlopen.L = 12: tag[2] fid[4] flags[4]
+Rlopen.L: tag[2] qid[qid] iounit[4]
+ lopen prepares fid for file (or directory) I/O.
+
+ flags contains Linux open(2) flag bits, e.g., O_RDONLY, O_RDWR, O_WRONLY.
+
+ #define L_O_CREAT 000000100
+ #define L_O_EXCL 000000200
+ #define L_O_NOCTTY 000000400
+ #define L_O_TRUNC 000001000
+ #define L_O_APPEND 000002000
+ #define L_O_NONBLOCK 000004000
+ #define L_O_DSYNC 000010000
+ #define L_O_FASYNC 000020000
+ #define L_O_DIRECT 000040000
+ #define L_O_LARGEFILE 000100000
+ #define L_O_DIRECTORY 000200000
+ #define L_O_NOFOLLOW 000400000
+ #define L_O_NOATIME 001000000
+ #define L_O_CLOEXEC 002000000
+ #define L_O_SYNC 004000000
+ #define L_O_PATH 010000000
+ #define L_O_TMPFILE 020000000
+
+Tlcreate.L = 14: tag[2] fid[4] name[s] flags[4] mode[4] gid[4]
+Rlcreate.L: tag[2] qid[qid] iounit[4]
+ lcreate creates a regular file name in directory fid and prepares
+ it for I/O.
+
+ fid initially represents the parent directory of the new file.
+ After the call it represents the new file.
+
+ flags contains Linux open(2) flag bits (including O_CREAT).
+
+ mode contains Linux creat(2) mode (permissions) bits.
+
+ gid is the effective gid of the caller.
+
+Tsymlink.L = 16: tag[2] dfid[4] name[s] symtgt[s] gid[4]
+Rsymlink.L: tag[2] qid[qid]
+ symlink creates a symbolic link name in directory dfid. The
+ link will point to symtgt.
+
+ gid is the effective group id of the caller.
+
+ The qid for the new symbolic link is returned in the reply.
+
+Tmknod.L = 18: tag[2] dfid[4] name[s] mode[4] major[4] minor[4] gid[4]
+Rmknod.L: tag[2] qid[qid]
+ mknod creates a device node name in directory dfid with major
+ and minor numbers.
+
+ mode contains Linux mknod(2) mode bits. (Note that these
+ include the S_IFMT bits which may be S_IFBLK, S_IFCHR, or
+ S_IFSOCK.)
+
+ gid is the effective group id of the caller.
+
+ The qid for the new device node is returned in the reply.
+
+Trename.L = 20: tag[2] fid[4] dfid[4] name[s]
+Rrename.L: tag[2]
+ rename renames a file system object referenced by fid, to name
+ in the directory referenced by dfid.
+
+ This operation will eventually be replaced by renameat.
+
+Treadlink.L = 22: tag[2] fid[4]
+Rreadlink.L: tag[2] target[s]
+ readlink returns the contents of teh symbolic link referenced by fid.
+
+Tgetattr.L = 24: tag[2] fid[4] request_mask[8]
+Rgetattr.L: tag[2] valid[8] qid[qid] mode[4] uid[4] gid[4] nlink[8] \
+ rdev[8] size[8] blksize[8] blocks[8] \
+ atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8] \
+ ctime_sec[8] ctime_nsec[8] btime_sec[8] btime_nsec[8] \
+ gen[8] data_version[8]
+
+ getattr gets attributes of a file system object referenced by fid.
+ The response is intended to follow pretty closely the fields
+ returned by the stat(2) system call:
+
+ struct stat {
+ dev_t st_dev; /* ID of device containing file */
+ ino_t st_ino; /* inode number */
+ mode_t st_mode; /* protection */
+ nlink_t st_nlink; /* number of hard links */
+ uid_t st_uid; /* user ID of owner */
+ gid_t st_gid; /* group ID of owner */
+ dev_t st_rdev; /* device ID (if special file) */
+ off_t st_size; /* total size, in bytes */
+ blksize_t st_blksize; /* blocksize for file system I/O */
+ blkcnt_t st_blocks; /* number of 512B blocks allocated */
+ time_t st_atime; /* time of last access */
+ time_t st_mtime; /* time of last modification */
+ time_t st_ctime; /* time of last status change */
+ };
+
+ The differences are:
+
+ * st_dev is omitted
+ * st_ino is contained in the path component of qid
+ * times are nanosecond resolution
+ * btime, gen and data_version fields are reserved for future use
+
+ Not all fields are valid in every call. request_mask is a bitmask
+ indicating which fields are requested. valid is a bitmask
+ indicating which fields are valid in the response. The mask
+ values are as follows:
+
+ #define GETATTR_MODE 0x00000001
+ #define GETATTR_NLINK 0x00000002
+ #define GETATTR_UID 0x00000004
+ #define GETATTR_GID 0x00000008
+ #define GETATTR_RDEV 0x00000010
+ #define GETATTR_ATIME 0x00000020
+ #define GETATTR_MTIME 0x00000040
+ #define GETATTR_CTIME 0x00000080
+ #define GETATTR_INO 0x00000100
+ #define GETATTR_SIZE 0x00000200
+ #define GETATTR_BLOCKS 0x00000400
+
+ #define GETATTR_BTIME 0x00000800
+ #define GETATTR_GEN 0x00001000
+ #define GETATTR_DATA_VERSION 0x00002000
+
+ #define GETATTR_BASIC 0x000007ff /* Mask for fields up to BLOCKS */
+ #define GETATTR_ALL 0x00003fff /* Mask for All fields above */
+
+Tsetattr.L = 26: tag[2] fid[4] valid[4] mode[4] uid[4] gid[4] size[8] \
+ atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8]
+Rsetattr.L: tag[2]
+ setattr sets attributes of a file system object referenced by
+ fid. As with getattr, valid is a bitmask selecting which
+ fields to set, which can be any combination of:
+
+ mode - Linux chmod(2) mode bits.
+
+ uid, gid - New owner, group of the file as described in Linux chown(2).
+
+ size - New file size as handled by Linux truncate(2).
+
+ atime_sec, atime_nsec - Time of last file access.
+
+ mtime_sec, mtime_nsec - Time of last file modification.
+
+ The valid bits are defined as follows:
+
+ #define SETATTR_MODE 0x00000001
+ #define SETATTR_UID 0x00000002
+ #define SETATTR_GID 0x00000004
+ #define SETATTR_SIZE 0x00000008
+ #define SETATTR_ATIME 0x00000010
+ #define SETATTR_MTIME 0x00000020
+ #define SETATTR_CTIME 0x00000040
+ #define SETATTR_ATIME_SET 0x00000080
+ #define SETATTR_MTIME_SET 0x00000100
+
+ If a time bit is set without the corresponding SET bit, the
+ current system time on the server is used instead of the value
+ sent in the request.
+
+Txattrwalk.L = 30: tag[2] fid[4] newfid[4] name[s]
+Rxattrwalk.L: tag[2] size[8]
+ xattrwalk gets a newfid pointing to xattr name. This fid can
+ later be used to read the xattr value. If name is NULL newfid
+ can be used to get the list of extended attributes associated
+ with the file system object.
+
+Txattrcreate.L = 32: tag[2] fid[4] name[s] attr_size[8] flags[4]
+Rxattrcreate.L: tag[2]
+ xattrcreate gets a fid pointing to the xattr name. This fid
+ can later be used to set the xattr value.
+
+ flag is derived from set Linux setxattr. The manpage says
+
+ The flags parameter can be used to refine the semantics of
+ the operation. XATTR_CREATE specifies a pure create,
+ which fails if the named attribute exists already.
+ XATTR_REPLACE specifies a pure replace operation, which
+ fails if the named attribute does not already exist. By
+ default (no flags), the extended attribute will be created
+ if need be, or will simply replace the value if the
+ attribute exists.
+
+ The actual setxattr operation happens when the fid is clunked.
+ At that point the written byte count and the attr_size
+ specified in TXATTRCREATE should be same otherwise an error
+ will be returned.
+
+Treaddir.L = 40: tag[2] fid[4] offset[8] count[4]
+Rreaddir.L: tag[2] count[4] data[count]
+ readdir requests that the server return directory entries from
+ the directory represented by fid, previously opened with
+ lopen. offset is zero on the first call.
+
+ Directory entries are represented as variable-length records:
+ qid[qid] offset[8] type[1] name[s]
+ At most count bytes will be returned in data. If count is not
+ zero in the response, more data is available. On subsequent
+ calls, offset is the offset returned in the last directory
+ entry of the previous call.
+
+Tfsync.L = 50: tag[2] fid[4]
+Rfsync.L: tag[2]
+ fsync tells the server to flush any cached data associated
+ with fid, previously opened with lopen.
+
+Tlock.L = 52: tag[2] fid[4] type[1] flags[4] start[8] length[8] \
+ proc_id[4] client_id[s]
+Rlock.L: tag[2] status[1]
+ lock is used to acquire or release a POSIX record lock on fid
+ and has semantics similar to Linux fcntl(F_SETLK).
+
+ type has one of the values:
+
+ #define LOCK_TYPE_RDLCK 0
+ #define LOCK_TYPE_WRLCK 1
+ #define LOCK_TYPE_UNLCK 2
+
+ start, length, and proc_id correspond to the analagous fields
+ passed to Linux fcntl(F_SETLK):
+
+ struct flock {
+ short l_type; /* Type of lock: F_RDLCK, F_WRLCK, F_UNLCK */
+ short l_whence;/* How to intrprt l_start: SEEK_SET,SEEK_CUR,SEEK_END */
+ off_t l_start; /* Starting offset for lock */
+ off_t l_len; /* Number of bytes to lock */
+ pid_t l_pid; /* PID of process blocking our lock (F_GETLK only) */
+ };
+
+ flags bits are:
+
+ #define LOCK_SUCCESS 0
+ #define LOCK_BLOCKED 1
+ #define LOCK_ERROR 2
+ #define LOCK_GRACE 3
+
+ The Linux v9fs client implements the fcntl(F_SETLKW)
+ (blocking) lock request by calling lock with
+ LOCK_FLAGS_BLOCK set. If the response is LOCK_BLOCKED,
+ it retries the lock request in an interruptible loop until
+ status is no longer LOCK_BLOCKED.
+
+ The Linux v9fs client translates BSD advisory locks (flock) to
+ whole-file POSIX record locks. v9fs does not implement
+ mandatory locks and will return ENOLCK if use is attempted.
+
+ Because of POSIX record lock inheritance and upgrade
+ properties, pass-through servers must be implemented
+ carefully.
+
+Tgetlock.L = 54: tag[2] fid[4] type[1] start[8] length[8] proc_id[4] \
+ client_id[s]
+Rgetlock.L: tag[2] type[1] start[8] length[8] proc_id[4] client_id[s]
+ getlock tests for the existence of a POSIX record lock and has
+ semantics similar to Linux fcntl(F_GETLK).
+
+ As with lock, type has one of the values defined above, and
+ start, length, and proc_id correspond to the analagous fields
+ in struct flock passed to Linux fcntl(F_GETLK), and client_Id
+ is an additional mechanism for uniquely identifying the lock
+ requester and is set to the nodename by the Linux v9fs client.
+
+Tlink.L = 70: tag[2] dfid[4] fid[4] name[s]
+Rlink.L: tag[2]
+ link creates a hard link name in directory dfid. The link
+ target is referenced by fid.
+
+Tmkdir.L = 72: tag[2] dfid[4] name[s] mode[4] gid[4]
+Rmkdir.L: tag[2] qid[qid]
+ mkdir creates a new directory name in parent directory dfid.
+
+ mode contains Linux mkdir(2) mode bits.
+
+ gid is the effective group ID of the caller.
+
+ The qid of the new directory is returned in the response.
+
+Trenameat.L = 74: tag[2] olddirfid[4] oldname[s] newdirfid[4] newname[s]
+Rrenameat.L: tag[2]
+ Change the name of a file from oldname to newname, possible
+ moving it from old directory represented by olddirfid to new
+ directory represented by newdirfid.
+
+ If the server returns ENOTSUPP, the client should fall back to
+ the rename operation.
+
+Tunlinkat.L = 76: tag[2] dirfd[4] name[s] flags[4]
+Runlinkat.L: tag[2]
+ Unlink name from directory represented by dirfd. If the file
+ is represented by a fid, that fid is not clunked. If the
+ server returns ENOTSUPP, the client should fall back to the
+ remove operation.
+
+ There seems to be only one defined flag:
+
+ #define AT_REMOVEDIR 0x200
+
+Tversion = 100: tag[2] msize[4] version[s]:auto
+Rversion: tag[2] msize[4] version[s]
+
+ negotiate protocol version
+
+ version establishes the msize, which is the maximum message
+ size inclusive of the size value that can be handled by both
+ client and server.
+
+ It also establishes the protocol version. For 9P2000.L
+ version must be the string 9P2000.L.
+
+Tauth = 102: tag[2] afid[4] uname[s] aname[s] n_uname[4]
+Rauth: tag[2] aqid[qid]
+ auth initiates an authentication handshake for n_uname.
+ Rlerror is returned if authentication is not required. If
+ successful, afid is used to read/write the authentication
+ handshake (protocol does not specify what is read/written),
+ and afid is presented in the attach.
+
+Tattach = 104: tag[2] fid[4] afid[4] uname[s] aname[s] {.u: n_uname[4] }
+Rattach: tag[2] qid[qid]
+ attach introduces a new user to the server, and establishes
+ fid as the root for that user on the file tree selected by
+ aname.
+
+ afid can be NOFID (~0) or the fid from a previous auth
+ handshake. The afid can be clunked immediately after the
+ attach.
+
+ #define NOFID 0xffffffff
+
+ n_uname, if not set to NONUNAME (~0), is the uid of the
+ user and is used in preference to uname. Note that it appears
+ in both .u and .L (unlike most .u-specific features).
+
+ #define NONUNAME 0xffffffff
+
+ v9fs has several modes of access which determine how it uses
+ attach. In the default access=user, an initial attach is sent
+ for the user provided in the uname=name mount option, and for
+ each user that accesses the file system thereafter. For
+ access=, only the initial attach is sent for and all other
+ users are denied access by the client.
+
+Rerror = 107: tag[2] errstr[s] {.u: errnum[4] }
+
+Tflush = 108: tag[2] oldtag[2]
+Rflush: tag[2]
+ flush aborts an in-flight request referenced by oldtag, if any.
+
+Twalk = 110: tag[2] fid[4] newfid[4] nwname[2] nwname*(wname[s])
+Rwalk: tag[2] nwqid[2] nwqid*(wqid[qid])
+ walk is used to descend a directory represented by fid using
+ successive path elements provided in the wname array. If
+ succesful, newfid represents the new path.
+
+ fid can be cloned to newfid by calling walk with nwname set to
+ zero.
+
+ if nwname==0, fid need not represent a directory.
+
+Topen = 112: tag[2] fid[4] mode[1]
+Ropen: tag[2] qid[qid] iounit[4]
+ open prepares fid for file (or directory) I/O.
+
+ mode is:
+ #define OREAD 0 /* open for read */
+ #define OWRITE 1 /* open for write */
+ #define ORDWR 2 /* open for read and write */
+ #define OEXEC 3 /* open for execute */
+
+ #define OTRUNC 16 /* truncate (illegal if OEXEC) */
+ #define OCEXEC 32 /* close on exec (nonsensical) */
+ #define ORCLOSE 64 /* remove on close */
+ #define ODIRECT 128 /* direct access (.u extension?) */
+
+Tcreate = 114: tag[2] fid[4] name[s] perm[4] mode[1] {.u: extension[s] }
+Rcreate: tag[2] qid[qid] iounit[4]
+ create is similar to open; however, the incoming fid is the
+ diretory in which the file is to be created, and on success,
+ return, the fid refers to the then-created file.
+
+Tread = 116: tag[2] fid[4] offset[8] count[4]
+Rread: tag[2] count[4] data[count]
+ perform a read on the file represented by fid. Note that in
+ v9fs, a read(2) or write(2) system call for a chunk of the
+ file that won't fit in a single request is broken up into
+ multiple requests.
+
+ Under 9P2000.L, read cannot be used on directories. See readdir.
+
+Twrite = 118: tag[2] fid[4] offset[8] count[4] data[count]
+Rwrite: tag[2] count[4]
+ perform a write on the file represented by fid. Note that in
+ v9fs, a read(2) or write(2) system call for a chunk of the
+ file that won't fit in a single request is broken up into
+ multiple requests.
+
+ write cannot be used on directories.
+
+Tclunk = 120: tag[2] fid[4]
+Rclunk: tag[2]
+ clunk signifies that fid is no longer needed by the client.
+
+Tremove = 122: tag[2] fid[4]
+Rremove: tag[2]
+ remove removes the file system object represented by fid.
+
+ The fid is always clunked (even on error).
+
+Tstat = 124: tag[2] fid[4]
+Rstat: tag[2] size[2] data[size]
+
+Twstat = 126: tag[2] fid[4] size[2] data[size]
+Rwstat: tag[2]
+"""
+
+class _Token(object):
+ r"""
+ A scanned token.
+
+ Tokens have a type (tok.ttype) and value (tok.value). The value
+ is generally the token itself, although sometimes a prefix and/or
+ suffix has been removed (for 'label', 'word*', ':aux', and
+ '[type]' tokens). If prefix and/or suffix are removed, the full
+ original token is
+ in its .orig.
+
+ Tokens are:
+ - 'word', 'word*', or 'label':
+ '[.\w]+' followed by optional '*' or ':':
+
+ - 'aux': ':' followed by '\w+' (used for :auto annotation)
+
+ - 'type':
+ open bracket '[', followed by '\w+' or '\d+' (only one of these),
+ followed by close bracket ']'
+
+ - '(', ')', '{', '}': themeselves
+
+ Each token can have arbitrary leading white space (which is
+ discarded).
+
+ (Probably should return ':' as a char and handle it in parser,
+ but oh well.)
+ """
+ def __init__(self, ttype, value, orig=None):
+ self.ttype = ttype
+ self.value = value
+ self.orig = value if orig is None else orig
+ if self.ttype == 'type' and self.value.isdigit():
+ self.ival = int(self.value)
+ else:
+ self.ival = None
+ def __str__(self):
+ return self.orig
+
+_Token.tok_expr = re.compile(r'\s*([.\w]+(?:\*|:)?'
+ r'|:\w+'
+ r'|\[(?:\w+|\d+)\]'
+ r'|[(){}])')
+
+def _scan(string):
+ """
+ Tokenize a string.
+
+ Note: This raises a ValueError with the position of any unmatched
+ character in the string.
+ """
+ tlist = []
+
+ # make sure entire string is tokenized properly
+ pos = 0
+ for item in _Token.tok_expr.finditer(string):
+ span = item.span()
+ if span[0] != pos:
+ print('error: unmatched character(s) in input\n{0}\n{1}^'.format(
+ string, ' ' * pos))
+ raise ValueError('unmatched lexeme', pos)
+ pos = span[1]
+ tlist.append(item.group(1))
+ if pos != len(string):
+ print('error: unmatched character(s) in input\n{0}\n{1}^'.format(
+ string, ' ' * pos))
+ raise ValueError('unmatched lexeme', pos)
+
+ # classify each token, stripping decorations
+ result = []
+ for item in tlist:
+ if item in ('(', ')', '{', '}'):
+ tok = _Token(item, item)
+ elif item[0] == ':':
+ tok = _Token('aux', item[1:], item)
+ elif item.endswith(':'):
+ tok = _Token('label', item[0:-1], item)
+ elif item.endswith('*'):
+ tok = _Token('word*', item[0:-1], item)
+ elif item[0] == '[':
+ # integer or named type
+ if item[-1] != ']':
+ raise ValueError('internal error: "{0}" is not [...]'.format(
+ item))
+ tok = _Token('type', item[1:-1], item)
+ else:
+ tok = _Token('word', item)
+ result.append(tok)
+ return result
+
+def _debug_print_sequencer(seq):
+ """for debugging"""
+ print('sequencer is {0!r}'.format(seq), file=sys.stderr)
+ for i, enc in enumerate(seq):
+ print(' [{0:d}] = {1}'.format(i, enc), file=sys.stderr)
+
+def _parse_expr(seq, string, typedefs):
+ """
+ Parse "expression-ish" items, which is a list of:
+ name[type]
+ name*(subexpr) (a literal asterisk)
+ { label ... }
+
+ The "type" may be an integer or a second name. In the case
+ of a second name it must be something from <typedefs>.
+
+ The meaning of name[integer] is that we are going to encode
+ or decode a fixed-size field of <integer> bytes, using the
+ given name.
+
+ For name[name2], we can look up name2 in our typedefs table.
+ The only real typedefs's used here are "stat" and "s"; each
+ of these expands to a variable-size encode/decode. See the
+ special case below, though.
+
+ The meaning of name*(...) is: the earlier name will have been
+ defined by an earlier _parse_expr for this same line. That
+ earlier name provides a repeat-count.
+
+ Inside the parens we get a name[type] sub-expressino. This may
+ not recurse further, so we can use a pretty cheesy parser.
+
+ As a special case, given name[name2], we first check whether
+ name2 is an earlier name a la name*(...). Here the meaning
+ is much like name2*(name[1]), except that the result is a
+ simple byte string, rather than an array.
+
+ The meaning of "{ label ... " is that everything following up
+ to "}" is optional and used only with 9P2000.u and/or 9P2000.L.
+ Inside the {...} pair is the usual set of tokens, but again
+ {...} cannot recurse.
+
+ The parse fills in a Sequencer instance, and returns a list
+ of the parsed names.
+ """
+ names = []
+ cond = None
+
+ tokens = collections.deque(_scan(string))
+
+ def get_subscripted(tokens):
+ """
+ Allows name[integer] and name1[name2] only; returns
+ tuple after stripping off both tokens, or returns None
+ and does not strip tokens.
+ """
+ if len(tokens) == 0 or tokens[0].ttype != 'word':
+ return None
+ if len(tokens) > 1 and tokens[1].ttype == 'type':
+ word = tokens.popleft()
+ return word, tokens.popleft()
+ return None
+
+ def lookup(name, typeinfo, aux=None):
+ """
+ Convert cond (if not None) to its .value, so that instead
+ of (x, '.u') we get '.u'.
+
+ Convert typeinfo to an encdec. Typeinfo may be 1/2/4/8, or
+ one of our typedef names. If it's a typedef name it will
+ normally correspond to an EncDecTyped, but we have one special
+ case for string types, and another for using an earlier-defined
+ variable.
+ """
+ condval = None if cond is None else cond.value
+ if typeinfo.ival is None:
+ try:
+ cls, sub = typedefs[typeinfo.value]
+ except KeyError:
+ raise ValueError('unknown type name {0}'.format(typeinfo))
+ # the type name is typeinfo.value; the corresponding
+ # pfod class is cls; the *variable* name is name;
+ # and the sub-sequence is sub. But if cls is None
+ # then it's our string type.
+ if cls is None:
+ encdec = sequencer.EncDecSimple(name, _STRING_MAGIC, aux)
+ else:
+ encdec = sequencer.EncDecTyped(cls, name, sub, aux)
+ else:
+ if typeinfo.ival not in (1, 2, 4, 8):
+ raise ValueError('bad integer code in {0}'.format(typeinfo))
+ encdec = sequencer.EncDecSimple(name, typeinfo.ival, aux)
+ return condval, encdec
+
+ def emit_simple(name, typeinfo, aux=None):
+ """
+ Emit name[type]. We may be inside a conditional; if so
+ cond is not None.
+ """
+ condval, encdec = lookup(name, typeinfo, aux)
+ seq.append_encdec(condval, encdec)
+ names.append(name)
+
+ def emit_repeat(name1, name2, typeinfo):
+ """
+ Emit name1*(name2[type]).
+
+ Note that the conditional is buried in the sub-coder for
+ name2. It must be passed through anyway in case the sub-
+ coder is only partly conditional. If the sub-coder is
+ fully conditional, each sub-coding uses or produces no
+ bytes and hence the array itself is effectively conditional
+ as well (it becomes name1 * [None]).
+
+ We don't (currently) have any auxiliary data for arrays.
+ """
+ if name1 not in names:
+ raise ValueError('{0}*({1}[{2}]): '
+ '{0} undefined'.format(name1, name2,
+ typeinfo.value))
+ condval, encdec = lookup(name2, typeinfo)
+ encdec = sequencer.EncDecA(name1, name2, encdec)
+ seq.append_encdec(condval, encdec)
+ names.append(name2)
+
+ def emit_bytes_repeat(name1, name2):
+ """
+ Emit name1[name2], e.g., data[count].
+ """
+ condval = None if cond is None else cond.value
+ # Note that the two names are reversed when compared to
+ # count*(data[type]). The "sub-coder" is handled directly
+ # by EncDecA, hence is None.
+ #
+ # As a peculiar side effect, all bytes-repeats cause the
+ # count itself to become automatic (to have an aux of 'len').
+ encdec = sequencer.EncDecA(name2, name1, None, 'len')
+ seq.append_encdec(condval, encdec)
+ names.append(name1)
+
+ supported_conditions = ('.u')
+ while tokens:
+ token = tokens.popleft()
+ if token.ttype == 'label':
+ raise ValueError('misplaced label')
+ if token.ttype == 'aux':
+ raise ValueError('misplaced auxiliary')
+ if token.ttype == '{':
+ if cond is not None:
+ raise ValueError('nested "{"')
+ if len(tokens) == 0:
+ raise ValueError('unclosed "{"')
+ cond = tokens.popleft()
+ if cond.ttype != 'label':
+ raise ValueError('"{" not followed by cond label')
+ if cond.value not in supported_conditions:
+ raise ValueError('unsupported condition "{0}"'.format(
+ cond.value))
+ continue
+ if token.ttype == '}':
+ if cond is None:
+ raise ValueError('closing "}" w/o opening "{"')
+ cond = None
+ continue
+ if token.ttype == 'word*':
+ if len(tokens) == 0 or tokens[0].ttype != '(':
+ raise ValueError('{0} not followed by (...)'.format(token))
+ tokens.popleft()
+ repeat = get_subscripted(tokens)
+ if repeat is None:
+ raise ValueError('parse error after {0}('.format(token))
+ if len(tokens) == 0 or tokens[0].ttype != ')':
+ raise ValueError('missing ")" after {0}({1}{2}'.format(
+ token, repeat[0], repeat[1]))
+ tokens.popleft()
+ # N.B.: a repeat cannot have an auxiliary info (yet?).
+ emit_repeat(token.value, repeat[0].value, repeat[1])
+ continue
+ if token.ttype == 'word':
+ # Special case: _STRING_MAGIC turns into a string
+ # sequencer. This should be used with just one
+ # typedef (typedef s: _string_).
+ if token.value == _STRING_MAGIC:
+ names.append(_STRING_MAGIC) # XXX temporary
+ continue
+ if len(tokens) == 0 or tokens[0].ttype != 'type':
+ raise ValueError('parse error after {0}'.format(token))
+ type_or_size = tokens.popleft()
+ # Check for name[name2] where name2 is a word (not a
+ # number) that is in the names[] array.
+ if type_or_size.value in names:
+ # NB: this cannot have auxiliary info.
+ emit_bytes_repeat(token.value, type_or_size.value)
+ continue
+ if len(tokens) > 0 and tokens[0].ttype == 'aux':
+ aux = tokens.popleft()
+ if aux.value != 'auto':
+ raise ValueError('{0}{1}: only know "auto", not '
+ '{2}'.format(token, type_or_size,
+ aux.value))
+ emit_simple(token.value, type_or_size, aux.value)
+ else:
+ emit_simple(token.value, type_or_size)
+ continue
+ raise ValueError('"{0}" not valid here"'.format(token))
+
+ if cond is not None:
+ raise ValueError('unclosed "}"')
+
+ return names
+
+class _ProtoDefs(object):
+ def __init__(self):
+ # Scan our typedefs. This may execute '#define's as well.
+ self.typedefs = {}
+ self.defines = {}
+ typedef_re = re.compile(r'\s*typedef\s+(\w+)\s*:\s*(.*)')
+ self.parse_lines('SDesc', SDesc, typedef_re, self.handle_typedef)
+ self.parse_lines('QIDDesc', QIDDesc, typedef_re, self.handle_typedef)
+ self.parse_lines('STATDesc', STATDesc, typedef_re, self.handle_typedef)
+ self.parse_lines('WirestatDesc', WirestatDesc, typedef_re,
+ self.handle_typedef)
+ self.parse_lines('DirentDesc', DirentDesc, typedef_re,
+ self.handle_typedef)
+
+ # Scan protocol (the bulk of the work). This, too, may
+ # execute '#define's.
+ self.protocol = {}
+ proto_re = re.compile(r'(\*?\w+)(\.\w+)?\s*(?:=\s*(\d+))?\s*:\s*(.*)')
+ self.prev_proto_value = None
+ self.parse_lines('ProtocolDesc', ProtocolDesc,
+ proto_re, self.handle_proto_def)
+
+ self.setup_header()
+
+ # set these up for export()
+ self.plain = {}
+ self.dotu = {}
+ self.dotl = {}
+
+ def parse_lines(self, name, text, regexp, match_handler):
+ """
+ Parse a sequence of lines. Match each line using the
+ given regexp, or (first) as a #define line. Note that
+ indented lines are either #defines or are commentary!
+
+ If hnadling raises a ValueError, we complain and include
+ the appropriate line offset. Then we sys.exit(1) (!).
+ """
+ define = re.compile(r'\s*#define\s+(\w+)\s+([^/]*)'
+ r'(\s*/\*.*\*/)?\s*$')
+ for lineoff, line in enumerate(text.splitlines()):
+ try:
+ match = define.match(line)
+ if match:
+ self.handle_define(*match.groups())
+ continue
+ match = regexp.match(line)
+ if match:
+ match_handler(*match.groups())
+ continue
+ if len(line) and not line[0].isspace():
+ raise ValueError('unhandled line: {0}'.format(line))
+ except ValueError as err:
+ print('Internal error while parsing {0}:\n'
+ ' {1}\n'
+ '(at line offset +{2}, discounting \\-newline)\n'
+ 'The original line in question reads:\n'
+ '{3}'.format(name, err.args[0], lineoff, line),
+ file=sys.stderr)
+ sys.exit(1)
+
+ def handle_define(self, name, value, comment):
+ """
+ Handle #define match.
+
+ The regexp has three fields, matching the name, value,
+ and possibly-empty comment; these are our arguments.
+ """
+ # Obnoxious: int(,0) requires new 0o syntax in py3k;
+ # work around by trying twice, once with base 0, then again
+ # with explicit base 8 if the first attempt fails.
+ try:
+ value = int(value, 0)
+ except ValueError:
+ value = int(value, 8)
+ if DEBUG:
+ print('define: defining {0} as {1:x}'.format(name, value),
+ file=sys.stderr)
+ if name in self.defines:
+ raise ValueError('redefining {0}'.format(name))
+ self.defines[name] = (value, comment)
+
+ def handle_typedef(self, name, expr):
+ """
+ Handle typedef match.
+
+ The regexp has just two fields, the name and the expression
+ to parse (note that the expression must fit all on one line,
+ using backslach-newline if needed).
+
+ Typedefs may refer back to existing typedefs, so we pass
+ self.typedefs to _parse_expr().
+ """
+ seq = sequencer.Sequencer(name)
+ fields = _parse_expr(seq, expr, self.typedefs)
+ # Check for special string magic typedef. (The name
+ # probably should be just 's' but we won't check that
+ # here.)
+ if len(fields) == 1 and fields[0] == _STRING_MAGIC:
+ cls = None
+ else:
+ cls = pfod.pfod(name, fields)
+ if DEBUG:
+ print('typedef: {0} = {1!r}; '.format(name, fields),
+ end='', file=sys.stderr)
+ _debug_print_sequencer(seq)
+ if name in self.typedefs:
+ raise ValueError('redefining {0}'.format(name))
+ self.typedefs[name] = cls, seq
+
+ def handle_proto_def(self, name, proto_version, value, expr):
+ """
+ Handle protocol definition.
+
+ The regexp matched:
+ - The name of the protocol option such as Tversion,
+ Rversion, Rlerror, etc.
+ - The protocol version, if any (.u or .L).
+ - The value, if specified. If no value is specified
+ we use "the next value".
+ - The expression to parse.
+
+ As with typedefs, the expression must fit all on one
+ line.
+ """
+ if value:
+ value = int(value)
+ elif self.prev_proto_value is not None:
+ value = self.prev_proto_value + 1
+ else:
+ raise ValueError('{0}: missing protocol value'.format(name))
+ if value < 0 or value > 255:
+ raise ValueError('{0}: protocol value {1} out of '
+ 'range'.format(name, value))
+ self.prev_proto_value = value
+
+ seq = sequencer.Sequencer(name)
+ fields = _parse_expr(seq, expr, self.typedefs)
+ cls = pfod.pfod(name, fields)
+ if DEBUG:
+ print('proto: {0} = {1}; '.format(name, value),
+ end='', file=sys.stderr)
+ _debug_print_sequencer(seq)
+ if name in self.protocol:
+ raise ValueError('redefining {0}'.format(name))
+ self.protocol[name] = cls, value, proto_version, seq
+
+ def setup_header(self):
+ """
+ Handle header definition.
+
+ This is a bit gimmicky and uses some special cases,
+ because data is sized to dsize which is effectively
+ just size - 5. We can't express this in our mini language,
+ so we just hard-code the sequencer and pfod.
+
+ In addition, the unpacker never gets the original packet's
+ size field, only the fcall and the data.
+ """
+ self.header_pfod = pfod.pfod('Header', 'size dsize fcall data')
+
+ seq = sequencer.Sequencer('Header-pack')
+ # size: 4 bytes
+ seq.append_encdec(None, sequencer.EncDecSimple('size', 4, None))
+ # fcall: 1 byte
+ seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None))
+ # data: string of length dsize
+ seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None))
+ if DEBUG:
+ print('Header-pack:', file=sys.stderr)
+ _debug_print_sequencer(seq)
+ self.header_pack_seq = seq
+
+ seq = sequencer.Sequencer('Header-unpack')
+ seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None))
+ seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None))
+ if DEBUG:
+ print('Header-unpack:', file=sys.stderr)
+ _debug_print_sequencer(seq)
+ self.header_unpack_seq = seq
+
+ def export(self, mod):
+ """
+ Dump results of internal parsing process
+ into our module namespace.
+
+ Note that we do not export the 's' typedef, which
+ did not define a data structure.
+
+ Check for name collisions while we're at it.
+ """
+ namespace = type('td', (object,), {})
+
+ # Export the typedefs (qid, stat).
+ setattr(mod, 'td', namespace)
+ for key in self.typedefs:
+ cls = self.typedefs[key][0]
+ if cls is None:
+ continue
+ setattr(namespace, key, cls)
+
+ # Export two sequencers for en/decoding stat fields
+ # (needed for reading directories and doing Twstat).
+ setattr(namespace, 'stat_seq', self.typedefs['stat'][1])
+ setattr(namespace, 'wirestat_seq', self.typedefs['wirestat'][1])
+
+ # Export the similar dirent decoder.
+ setattr(namespace, 'dirent_seq', self.typedefs['dirent'][1])
+
+ # Export the #define values
+ for key, val in self.defines.items():
+ if hasattr(namespace, key):
+ print('{0!r} is both a #define and a typedef'.format(key))
+ raise AssertionError('bad internal names')
+ setattr(namespace, key, val[0])
+
+ # Export Tattach, Rattach, Twrite, Rversion, etc values.
+ # Set up fcall_names[] table to map from value back to name.
+ # We also map fcall names to themselves, so given either a
+ # name or a byte code we can find out whether it's a valid
+ # fcall.
+ for key, val in self.protocol.items():
+ if hasattr(namespace, key):
+ prev_def = '#define' if key in self.defines else 'typedef'
+ print('{0!r} is both a {1} and a protocol '
+ 'value'.format(key, prev_def))
+ raise AssertionError('bad internal names')
+ setattr(namespace, key, val[1])
+ fcall_names[key] = key
+ fcall_names[val[1]] = key
+
+ # Hook up PFOD's for each protocol object -- for
+ # Tversion/Rversion, Twrite/Rwrite, Tlopen/Rlopen, etc.
+ # They go in the rrd name-space, and also in dictionaries
+ # per-protocol here, with the lookup pointing to a _PackInfo
+ # for the corresponding sequencer.
+ #
+ # Note that each protocol PFOD is optionally annotated with
+ # its specific version. We know that .L > .u > plain; but
+ # all the "lesser" PFODs are available to all "greater"
+ # protocols at all times.
+ #
+ # (This is sort-of-wrong for Rerror vs Rlerror, but we
+ # don't bother to exclude Rerror from .L.)
+ #
+ # The PFODs themselves were already created, at parse time.
+ namespace = type('rrd', (object,), {})
+ setattr(mod, 'rrd', namespace)
+ for key, val in self.protocol.items():
+ cls = val[0]
+ proto_version = val[2]
+ seq = val[3]
+ packinfo = _PackInfo(seq)
+ if proto_version is None:
+ # all three protocols have it
+ self.plain[cls] = packinfo
+ self.dotu[cls] = packinfo
+ self.dotl[cls] = packinfo
+ elif proto_version == '.u':
+ # only .u and .L have it
+ self.dotu[cls] = packinfo
+ self.dotl[cls] = packinfo
+ elif proto_version == '.L':
+ # only .L has it
+ self.dotl[cls] = packinfo
+ else:
+ raise AssertionError('unknown protocol {1} for '
+ '{0}'.format(key, proto_version))
+ setattr(namespace, key, cls)
+
+_9p_data = _ProtoDefs()
+_9p_data.export(sys.modules[__name__])
+
+# Currently we look up by text-string, in lowercase.
+_9p_versions = {
+ '9p2000': _P9Proto({'version': '9P2000'},
+ {'.u': False},
+ _9p_data,
+ _9p_data.plain,
+ 0),
+ '9p2000.u': _P9Proto({'version': '9P2000.u'},
+ {'.u': True},
+ _9p_data,
+ _9p_data.dotu,
+ 1),
+ '9p2000.l': _P9Proto({'version': '9P2000.L'},
+ {'.u': True},
+ _9p_data,
+ _9p_data.dotl,
+ 2),
+}
+def p9_version(vers_string):
+ """
+ Return protocol implementation of given version. Raises
+ KeyError if the version is invalid. Note that the KeyError
+ will be on a string-ified, lower-cased version of the vers_string
+ argument, even if it comes in as a bytes instance in py3k.
+ """
+ if not isinstance(vers_string, str) and isinstance(vers_string, bytes):
+ vers_string = vers_string.decode('utf-8', 'surrogateescape')
+ return _9p_versions[vers_string.lower()]
+
+plain = p9_version('9p2000')
+dotu = p9_version('9p2000.u')
+dotl = p9_version('9p2000.L')
+
+def qid_type2name(qidtype):
+ """
+ Convert qid type field to printable string.
+
+ >>> qid_type2name(td.QTDIR)
+ 'dir'
+ >>> qid_type2name(td.QTAPPEND)
+ 'append-only'
+ >>> qid_type2name(0xff)
+ 'invalid(0xff)'
+ """
+ try:
+ # Is it ever OK to have multiple bits set,
+ # e.g., both QTAPPEND and QTEXCL?
+ return {
+ td.QTDIR: 'dir',
+ td.QTAPPEND: 'append-only',
+ td.QTEXCL: 'exclusive',
+ td.QTMOUNT: 'mount',
+ td.QTAUTH: 'auth',
+ td.QTTMP: 'tmp',
+ td.QTSYMLINK: 'symlink',
+ td.QTFILE: 'file',
+ }[qidtype]
+ except KeyError:
+ pass
+ return 'invalid({0:#x})'.format(qidtype)
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
diff --git a/pytest/sequencer.py b/pytest/sequencer.py
new file mode 100644
index 000000000000..207d22986470
--- /dev/null
+++ b/pytest/sequencer.py
@@ -0,0 +1,653 @@
+#! /usr/bin/env python
+
+from __future__ import print_function
+
+#__all__ = ['EncDec', 'EncDecSimple', 'EncDecTyped', 'EncDecA',
+# 'SequenceError', 'Sequencer']
+
+import abc
+import struct
+import sys
+
+_ProtoStruct = {
+ '1': struct.Struct('<B'),
+ '2': struct.Struct('<H'),
+ '4': struct.Struct('<I'),
+ '8': struct.Struct('<Q'),
+ '_string_': None, # handled specially
+}
+for _i in (1, 2, 4, 8):
+ _ProtoStruct[_i] = _ProtoStruct[str(_i)]
+del _i
+
+class EncDec(object):
+ __metaclass__ = abc.ABCMeta
+ """
+ Base class for en/de-coders, which are put into sequencers.
+
+ All have a name and arbitrary user-supplied auxiliary data
+ (default=None).
+
+ All provide a pack() and unpack(). The pack() function
+ returns a "bytes" value. This is internally implemented as a
+ function apack() that returns a list of struct.pack() bytes,
+ and pack() just joins them up as needed.
+
+ The pack/unpack functions take a dictionary of variable names
+ and values, and a second dictionary for conditionals, but at
+ this level conditionals don't apply: they are just being
+ passed through. Variable names do apply to array encoders
+
+ EncDec also provide b2s() and s2b() static methods, which
+ convert strings to bytes and vice versa, as reversibly as
+ possible (using surrogateescape encoding). In Python2 this is
+ a no-op since the string type *is* the bytes type (<type
+ 'unicode'>) is the unicode-ized string type).
+
+ EncDec also provides b2u() and u2b() to do conversion to/from
+ Unicode.
+
+ These are partly for internal use (all strings get converted
+ to UTF-8 byte sequences when coding a _string_ type) and partly
+ for doctests, where we just want some py2k/py3k compat hacks.
+ """
+ def __init__(self, name, aux):
+ self.name = name
+ self.aux = aux
+
+ @staticmethod
+ def b2u(byte_sequence):
+ "transform bytes to unicode"
+ return byte_sequence.decode('utf-8', 'surrogateescape')
+
+ @staticmethod
+ def u2b(unicode_sequence):
+ "transform unicode to bytes"
+ return unicode_sequence.encode('utf-8', 'surrogateescape')
+
+ if sys.version_info[0] >= 3:
+ b2s = b2u
+ @staticmethod
+ def s2b(string):
+ "transform string to bytes (leaves raw byte sequence unchanged)"
+ if isinstance(string, bytes):
+ return string
+ return string.encode('utf-8', 'surrogateescape')
+ else:
+ @staticmethod
+ def b2s(byte_sequence):
+ "transform bytes to string - no-op in python2.7"
+ return byte_sequence
+ @staticmethod
+ def s2b(string):
+ "transform string or unicode to bytes"
+ if isinstance(string, unicode):
+ return string.encode('utf-8', 'surrogateescape')
+ return string
+
+ def pack(self, vdict, cdict, val):
+ "encode value <val> into a byte-string"
+ return b''.join(self.apack(vdict, cdict, val))
+
+ @abc.abstractmethod
+ def apack(self, vdict, cdict, val):
+ "encode value <val> into [bytes1, b2, ..., bN]"
+
+ @abc.abstractmethod
+ def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+ "unpack bytes from <bstring> at <offset>"
+
+
+class EncDecSimple(EncDec):
+ r"""
+ Encode/decode a simple (but named) field. The field is not an
+ array, which requires using EncDecA, nor a typed object
+ like a qid or stat instance -- those require a Sequence and
+ EncDecTyped.
+
+ The format is one of '1'/1, '2'/2, '4'/4, '8'/8, or '_string_'.
+
+ Note: using b2s here is purely a doctest/tetsmod python2/python3
+ compat hack. The output of e.pack is <type 'bytes'>; b2s
+ converts it to a string, purely for display purposes. (It might
+ be better to map py2 output to bytes but they just print as a
+ string anyway.) In normal use, you should not call b2s here.
+
+ >>> e = EncDecSimple('eggs', 2)
+ >>> e.b2s(e.pack({}, {}, 0))
+ '\x00\x00'
+ >>> e.b2s(e.pack({}, {}, 256))
+ '\x00\x01'
+
+ Values that cannot be packed produce a SequenceError:
+
+ >>> e.pack({}, {}, None)
+ Traceback (most recent call last):
+ ...
+ SequenceError: failed while packing 'eggs'=None
+ >>> e.pack({}, {}, -1)
+ Traceback (most recent call last):
+ ...
+ SequenceError: failed while packing 'eggs'=-1
+
+ Unpacking both returns a value, and tells how many bytes it
+ used out of the bytestring or byte-array argument. If there
+ are not enough bytes remaining at the starting offset, it
+ raises a SequenceError, unless noerror=True (then unset
+ values are None)
+
+ >>> e.unpack({}, {}, b'\x00\x01', 0)
+ (256, 2)
+ >>> e.unpack({}, {}, b'', 0)
+ Traceback (most recent call last):
+ ...
+ SequenceError: out of data while unpacking 'eggs'
+ >>> e.unpack({}, {}, b'', 0, noerror=True)
+ (None, 2)
+
+ Note that strings can be provided as regular strings, byte
+ strings (same as regular strings in py2k), or Unicode strings
+ (same as regular strings in py3k). Unicode strings will be
+ converted to UTF-8 before being packed. Since this leaves
+ 7-bit characters alone, these examples work in both py2k and
+ py3k. (Note: the UTF-8 encoding of u'\u1234' is
+ '\0xe1\0x88\0xb4' or 225, 136, 180. The b2i trick below is
+ another py2k vs py3k special case just for doctests: py2k
+ tries to display the utf-8 encoded data as a string.)
+
+ >>> e = EncDecSimple('spam', '_string_')
+ >>> e.b2s(e.pack({}, {}, 'p3=unicode,p2=bytes'))
+ '\x13\x00p3=unicode,p2=bytes'
+
+ >>> e.b2s(e.pack({}, {}, b'bytes'))
+ '\x05\x00bytes'
+
+ >>> import sys
+ >>> ispy3k = sys.version_info[0] >= 3
+
+ >>> b2i = lambda x: x if ispy3k else ord(x)
+ >>> [b2i(x) for x in e.pack({}, {}, u'\u1234')]
+ [3, 0, 225, 136, 180]
+
+ The byte length of the utf-8 data cannot exceed 65535 since
+ the encoding has the length as a 2-byte field (a la the
+ encoding for 'eggs' here). A too-long string produces
+ a SequenceError as well.
+
+ >>> e.pack({}, {}, 16384 * 'spam')
+ Traceback (most recent call last):
+ ...
+ SequenceError: string too long (len=65536) while packing 'spam'
+
+ Unpacking strings produces byte arrays. (Of course,
+ in py2k these are also known as <type 'str'>.)
+
+ >>> unpacked = e.unpack({}, {}, b'\x04\x00data', 0)
+ >>> etype = bytes if ispy3k else str
+ >>> print(isinstance(unpacked[0], etype))
+ True
+ >>> e.b2s(unpacked[0])
+ 'data'
+ >>> unpacked[1]
+ 6
+
+ You may use e.b2s() to conver them to unicode strings in py3k,
+ or you may set e.autob2s. This still only really does
+ anything in py3k, since py2k strings *are* bytes, so it's
+ really just intended for doctest purposes (see EncDecA):
+
+ >>> e.autob2s = True
+ >>> e.unpack({}, {}, b'\x07\x00stringy', 0)
+ ('stringy', 9)
+ """
+ def __init__(self, name, fmt, aux=None):
+ super(EncDecSimple, self).__init__(name, aux)
+ self.fmt = fmt
+ self.struct = _ProtoStruct[fmt]
+ self.autob2s = False
+
+ def __repr__(self):
+ if self.aux is None:
+ return '{0}({1!r}, {2!r})'.format(self.__class__.__name__,
+ self.name, self.fmt)
+ return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+ self.name, self.fmt, self.aux)
+
+ __str__ = __repr__
+
+ def apack(self, vdict, cdict, val):
+ "encode a value"
+ try:
+ if self.struct:
+ return [self.struct.pack(val)]
+ sval = self.s2b(val)
+ if len(sval) > 65535:
+ raise SequenceError('string too long (len={0:d}) '
+ 'while packing {1!r}'.format(len(sval), self.name))
+ return [EncDecSimple.string_len.pack(len(sval)), sval]
+ # Include AttributeError in case someone tries to, e.g.,
+ # pack name=None and self.s2b() tries to use .encode on it.
+ except (struct.error, AttributeError):
+ raise SequenceError('failed '
+ 'while packing {0!r}={1!r}'.format(self.name, val))
+
+ def _unpack1(self, via, bstring, offset, noerror):
+ "internal function to unpack single item"
+ try:
+ tup = via.unpack_from(bstring, offset)
+ except struct.error as err:
+ if 'unpack_from requires a buffer of at least' in str(err):
+ if noerror:
+ return None, offset + via.size
+ raise SequenceError('out of data '
+ 'while unpacking {0!r}'.format(self.name))
+ # not clear what to do here if noerror
+ raise SequenceError('failed '
+ 'while unpacking {0!r}'.format(self.name))
+ assert len(tup) == 1
+ return tup[0], offset + via.size
+
+ def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+ "decode a value; return the value and the new offset"
+ if self.struct:
+ return self._unpack1(self.struct, bstring, offset, noerror)
+ slen, offset = self._unpack1(EncDecSimple.string_len, bstring, offset,
+ noerror)
+ if slen is None:
+ return None, offset
+ nexto = offset + slen
+ if len(bstring) < nexto:
+ if noerror:
+ val = None
+ else:
+ raise SequenceError('out of data '
+ 'while unpacking {0!r}'.format(self.name))
+ else:
+ val = bstring[offset:nexto]
+ if self.autob2s:
+ val = self.b2s(val)
+ return val, nexto
+
+# string length: 2 byte unsigned field
+EncDecSimple.string_len = _ProtoStruct[2]
+
+class EncDecTyped(EncDec):
+ r"""
+ EncDec for typed objects (which are build from PFODs, which are
+ a sneaky class variant of OrderedDict similar to namedtuple).
+
+ Calling the klass() function with no arguments must create an
+ instance with all-None members.
+
+ We also require a Sequencer to pack and unpack the members of
+ the underlying pfod.
+
+ >>> qid_s = Sequencer('qid')
+ >>> qid_s.append_encdec(None, EncDecSimple('type', 1))
+ >>> qid_s.append_encdec(None, EncDecSimple('version', 4))
+ >>> qid_s.append_encdec(None, EncDecSimple('path', 8))
+ >>> len(qid_s)
+ 3
+
+ >>> from pfod import pfod
+ >>> qid = pfod('qid', ['type', 'version', 'path'])
+ >>> len(qid._fields)
+ 3
+ >>> qid_inst = qid(1, 2, 3)
+ >>> qid_inst
+ qid(type=1, version=2, path=3)
+
+ >>> e = EncDecTyped(qid, 'aqid', qid_s)
+ >>> e.b2s(e.pack({}, {}, qid_inst))
+ '\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00'
+ >>> e.unpack({}, {},
+ ... b'\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00', 0)
+ (qid(type=1, version=2, path=3), 13)
+
+ If an EncDecTyped instance has a conditional sequencer, note
+ that unpacking will leave un-selected items set to None (see
+ the Sequencer example below):
+
+ >>> breakfast = pfod('breakfast', 'eggs spam ham')
+ >>> breakfast()
+ breakfast(eggs=None, spam=None, ham=None)
+ >>> bfseq = Sequencer('breakfast')
+ >>> bfseq.append_encdec(None, EncDecSimple('eggs', 1))
+ >>> bfseq.append_encdec('yuck', EncDecSimple('spam', 1))
+ >>> bfseq.append_encdec(None, EncDecSimple('ham', 1))
+ >>> e = EncDecTyped(breakfast, 'bfname', bfseq)
+ >>> e.unpack({}, {'yuck': False}, b'\x02\x01\x04', 0)
+ (breakfast(eggs=2, spam=None, ham=1), 2)
+
+ This used just two of the three bytes: eggs=2, ham=1.
+
+ >>> e.unpack({}, {'yuck': True}, b'\x02\x01\x04', 0)
+ (breakfast(eggs=2, spam=1, ham=4), 3)
+
+ This used the third byte, so ham=4.
+ """
+ def __init__(self, klass, name, sequence, aux=None):
+ assert len(sequence) == len(klass()._fields) # temporary
+ super(EncDecTyped, self).__init__(name, aux)
+ self.klass = klass
+ self.name = name
+ self.sequence = sequence
+
+ def __repr__(self):
+ if self.aux is None:
+ return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+ self.klass, self.name, self.sequence)
+ return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,
+ self.klass, self.name, self.sequence, self.aux)
+
+ __str__ = __repr__
+
+ def apack(self, vdict, cdict, val):
+ """
+ Pack each of our instance variables.
+
+ Note that some packing may be conditional.
+ """
+ return self.sequence.apack(val, cdict)
+
+ def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+ """
+ Unpack each instance variable, into a new object of
+ self.klass. Return the new instance and new offset.
+
+ Note that some unpacking may be conditional.
+ """
+ obj = self.klass()
+ offset = self.sequence.unpack_from(obj, cdict, bstring, offset, noerror)
+ return obj, offset
+
+class EncDecA(EncDec):
+ r"""
+ EncDec for arrays (repeated objects).
+
+ We take the name of repeat count variable, and a sub-coder
+ (Sequencer instance). For instance, we can en/de-code
+ repeat='nwname' copies of name='wname', or nwname of
+ name='wqid', in a Twalk en/de-code.
+
+ Note that we don't pack or unpack the repeat count itself --
+ that must be done by higher level code. We just get its value
+ from vdict.
+
+ >>> subcode = EncDecSimple('wname', '_string_')
+ >>> e = EncDecA('nwname', 'wname', subcode)
+ >>> e.b2s(e.pack({'nwname': 2}, {}, ['A', 'BC']))
+ '\x01\x00A\x02\x00BC'
+
+ >>> subcode.autob2s = True # so that A and BC decode to py3k str
+ >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00BC', 0)
+ (['A', 'BC'], 7)
+
+ When using noerror, the first sub-item that fails to decode
+ completely starts the None-s. Strings whose length fails to
+ decode are assumed to be zero bytes long as well, for the
+ purpose of showing the expected packet length:
+
+ >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00', 0, noerror=True)
+ (['A', None], 7)
+ >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02', 0, noerror=True)
+ (['A', None], 5)
+ >>> e.unpack({'nwname': 3}, {}, b'\x01\x00A\x02', 0, noerror=True)
+ (['A', None, None], 7)
+
+ As a special case, supplying None for the sub-coder
+ makes the repeated item pack or unpack a simple byte
+ string. (Note that autob2s is not supported here.)
+ A too-short byte string is simply truncated!
+
+ >>> e = EncDecA('count', 'data', None)
+ >>> e.b2s(e.pack({'count': 5}, {}, b'12345'))
+ '12345'
+ >>> x = list(e.unpack({'count': 3}, {}, b'123', 0))
+ >>> x[0] = e.b2s(x[0])
+ >>> x
+ ['123', 3]
+ >>> x = list(e.unpack({'count': 3}, {}, b'12', 0, noerror=True))
+ >>> x[0] = e.b2s(x[0])
+ >>> x
+ ['12', 3]
+ """
+ def __init__(self, repeat, name, sub, aux=None):
+ super(EncDecA, self).__init__(name, aux)
+ self.repeat = repeat
+ self.name = name
+ self.sub = sub
+
+ def __repr__(self):
+ if self.aux is None:
+ return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+ self.repeat, self.name, self.sub)
+ return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,
+ self.repeat, self.name, self.sub, self.aux)
+
+ __str__ = __repr__
+
+ def apack(self, vdict, cdict, val):
+ "pack each val[i], for i in range(vdict[self.repeat])"
+ num = vdict[self.repeat]
+ assert num == len(val)
+ if self.sub is None:
+ assert isinstance(val, bytes)
+ return [val]
+ parts = []
+ for i in val:
+ parts.extend(self.sub.apack(vdict, cdict, i))
+ return parts
+
+ def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+ "unpack repeatedly, per self.repeat, into new array."
+ num = vdict[self.repeat]
+ if num is None and noerror:
+ num = 0
+ else:
+ assert num >= 0
+ if self.sub is None:
+ nexto = offset + num
+ if len(bstring) < nexto and not noerror:
+ raise SequenceError('out of data '
+ 'while unpacking {0!r}'.format(self.name))
+ return bstring[offset:nexto], nexto
+ array = []
+ for i in range(num):
+ obj, offset = self.sub.unpack(vdict, cdict, bstring, offset,
+ noerror)
+ array.append(obj)
+ return array, offset
+
+class SequenceError(Exception):
+ "sequence error: item too big, or ran out of data"
+ pass
+
+class Sequencer(object):
+ r"""
+ A sequencer is an object that packs (marshals) or unpacks
+ (unmarshals) a series of objects, according to their EncDec
+ instances.
+
+ The objects themselves (and their values) come from, or
+ go into, a dictionary: <vdict>, the first argument to
+ pack/unpack.
+
+ Some fields may be conditional. The conditions are in a
+ separate dictionary (the second or <cdict> argument).
+
+ Some objects may be dictionaries or PFODs, e.g., they may
+ be a Plan9 qid or stat structure. These have their own
+ sub-encoding.
+
+ As with each encoder, we have both an apack() function
+ (returns a list of parts) and a plain pack(). Users should
+ mostly stick with plain pack().
+
+ >>> s = Sequencer('monty')
+ >>> s
+ Sequencer('monty')
+ >>> e = EncDecSimple('eggs', 2)
+ >>> s.append_encdec(None, e)
+ >>> s.append_encdec(None, EncDecSimple('spam', 1))
+ >>> s[0]
+ (None, EncDecSimple('eggs', 2))
+ >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {}))
+ '\x01\x02A'
+
+ When particular fields are conditional, they appear in
+ packed output, or are taken from the byte-string during
+ unpacking, only if their condition is true.
+
+ As with struct, use unpack_from to start at an arbitrary
+ offset and/or omit verification that the entire byte-string
+ is consumed.
+
+ >>> s = Sequencer('python')
+ >>> s.append_encdec(None, e)
+ >>> s.append_encdec('.u', EncDecSimple('spam', 1))
+ >>> s[1]
+ ('.u', EncDecSimple('spam', 1))
+ >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': True}))
+ '\x01\x02A'
+ >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': False}))
+ '\x01\x02'
+
+ >>> d = {}
+ >>> s.unpack(d, {'.u': True}, b'\x01\x02A')
+ >>> print(d['eggs'], d['spam'])
+ 513 65
+ >>> d = {}
+ >>> s.unpack(d, {'.u': False}, b'\x01\x02A', 0)
+ Traceback (most recent call last):
+ ...
+ SequenceError: 1 byte(s) unconsumed
+ >>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)
+ 2
+ >>> print(d)
+ {'eggs': 513}
+
+ The incoming dictionary-like object may be pre-initialized
+ if you like; only sequences that decode are filled-in:
+
+ >>> d = {'eggs': None, 'spam': None}
+ >>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)
+ 2
+ >>> print(d['eggs'], d['spam'])
+ 513 None
+
+ Some objects may be arrays; if so their EncDec is actually
+ an EncDecA, the repeat count must be in the dictionary, and
+ the object itself must have a len() and be index-able:
+
+ >>> s = Sequencer('arr')
+ >>> s.append_encdec(None, EncDecSimple('n', 1))
+ >>> ae = EncDecSimple('array', 2)
+ >>> s.append_encdec(None, EncDecA('n', 'array', ae))
+ >>> ae.b2s(s.pack({'n': 2, 'array': [257, 514]}, {}))
+ '\x02\x01\x01\x02\x02'
+
+ Unpacking an array creates a list of the number of items.
+ The EncDec encoder that decodes the number of items needs to
+ occur first in the sequencer, so that the dictionary will have
+ acquired the repeat-count variable's value by the time we hit
+ the array's encdec:
+
+ >>> d = {}
+ >>> s.unpack(d, {}, b'\x01\x04\x00')
+ >>> d['n'], d['array']
+ (1, [4])
+ """
+ def __init__(self, name):
+ self.name = name
+ self._codes = []
+ self.debug = False # or sys.stderr
+
+ def __repr__(self):
+ return '{0}({1!r})'.format(self.__class__.__name__, self.name)
+
+ __str__ = __repr__
+
+ def __len__(self):
+ return len(self._codes)
+
+ def __iter__(self):
+ return iter(self._codes)
+
+ def __getitem__(self, index):
+ return self._codes[index]
+
+ def dprint(self, *args, **kwargs):
+ if not self.debug:
+ return
+ if isinstance(self.debug, bool):
+ dest = sys.stdout
+ else:
+ dest = self.debug
+ print(*args, file=dest, **kwargs)
+
+ def append_encdec(self, cond, code):
+ "add EncDec en/de-coder, conditional on cond"
+ self._codes.append((cond, code))
+
+ def apack(self, vdict, cdict):
+ """
+ Produce packed representation of each field.
+ """
+ packed_data = []
+ for cond, code in self._codes:
+ # Skip this item if it's conditional on a false thing.
+ if cond is not None and not cdict[cond]:
+ self.dprint('skip %r - %r is False' % (code, cond))
+ continue
+
+ # Pack the item.
+ self.dprint('pack %r - no cond or %r is True' % (code, cond))
+ packed_data.extend(code.apack(vdict, cdict, vdict[code.name]))
+
+ return packed_data
+
+ def pack(self, vdict, cdict):
+ """
+ Flatten packed data.
+ """
+ return b''.join(self.apack(vdict, cdict))
+
+ def unpack_from(self, vdict, cdict, bstring, offset=0, noerror=False):
+ """
+ Unpack from byte string.
+
+ The values are unpacked into a dictionary vdict;
+ some of its entries may themselves be ordered
+ dictionaries created by typedefed codes.
+
+ Raises SequenceError if the string is too short,
+ unless you set noerror, in which case we assume
+ you want see what you can get out of the data.
+ """
+ for cond, code in self._codes:
+ # Skip this item if it's conditional on a false thing.
+ if cond is not None and not cdict[cond]:
+ self.dprint('skip %r - %r is False' % (code, cond))
+ continue
+
+ # Unpack the item.
+ self.dprint('unpack %r - no cond or %r is True' % (code, cond))
+ obj, offset = code.unpack(vdict, cdict, bstring, offset, noerror)
+ vdict[code.name] = obj
+
+ return offset
+
+ def unpack(self, vdict, cdict, bstring, noerror=False):
+ """
+ Like unpack_from but unless noerror=True, requires that
+ we completely use up the given byte string.
+ """
+ offset = self.unpack_from(vdict, cdict, bstring, 0, noerror)
+ if not noerror and offset != len(bstring):
+ raise SequenceError('{0} byte(s) unconsumed'.format(
+ len(bstring) - offset))
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
diff --git a/pytest/testconf.ini.sample b/pytest/testconf.ini.sample
new file mode 100644
index 000000000000..a9e252ac7419
--- /dev/null
+++ b/pytest/testconf.ini.sample
@@ -0,0 +1,16 @@
+# test configuration
+
+[client]
+server = localhost
+port = 12345
+# timeout is in seconds
+timeout = 0.1
+loglevel = INFO
+logfile = ./ctest.log
+# logfmt = ...
+# protocol = 9p2000, 9p2000.u, or 9p2000.L
+protocol = 9p2000.L
+only_dotl = true
+may_downgrade = False
+uname = anonymous
+n_uname = 1001
diff --git a/request.c b/request.c
new file mode 100644
index 000000000000..357bd23bf98a
--- /dev/null
+++ b/request.c
@@ -0,0 +1,1440 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "fcall.h"
+#include "fid.h"
+#include "hashtable.h"
+#include "log.h"
+#include "linux_errno.h"
+#include "backend/backend.h"
+#include "threadpool.h"
+
+#define N(x) (sizeof(x) / sizeof(x[0]))
+
+static int l9p_dispatch_tversion(struct l9p_request *req);
+static int l9p_dispatch_tattach(struct l9p_request *req);
+static int l9p_dispatch_tclunk(struct l9p_request *req);
+static int l9p_dispatch_tcreate(struct l9p_request *req);
+static int l9p_dispatch_topen(struct l9p_request *req);
+static int l9p_dispatch_tread(struct l9p_request *req);
+static int l9p_dispatch_tremove(struct l9p_request *req);
+static int l9p_dispatch_tstat(struct l9p_request *req);
+static int l9p_dispatch_twalk(struct l9p_request *req);
+static int l9p_dispatch_twrite(struct l9p_request *req);
+static int l9p_dispatch_twstat(struct l9p_request *req);
+static int l9p_dispatch_tstatfs(struct l9p_request *req);
+static int l9p_dispatch_tlopen(struct l9p_request *req);
+static int l9p_dispatch_tlcreate(struct l9p_request *req);
+static int l9p_dispatch_tsymlink(struct l9p_request *req);
+static int l9p_dispatch_tmknod(struct l9p_request *req);
+static int l9p_dispatch_trename(struct l9p_request *req);
+static int l9p_dispatch_treadlink(struct l9p_request *req);
+static int l9p_dispatch_tgetattr(struct l9p_request *req);
+static int l9p_dispatch_tsetattr(struct l9p_request *req);
+static int l9p_dispatch_txattrwalk(struct l9p_request *req);
+static int l9p_dispatch_txattrcreate(struct l9p_request *req);
+static int l9p_dispatch_treaddir(struct l9p_request *req);
+static int l9p_dispatch_tfsync(struct l9p_request *req);
+static int l9p_dispatch_tlock(struct l9p_request *req);
+static int l9p_dispatch_tgetlock(struct l9p_request *req);
+static int l9p_dispatch_tlink(struct l9p_request *req);
+static int l9p_dispatch_tmkdir(struct l9p_request *req);
+static int l9p_dispatch_trenameat(struct l9p_request *req);
+static int l9p_dispatch_tunlinkat(struct l9p_request *req);
+
+/*
+ * Each Txxx handler has a "must run" flag. If it is false,
+ * we check for a flush request before calling the handler.
+ * If a flush is already requested we can instantly fail the
+ * request with EINTR.
+ *
+ * Tclunk and Tremove must run because they make their fids
+ * become invalid. Tversion and Tattach should never get
+ * a flush request applied (it makes no sense as the connection
+ * is not really running yet), so it should be harmless to
+ * set them either way, but for now we have them as must-run.
+ * Flushing a Tflush is not really allowed either so we keep
+ * these as must-run too (although they run without being done
+ * threaded anyway).
+ */
+struct l9p_handler {
+ enum l9p_ftype type;
+ int (*handler)(struct l9p_request *);
+ bool must_run;
+};
+
+static const struct l9p_handler l9p_handlers_no_version[] = {
+ {L9P_TVERSION, l9p_dispatch_tversion, true},
+};
+
+static const struct l9p_handler l9p_handlers_base[] = {
+ {L9P_TVERSION, l9p_dispatch_tversion, true},
+ {L9P_TATTACH, l9p_dispatch_tattach, true},
+ {L9P_TCLUNK, l9p_dispatch_tclunk, true},
+ {L9P_TFLUSH, l9p_threadpool_tflush, true},
+ {L9P_TCREATE, l9p_dispatch_tcreate, false},
+ {L9P_TOPEN, l9p_dispatch_topen, false},
+ {L9P_TREAD, l9p_dispatch_tread, false},
+ {L9P_TWRITE, l9p_dispatch_twrite, false},
+ {L9P_TREMOVE, l9p_dispatch_tremove, true},
+ {L9P_TSTAT, l9p_dispatch_tstat, false},
+ {L9P_TWALK, l9p_dispatch_twalk, false},
+ {L9P_TWSTAT, l9p_dispatch_twstat, false}
+};
+static const struct l9p_handler l9p_handlers_dotu[] = {
+ {L9P_TVERSION, l9p_dispatch_tversion, true},
+ {L9P_TATTACH, l9p_dispatch_tattach, true},
+ {L9P_TCLUNK, l9p_dispatch_tclunk, true},
+ {L9P_TFLUSH, l9p_threadpool_tflush, true},
+ {L9P_TCREATE, l9p_dispatch_tcreate, false},
+ {L9P_TOPEN, l9p_dispatch_topen, false},
+ {L9P_TREAD, l9p_dispatch_tread, false},
+ {L9P_TWRITE, l9p_dispatch_twrite, false},
+ {L9P_TREMOVE, l9p_dispatch_tremove, true},
+ {L9P_TSTAT, l9p_dispatch_tstat, false},
+ {L9P_TWALK, l9p_dispatch_twalk, false},
+ {L9P_TWSTAT, l9p_dispatch_twstat, false}
+};
+static const struct l9p_handler l9p_handlers_dotL[] = {
+ {L9P_TVERSION, l9p_dispatch_tversion, true},
+ {L9P_TATTACH, l9p_dispatch_tattach, true},
+ {L9P_TCLUNK, l9p_dispatch_tclunk, true},
+ {L9P_TFLUSH, l9p_threadpool_tflush, true},
+ {L9P_TCREATE, l9p_dispatch_tcreate, false},
+ {L9P_TOPEN, l9p_dispatch_topen, false},
+ {L9P_TREAD, l9p_dispatch_tread, false},
+ {L9P_TWRITE, l9p_dispatch_twrite, false},
+ {L9P_TREMOVE, l9p_dispatch_tremove, true},
+ {L9P_TSTAT, l9p_dispatch_tstat, false},
+ {L9P_TWALK, l9p_dispatch_twalk, false},
+ {L9P_TWSTAT, l9p_dispatch_twstat, false},
+ {L9P_TSTATFS, l9p_dispatch_tstatfs, false},
+ {L9P_TLOPEN, l9p_dispatch_tlopen, false},
+ {L9P_TLCREATE, l9p_dispatch_tlcreate, false},
+ {L9P_TSYMLINK, l9p_dispatch_tsymlink, false},
+ {L9P_TMKNOD, l9p_dispatch_tmknod, false},
+ {L9P_TRENAME, l9p_dispatch_trename, false},
+ {L9P_TREADLINK, l9p_dispatch_treadlink, false},
+ {L9P_TGETATTR, l9p_dispatch_tgetattr, false},
+ {L9P_TSETATTR, l9p_dispatch_tsetattr, false},
+ {L9P_TXATTRWALK, l9p_dispatch_txattrwalk, false},
+ {L9P_TXATTRCREATE, l9p_dispatch_txattrcreate, false},
+ {L9P_TREADDIR, l9p_dispatch_treaddir, false},
+ {L9P_TFSYNC, l9p_dispatch_tfsync, false},
+ {L9P_TLOCK, l9p_dispatch_tlock, true},
+ {L9P_TGETLOCK, l9p_dispatch_tgetlock, true},
+ {L9P_TLINK, l9p_dispatch_tlink, false},
+ {L9P_TMKDIR, l9p_dispatch_tmkdir, false},
+ {L9P_TRENAMEAT, l9p_dispatch_trenameat, false},
+ {L9P_TUNLINKAT, l9p_dispatch_tunlinkat, false},
+};
+
+/*
+ * NB: version index 0 is reserved for new connections, and
+ * is a protocol that handles only L9P_TVERSION. Once we get a
+ * valid version, we start a new session using its dispatch table.
+ */
+static const struct {
+ const char *name;
+ const struct l9p_handler *handlers;
+ int n_handlers;
+} l9p_versions[] = {
+ { "<none>", l9p_handlers_no_version, N(l9p_handlers_no_version) },
+ { "9P2000", l9p_handlers_base, N(l9p_handlers_base) },
+ { "9P2000.u", l9p_handlers_dotu, N(l9p_handlers_dotu), },
+ { "9P2000.L", l9p_handlers_dotL, N(l9p_handlers_dotL), },
+};
+
+/*
+ * Run the appropriate handler for this request.
+ * It's our caller's responsibility to respond.
+ */
+int
+l9p_dispatch_request(struct l9p_request *req)
+{
+ struct l9p_connection *conn;
+#if defined(L9P_DEBUG)
+ struct sbuf *sb;
+#endif
+ size_t i, n;
+ const struct l9p_handler *handlers, *hp;
+ bool flush_requested;
+
+ conn = req->lr_conn;
+ flush_requested = req->lr_flushstate == L9P_FLUSH_REQUESTED_PRE_START;
+
+ handlers = l9p_versions[conn->lc_version].handlers;
+ n = (size_t)l9p_versions[conn->lc_version].n_handlers;
+ for (hp = handlers, i = 0; i < n; hp++, i++)
+ if (req->lr_req.hdr.type == hp->type)
+ goto found;
+ hp = NULL;
+found:
+
+#if defined(L9P_DEBUG)
+ sb = sbuf_new_auto();
+ if (flush_requested) {
+ sbuf_cat(sb, "FLUSH requested pre-dispatch");
+ if (hp != NULL && hp->must_run)
+ sbuf_cat(sb, ", but must run");
+ sbuf_cat(sb, ": ");
+ }
+ l9p_describe_fcall(&req->lr_req, conn->lc_version, sb);
+ sbuf_finish(sb);
+
+ L9P_LOG(L9P_DEBUG, "%s", sbuf_data(sb));
+ sbuf_delete(sb);
+#endif
+
+ if (hp != NULL) {
+ if (!flush_requested || hp->must_run)
+ return (hp->handler(req));
+ return (EINTR);
+ }
+
+ L9P_LOG(L9P_WARNING, "unknown request of type %d",
+ req->lr_req.hdr.type);
+ return (ENOSYS);
+}
+
+/*
+ * Translate BSD errno to 9P2000/9P2000.u errno.
+ */
+static inline int
+e29p(int errnum)
+{
+ static int const table[] = {
+ [ENOTEMPTY] = EPERM,
+ [EDQUOT] = EPERM,
+ [ENOSYS] = EPERM, /* ??? */
+ };
+
+ if ((size_t)errnum < N(table) && table[errnum] != 0)
+ return (table[errnum]);
+ if (errnum <= ERANGE)
+ return (errnum);
+ return (EIO); /* ??? */
+}
+
+/*
+ * Translate BSD errno to Linux errno.
+ */
+static inline int
+e2linux(int errnum)
+{
+ static int const table[] = {
+ [EDEADLK] = LINUX_EDEADLK,
+ [EAGAIN] = LINUX_EAGAIN,
+ [EINPROGRESS] = LINUX_EINPROGRESS,
+ [EALREADY] = LINUX_EALREADY,
+ [ENOTSOCK] = LINUX_ENOTSOCK,
+ [EDESTADDRREQ] = LINUX_EDESTADDRREQ,
+ [EMSGSIZE] = LINUX_EMSGSIZE,
+ [EPROTOTYPE] = LINUX_EPROTOTYPE,
+ [ENOPROTOOPT] = LINUX_ENOPROTOOPT,
+ [EPROTONOSUPPORT] = LINUX_EPROTONOSUPPORT,
+ [ESOCKTNOSUPPORT] = LINUX_ESOCKTNOSUPPORT,
+ [EOPNOTSUPP] = LINUX_EOPNOTSUPP,
+ [EPFNOSUPPORT] = LINUX_EPFNOSUPPORT,
+ [EAFNOSUPPORT] = LINUX_EAFNOSUPPORT,
+ [EADDRINUSE] = LINUX_EADDRINUSE,
+ [EADDRNOTAVAIL] = LINUX_EADDRNOTAVAIL,
+ [ENETDOWN] = LINUX_ENETDOWN,
+ [ENETUNREACH] = LINUX_ENETUNREACH,
+ [ENETRESET] = LINUX_ENETRESET,
+ [ECONNABORTED] = LINUX_ECONNABORTED,
+ [ECONNRESET] = LINUX_ECONNRESET,
+ [ENOBUFS] = LINUX_ENOBUFS,
+ [EISCONN] = LINUX_EISCONN,
+ [ENOTCONN] = LINUX_ENOTCONN,
+ [ESHUTDOWN] = LINUX_ESHUTDOWN,
+ [ETOOMANYREFS] = LINUX_ETOOMANYREFS,
+ [ETIMEDOUT] = LINUX_ETIMEDOUT,
+ [ECONNREFUSED] = LINUX_ECONNREFUSED,
+ [ELOOP] = LINUX_ELOOP,
+ [ENAMETOOLONG] = LINUX_ENAMETOOLONG,
+ [EHOSTDOWN] = LINUX_EHOSTDOWN,
+ [EHOSTUNREACH] = LINUX_EHOSTUNREACH,
+ [ENOTEMPTY] = LINUX_ENOTEMPTY,
+ [EPROCLIM] = LINUX_EAGAIN,
+ [EUSERS] = LINUX_EUSERS,
+ [EDQUOT] = LINUX_EDQUOT,
+ [ESTALE] = LINUX_ESTALE,
+ [EREMOTE] = LINUX_EREMOTE,
+ /* EBADRPC = unmappable? */
+ /* ERPCMISMATCH = unmappable? */
+ /* EPROGUNAVAIL = unmappable? */
+ /* EPROGMISMATCH = unmappable? */
+ /* EPROCUNAVAIL = unmappable? */
+ [ENOLCK] = LINUX_ENOLCK,
+ [ENOSYS] = LINUX_ENOSYS,
+ /* EFTYPE = unmappable? */
+ /* EAUTH = unmappable? */
+ /* ENEEDAUTH = unmappable? */
+ [EIDRM] = LINUX_EIDRM,
+ [ENOMSG] = LINUX_ENOMSG,
+ [EOVERFLOW] = LINUX_EOVERFLOW,
+ [ECANCELED] = LINUX_ECANCELED,
+ [EILSEQ] = LINUX_EILSEQ,
+ /* EDOOFUS = unmappable? */
+ [EBADMSG] = LINUX_EBADMSG,
+ [EMULTIHOP] = LINUX_EMULTIHOP,
+ [ENOLINK] = LINUX_ENOLINK,
+ [EPROTO] = LINUX_EPROTO,
+ /* ENOTCAPABLE = unmappable? */
+#ifdef ECAPMODE
+ [ECAPMODE] = EPERM,
+#endif
+#ifdef ENOTRECOVERABLE
+ [ENOTRECOVERABLE] = LINUX_ENOTRECOVERABLE,
+#endif
+#ifdef EOWNERDEAD
+ [EOWNERDEAD] = LINUX_EOWNERDEAD,
+#endif
+ };
+
+ /*
+ * In case we want to return a raw Linux errno, allow negative
+ * values a la Linux kernel internals.
+ *
+ * Values up to ERANGE are shared across systems (see
+ * linux_errno.h), except for EAGAIN.
+ */
+ if (errnum < 0)
+ return (-errnum);
+
+ if ((size_t)errnum < N(table) && table[errnum] != 0)
+ return (table[errnum]);
+
+ if (errnum <= ERANGE)
+ return (errnum);
+
+ L9P_LOG(L9P_WARNING, "cannot map errno %d to anything reasonable",
+ errnum);
+
+ return (LINUX_ENOTRECOVERABLE); /* ??? */
+}
+
+/*
+ * Send response to request, or possibly just drop request.
+ * We also need to know whether to remove the request from
+ * the tag hash table.
+ */
+void
+l9p_respond(struct l9p_request *req, bool drop, bool rmtag)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ size_t iosize;
+#if defined(L9P_DEBUG)
+ struct sbuf *sb;
+ const char *ftype;
+#endif
+ int error;
+
+ req->lr_resp.hdr.tag = req->lr_req.hdr.tag;
+
+ error = req->lr_error;
+ if (error == 0)
+ req->lr_resp.hdr.type = req->lr_req.hdr.type + 1;
+ else {
+ if (conn->lc_version == L9P_2000L) {
+ req->lr_resp.hdr.type = L9P_RLERROR;
+ req->lr_resp.error.errnum = (uint32_t)e2linux(error);
+ } else {
+ req->lr_resp.hdr.type = L9P_RERROR;
+ req->lr_resp.error.ename = strerror(error);
+ req->lr_resp.error.errnum = (uint32_t)e29p(error);
+ }
+ }
+
+#if defined(L9P_DEBUG)
+ sb = sbuf_new_auto();
+ l9p_describe_fcall(&req->lr_resp, conn->lc_version, sb);
+ sbuf_finish(sb);
+
+ switch (req->lr_flushstate) {
+ case L9P_FLUSH_NONE:
+ ftype = "";
+ break;
+ case L9P_FLUSH_REQUESTED_PRE_START:
+ ftype = "FLUSH requested pre-dispatch: ";
+ break;
+ case L9P_FLUSH_REQUESTED_POST_START:
+ ftype = "FLUSH requested while running: ";
+ break;
+ case L9P_FLUSH_TOOLATE:
+ ftype = "FLUSH requested too late: ";
+ break;
+ }
+ L9P_LOG(L9P_DEBUG, "%s%s%s",
+ drop ? "DROP: " : "", ftype, sbuf_data(sb));
+ sbuf_delete(sb);
+#endif
+
+ error = drop ? 0 :
+ l9p_pufcall(&req->lr_resp_msg, &req->lr_resp, conn->lc_version);
+ if (rmtag)
+ ht_remove(&conn->lc_requests, req->lr_req.hdr.tag);
+ if (error != 0) {
+ L9P_LOG(L9P_ERROR, "cannot pack response");
+ drop = true;
+ }
+
+ if (drop) {
+ conn->lc_lt.lt_drop_response(req,
+ req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+ conn->lc_lt.lt_aux);
+ } else {
+ iosize = req->lr_resp_msg.lm_size;
+
+ /*
+ * Include I/O size in calculation for Rread and
+ * Rreaddir responses.
+ */
+ if (req->lr_resp.hdr.type == L9P_RREAD ||
+ req->lr_resp.hdr.type == L9P_RREADDIR)
+ iosize += req->lr_resp.io.count;
+
+ conn->lc_lt.lt_send_response(req,
+ req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+ iosize, conn->lc_lt.lt_aux);
+ }
+
+ l9p_freefcall(&req->lr_req);
+ l9p_freefcall(&req->lr_resp);
+
+ free(req);
+}
+
+/*
+ * This allows a caller to iterate through the data in a
+ * read or write request (creating the data if packing,
+ * scanning through it if unpacking). This is used for
+ * writing readdir entries, so mode should be L9P_PACK
+ * (but we allow L9P_UNPACK so that debug code can also scan
+ * through the data later, if desired).
+ *
+ * This relies on the Tread op having positioned the request's
+ * iov to the beginning of the data buffer (note the l9p_seek_iov
+ * in l9p_dispatch_tread).
+ */
+void
+l9p_init_msg(struct l9p_message *msg, struct l9p_request *req,
+ enum l9p_pack_mode mode)
+{
+
+ msg->lm_size = 0;
+ msg->lm_mode = mode;
+ msg->lm_cursor_iov = 0;
+ msg->lm_cursor_offset = 0;
+ msg->lm_niov = req->lr_data_niov;
+ memcpy(msg->lm_iov, req->lr_data_iov,
+ sizeof (struct iovec) * req->lr_data_niov);
+}
+
+enum fid_lookup_flags {
+ F_REQUIRE_OPEN = 0x01, /* require that the file be marked OPEN */
+ F_REQUIRE_DIR = 0x02, /* require that the file be marked ISDIR */
+ F_REQUIRE_XATTR = 0x04, /* require that the file be marked XATTR */
+ F_REQUIRE_AUTH = 0x08, /* require that the fid be marked AUTH */
+ F_FORBID_OPEN = 0x10, /* forbid that the file be marked OPEN */
+ F_FORBID_DIR = 0x20, /* forbid that the file be marked ISDIR */
+ F_FORBID_XATTR = 0x40, /* forbid that the file be marked XATTR */
+ F_ALLOW_AUTH = 0x80, /* allow that the fid be marked AUTH */
+};
+
+/*
+ * Look up a fid. It must correspond to a valid file, else we return
+ * the given errno (some "not a valid fid" calls must return EIO and
+ * some must return EINVAL and qemu returns ENOENT in other cases and
+ * so on, so we just provide a general "return this error number").
+ *
+ * Callers may also set constraints: fid must be (or not be) open,
+ * must be (or not be) a directory, must be (or not be) an xattr.
+ *
+ * Only one op has a fid that *must* be an auth fid. Most ops forbid
+ * auth fids So instead of FORBID we have ALLOW here and the default
+ * is FORBID.
+ */
+static inline int
+fid_lookup(struct l9p_connection *conn, uint32_t fid, int err, int flags,
+ struct l9p_fid **afile)
+{
+ struct l9p_fid *file;
+
+ file = ht_find(&conn->lc_files, fid);
+ if (file == NULL)
+ return (err);
+
+ /*
+ * As soon as we go multithreaded / async, this
+ * assert has to become "return EINVAL" or "return err".
+ *
+ * We may also need a way to mark a fid as
+ * "in async op" (valid for some purposes, but cannot be
+ * used elsewhere until async op is completed or aborted).
+ *
+ * For now, this serves for bug-detecting.
+ */
+ assert(l9p_fid_isvalid(file));
+
+ /*
+ * Note that we're inline expanded and flags is constant,
+ * so unnecessary tests just drop out entirely.
+ */
+ if ((flags & F_REQUIRE_OPEN) && !l9p_fid_isopen(file))
+ return (EINVAL);
+ if ((flags & F_FORBID_OPEN) && l9p_fid_isopen(file))
+ return (EINVAL);
+ if ((flags & F_REQUIRE_DIR) && !l9p_fid_isdir(file))
+ return (ENOTDIR);
+ if ((flags & F_FORBID_DIR) && l9p_fid_isdir(file))
+ return (EISDIR);
+ if ((flags & F_REQUIRE_XATTR) && !l9p_fid_isxattr(file))
+ return (EINVAL);
+ if ((flags & F_FORBID_XATTR) && l9p_fid_isxattr(file))
+ return (EINVAL);
+ if (l9p_fid_isauth(file)) {
+ if ((flags & (F_REQUIRE_AUTH | F_ALLOW_AUTH)) == 0)
+ return (EINVAL);
+ } else if (flags & F_REQUIRE_AUTH)
+ return (EINVAL);
+ *afile = file;
+ return (0);
+}
+
+/*
+ * Append variable-size stat object and adjust io count.
+ * Returns 0 if the entire stat object was packed, -1 if not.
+ * A fully packed object updates the request's io count.
+ *
+ * Caller must use their own private l9p_message object since
+ * a partially packed object will leave the message object in
+ * a useless state.
+ *
+ * Frees the stat object.
+ */
+int
+l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req,
+ struct l9p_stat *st)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ uint16_t size = l9p_sizeof_stat(st, conn->lc_version);
+ int ret = 0;
+
+ assert(msg->lm_mode == L9P_PACK);
+
+ if (req->lr_resp.io.count + size > req->lr_req.io.count ||
+ l9p_pustat(msg, st, conn->lc_version) < 0)
+ ret = -1;
+ else
+ req->lr_resp.io.count += size;
+ l9p_freestat(st);
+ return (ret);
+}
+
+static int
+l9p_dispatch_tversion(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_server *server = conn->lc_server;
+ enum l9p_version remote_version = L9P_INVALID_VERSION;
+ size_t i;
+ const char *remote_version_name;
+
+ for (i = 0; i < N(l9p_versions); i++) {
+ if (strcmp(req->lr_req.version.version,
+ l9p_versions[i].name) == 0) {
+ remote_version = (enum l9p_version)i;
+ break;
+ }
+ }
+
+ if (remote_version == L9P_INVALID_VERSION) {
+ L9P_LOG(L9P_ERROR, "unsupported remote version: %s",
+ req->lr_req.version.version);
+ return (ENOSYS);
+ }
+
+ remote_version_name = l9p_versions[remote_version].name;
+ L9P_LOG(L9P_INFO, "remote version: %s", remote_version_name);
+ L9P_LOG(L9P_INFO, "local version: %s",
+ l9p_versions[server->ls_max_version].name);
+
+ conn->lc_version = MIN(remote_version, server->ls_max_version);
+ conn->lc_msize = MIN(req->lr_req.version.msize, conn->lc_msize);
+ conn->lc_max_io_size = conn->lc_msize - 24;
+ req->lr_resp.version.version = strdup(remote_version_name);
+ req->lr_resp.version.msize = conn->lc_msize;
+ return (0);
+}
+
+static int
+l9p_dispatch_tattach(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid;
+ int error;
+
+ /*
+ * We still don't have Tauth yet, but let's code this part
+ * anyway.
+ *
+ * Look up the auth fid first since if it fails we can just
+ * return immediately.
+ */
+ if (req->lr_req.tattach.afid != L9P_NOFID) {
+ error = fid_lookup(conn, req->lr_req.tattach.afid, EINVAL,
+ F_REQUIRE_AUTH, &req->lr_fid2);
+ if (error)
+ return (error);
+ } else
+ req->lr_fid2 = NULL;
+
+ fid = l9p_connection_alloc_fid(conn, req->lr_req.hdr.fid);
+ if (fid == NULL)
+ return (EINVAL);
+
+ be = conn->lc_server->ls_backend;
+
+ req->lr_fid = fid;
+
+ /* For backend convenience, set NONUNAME on 9P2000. */
+ if (conn->lc_version == L9P_2000)
+ req->lr_req.tattach.n_uname = L9P_NONUNAME;
+ error = be->attach(be->softc, req);
+
+ /*
+ * On success, fid becomes valid; on failure, disconnect.
+ * It certainly *should* be a directory here...
+ */
+ if (error == 0) {
+ l9p_fid_setvalid(fid);
+ if (req->lr_resp.rattach.qid.type & L9P_QTDIR)
+ l9p_fid_setdir(fid);
+ } else
+ l9p_connection_remove_fid(conn, fid);
+ return (error);
+}
+
+static int
+l9p_dispatch_tclunk(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid;
+ int error;
+
+ /* Note that clunk is the only way to dispose of an auth fid. */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_ALLOW_AUTH, &fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+ l9p_fid_unsetvalid(fid);
+
+ /*
+ * If it's an xattr fid there must, by definition, be an
+ * xattrclunk. The xattrclunk function can only be NULL if
+ * xattrwalk and xattrcreate are NULL or always return error.
+ *
+ * Q: do we want to allow async xattrclunk in case of very
+ * large xattr create? This will make things difficult,
+ * so probably not.
+ */
+ if (l9p_fid_isxattr(fid))
+ error = be->xattrclunk(be->softc, fid);
+ else
+ error = be->clunk(be->softc, fid);
+
+ /* fid is now gone regardless of any error return */
+ l9p_connection_remove_fid(conn, fid);
+ return (error);
+}
+
+static int
+l9p_dispatch_tcreate(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ uint32_t dmperm;
+ int error;
+
+ /* Incoming fid must represent a directory that has not been opened. */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+ dmperm = req->lr_req.tcreate.perm;
+#define MKDIR_OR_SIMILAR \
+ (L9P_DMDIR | L9P_DMSYMLINK | L9P_DMNAMEDPIPE | L9P_DMSOCKET | L9P_DMDEVICE)
+
+ /*
+ * TODO:
+ * - check new file name
+ * - break out different kinds of create (file vs mkdir etc)
+ * - add async file-create (leaves req->lr_fid in limbo)
+ *
+ * A successful file-create changes the fid into an open file.
+ */
+ error = be->create(be->softc, req);
+ if (error == 0 && (dmperm & MKDIR_OR_SIMILAR) == 0) {
+ l9p_fid_unsetdir(req->lr_fid);
+ l9p_fid_setopen(req->lr_fid);
+ }
+
+ return (error);
+}
+
+static int
+l9p_dispatch_topen(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /*
+ * TODO:
+ * - add async open (leaves req->lr_fid in limbo)
+ */
+ error = be->open(be->softc, req);
+ if (error == 0)
+ l9p_fid_setopen(req->lr_fid);
+ return (error);
+}
+
+static int
+l9p_dispatch_tread(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid;
+ int error;
+
+ /* Xattr fids are not open, so we need our own tests. */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid);
+ if (error)
+ return (error);
+
+ /*
+ * Adjust so that writing messages (packing data) starts
+ * right after the count field in the response.
+ *
+ * size[4] + Rread[1] + tag[2] + count[4] = 11
+ */
+ l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+ req->lr_data_iov, &req->lr_data_niov, 11);
+
+ /*
+ * If it's an xattr fid there must, by definition, be an
+ * xattrread. The xattrread function can only be NULL if
+ * xattrwalk and xattrcreate are NULL or always return error.
+ *
+ * TODO:
+ * separate out directory-read
+ * allow async read
+ */
+ be = conn->lc_server->ls_backend;
+ fid = req->lr_fid;
+ if (l9p_fid_isxattr(fid)) {
+ error = be->xattrread(be->softc, req);
+ } else if (l9p_fid_isopen(fid)) {
+ error = be->read(be->softc, req);
+ } else {
+ error = EINVAL;
+ }
+
+ return (error);
+}
+
+static int
+l9p_dispatch_tremove(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid;
+ int error;
+
+ /*
+ * ?? Should we allow Tremove on auth fids? If so, do
+ * we pretend it is just a Tclunk?
+ */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+ l9p_fid_unsetvalid(fid);
+
+ error = be->remove(be->softc, fid);
+ /* fid is now gone regardless of any error return */
+ l9p_connection_remove_fid(conn, fid);
+ return (error);
+}
+
+static int
+l9p_dispatch_tstat(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid;
+ int error;
+
+ /* Allow Tstat on auth fid? Seems harmless enough... */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_ALLOW_AUTH, &fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+ req->lr_fid = fid;
+ error = be->stat(be->softc, req);
+
+ if (error == 0) {
+ if (l9p_fid_isauth(fid))
+ req->lr_resp.rstat.stat.qid.type |= L9P_QTAUTH;
+
+ /* should we check req->lr_resp.rstat.qid.type L9P_QTDIR bit? */
+ if (req->lr_resp.rstat.stat.qid.type &= L9P_QTDIR)
+ l9p_fid_setdir(fid);
+ else
+ l9p_fid_unsetdir(fid);
+ }
+
+ return (error);
+}
+
+static int
+l9p_dispatch_twalk(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid, *newfid;
+ uint16_t n;
+ int error;
+
+ /* Can forbid XATTR, but cannot require DIR. */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_XATTR, &fid);
+ if (error)
+ return (error);
+
+ if (req->lr_req.twalk.hdr.fid != req->lr_req.twalk.newfid) {
+ newfid = l9p_connection_alloc_fid(conn,
+ req->lr_req.twalk.newfid);
+ if (newfid == NULL)
+ return (EINVAL);
+ } else
+ newfid = fid;
+
+ be = conn->lc_server->ls_backend;
+ req->lr_fid = fid;
+ req->lr_newfid = newfid;
+ error = be->walk(be->softc, req);
+
+ /*
+ * If newfid == fid, then fid itself has (potentially) changed,
+ * but is still valid. Otherwise set newfid valid on
+ * success, and destroy it on error.
+ */
+ if (newfid != fid) {
+ if (error == 0)
+ l9p_fid_setvalid(newfid);
+ else
+ l9p_connection_remove_fid(conn, newfid);
+ }
+
+ /*
+ * If we walked any name elements, the last (n-1'th) qid
+ * has the type (dir vs file) for the new fid. Otherwise
+ * the type of newfid is the same as fid. Of course, if
+ * n==0 and fid==newfid, fid is already set up correctly
+ * as the whole thing was a big no-op, but it's safe to
+ * copy its dir bit to itself.
+ */
+ if (error == 0) {
+ n = req->lr_resp.rwalk.nwqid;
+ if (n > 0) {
+ if (req->lr_resp.rwalk.wqid[n - 1].type & L9P_QTDIR)
+ l9p_fid_setdir(newfid);
+ } else {
+ if (l9p_fid_isdir(fid))
+ l9p_fid_setdir(newfid);
+ }
+ }
+ return (error);
+}
+
+static int
+l9p_dispatch_twrite(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid;
+ int error;
+
+ /* Cannot require open due to xattr write, but can forbid dir. */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+ F_FORBID_DIR, &req->lr_fid);
+ if (error)
+ return (error);
+
+ /*
+ * Adjust to point to the data to be written (a la
+ * l9p_dispatch_tread, but we're pointing into the request
+ * buffer rather than the response):
+ *
+ * size[4] + Twrite[1] + tag[2] + fid[4] + offset[8] + count[4] = 23
+ */
+ l9p_seek_iov(req->lr_req_msg.lm_iov, req->lr_req_msg.lm_niov,
+ req->lr_data_iov, &req->lr_data_niov, 23);
+
+ /*
+ * Unlike read, write and xattrwrite are optional (for R/O fs).
+ *
+ * TODO:
+ * allow async write
+ */
+ be = conn->lc_server->ls_backend;
+ fid = req->lr_fid;
+ if (l9p_fid_isxattr(fid)) {
+ error = be->xattrwrite != NULL ?
+ be->xattrwrite(be->softc, req) : ENOSYS;
+ } else if (l9p_fid_isopen(fid)) {
+ error = be->write != NULL ?
+ be->write(be->softc, req) : ENOSYS;
+ } else {
+ error = EINVAL;
+ }
+
+ return (error);
+}
+
+static int
+l9p_dispatch_twstat(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+ F_FORBID_XATTR, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+ error = be->wstat != NULL ? be->wstat(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tstatfs(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ /* Should we allow statfs on auth fids? */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+ error = be->statfs(be->softc, req);
+ return (error);
+}
+
+static int
+l9p_dispatch_tlopen(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /*
+ * TODO:
+ * - add async open (leaves req->lr_fid in limbo)
+ */
+ error = be->lopen != NULL ? be->lopen(be->softc, req) : ENOSYS;
+ if (error == 0)
+ l9p_fid_setopen(req->lr_fid);
+ return (error);
+}
+
+static int
+l9p_dispatch_tlcreate(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /*
+ * TODO:
+ * - check new file name
+ * - add async create (leaves req->lr_fid in limbo)
+ */
+ error = be->lcreate != NULL ? be->lcreate(be->softc, req) : ENOSYS;
+ if (error == 0) {
+ l9p_fid_unsetdir(req->lr_fid);
+ l9p_fid_setopen(req->lr_fid);
+ }
+ return (error);
+}
+
+static int
+l9p_dispatch_tsymlink(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ /* This doesn't affect the containing dir; maybe allow OPEN? */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /*
+ * TODO:
+ * - check new file name
+ */
+ error = be->symlink != NULL ? be->symlink(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tmknod(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ /* This doesn't affect the containing dir; maybe allow OPEN? */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /*
+ * TODO:
+ * - check new file name
+ */
+ error = be->mknod != NULL ? be->mknod(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_trename(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ /* Rename directory or file (including symlink etc). */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_XATTR, &req->lr_fid);
+ if (error)
+ return (error);
+
+ /* Doesn't affect new dir fid; maybe allow OPEN? */
+ error = fid_lookup(conn, req->lr_req.trename.dfid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /*
+ * TODO:
+ * - check new file name (trename.name)
+ */
+ error = be->rename != NULL ? be->rename(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_treadlink(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ /*
+ * The underlying readlink will fail unless it's a symlink,
+ * and the back end has to check, but we might as well forbid
+ * directories and open files here since it's cheap.
+ */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_DIR | F_FORBID_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ error = be->readlink != NULL ? be->readlink(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tgetattr(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_XATTR, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ error = be->getattr != NULL ? be->getattr(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tsetattr(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_XATTR, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ error = be->setattr != NULL ? be->setattr(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_txattrwalk(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid, *newfid;
+ int error;
+
+ /*
+ * Not sure if we care if file-or-dir is open or not.
+ * However, the fid argument should always be a file or
+ * dir and the newfid argument must be supplied, must
+ * be different, and always becomes a new xattr,
+ * so this is not very much like Twalk.
+ */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_XATTR, &fid);
+ if (error)
+ return (error);
+
+ newfid = l9p_connection_alloc_fid(conn, req->lr_req.txattrwalk.newfid);
+ if (newfid == NULL)
+ return (EINVAL);
+
+ be = conn->lc_server->ls_backend;
+
+ req->lr_fid = fid;
+ req->lr_newfid = newfid;
+ error = be->xattrwalk != NULL ? be->xattrwalk(be->softc, req) : ENOSYS;
+
+ /*
+ * Success/fail is similar to Twalk, except that we need
+ * to set the xattr type bit in the new fid. It's also
+ * much simpler since newfid is always a new fid.
+ */
+ if (error == 0) {
+ l9p_fid_setvalid(newfid);
+ l9p_fid_setxattr(newfid);
+ } else {
+ l9p_connection_remove_fid(conn, newfid);
+ }
+ return (error);
+}
+
+static int
+l9p_dispatch_txattrcreate(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ struct l9p_fid *fid;
+ int error;
+
+ /*
+ * Forbid incoming open fid since it's going to become an
+ * xattr fid instead. If it turns out we need to allow
+ * it, fs code will need to handle this.
+ *
+ * Curiously, qemu 9pfs uses ENOENT for a bad txattrwalk
+ * fid, but EINVAL for txattrcreate (so we do too).
+ */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+ F_FORBID_XATTR | F_FORBID_OPEN, &fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ req->lr_fid = fid;
+ error = be->xattrcreate != NULL ? be->xattrcreate(be->softc, req) :
+ ENOSYS;
+
+ /*
+ * On success, fid has changed from a regular (file or dir)
+ * fid to an xattr fid.
+ */
+ if (error == 0) {
+ l9p_fid_unsetdir(fid);
+ l9p_fid_setxattr(fid);
+ }
+ return (error);
+}
+
+static int
+l9p_dispatch_treaddir(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_DIR | F_REQUIRE_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ /*
+ * Adjust so that writing messages (packing data) starts
+ * right after the count field in the response.
+ *
+ * size[4] + Rreaddir[1] + tag[2] + count[4] = 11
+ */
+ l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+ req->lr_data_iov, &req->lr_data_niov, 11);
+
+ be = conn->lc_server->ls_backend;
+
+ error = be->readdir != NULL ? be->readdir(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tfsync(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ error = be->fsync != NULL ? be->fsync(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tlock(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ /* Forbid directories? */
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /*
+ * TODO: multiple client handling; perhaps async locking.
+ */
+ error = be->lock != NULL ? be->lock(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tgetlock(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /*
+ * TODO: multiple client handling; perhaps async locking.
+ */
+ error = be->getlock != NULL ? be->getlock(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tlink(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ /*
+ * Note, dfid goes into fid2 in current scheme.
+ *
+ * Allow open dir? Target dir fid is not modified...
+ */
+ error = fid_lookup(conn, req->lr_req.tlink.dfid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+ if (error)
+ return (error);
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_FORBID_DIR | F_FORBID_XATTR, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ error = be->link != NULL ? be->link(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tmkdir(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ /* Slashes embedded in the name are not allowed */
+ if (strchr(req->lr_req.tlcreate.name, '/') != NULL)
+ return (EINVAL);
+
+ be = conn->lc_server->ls_backend;
+ error = be->mkdir != NULL ? be->mkdir(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_trenameat(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ error = fid_lookup(conn, req->lr_req.trenameat.newdirfid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /* TODO: check old and new names */
+ error = be->renameat != NULL ? be->renameat(be->softc, req) : ENOSYS;
+ return (error);
+}
+
+static int
+l9p_dispatch_tunlinkat(struct l9p_request *req)
+{
+ struct l9p_connection *conn = req->lr_conn;
+ struct l9p_backend *be;
+ int error;
+
+ error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+ F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+ if (error)
+ return (error);
+
+ be = conn->lc_server->ls_backend;
+
+ /* TODO: check dir-or-file name */
+ error = be->unlinkat != NULL ? be->unlinkat(be->softc, req) : ENOSYS;
+ return (error);
+}
diff --git a/rfuncs.c b/rfuncs.c
new file mode 100644
index 000000000000..3995d413e3a6
--- /dev/null
+++ b/rfuncs.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2016 Chris Torek <chris.torek@gmail.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(WITH_CASPER)
+#include <libcasper.h>
+#include <casper/cap_pwd.h>
+#include <casper/cap_grp.h>
+#endif
+
+#include "rfuncs.h"
+
+/*
+ * This is essentially a clone of the BSD basename_r function,
+ * which is like POSIX basename() but puts the result in a user
+ * supplied buffer.
+ *
+ * In BSD basename_r, the buffer must be least MAXPATHLEN bytes
+ * long. In our case we take the size of the buffer as an argument.
+ *
+ * Note that it's impossible in general to do this without
+ * a temporary buffer since basename("foo/bar") is "bar",
+ * but basename("foo/bar/") is still "bar" -- no trailing
+ * slash is allowed.
+ *
+ * The return value is your supplied buffer <buf>, or NULL if
+ * the length of the basename of the supplied <path> equals or
+ * exceeds your indicated <bufsize>.
+ *
+ * As a special but useful case, if you supply NULL for the <buf>
+ * argument, we allocate the buffer dynamically to match the
+ * basename, i.e., the result is basically strdup()ed for you.
+ * In this case <bufsize> is ignored (recommended: pass 0 here).
+ */
+char *
+r_basename(const char *path, char *buf, size_t bufsize)
+{
+ const char *endp, *comp;
+ size_t len;
+
+ /*
+ * NULL or empty path means ".". This is perhaps overly
+ * forgiving but matches libc basename_r(), and avoids
+ * breaking the code below.
+ */
+ if (path == NULL || *path == '\0') {
+ comp = ".";
+ len = 1;
+ } else {
+ /*
+ * Back up over any trailing slashes. If we reach
+ * the top of the path and it's still a trailing
+ * slash, it's also a leading slash and the entire
+ * path is just "/" (or "//", or "///", etc).
+ */
+ endp = path + strlen(path) - 1;
+ while (*endp == '/' && endp > path)
+ endp--;
+ /* Invariant: *endp != '/' || endp == path */
+ if (*endp == '/') {
+ /* then endp==path and hence entire path is "/" */
+ comp = "/";
+ len = 1;
+ } else {
+ /*
+ * We handled empty strings earlier, and
+ * we just proved *endp != '/'. Hence
+ * we have a non-empty basename, ending
+ * at endp.
+ *
+ * Back up one path name component. The
+ * part between these two is the basename.
+ *
+ * Note that we only stop backing up when
+ * either comp==path, or comp[-1] is '/'.
+ *
+ * Suppose path[0] is '/'. Then, since *endp
+ * is *not* '/', we had comp>path initially, and
+ * stopped backing up because we found a '/'
+ * (perhaps path[0], perhaps a later '/').
+ *
+ * Or, suppose path[0] is NOT '/'. Then,
+ * either there are no '/'s at all and
+ * comp==path, or comp[-1] is '/'.
+ *
+ * In all cases, we want all bytes from *comp
+ * to *endp, inclusive.
+ */
+ comp = endp;
+ while (comp > path && comp[-1] != '/')
+ comp--;
+ len = (size_t)(endp - comp + 1);
+ }
+ }
+ if (buf == NULL) {
+ buf = malloc(len + 1);
+ if (buf == NULL)
+ return (NULL);
+ } else {
+ if (len >= bufsize) {
+ errno = ENAMETOOLONG;
+ return (NULL);
+ }
+ }
+ memcpy(buf, comp, len);
+ buf[len] = '\0';
+ return (buf);
+}
+
+/*
+ * This is much like POSIX dirname(), but is reentrant.
+ *
+ * We examine a path, find the directory portion, and copy that
+ * to a user supplied buffer <buf> of the given size <bufsize>.
+ *
+ * Note that dirname("/foo/bar/") is "/foo", dirname("/foo") is "/",
+ * and dirname("////") is "/". However, dirname("////foo/bar") is
+ * "////foo" (we do not resolve these leading slashes away -- this
+ * matches the BSD libc behavior).
+ *
+ * The return value is your supplied buffer <buf>, or NULL if
+ * the length of the dirname of the supplied <path> equals or
+ * exceeds your indicated <bufsize>.
+ *
+ * As a special but useful case, if you supply NULL for the <buf>
+ * argument, we allocate the buffer dynamically to match the
+ * dirname, i.e., the result is basically strdup()ed for you.
+ * In this case <bufsize> is ignored (recommended: pass 0 here).
+ */
+char *
+r_dirname(const char *path, char *buf, size_t bufsize)
+{
+ const char *endp, *dirpart;
+ size_t len;
+
+ /*
+ * NULL or empty path means ".". This is perhaps overly
+ * forgiving but matches libc dirname(), and avoids breaking
+ * the code below.
+ */
+ if (path == NULL || *path == '\0') {
+ dirpart = ".";
+ len = 1;
+ } else {
+ /*
+ * Back up over any trailing slashes, then back up
+ * one path name, then back up over more slashes.
+ * In all cases, stop as soon as endp==path so
+ * that we do not back out of the buffer entirely.
+ *
+ * The first loop takes care of trailing slashes
+ * in names like "/foo/bar//" (where the dirname
+ * part is to be "/foo"), the second strips out
+ * the non-dir-name part, and the third leaves us
+ * pointing to the end of the directory component.
+ *
+ * If the entire name is of the form "/foo" or
+ * "//foo" (or "/foo/", etc, but we already
+ * handled trailing slashes), we end up pointing
+ * to the leading "/", which is what we want; but
+ * if it is of the form "foo" (or "foo/", etc) we
+ * point to a non-slash. So, if (and only if)
+ * endp==path AND *endp is not '/', the dirname is
+ * ".", but in all cases, the LENGTH of the
+ * dirname is (endp-path+1).
+ */
+ endp = path + strlen(path) - 1;
+ while (endp > path && *endp == '/')
+ endp--;
+ while (endp > path && *endp != '/')
+ endp--;
+ while (endp > path && *endp == '/')
+ endp--;
+
+ len = (size_t)(endp - path + 1);
+ if (endp == path && *endp != '/')
+ dirpart = ".";
+ else
+ dirpart = path;
+ }
+ if (buf == NULL) {
+ buf = malloc(len + 1);
+ if (buf == NULL)
+ return (NULL);
+ } else {
+ if (len >= bufsize) {
+ errno = ENAMETOOLONG;
+ return (NULL);
+ }
+ }
+ memcpy(buf, dirpart, len);
+ buf[len] = '\0';
+ return (buf);
+}
+
+static void
+r_pginit(struct r_pgdata *pg)
+{
+
+ /* Note: init to half size since the first thing we do is double it */
+ pg->r_pgbufsize = 1 << 9;
+ pg->r_pgbuf = NULL; /* note that realloc(NULL) == malloc */
+}
+
+static int
+r_pgexpand(struct r_pgdata *pg)
+{
+ size_t nsize;
+
+ nsize = pg->r_pgbufsize << 1;
+ if (nsize >= (1 << 20) ||
+ (pg->r_pgbuf = realloc(pg->r_pgbuf, nsize)) == NULL)
+ return (ENOMEM);
+ return (0);
+}
+
+void
+r_pgfree(struct r_pgdata *pg)
+{
+
+ free(pg->r_pgbuf);
+}
+
+struct passwd *
+r_getpwuid(uid_t uid, struct r_pgdata *pg)
+{
+ struct passwd *result = NULL;
+ int error;
+
+ r_pginit(pg);
+ do {
+ error = r_pgexpand(pg);
+ if (error == 0)
+ error = getpwuid_r(uid, &pg->r_pgun.un_pw,
+ pg->r_pgbuf, pg->r_pgbufsize, &result);
+ } while (error == ERANGE);
+
+ return (error ? NULL : result);
+}
+
+struct group *
+r_getgrgid(gid_t gid, struct r_pgdata *pg)
+{
+ struct group *result = NULL;
+ int error;
+
+ r_pginit(pg);
+ do {
+ error = r_pgexpand(pg);
+ if (error == 0)
+ error = getgrgid_r(gid, &pg->r_pgun.un_gr,
+ pg->r_pgbuf, pg->r_pgbufsize, &result);
+ } while (error == ERANGE);
+
+ return (error ? NULL : result);
+}
+
+#if defined(WITH_CASPER)
+struct passwd *
+r_cap_getpwuid(cap_channel_t *cap, uid_t uid, struct r_pgdata *pg)
+{
+ struct passwd *result = NULL;
+ int error;
+
+ r_pginit(pg);
+ do {
+ error = r_pgexpand(pg);
+ if (error == 0)
+ error = cap_getpwuid_r(cap, uid, &pg->r_pgun.un_pw,
+ pg->r_pgbuf, pg->r_pgbufsize, &result);
+ } while (error == ERANGE);
+
+ return (error ? NULL : result);
+}
+
+struct group *
+r_cap_getgrgid(cap_channel_t *cap, gid_t gid, struct r_pgdata *pg)
+{
+ struct group *result = NULL;
+ int error;
+
+ r_pginit(pg);
+ do {
+ error = r_pgexpand(pg);
+ if (error == 0)
+ error = cap_getgrgid_r(cap, gid, &pg->r_pgun.un_gr,
+ pg->r_pgbuf, pg->r_pgbufsize, &result);
+ } while (error == ERANGE);
+
+ return (error ? NULL : result);
+}
+#endif
diff --git a/rfuncs.h b/rfuncs.h
new file mode 100644
index 000000000000..22d329311d9f
--- /dev/null
+++ b/rfuncs.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2016 Chris Torek <chris.torek@gmail.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_RFUNCS_H
+#define LIB9P_RFUNCS_H
+
+#include <grp.h>
+#include <pwd.h>
+#include <string.h>
+
+#if defined(WITH_CASPER)
+#include <libcasper.h>
+#endif
+
+/*
+ * Reentrant, optionally-malloc-ing versions of
+ * basename() and dirname().
+ */
+char *r_basename(const char *, char *, size_t);
+char *r_dirname(const char *, char *, size_t);
+
+/*
+ * Yuck: getpwuid, getgrgid are not thread-safe, and the
+ * POSIX replacements (getpwuid_r, getgrgid_r) are horrible.
+ * This is to allow us to loop over the get.*_r calls with ever
+ * increasing buffers until they succeed or get unreasonable
+ * (same idea as the libc code for the non-reentrant versions,
+ * although prettier).
+ *
+ * The getpwuid/getgrgid functions auto-init one of these,
+ * but the caller must call r_pgfree() when done with the
+ * return values.
+ *
+ * If we need more later, we may have to expose the init function.
+ */
+struct r_pgdata {
+ char *r_pgbuf;
+ size_t r_pgbufsize;
+ union {
+ struct passwd un_pw;
+ struct group un_gr;
+ } r_pgun;
+};
+
+/* void r_pginit(struct r_pgdata *); */
+void r_pgfree(struct r_pgdata *);
+struct passwd *r_getpwuid(uid_t, struct r_pgdata *);
+struct group *r_getgrgid(gid_t, struct r_pgdata *);
+
+#if defined(WITH_CASPER)
+struct passwd *r_cap_getpwuid(cap_channel_t *, uid_t, struct r_pgdata *);
+struct group *r_cap_getgrgid(cap_channel_t *, gid_t, struct r_pgdata *);
+#endif
+
+#endif /* LIB9P_RFUNCS_H */
diff --git a/sbuf/sbuf.c b/sbuf/sbuf.c
new file mode 100644
index 000000000000..525bb52db3f6
--- /dev/null
+++ b/sbuf/sbuf.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Minimal libsbuf reimplementation for Mac OS X.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include "sbuf.h"
+
+#define SBUF_INITIAL_SIZE 128
+
+struct sbuf *
+sbuf_new_auto()
+{
+ struct sbuf *s;
+
+ s = malloc(sizeof(struct sbuf));
+ s->s_buf = calloc(1, SBUF_INITIAL_SIZE + 1);
+ s->s_capacity = s->s_buf != NULL ? SBUF_INITIAL_SIZE : 0;
+ s->s_size = 0;
+
+ return (s);
+}
+
+int
+sbuf_cat(struct sbuf *s, const char *str)
+{
+ int req = (int)strlen(str);
+
+ if (s->s_size + req >= s->s_capacity) {
+ s->s_capacity = s->s_size + req + 1;
+ s->s_buf = realloc(s->s_buf, (size_t)s->s_capacity);
+ }
+ if (s->s_buf == NULL)
+ return (-1);
+
+ strcpy(s->s_buf + s->s_size, str);
+ s->s_size += req;
+
+ return (0);
+}
+
+int
+sbuf_printf(struct sbuf *s, const char *fmt, ...)
+{
+ int ret;
+ va_list ap;
+
+ va_start(ap, fmt);
+ ret = sbuf_vprintf(s, fmt, ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+int
+sbuf_vprintf(struct sbuf *s, const char *fmt, va_list args)
+{
+ va_list copy;
+ int req;
+
+ va_copy(copy, args);
+ req = vsnprintf(NULL, 0, fmt, copy);
+ va_end(copy);
+
+ if (s->s_size + req >= s->s_capacity) {
+ s->s_capacity = s->s_size + req + 1;
+ s->s_buf = realloc(s->s_buf, (size_t)s->s_capacity);
+ }
+ if (s->s_buf == NULL)
+ return (-1);
+
+ req = vsnprintf(s->s_buf + s->s_size, req + 1, fmt, args);
+ s->s_size += req;
+
+ return (0);
+}
+
+char *
+sbuf_data(struct sbuf *s)
+{
+ return (s->s_buf);
+}
+
+int
+sbuf_finish(struct sbuf *s)
+{
+ if (s->s_buf != NULL)
+ s->s_buf[s->s_size] = '\0';
+ return (0);
+}
+
+void
+sbuf_delete(struct sbuf *s)
+{
+ free(s->s_buf);
+ free(s);
+}
diff --git a/sbuf/sbuf.h b/sbuf/sbuf.h
new file mode 100644
index 000000000000..e3ac87dd4142
--- /dev/null
+++ b/sbuf/sbuf.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Minimal libsbuf reimplementation for Mac OS X.
+ */
+
+#ifndef LIB9P_SBUF_H
+#define LIB9P_SBUF_H
+
+#include <stdarg.h>
+
+struct sbuf
+{
+ char *s_buf;
+ int s_size;
+ int s_capacity;
+ int s_position;
+};
+
+struct sbuf *sbuf_new_auto(void);
+int sbuf_cat(struct sbuf *s, const char *str);
+int sbuf_printf(struct sbuf *s, const char *fmt, ...);
+int sbuf_vprintf(struct sbuf *s, const char *fmt, va_list args);
+int sbuf_done(struct sbuf *s);
+void sbuf_delete(struct sbuf *s);
+int sbuf_finish(struct sbuf *s);
+char *sbuf_data(struct sbuf *s);
+
+#endif /* LIB9P_SBUF_H */
+
diff --git a/threadpool.c b/threadpool.c
new file mode 100644
index 000000000000..a973a143e9e2
--- /dev/null
+++ b/threadpool.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <pthread.h>
+#if defined(__FreeBSD__)
+#include <pthread_np.h>
+#endif
+#include <sys/queue.h>
+#include "lib9p.h"
+#include "threadpool.h"
+
+static void l9p_threadpool_rflush(struct l9p_threadpool *tp,
+ struct l9p_request *req);
+
+static void *
+l9p_responder(void *arg)
+{
+ struct l9p_threadpool *tp;
+ struct l9p_worker *worker = arg;
+ struct l9p_request *req;
+
+ tp = worker->ltw_tp;
+ for (;;) {
+ /* get next reply to send */
+ pthread_mutex_lock(&tp->ltp_mtx);
+ while (STAILQ_EMPTY(&tp->ltp_replyq) && !worker->ltw_exiting)
+ pthread_cond_wait(&tp->ltp_reply_cv, &tp->ltp_mtx);
+ if (worker->ltw_exiting) {
+ pthread_mutex_unlock(&tp->ltp_mtx);
+ break;
+ }
+
+ /* off reply queue */
+ req = STAILQ_FIRST(&tp->ltp_replyq);
+ STAILQ_REMOVE_HEAD(&tp->ltp_replyq, lr_worklink);
+
+ /* request is now in final glide path, can't be Tflush-ed */
+ req->lr_workstate = L9P_WS_REPLYING;
+
+ /* any flushers waiting for this request can go now */
+ if (req->lr_flushstate != L9P_FLUSH_NONE)
+ l9p_threadpool_rflush(tp, req);
+
+ pthread_mutex_unlock(&tp->ltp_mtx);
+
+ /* send response */
+ l9p_respond(req, false, true);
+ }
+ return (NULL);
+}
+
+static void *
+l9p_worker(void *arg)
+{
+ struct l9p_threadpool *tp;
+ struct l9p_worker *worker = arg;
+ struct l9p_request *req;
+
+ tp = worker->ltw_tp;
+ pthread_mutex_lock(&tp->ltp_mtx);
+ for (;;) {
+ while (STAILQ_EMPTY(&tp->ltp_workq) && !worker->ltw_exiting)
+ pthread_cond_wait(&tp->ltp_work_cv, &tp->ltp_mtx);
+ if (worker->ltw_exiting)
+ break;
+
+ /* off work queue; now work-in-progress, by us */
+ req = STAILQ_FIRST(&tp->ltp_workq);
+ STAILQ_REMOVE_HEAD(&tp->ltp_workq, lr_worklink);
+ req->lr_workstate = L9P_WS_INPROGRESS;
+ req->lr_worker = worker;
+ pthread_mutex_unlock(&tp->ltp_mtx);
+
+ /* actually try the request */
+ req->lr_error = l9p_dispatch_request(req);
+
+ /* move to responder queue, updating work-state */
+ pthread_mutex_lock(&tp->ltp_mtx);
+ req->lr_workstate = L9P_WS_RESPQUEUED;
+ req->lr_worker = NULL;
+ STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink);
+
+ /* signal the responder */
+ pthread_cond_signal(&tp->ltp_reply_cv);
+ }
+ pthread_mutex_unlock(&tp->ltp_mtx);
+ return (NULL);
+}
+
+/*
+ * Just before finally replying to a request that got touched by
+ * a Tflush request, we enqueue its flushers (requests of type
+ * Tflush, which are now on the flushee's lr_flushq) onto the
+ * response queue.
+ */
+static void
+l9p_threadpool_rflush(struct l9p_threadpool *tp, struct l9p_request *req)
+{
+ struct l9p_request *flusher;
+
+ /*
+ * https://swtch.com/plan9port/man/man9/flush.html says:
+ *
+ * "Should multiple Tflushes be received for a pending
+ * request, they must be answered in order. A Rflush for
+ * any of the multiple Tflushes implies an answer for all
+ * previous ones. Therefore, should a server receive a
+ * request and then multiple flushes for that request, it
+ * need respond only to the last flush." This means
+ * we could march through the queue of flushers here,
+ * marking all but the last one as "to be dropped" rather
+ * than "to be replied-to".
+ *
+ * However, we'll leave that for later, if ever -- it
+ * should be harmless to respond to each, in order.
+ */
+ STAILQ_FOREACH(flusher, &req->lr_flushq, lr_flushlink) {
+ flusher->lr_workstate = L9P_WS_RESPQUEUED;
+#ifdef notdef
+ if (not the last) {
+ flusher->lr_flushstate = L9P_FLUSH_NOT_RUN;
+ /* or, flusher->lr_drop = true ? */
+ }
+#endif
+ STAILQ_INSERT_TAIL(&tp->ltp_replyq, flusher, lr_worklink);
+ }
+}
+
+int
+l9p_threadpool_init(struct l9p_threadpool *tp, int size)
+{
+ struct l9p_worker *worker;
+#if defined(__FreeBSD__)
+ char threadname[16];
+#endif
+ int error;
+ int i, nworkers, nresponders;
+
+ if (size <= 0)
+ return (EINVAL);
+ error = pthread_mutex_init(&tp->ltp_mtx, NULL);
+ if (error)
+ return (error);
+ error = pthread_cond_init(&tp->ltp_work_cv, NULL);
+ if (error)
+ goto fail_work_cv;
+ error = pthread_cond_init(&tp->ltp_reply_cv, NULL);
+ if (error)
+ goto fail_reply_cv;
+
+ STAILQ_INIT(&tp->ltp_workq);
+ STAILQ_INIT(&tp->ltp_replyq);
+ LIST_INIT(&tp->ltp_workers);
+
+ nresponders = 0;
+ nworkers = 0;
+ for (i = 0; i <= size; i++) {
+ worker = calloc(1, sizeof(struct l9p_worker));
+ worker->ltw_tp = tp;
+ worker->ltw_responder = i == 0;
+ error = pthread_create(&worker->ltw_thread, NULL,
+ worker->ltw_responder ? l9p_responder : l9p_worker,
+ (void *)worker);
+ if (error) {
+ free(worker);
+ break;
+ }
+ if (worker->ltw_responder)
+ nresponders++;
+ else
+ nworkers++;
+
+#if defined(__FreeBSD__)
+ if (worker->ltw_responder) {
+ pthread_set_name_np(worker->ltw_thread, "9p-responder");
+ } else {
+ sprintf(threadname, "9p-worker:%d", i - 1);
+ pthread_set_name_np(worker->ltw_thread, threadname);
+ }
+#endif
+
+ LIST_INSERT_HEAD(&tp->ltp_workers, worker, ltw_link);
+ }
+ if (nresponders == 0 || nworkers == 0) {
+ /* need the one responder, and at least one worker */
+ l9p_threadpool_shutdown(tp);
+ return (error);
+ }
+ return (0);
+
+ /*
+ * We could avoid these labels by having multiple destroy
+ * paths (one for each error case), or by having booleans
+ * for which variables were initialized. Neither is very
+ * appealing...
+ */
+fail_reply_cv:
+ pthread_cond_destroy(&tp->ltp_work_cv);
+fail_work_cv:
+ pthread_mutex_destroy(&tp->ltp_mtx);
+
+ return (error);
+}
+
+/*
+ * Run a request, usually by queueing it.
+ */
+void
+l9p_threadpool_run(struct l9p_threadpool *tp, struct l9p_request *req)
+{
+
+ /*
+ * Flush requests must be handled specially, since they
+ * can cancel / kill off regular requests. (But we can
+ * run them through the regular dispatch mechanism.)
+ */
+ if (req->lr_req.hdr.type == L9P_TFLUSH) {
+ /* not on a work queue yet so we can touch state */
+ req->lr_workstate = L9P_WS_IMMEDIATE;
+ (void) l9p_dispatch_request(req);
+ } else {
+ pthread_mutex_lock(&tp->ltp_mtx);
+ req->lr_workstate = L9P_WS_NOTSTARTED;
+ STAILQ_INSERT_TAIL(&tp->ltp_workq, req, lr_worklink);
+ pthread_cond_signal(&tp->ltp_work_cv);
+ pthread_mutex_unlock(&tp->ltp_mtx);
+ }
+}
+
+/*
+ * Run a Tflush request. Called via l9p_dispatch_request() since
+ * it has some debug code in it, but not called from worker thread.
+ */
+int
+l9p_threadpool_tflush(struct l9p_request *req)
+{
+ struct l9p_connection *conn;
+ struct l9p_threadpool *tp;
+ struct l9p_request *flushee;
+ uint16_t oldtag;
+ enum l9p_flushstate nstate;
+
+ /*
+ * Find what we're supposed to flush (the flushee, as it were).
+ */
+ req->lr_error = 0; /* Tflush always succeeds */
+ conn = req->lr_conn;
+ tp = &conn->lc_tp;
+ oldtag = req->lr_req.tflush.oldtag;
+ ht_wrlock(&conn->lc_requests);
+ flushee = ht_find_locked(&conn->lc_requests, oldtag);
+ if (flushee == NULL) {
+ /*
+ * Nothing to flush! The old request must have
+ * been done and gone already. Just queue this
+ * Tflush for a success reply.
+ */
+ ht_unlock(&conn->lc_requests);
+ pthread_mutex_lock(&tp->ltp_mtx);
+ goto done;
+ }
+
+ /*
+ * Found the original request. We'll need to inspect its
+ * work-state to figure out what to do.
+ */
+ pthread_mutex_lock(&tp->ltp_mtx);
+ ht_unlock(&conn->lc_requests);
+
+ switch (flushee->lr_workstate) {
+
+ case L9P_WS_NOTSTARTED:
+ /*
+ * Flushee is on work queue, but not yet being
+ * handled by a worker.
+ *
+ * The documentation -- see
+ * http://ericvh.github.io/9p-rfc/rfc9p2000.html
+ * https://swtch.com/plan9port/man/man9/flush.html
+ * -- says that "the server should answer the
+ * flush message immediately". However, Linux
+ * sends flush requests for operations that
+ * must finish, such as Tclunk, and it's not
+ * possible to *answer* the flush request until
+ * it has been handled (if necessary) or aborted
+ * (if allowed).
+ *
+ * We therefore now just the original request
+ * and let the request-handler do whatever is
+ * appropriate. NOTE: we could have a table of
+ * "requests that can be aborted without being
+ * run" vs "requests that must be run to be
+ * aborted", but for now that seems like an
+ * unnecessary complication.
+ */
+ nstate = L9P_FLUSH_REQUESTED_PRE_START;
+ break;
+
+ case L9P_WS_IMMEDIATE:
+ /*
+ * This state only applies to Tflush requests, and
+ * flushing a Tflush is illegal. But we'll do nothing
+ * special here, which will make us act like a flush
+ * request for the flushee that arrived too late to
+ * do anything about the flushee.
+ */
+ nstate = L9P_FLUSH_REQUESTED_POST_START;
+ break;
+
+ case L9P_WS_INPROGRESS:
+ /*
+ * Worker thread flushee->lr_worker is working on it.
+ * Kick it to get it out of blocking system calls.
+ * (This requires that it carefully set up some
+ * signal handlers, and may be FreeBSD-dependent,
+ * it probably cannot be handled this way on MacOS.)
+ */
+#ifdef notyet
+ pthread_kill(...);
+#endif
+ nstate = L9P_FLUSH_REQUESTED_POST_START;
+ break;
+
+ case L9P_WS_RESPQUEUED:
+ /*
+ * The flushee is already in the response queue.
+ * We'll just mark it as having had some flush
+ * action applied.
+ */
+ nstate = L9P_FLUSH_TOOLATE;
+ break;
+
+ case L9P_WS_REPLYING:
+ /*
+ * Although we found the flushee, it's too late to
+ * make us depend on it: it's already heading out
+ * the door as a reply.
+ *
+ * We don't want to do anything to the flushee.
+ * Instead, we want to work the same way as if
+ * we had never found the tag.
+ */
+ goto done;
+ }
+
+ /*
+ * Now add us to the list of Tflush-es that are waiting
+ * for the flushee (creating the list if needed, i.e., if
+ * this is the first Tflush for the flushee). We (req)
+ * will get queued for reply later, when the responder
+ * processes the flushee and calls l9p_threadpool_rflush().
+ */
+ if (flushee->lr_flushstate == L9P_FLUSH_NONE)
+ STAILQ_INIT(&flushee->lr_flushq);
+ flushee->lr_flushstate = nstate;
+ STAILQ_INSERT_TAIL(&flushee->lr_flushq, req, lr_flushlink);
+
+ pthread_mutex_unlock(&tp->ltp_mtx);
+
+ return (0);
+
+done:
+ /*
+ * This immediate op is ready to be replied-to now, so just
+ * stick it onto the reply queue.
+ */
+ req->lr_workstate = L9P_WS_RESPQUEUED;
+ STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink);
+ pthread_mutex_unlock(&tp->ltp_mtx);
+ pthread_cond_signal(&tp->ltp_reply_cv);
+ return (0);
+}
+
+int
+l9p_threadpool_shutdown(struct l9p_threadpool *tp)
+{
+ struct l9p_worker *worker, *tmp;
+
+ LIST_FOREACH_SAFE(worker, &tp->ltp_workers, ltw_link, tmp) {
+ pthread_mutex_lock(&tp->ltp_mtx);
+ worker->ltw_exiting = true;
+ if (worker->ltw_responder)
+ pthread_cond_signal(&tp->ltp_reply_cv);
+ else
+ pthread_cond_broadcast(&tp->ltp_work_cv);
+ pthread_mutex_unlock(&tp->ltp_mtx);
+ pthread_join(worker->ltw_thread, NULL);
+ LIST_REMOVE(worker, ltw_link);
+ free(worker);
+ }
+ pthread_cond_destroy(&tp->ltp_reply_cv);
+ pthread_cond_destroy(&tp->ltp_work_cv);
+ pthread_mutex_destroy(&tp->ltp_mtx);
+
+ return (0);
+}
diff --git a/threadpool.h b/threadpool.h
new file mode 100644
index 000000000000..2855c1c54577
--- /dev/null
+++ b/threadpool.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_THREADPOOL_H
+#define LIB9P_THREADPOOL_H
+
+#include <stdbool.h>
+#include <pthread.h>
+#include <sys/queue.h>
+#include "lib9p.h"
+
+STAILQ_HEAD(l9p_request_queue, l9p_request);
+
+/*
+ * Most of the workers in the threadpool run requests.
+ *
+ * One distinguished worker delivers responses from the
+ * response queue. The reason this worker exists is to
+ * guarantee response order, so that flush responses go
+ * after their flushed requests.
+ */
+struct l9p_threadpool {
+ struct l9p_connection * ltp_conn; /* the connection */
+ struct l9p_request_queue ltp_workq; /* requests awaiting a worker */
+ struct l9p_request_queue ltp_replyq; /* requests that are done */
+ pthread_mutex_t ltp_mtx; /* locks queues and cond vars */
+ pthread_cond_t ltp_work_cv; /* to signal regular workers */
+ pthread_cond_t ltp_reply_cv; /* to signal reply-worker */
+ LIST_HEAD(, l9p_worker) ltp_workers; /* list of all workers */
+};
+
+/*
+ * All workers, including the responder, use this as their
+ * control structure. (The only thing that distinguishes the
+ * responder is that it runs different code and waits on the
+ * reply_cv.)
+ */
+struct l9p_worker {
+ struct l9p_threadpool * ltw_tp;
+ pthread_t ltw_thread;
+ bool ltw_exiting;
+ bool ltw_responder;
+ LIST_ENTRY(l9p_worker) ltw_link;
+};
+
+/*
+ * Each request has a "work state" telling where the request is,
+ * in terms of workers working on it. That is, this tells us
+ * which threadpool queue, if any, the request is in now or would
+ * go in, or what's happening with it.
+ */
+enum l9p_workstate {
+ L9P_WS_NOTSTARTED, /* not yet started */
+ L9P_WS_IMMEDIATE, /* Tflush being done sans worker */
+ L9P_WS_INPROGRESS, /* worker is working on it */
+ L9P_WS_RESPQUEUED, /* worker is done, response queued */
+ L9P_WS_REPLYING, /* responder is in final reply path */
+};
+
+/*
+ * Each request has a "flush state", initally NONE meaning no
+ * Tflush affected the request.
+ *
+ * If a Tflush comes in before we ever assign a work thread,
+ * the flush state goes to FLUSH_REQUESTED_PRE_START.
+ *
+ * If a Tflush comes in after we assign a work thread, the
+ * flush state goes to FLUSH_REQUESTED_POST_START. The flush
+ * request may be too late: the request might finish anyway.
+ * Or it might be soon enough to abort. In all cases, though, the
+ * operation requesting the flush (the "flusher") must wait for
+ * the other request (the "flushee") to go through the respond
+ * path. The respond routine gets to decide whether to send a
+ * normal response, send an error, or drop the request
+ * entirely.
+ *
+ * There's one especially annoying case: what if a Tflush comes in
+ * *while* we're sending a response? In this case it's too late:
+ * the flush just waits for the fully-composed response.
+ */
+enum l9p_flushstate {
+ L9P_FLUSH_NONE = 0, /* must be zero */
+ L9P_FLUSH_REQUESTED_PRE_START, /* not even started before flush */
+ L9P_FLUSH_REQUESTED_POST_START, /* started, then someone said flush */
+ L9P_FLUSH_TOOLATE /* too late, already responding */
+};
+
+void l9p_threadpool_flushee_done(struct l9p_request *);
+int l9p_threadpool_init(struct l9p_threadpool *, int);
+void l9p_threadpool_run(struct l9p_threadpool *, struct l9p_request *);
+int l9p_threadpool_shutdown(struct l9p_threadpool *);
+int l9p_threadpool_tflush(struct l9p_request *);
+
+#endif /* LIB9P_THREADPOOL_H */
diff --git a/transport/socket.c b/transport/socket.c
new file mode 100644
index 000000000000..8b6a9e59c8e9
--- /dev/null
+++ b/transport/socket.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sys/types.h>
+#ifdef __APPLE__
+# include "../apple_endian.h"
+#else
+# include <sys/endian.h>
+#endif
+#include <sys/socket.h>
+#include <sys/event.h>
+#include <sys/uio.h>
+#include <netdb.h>
+#include "../lib9p.h"
+#include "../lib9p_impl.h"
+#include "../log.h"
+#include "socket.h"
+
+struct l9p_socket_softc
+{
+ struct l9p_connection *ls_conn;
+ struct sockaddr ls_sockaddr;
+ socklen_t ls_socklen;
+ pthread_t ls_thread;
+ int ls_fd;
+};
+
+static int l9p_socket_readmsg(struct l9p_socket_softc *, void **, size_t *);
+static int l9p_socket_get_response_buffer(struct l9p_request *,
+ struct iovec *, size_t *, void *);
+static int l9p_socket_send_response(struct l9p_request *, const struct iovec *,
+ const size_t, const size_t, void *);
+static void l9p_socket_drop_response(struct l9p_request *, const struct iovec *,
+ size_t, void *);
+static void *l9p_socket_thread(void *);
+static ssize_t xread(int, void *, size_t);
+static ssize_t xwrite(int, void *, size_t);
+
+int
+l9p_start_server(struct l9p_server *server, const char *host, const char *port)
+{
+ struct addrinfo *res, *res0, hints;
+ struct kevent kev[2];
+ struct kevent event[2];
+ int err, kq, i, val, evs, nsockets = 0;
+ int sockets[2];
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = PF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ err = getaddrinfo(host, port, &hints, &res0);
+
+ if (err)
+ return (-1);
+
+ for (res = res0; res; res = res->ai_next) {
+ int s = socket(res->ai_family, res->ai_socktype,
+ res->ai_protocol);
+
+ val = 1;
+ setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+
+ if (s < 0)
+ continue;
+
+ if (bind(s, res->ai_addr, res->ai_addrlen) < 0) {
+ close(s);
+ continue;
+ }
+
+ sockets[nsockets] = s;
+ EV_SET(&kev[nsockets++], s, EVFILT_READ, EV_ADD | EV_ENABLE, 0,
+ 0, 0);
+ listen(s, 10);
+ }
+
+ if (nsockets < 1) {
+ L9P_LOG(L9P_ERROR, "bind(): %s", strerror(errno));
+ return(-1);
+ }
+
+ kq = kqueue();
+
+ if (kevent(kq, kev, nsockets, NULL, 0, NULL) < 0) {
+ L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno));
+ return (-1);
+ }
+
+ for (;;) {
+ evs = kevent(kq, NULL, 0, event, nsockets, NULL);
+ if (evs < 0) {
+ if (errno == EINTR)
+ continue;
+
+ L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno));
+ return (-1);
+ }
+
+ for (i = 0; i < evs; i++) {
+ struct sockaddr client_addr;
+ socklen_t client_addr_len = sizeof(client_addr);
+ int news = accept((int)event[i].ident, &client_addr,
+ &client_addr_len);
+
+ if (news < 0) {
+ L9P_LOG(L9P_WARNING, "accept(): %s",
+ strerror(errno));
+ continue;
+ }
+
+ l9p_socket_accept(server, news, &client_addr,
+ client_addr_len);
+ }
+ }
+
+}
+
+void
+l9p_socket_accept(struct l9p_server *server, int conn_fd,
+ struct sockaddr *client_addr, socklen_t client_addr_len)
+{
+ struct l9p_socket_softc *sc;
+ struct l9p_connection *conn;
+ char host[NI_MAXHOST + 1];
+ char serv[NI_MAXSERV + 1];
+ int err;
+
+ err = getnameinfo(client_addr, client_addr_len, host, NI_MAXHOST, serv,
+ NI_MAXSERV, NI_NUMERICHOST | NI_NUMERICSERV);
+
+ if (err != 0) {
+ L9P_LOG(L9P_WARNING, "cannot look up client name: %s",
+ gai_strerror(err));
+ } else {
+ L9P_LOG(L9P_INFO, "new connection from %s:%s", host, serv);
+ }
+
+ if (l9p_connection_init(server, &conn) != 0) {
+ L9P_LOG(L9P_ERROR, "cannot create new connection");
+ return;
+ }
+
+ sc = l9p_calloc(1, sizeof(*sc));
+ sc->ls_conn = conn;
+ sc->ls_fd = conn_fd;
+
+ /*
+ * Fill in transport handler functions and aux argument.
+ */
+ conn->lc_lt.lt_aux = sc;
+ conn->lc_lt.lt_get_response_buffer = l9p_socket_get_response_buffer;
+ conn->lc_lt.lt_send_response = l9p_socket_send_response;
+ conn->lc_lt.lt_drop_response = l9p_socket_drop_response;
+
+ err = pthread_create(&sc->ls_thread, NULL, l9p_socket_thread, sc);
+ if (err) {
+ L9P_LOG(L9P_ERROR,
+ "pthread_create (for connection from %s:%s): error %s",
+ host, serv, strerror(err));
+ l9p_connection_close(sc->ls_conn);
+ free(sc);
+ }
+}
+
+static void *
+l9p_socket_thread(void *arg)
+{
+ struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg;
+ struct iovec iov;
+ void *buf;
+ size_t length;
+
+ for (;;) {
+ if (l9p_socket_readmsg(sc, &buf, &length) != 0)
+ break;
+
+ iov.iov_base = buf;
+ iov.iov_len = length;
+ l9p_connection_recv(sc->ls_conn, &iov, 1, NULL);
+ free(buf);
+ }
+
+ L9P_LOG(L9P_INFO, "connection closed");
+ l9p_connection_close(sc->ls_conn);
+ free(sc);
+ return (NULL);
+}
+
+static int
+l9p_socket_readmsg(struct l9p_socket_softc *sc, void **buf, size_t *size)
+{
+ uint32_t msize;
+ size_t toread;
+ ssize_t ret;
+ void *buffer;
+ int fd = sc->ls_fd;
+
+ assert(fd > 0);
+
+ buffer = l9p_malloc(sizeof(uint32_t));
+
+ ret = xread(fd, buffer, sizeof(uint32_t));
+ if (ret < 0) {
+ L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno));
+ return (-1);
+ }
+
+ if (ret != sizeof(uint32_t)) {
+ if (ret == 0)
+ L9P_LOG(L9P_DEBUG, "%p: EOF", (void *)sc->ls_conn);
+ else
+ L9P_LOG(L9P_ERROR,
+ "short read: %zd bytes of %zd expected",
+ ret, sizeof(uint32_t));
+ return (-1);
+ }
+
+ msize = le32toh(*(uint32_t *)buffer);
+ toread = msize - sizeof(uint32_t);
+ buffer = l9p_realloc(buffer, msize);
+
+ ret = xread(fd, (char *)buffer + sizeof(uint32_t), toread);
+ if (ret < 0) {
+ L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno));
+ return (-1);
+ }
+
+ if (ret != (ssize_t)toread) {
+ L9P_LOG(L9P_ERROR, "short read: %zd bytes of %zd expected",
+ ret, toread);
+ return (-1);
+ }
+
+ *size = msize;
+ *buf = buffer;
+ L9P_LOG(L9P_INFO, "%p: read complete message, buf=%p size=%d",
+ (void *)sc->ls_conn, buffer, msize);
+
+ return (0);
+}
+
+static int
+l9p_socket_get_response_buffer(struct l9p_request *req, struct iovec *iov,
+ size_t *niovp, void *arg __unused)
+{
+ size_t size = req->lr_conn->lc_msize;
+ void *buf;
+
+ buf = l9p_malloc(size);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = size;
+
+ *niovp = 1;
+ return (0);
+}
+
+static int
+l9p_socket_send_response(struct l9p_request *req __unused,
+ const struct iovec *iov, const size_t niov __unused, const size_t iolen,
+ void *arg)
+{
+ struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg;
+
+ assert(sc->ls_fd >= 0);
+
+ L9P_LOG(L9P_DEBUG, "%p: sending reply, buf=%p, size=%d", arg,
+ iov[0].iov_base, iolen);
+
+ if (xwrite(sc->ls_fd, iov[0].iov_base, iolen) != (int)iolen) {
+ L9P_LOG(L9P_ERROR, "short write: %s", strerror(errno));
+ return (-1);
+ }
+
+ free(iov[0].iov_base);
+ return (0);
+}
+
+static void
+l9p_socket_drop_response(struct l9p_request *req __unused,
+ const struct iovec *iov, size_t niov __unused, void *arg)
+{
+
+ L9P_LOG(L9P_DEBUG, "%p: drop buf=%p", arg, iov[0].iov_base);
+ free(iov[0].iov_base);
+}
+
+static ssize_t
+xread(int fd, void *buf, size_t count)
+{
+ size_t done = 0;
+ ssize_t ret;
+
+ while (done < count) {
+ ret = read(fd, (char *)buf + done, count - done);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return (-1);
+ }
+
+ if (ret == 0)
+ return ((ssize_t)done);
+
+ done += (size_t)ret;
+ }
+
+ return ((ssize_t)done);
+}
+
+static ssize_t
+xwrite(int fd, void *buf, size_t count)
+{
+ size_t done = 0;
+ ssize_t ret;
+
+ while (done < count) {
+ ret = write(fd, (char *)buf + done, count - done);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return (-1);
+ }
+
+ if (ret == 0)
+ return ((ssize_t)done);
+
+ done += (size_t)ret;
+ }
+
+ return ((ssize_t)done);
+}
diff --git a/transport/socket.h b/transport/socket.h
new file mode 100644
index 000000000000..b022da1a923e
--- /dev/null
+++ b/transport/socket.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_SOCKET_H
+#define LIB9P_SOCKET_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "../lib9p.h"
+
+int l9p_start_server(struct l9p_server *server, const char *host,
+ const char *port);
+void l9p_socket_accept(struct l9p_server *server, int conn_fd,
+ struct sockaddr *client_addr, socklen_t client_addr_len);
+
+#endif /* LIB9P_SOCKET_H */
diff --git a/utils.c b/utils.c
new file mode 100644
index 000000000000..609d7a216c82
--- /dev/null
+++ b/utils.c
@@ -0,0 +1,1268 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+#include "lib9p.h"
+#include "fcall.h"
+#include "linux_errno.h"
+
+#ifdef __APPLE__
+ #define GETGROUPS_GROUP_TYPE_IS_INT
+#endif
+
+#define N(ary) (sizeof(ary) / sizeof(*ary))
+
+/* See l9p_describe_bits() below. */
+struct descbits {
+ uint64_t db_mask; /* mask value */
+ uint64_t db_match; /* match value */
+ const char *db_name; /* name for matched value */
+};
+
+
+static bool l9p_describe_bits(const char *, uint64_t, const char *,
+ const struct descbits *, struct sbuf *);
+static void l9p_describe_fid(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_mode(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_name(const char *, char *, struct sbuf *);
+static void l9p_describe_perm(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_lperm(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_qid(const char *, struct l9p_qid *, struct sbuf *);
+static void l9p_describe_l9stat(const char *, struct l9p_stat *,
+ enum l9p_version, struct sbuf *);
+static void l9p_describe_statfs(const char *, struct l9p_statfs *,
+ struct sbuf *);
+static void l9p_describe_time(struct sbuf *, const char *, uint64_t, uint64_t);
+static void l9p_describe_readdir(struct sbuf *, struct l9p_f_io *);
+static void l9p_describe_size(const char *, uint64_t, struct sbuf *);
+static void l9p_describe_ugid(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_getattr_mask(uint64_t, struct sbuf *);
+static void l9p_describe_unlinkat_flags(const char *, uint32_t, struct sbuf *);
+static const char *lookup_linux_errno(uint32_t);
+
+/*
+ * Using indexed initializers, we can have these occur in any order.
+ * Using adjacent-string concatenation ("T" #name, "R" #name), we
+ * get both Tfoo and Rfoo strings with one copy of the name.
+ * Alas, there is no stupid cpp trick to lowercase-ify, so we
+ * have to write each name twice. In which case we might as well
+ * make the second one a string in the first place and not bother
+ * with the stringizing.
+ *
+ * This table should have entries for each enum value in fcall.h.
+ */
+#define X(NAME, name) [L9P_T##NAME - L9P__FIRST] = "T" name, \
+ [L9P_R##NAME - L9P__FIRST] = "R" name
+static const char *ftype_names[] = {
+ X(VERSION, "version"),
+ X(AUTH, "auth"),
+ X(ATTACH, "attach"),
+ X(ERROR, "error"),
+ X(LERROR, "lerror"),
+ X(FLUSH, "flush"),
+ X(WALK, "walk"),
+ X(OPEN, "open"),
+ X(CREATE, "create"),
+ X(READ, "read"),
+ X(WRITE, "write"),
+ X(CLUNK, "clunk"),
+ X(REMOVE, "remove"),
+ X(STAT, "stat"),
+ X(WSTAT, "wstat"),
+ X(STATFS, "statfs"),
+ X(LOPEN, "lopen"),
+ X(LCREATE, "lcreate"),
+ X(SYMLINK, "symlink"),
+ X(MKNOD, "mknod"),
+ X(RENAME, "rename"),
+ X(READLINK, "readlink"),
+ X(GETATTR, "getattr"),
+ X(SETATTR, "setattr"),
+ X(XATTRWALK, "xattrwalk"),
+ X(XATTRCREATE, "xattrcreate"),
+ X(READDIR, "readdir"),
+ X(FSYNC, "fsync"),
+ X(LOCK, "lock"),
+ X(GETLOCK, "getlock"),
+ X(LINK, "link"),
+ X(MKDIR, "mkdir"),
+ X(RENAMEAT, "renameat"),
+ X(UNLINKAT, "unlinkat"),
+};
+#undef X
+
+void
+l9p_seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2,
+ size_t *niov2, size_t seek)
+{
+ size_t remainder = 0;
+ size_t left = seek;
+ size_t i, j;
+
+ for (i = 0; i < niov1; i++) {
+ size_t toseek = MIN(left, iov1[i].iov_len);
+ left -= toseek;
+
+ if (toseek == iov1[i].iov_len)
+ continue;
+
+ if (left == 0) {
+ remainder = toseek;
+ break;
+ }
+ }
+
+ for (j = i; j < niov1; j++) {
+ iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder;
+ iov2[j - i].iov_len = iov1[j].iov_len - remainder;
+ remainder = 0;
+ }
+
+ *niov2 = j - i;
+}
+
+size_t
+l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length)
+{
+ size_t i, done = 0;
+
+ for (i = 0; i < niov; i++) {
+ size_t toseek = MIN(length - done, iov[i].iov_len);
+ done += toseek;
+
+ if (toseek < iov[i].iov_len) {
+ iov[i].iov_len = toseek;
+ return (i + 1);
+ }
+ }
+
+ return (niov);
+}
+
+/*
+ * This wrapper for getgrouplist() that malloc'ed memory, and
+ * papers over FreeBSD vs Mac differences in the getgrouplist()
+ * argument types.
+ *
+ * Note that this function guarantees that *either*:
+ * return value != NULL and *angroups has been set
+ * or: return value == NULL and *angroups is 0
+ */
+gid_t *
+l9p_getgrlist(const char *name, gid_t basegid, int *angroups)
+{
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+ int i, *int_groups;
+#endif
+ gid_t *groups;
+ int ngroups;
+
+ /*
+ * Todo, perhaps: while getgrouplist() returns -1, expand.
+ * For now just use NGROUPS_MAX.
+ */
+ ngroups = NGROUPS_MAX;
+ groups = malloc((size_t)ngroups * sizeof(*groups));
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+ int_groups = groups ? malloc((size_t)ngroups * sizeof(*int_groups)) :
+ NULL;
+ if (int_groups == NULL) {
+ free(groups);
+ groups = NULL;
+ }
+#endif
+ if (groups == NULL) {
+ *angroups = 0;
+ return (NULL);
+ }
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+ (void) getgrouplist(name, (int)basegid, int_groups, &ngroups);
+ for (i = 0; i < ngroups; i++)
+ groups[i] = (gid_t)int_groups[i];
+#else
+ (void) getgrouplist(name, basegid, groups, &ngroups);
+#endif
+ *angroups = ngroups;
+ return (groups);
+}
+
+/*
+ * For the various debug describe ops: decode bits in a bit-field-y
+ * value. For example, we might produce:
+ * value=0x3c[FOO,BAR,QUUX,?0x20]
+ * when FOO is bit 0x10, BAR is 0x08, and QUUX is 0x04 (as defined
+ * by the table). This leaves 0x20 (bit 5) as a mystery, while bits
+ * 4, 3, and 2 were decoded. (Bits 0 and 1 were 0 on input hence
+ * were not attempted here.)
+ *
+ * For general use we take a uint64_t <value>. The bit description
+ * table <db> is an array of {mask, match, str} values ending with
+ * {0, 0, NULL}.
+ *
+ * If <str> is non-NULL we'll print it and the mask as well (if
+ * str is NULL we'll print neither). The mask is always printed in
+ * hex at the moment. See undec description too.
+ *
+ * For convenience, you can use a mask-and-match value, e.g., to
+ * decode a 2-bit field in bits 0 and 1 you can mask against 3 and
+ * match the values 0, 1, 2, and 3. To handle this, make sure that
+ * all masks-with-same-match are sequential.
+ *
+ * If there are any nonzero undecoded bits, print them after
+ * all the decode-able bits have been handled.
+ *
+ * The <oc> argument defines the open and close bracket characters,
+ * typically "[]", that surround the entire string. If NULL, no
+ * brackets are added, else oc[0] goes in the front and oc[1] at
+ * the end, after printing any <str><value> part.
+ *
+ * Returns true if it printed anything (other than the implied
+ * str-and-value, that is).
+ */
+static bool
+l9p_describe_bits(const char *str, uint64_t value, const char *oc,
+ const struct descbits *db, struct sbuf *sb)
+{
+ const char *sep;
+ char bracketbuf[2] = "";
+ bool printed = false;
+
+ if (str != NULL)
+ sbuf_printf(sb, "%s0x%" PRIx64, str, value);
+
+ if (oc != NULL)
+ bracketbuf[0] = oc[0];
+ sep = bracketbuf;
+ for (; db->db_name != NULL; db++) {
+ if ((value & db->db_mask) == db->db_match) {
+ sbuf_printf(sb, "%s%s", sep, db->db_name);
+ sep = ",";
+ printed = true;
+
+ /*
+ * Clear the field, and make sure we
+ * won't match a zero-valued field with
+ * this same mask.
+ */
+ value &= ~db->db_mask;
+ while (db[1].db_mask == db->db_mask &&
+ db[1].db_name != NULL)
+ db++;
+ }
+ }
+ if (value != 0) {
+ sbuf_printf(sb, "%s?0x%" PRIx64, sep, value);
+ printed = true;
+ }
+ if (printed && oc != NULL) {
+ bracketbuf[0] = oc[1];
+ sbuf_cat(sb, bracketbuf);
+ }
+ return (printed);
+}
+
+/*
+ * Show file ID.
+ */
+static void
+l9p_describe_fid(const char *str, uint32_t fid, struct sbuf *sb)
+{
+
+ sbuf_printf(sb, "%s%" PRIu32, str, fid);
+}
+
+/*
+ * Show user or group ID.
+ */
+static void
+l9p_describe_ugid(const char *str, uint32_t ugid, struct sbuf *sb)
+{
+
+ sbuf_printf(sb, "%s%" PRIu32, str, ugid);
+}
+
+/*
+ * Show file mode (O_RDWR, O_RDONLY, etc). The argument is
+ * an l9p_omode, not a Linux flags mode. Linux flags are
+ * decoded with l9p_describe_lflags.
+ */
+static void
+l9p_describe_mode(const char *str, uint32_t mode, struct sbuf *sb)
+{
+ static const struct descbits bits[] = {
+ { L9P_OACCMODE, L9P_OREAD, "OREAD" },
+ { L9P_OACCMODE, L9P_OWRITE, "OWRITE" },
+ { L9P_OACCMODE, L9P_ORDWR, "ORDWR" },
+ { L9P_OACCMODE, L9P_OEXEC, "OEXEC" },
+
+ { L9P_OCEXEC, L9P_OCEXEC, "OCEXEC" },
+ { L9P_ODIRECT, L9P_ODIRECT, "ODIRECT" },
+ { L9P_ORCLOSE, L9P_ORCLOSE, "ORCLOSE" },
+ { L9P_OTRUNC, L9P_OTRUNC, "OTRUNC" },
+ { 0, 0, NULL }
+ };
+
+ (void) l9p_describe_bits(str, mode, "[]", bits, sb);
+}
+
+/*
+ * Show Linux mode/flags.
+ */
+static void
+l9p_describe_lflags(const char *str, uint32_t flags, struct sbuf *sb)
+{
+ static const struct descbits bits[] = {
+ { L9P_OACCMODE, L9P_OREAD, "O_READ" },
+ { L9P_OACCMODE, L9P_OWRITE, "O_WRITE" },
+ { L9P_OACCMODE, L9P_ORDWR, "O_RDWR" },
+ { L9P_OACCMODE, L9P_OEXEC, "O_EXEC" },
+
+ { L9P_L_O_APPEND, L9P_L_O_APPEND, "O_APPEND" },
+ { L9P_L_O_CLOEXEC, L9P_L_O_CLOEXEC, "O_CLOEXEC" },
+ { L9P_L_O_CREAT, L9P_L_O_CREAT, "O_CREAT" },
+ { L9P_L_O_DIRECT, L9P_L_O_DIRECT, "O_DIRECT" },
+ { L9P_L_O_DIRECTORY, L9P_L_O_DIRECTORY, "O_DIRECTORY" },
+ { L9P_L_O_DSYNC, L9P_L_O_DSYNC, "O_DSYNC" },
+ { L9P_L_O_EXCL, L9P_L_O_EXCL, "O_EXCL" },
+ { L9P_L_O_FASYNC, L9P_L_O_FASYNC, "O_FASYNC" },
+ { L9P_L_O_LARGEFILE, L9P_L_O_LARGEFILE, "O_LARGEFILE" },
+ { L9P_L_O_NOATIME, L9P_L_O_NOATIME, "O_NOATIME" },
+ { L9P_L_O_NOCTTY, L9P_L_O_NOCTTY, "O_NOCTTY" },
+ { L9P_L_O_NOFOLLOW, L9P_L_O_NOFOLLOW, "O_NOFOLLOW" },
+ { L9P_L_O_NONBLOCK, L9P_L_O_NONBLOCK, "O_NONBLOCK" },
+ { L9P_L_O_PATH, L9P_L_O_PATH, "O_PATH" },
+ { L9P_L_O_SYNC, L9P_L_O_SYNC, "O_SYNC" },
+ { L9P_L_O_TMPFILE, L9P_L_O_TMPFILE, "O_TMPFILE" },
+ { L9P_L_O_TMPFILE, L9P_L_O_TMPFILE, "O_TMPFILE" },
+ { L9P_L_O_TRUNC, L9P_L_O_TRUNC, "O_TRUNC" },
+ { 0, 0, NULL }
+ };
+
+ (void) l9p_describe_bits(str, flags, "[]", bits, sb);
+}
+
+/*
+ * Show file name or other similar, potentially-very-long string.
+ * Actual strings get quotes, a NULL name (if it occurs) gets
+ * <null> (no quotes), so you can tell the difference.
+ */
+static void
+l9p_describe_name(const char *str, char *name, struct sbuf *sb)
+{
+ size_t len;
+
+ if (name == NULL) {
+ sbuf_printf(sb, "%s<null>", str);
+ return;
+ }
+
+ len = strlen(name);
+
+ if (len > 32)
+ sbuf_printf(sb, "%s\"%.*s...\"", str, 32 - 3, name);
+ else
+ sbuf_printf(sb, "%s\"%.*s\"", str, (int)len, name);
+}
+
+/*
+ * Show permissions (rwx etc). Prints the value in hex only if
+ * the rwx bits do not cover the entire value.
+ */
+static void
+l9p_describe_perm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+ char pbuf[12];
+
+ strmode(mode & 0777, pbuf);
+ if ((mode & ~(uint32_t)0777) != 0)
+ sbuf_printf(sb, "%s0x%" PRIx32 "<%.9s>", str, mode, pbuf + 1);
+ else
+ sbuf_printf(sb, "%s<%.9s>", str, pbuf + 1);
+}
+
+/*
+ * Show "extended" permissions: regular permissions, but also the
+ * various DM* extension bits from 9P2000.u.
+ */
+static void
+l9p_describe_ext_perm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+ static const struct descbits bits[] = {
+ { L9P_DMDIR, L9P_DMDIR, "DMDIR" },
+ { L9P_DMAPPEND, L9P_DMAPPEND, "DMAPPEND" },
+ { L9P_DMEXCL, L9P_DMEXCL, "DMEXCL" },
+ { L9P_DMMOUNT, L9P_DMMOUNT, "DMMOUNT" },
+ { L9P_DMAUTH, L9P_DMAUTH, "DMAUTH" },
+ { L9P_DMTMP, L9P_DMTMP, "DMTMP" },
+ { L9P_DMSYMLINK, L9P_DMSYMLINK, "DMSYMLINK" },
+ { L9P_DMDEVICE, L9P_DMDEVICE, "DMDEVICE" },
+ { L9P_DMNAMEDPIPE, L9P_DMNAMEDPIPE, "DMNAMEDPIPE" },
+ { L9P_DMSOCKET, L9P_DMSOCKET, "DMSOCKET" },
+ { L9P_DMSETUID, L9P_DMSETUID, "DMSETUID" },
+ { L9P_DMSETGID, L9P_DMSETGID, "DMSETGID" },
+ { 0, 0, NULL }
+ };
+ bool need_sep;
+
+ sbuf_printf(sb, "%s[", str);
+ need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL,
+ bits, sb);
+ l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb);
+ sbuf_cat(sb, "]");
+}
+
+/*
+ * Show Linux-specific permissions: regular permissions, but also
+ * the S_IFMT field.
+ */
+static void
+l9p_describe_lperm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+ static const struct descbits bits[] = {
+ { S_IFMT, S_IFIFO, "S_IFIFO" },
+ { S_IFMT, S_IFCHR, "S_IFCHR" },
+ { S_IFMT, S_IFDIR, "S_IFDIR" },
+ { S_IFMT, S_IFBLK, "S_IFBLK" },
+ { S_IFMT, S_IFREG, "S_IFREG" },
+ { S_IFMT, S_IFLNK, "S_IFLNK" },
+ { S_IFMT, S_IFSOCK, "S_IFSOCK" },
+ { 0, 0, NULL }
+ };
+ bool need_sep;
+
+ sbuf_printf(sb, "%s[", str);
+ need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL,
+ bits, sb);
+ l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb);
+ sbuf_cat(sb, "]");
+}
+
+/*
+ * Show qid (<type, version, path> tuple).
+ */
+static void
+l9p_describe_qid(const char *str, struct l9p_qid *qid, struct sbuf *sb)
+{
+ static const struct descbits bits[] = {
+ /*
+ * NB: L9P_QTFILE is 0, i.e., is implied by no
+ * other bits being set. We get this produced
+ * when we mask against 0xff and compare for
+ * L9P_QTFILE, but we must do it first so that
+ * we mask against the original (not-adjusted)
+ * value.
+ */
+ { 0xff, L9P_QTFILE, "FILE" },
+ { L9P_QTDIR, L9P_QTDIR, "DIR" },
+ { L9P_QTAPPEND, L9P_QTAPPEND, "APPEND" },
+ { L9P_QTEXCL, L9P_QTEXCL, "EXCL" },
+ { L9P_QTMOUNT, L9P_QTMOUNT, "MOUNT" },
+ { L9P_QTAUTH, L9P_QTAUTH, "AUTH" },
+ { L9P_QTTMP, L9P_QTTMP, "TMP" },
+ { L9P_QTSYMLINK, L9P_QTSYMLINK, "SYMLINK" },
+ { 0, 0, NULL }
+ };
+
+ assert(qid != NULL);
+
+ sbuf_cat(sb, str);
+ (void) l9p_describe_bits("<", qid->type, "[]", bits, sb);
+ sbuf_printf(sb, ",%" PRIu32 ",0x%016" PRIx64 ">",
+ qid->version, qid->path);
+}
+
+/*
+ * Show size.
+ */
+static void
+l9p_describe_size(const char *str, uint64_t size, struct sbuf *sb)
+{
+
+ sbuf_printf(sb, "%s%" PRIu64, str, size);
+}
+
+/*
+ * Show l9stat (including 9P2000.u extensions if appropriate).
+ */
+static void
+l9p_describe_l9stat(const char *str, struct l9p_stat *st,
+ enum l9p_version version, struct sbuf *sb)
+{
+ bool dotu = version >= L9P_2000U;
+
+ assert(st != NULL);
+
+ sbuf_printf(sb, "%stype=0x%04" PRIx32 " dev=0x%08" PRIx32, str,
+ st->type, st->dev);
+ l9p_describe_qid(" qid=", &st->qid, sb);
+ l9p_describe_ext_perm(" mode=", st->mode, sb);
+ if (st->atime != (uint32_t)-1)
+ sbuf_printf(sb, " atime=%" PRIu32, st->atime);
+ if (st->mtime != (uint32_t)-1)
+ sbuf_printf(sb, " mtime=%" PRIu32, st->mtime);
+ if (st->length != (uint64_t)-1)
+ sbuf_printf(sb, " length=%" PRIu64, st->length);
+ l9p_describe_name(" name=", st->name, sb);
+ /*
+ * It's pretty common to have NULL name+gid+muid. They're
+ * just noise if NULL *and* dot-u; decode only if non-null
+ * or not-dot-u.
+ */
+ if (st->uid != NULL || !dotu)
+ l9p_describe_name(" uid=", st->uid, sb);
+ if (st->gid != NULL || !dotu)
+ l9p_describe_name(" gid=", st->gid, sb);
+ if (st->muid != NULL || !dotu)
+ l9p_describe_name(" muid=", st->muid, sb);
+ if (dotu) {
+ if (st->extension != NULL)
+ l9p_describe_name(" extension=", st->extension, sb);
+ sbuf_printf(sb,
+ " n_uid=%" PRIu32 " n_gid=%" PRIu32 " n_muid=%" PRIu32,
+ st->n_uid, st->n_gid, st->n_muid);
+ }
+}
+
+static void
+l9p_describe_statfs(const char *str, struct l9p_statfs *st, struct sbuf *sb)
+{
+
+ assert(st != NULL);
+
+ sbuf_printf(sb, "%stype=0x%04lx bsize=%lu blocks=%" PRIu64
+ " bfree=%" PRIu64 " bavail=%" PRIu64 " files=%" PRIu64
+ " ffree=%" PRIu64 " fsid=0x%" PRIx64 " namelen=%" PRIu32 ">",
+ str, (u_long)st->type, (u_long)st->bsize, st->blocks,
+ st->bfree, st->bavail, st->files,
+ st->ffree, st->fsid, st->namelen);
+}
+
+/*
+ * Decode a <seconds,nsec> timestamp.
+ *
+ * Perhaps should use asctime_r. For now, raw values.
+ */
+static void
+l9p_describe_time(struct sbuf *sb, const char *s, uint64_t sec, uint64_t nsec)
+{
+
+ sbuf_cat(sb, s);
+ if (nsec > 999999999)
+ sbuf_printf(sb, "%" PRIu64 ".<invalid nsec %" PRIu64 ">)",
+ sec, nsec);
+ else
+ sbuf_printf(sb, "%" PRIu64 ".%09" PRIu64, sec, nsec);
+}
+
+/*
+ * Decode readdir data (.L format, variable length names).
+ */
+static void
+l9p_describe_readdir(struct sbuf *sb, struct l9p_f_io *io)
+{
+ uint32_t count;
+#ifdef notyet
+ int i;
+ struct l9p_message msg;
+ struct l9p_dirent de;
+#endif
+
+ if ((count = io->count) == 0) {
+ sbuf_printf(sb, " EOF (count=0)");
+ return;
+ }
+
+ /*
+ * Can't do this yet because we do not have the original
+ * req.
+ */
+#ifdef notyet
+ sbuf_printf(sb, " count=%" PRIu32 " [", count);
+
+ l9p_init_msg(&msg, req, L9P_UNPACK);
+ for (i = 0; msg.lm_size < count; i++) {
+ if (l9p_pudirent(&msg, &de) < 0) {
+ sbuf_printf(sb, " bad count");
+ break;
+ }
+
+ sbuf_printf(sb, i ? ", " : " ");
+ l9p_describe_qid(" qid=", &de.qid, sb);
+ sbuf_printf(sb, " offset=%" PRIu64 " type=%d",
+ de.offset, de.type);
+ l9p_describe_name(" name=", de.name);
+ free(de.name);
+ }
+ sbuf_printf(sb, "]=%d dir entries", i);
+#else /* notyet */
+ sbuf_printf(sb, " count=%" PRIu32, count);
+#endif
+}
+
+/*
+ * Decode Tgetattr request_mask field.
+ */
+static void
+l9p_describe_getattr_mask(uint64_t request_mask, struct sbuf *sb)
+{
+ static const struct descbits bits[] = {
+ /*
+ * Note: ALL and BASIC must occur first and second.
+ * This is a little dirty: it depends on the way the
+ * describe_bits code clears the values. If we
+ * match ALL, we clear all those bits and do not
+ * match BASIC; if we match BASIC, we clear all
+ * those bits and do not match individual bits. Thus
+ * if we have BASIC but not all the additional bits,
+ * we'll see, e.g., [BASIC,BTIME,GEN]; if we have
+ * all the additional bits too, we'll see [ALL].
+ *
+ * Since <undec> is true below, we'll also spot any
+ * bits added to the protocol since we made this table.
+ */
+ { L9PL_GETATTR_ALL, L9PL_GETATTR_ALL, "ALL" },
+ { L9PL_GETATTR_BASIC, L9PL_GETATTR_BASIC, "BASIC" },
+
+ /* individual bits in BASIC */
+ { L9PL_GETATTR_MODE, L9PL_GETATTR_MODE, "MODE" },
+ { L9PL_GETATTR_NLINK, L9PL_GETATTR_NLINK, "NLINK" },
+ { L9PL_GETATTR_UID, L9PL_GETATTR_UID, "UID" },
+ { L9PL_GETATTR_GID, L9PL_GETATTR_GID, "GID" },
+ { L9PL_GETATTR_RDEV, L9PL_GETATTR_RDEV, "RDEV" },
+ { L9PL_GETATTR_ATIME, L9PL_GETATTR_ATIME, "ATIME" },
+ { L9PL_GETATTR_MTIME, L9PL_GETATTR_MTIME, "MTIME" },
+ { L9PL_GETATTR_CTIME, L9PL_GETATTR_CTIME, "CTIME" },
+ { L9PL_GETATTR_INO, L9PL_GETATTR_INO, "INO" },
+ { L9PL_GETATTR_SIZE, L9PL_GETATTR_SIZE, "SIZE" },
+ { L9PL_GETATTR_BLOCKS, L9PL_GETATTR_BLOCKS, "BLOCKS" },
+
+ /* additional bits in ALL */
+ { L9PL_GETATTR_BTIME, L9PL_GETATTR_BTIME, "BTIME" },
+ { L9PL_GETATTR_GEN, L9PL_GETATTR_GEN, "GEN" },
+ { L9PL_GETATTR_DATA_VERSION, L9PL_GETATTR_DATA_VERSION,
+ "DATA_VERSION" },
+ { 0, 0, NULL }
+ };
+
+ (void) l9p_describe_bits(" request_mask=", request_mask, "[]", bits,
+ sb);
+}
+
+/*
+ * Decode Tunlinkat flags.
+ */
+static void
+l9p_describe_unlinkat_flags(const char *str, uint32_t flags, struct sbuf *sb)
+{
+ static const struct descbits bits[] = {
+ { L9PL_AT_REMOVEDIR, L9PL_AT_REMOVEDIR, "AT_REMOVEDIR" },
+ { 0, 0, NULL }
+ };
+
+ (void) l9p_describe_bits(str, flags, "[]", bits, sb);
+}
+
+static const char *
+lookup_linux_errno(uint32_t linux_errno)
+{
+ static char unknown[50];
+
+ /*
+ * Error numbers in the "base" range (1..ERANGE) are common
+ * across BSD, MacOS, Linux, and Plan 9.
+ *
+ * Error numbers outside that range require translation.
+ */
+ const char *const table[] = {
+#define X0(name) [name] = name ## _STR
+#define X(name) [name] = name ## _STR
+ X(LINUX_EAGAIN),
+ X(LINUX_EDEADLK),
+ X(LINUX_ENAMETOOLONG),
+ X(LINUX_ENOLCK),
+ X(LINUX_ENOSYS),
+ X(LINUX_ENOTEMPTY),
+ X(LINUX_ELOOP),
+ X(LINUX_ENOMSG),
+ X(LINUX_EIDRM),
+ X(LINUX_ECHRNG),
+ X(LINUX_EL2NSYNC),
+ X(LINUX_EL3HLT),
+ X(LINUX_EL3RST),
+ X(LINUX_ELNRNG),
+ X(LINUX_EUNATCH),
+ X(LINUX_ENOCSI),
+ X(LINUX_EL2HLT),
+ X(LINUX_EBADE),
+ X(LINUX_EBADR),
+ X(LINUX_EXFULL),
+ X(LINUX_ENOANO),
+ X(LINUX_EBADRQC),
+ X(LINUX_EBADSLT),
+ X(LINUX_EBFONT),
+ X(LINUX_ENOSTR),
+ X(LINUX_ENODATA),
+ X(LINUX_ETIME),
+ X(LINUX_ENOSR),
+ X(LINUX_ENONET),
+ X(LINUX_ENOPKG),
+ X(LINUX_EREMOTE),
+ X(LINUX_ENOLINK),
+ X(LINUX_EADV),
+ X(LINUX_ESRMNT),
+ X(LINUX_ECOMM),
+ X(LINUX_EPROTO),
+ X(LINUX_EMULTIHOP),
+ X(LINUX_EDOTDOT),
+ X(LINUX_EBADMSG),
+ X(LINUX_EOVERFLOW),
+ X(LINUX_ENOTUNIQ),
+ X(LINUX_EBADFD),
+ X(LINUX_EREMCHG),
+ X(LINUX_ELIBACC),
+ X(LINUX_ELIBBAD),
+ X(LINUX_ELIBSCN),
+ X(LINUX_ELIBMAX),
+ X(LINUX_ELIBEXEC),
+ X(LINUX_EILSEQ),
+ X(LINUX_ERESTART),
+ X(LINUX_ESTRPIPE),
+ X(LINUX_EUSERS),
+ X(LINUX_ENOTSOCK),
+ X(LINUX_EDESTADDRREQ),
+ X(LINUX_EMSGSIZE),
+ X(LINUX_EPROTOTYPE),
+ X(LINUX_ENOPROTOOPT),
+ X(LINUX_EPROTONOSUPPORT),
+ X(LINUX_ESOCKTNOSUPPORT),
+ X(LINUX_EOPNOTSUPP),
+ X(LINUX_EPFNOSUPPORT),
+ X(LINUX_EAFNOSUPPORT),
+ X(LINUX_EADDRINUSE),
+ X(LINUX_EADDRNOTAVAIL),
+ X(LINUX_ENETDOWN),
+ X(LINUX_ENETUNREACH),
+ X(LINUX_ENETRESET),
+ X(LINUX_ECONNABORTED),
+ X(LINUX_ECONNRESET),
+ X(LINUX_ENOBUFS),
+ X(LINUX_EISCONN),
+ X(LINUX_ENOTCONN),
+ X(LINUX_ESHUTDOWN),
+ X(LINUX_ETOOMANYREFS),
+ X(LINUX_ETIMEDOUT),
+ X(LINUX_ECONNREFUSED),
+ X(LINUX_EHOSTDOWN),
+ X(LINUX_EHOSTUNREACH),
+ X(LINUX_EALREADY),
+ X(LINUX_EINPROGRESS),
+ X(LINUX_ESTALE),
+ X(LINUX_EUCLEAN),
+ X(LINUX_ENOTNAM),
+ X(LINUX_ENAVAIL),
+ X(LINUX_EISNAM),
+ X(LINUX_EREMOTEIO),
+ X(LINUX_EDQUOT),
+ X(LINUX_ENOMEDIUM),
+ X(LINUX_EMEDIUMTYPE),
+ X(LINUX_ECANCELED),
+ X(LINUX_ENOKEY),
+ X(LINUX_EKEYEXPIRED),
+ X(LINUX_EKEYREVOKED),
+ X(LINUX_EKEYREJECTED),
+ X(LINUX_EOWNERDEAD),
+ X(LINUX_ENOTRECOVERABLE),
+ X(LINUX_ERFKILL),
+ X(LINUX_EHWPOISON),
+#undef X0
+#undef X
+ };
+ if ((size_t)linux_errno < N(table) && table[linux_errno] != NULL)
+ return (table[linux_errno]);
+ if (linux_errno <= ERANGE)
+ return (strerror((int)linux_errno));
+ (void) snprintf(unknown, sizeof(unknown),
+ "Unknown error %d", linux_errno);
+ return (unknown);
+}
+
+void
+l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version,
+ struct sbuf *sb)
+{
+ uint64_t mask;
+ uint8_t type;
+ int i;
+
+ assert(fcall != NULL);
+ assert(sb != NULL);
+ assert(version <= L9P_2000L && version >= L9P_INVALID_VERSION);
+
+ type = fcall->hdr.type;
+
+ if (type < L9P__FIRST || type >= L9P__LAST_PLUS_1 ||
+ ftype_names[type - L9P__FIRST] == NULL) {
+ const char *rr;
+
+ /*
+ * Can't say for sure that this distinction --
+ * an even number is a request, an odd one is
+ * a response -- will be maintained forever,
+ * but it's good enough for now.
+ */
+ rr = (type & 1) != 0 ? "response" : "request";
+ sbuf_printf(sb, "<unknown %s %d> tag=%d", rr, type,
+ fcall->hdr.tag);
+ } else {
+ sbuf_printf(sb, "%s tag=%d", ftype_names[type - L9P__FIRST],
+ fcall->hdr.tag);
+ }
+
+ switch (type) {
+ case L9P_TVERSION:
+ case L9P_RVERSION:
+ sbuf_printf(sb, " version=\"%s\" msize=%d", fcall->version.version,
+ fcall->version.msize);
+ return;
+
+ case L9P_TAUTH:
+ l9p_describe_fid(" afid=", fcall->hdr.fid, sb);
+ sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"",
+ fcall->tauth.uname, fcall->tauth.aname);
+ return;
+
+ case L9P_TATTACH:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_fid(" afid=", fcall->tattach.afid, sb);
+ sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"",
+ fcall->tattach.uname, fcall->tattach.aname);
+ if (version >= L9P_2000U)
+ sbuf_printf(sb, " n_uname=%d", fcall->tattach.n_uname);
+ return;
+
+ case L9P_RATTACH:
+ l9p_describe_qid(" ", &fcall->rattach.qid, sb);
+ return;
+
+ case L9P_RERROR:
+ sbuf_printf(sb, " ename=\"%s\" errnum=%d", fcall->error.ename,
+ fcall->error.errnum);
+ return;
+
+ case L9P_RLERROR:
+ sbuf_printf(sb, " errnum=%d (%s)", fcall->error.errnum,
+ lookup_linux_errno(fcall->error.errnum));
+ return;
+
+ case L9P_TFLUSH:
+ sbuf_printf(sb, " oldtag=%d", fcall->tflush.oldtag);
+ return;
+
+ case L9P_RFLUSH:
+ return;
+
+ case L9P_TWALK:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_fid(" newfid=", fcall->twalk.newfid, sb);
+ if (fcall->twalk.nwname) {
+ sbuf_cat(sb, " wname=\"");
+ for (i = 0; i < fcall->twalk.nwname; i++)
+ sbuf_printf(sb, "%s%s", i == 0 ? "" : "/",
+ fcall->twalk.wname[i]);
+ sbuf_cat(sb, "\"");
+ }
+ return;
+
+ case L9P_RWALK:
+ sbuf_printf(sb, " wqid=[");
+ for (i = 0; i < fcall->rwalk.nwqid; i++)
+ l9p_describe_qid(i == 0 ? "" : ",",
+ &fcall->rwalk.wqid[i], sb);
+ sbuf_cat(sb, "]");
+ return;
+
+ case L9P_TOPEN:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_mode(" mode=", fcall->tcreate.mode, sb);
+ return;
+
+ case L9P_ROPEN:
+ l9p_describe_qid(" qid=", &fcall->ropen.qid, sb);
+ sbuf_printf(sb, " iounit=%d", fcall->ropen.iounit);
+ return;
+
+ case L9P_TCREATE:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_name(" name=", fcall->tcreate.name, sb);
+ l9p_describe_ext_perm(" perm=", fcall->tcreate.perm, sb);
+ l9p_describe_mode(" mode=", fcall->tcreate.mode, sb);
+ if (version >= L9P_2000U && fcall->tcreate.extension != NULL)
+ l9p_describe_name(" extension=",
+ fcall->tcreate.extension, sb);
+ return;
+
+ case L9P_RCREATE:
+ l9p_describe_qid(" qid=", &fcall->rcreate.qid, sb);
+ sbuf_printf(sb, " iounit=%d", fcall->rcreate.iounit);
+ return;
+
+ case L9P_TREAD:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32,
+ fcall->io.offset, fcall->io.count);
+ return;
+
+ case L9P_RREAD:
+ case L9P_RWRITE:
+ sbuf_printf(sb, " count=%" PRIu32, fcall->io.count);
+ return;
+
+ case L9P_TWRITE:
+ case L9P_TREADDIR:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32,
+ fcall->io.offset, fcall->io.count);
+ return;
+
+ case L9P_TCLUNK:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ return;
+
+ case L9P_RCLUNK:
+ return;
+
+ case L9P_TREMOVE:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ return;
+
+ case L9P_RREMOVE:
+ return;
+
+ case L9P_TSTAT:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ return;
+
+ case L9P_RSTAT:
+ l9p_describe_l9stat(" ", &fcall->rstat.stat, version, sb);
+ return;
+
+ case L9P_TWSTAT:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_l9stat(" ", &fcall->twstat.stat, version, sb);
+ return;
+
+ case L9P_RWSTAT:
+ return;
+
+ case L9P_TSTATFS:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ return;
+
+ case L9P_RSTATFS:
+ l9p_describe_statfs(" ", &fcall->rstatfs.statfs, sb);
+ return;
+
+ case L9P_TLOPEN:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb);
+ return;
+
+ case L9P_RLOPEN:
+ l9p_describe_qid(" qid=", &fcall->rlopen.qid, sb);
+ sbuf_printf(sb, " iounit=%d", fcall->rlopen.iounit);
+ return;
+
+ case L9P_TLCREATE:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_name(" name=", fcall->tlcreate.name, sb);
+ /* confusing: "flags" is open-mode, "mode" is permissions */
+ l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb);
+ /* TLCREATE mode/permissions have S_IFREG (0x8000) set */
+ l9p_describe_lperm(" mode=", fcall->tlcreate.mode, sb);
+ l9p_describe_ugid(" gid=", fcall->tlcreate.gid, sb);
+ return;
+
+ case L9P_RLCREATE:
+ l9p_describe_qid(" qid=", &fcall->rlcreate.qid, sb);
+ sbuf_printf(sb, " iounit=%d", fcall->rlcreate.iounit);
+ return;
+
+ case L9P_TSYMLINK:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_name(" name=", fcall->tsymlink.name, sb);
+ l9p_describe_name(" symtgt=", fcall->tsymlink.symtgt, sb);
+ l9p_describe_ugid(" gid=", fcall->tsymlink.gid, sb);
+ return;
+
+ case L9P_RSYMLINK:
+ l9p_describe_qid(" qid=", &fcall->ropen.qid, sb);
+ return;
+
+ case L9P_TMKNOD:
+ l9p_describe_fid(" dfid=", fcall->hdr.fid, sb);
+ l9p_describe_name(" name=", fcall->tmknod.name, sb);
+ /*
+ * TMKNOD mode/permissions have S_IFBLK/S_IFCHR/S_IFIFO
+ * bits. The major and minor values are only meaningful
+ * for S_IFBLK and S_IFCHR, but just decode always here.
+ */
+ l9p_describe_lperm(" mode=", fcall->tmknod.mode, sb);
+ sbuf_printf(sb, " major=%u minor=%u",
+ fcall->tmknod.major, fcall->tmknod.minor);
+ l9p_describe_ugid(" gid=", fcall->tmknod.gid, sb);
+ return;
+
+ case L9P_RMKNOD:
+ l9p_describe_qid(" qid=", &fcall->rmknod.qid, sb);
+ return;
+
+ case L9P_TRENAME:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_fid(" dfid=", fcall->trename.dfid, sb);
+ l9p_describe_name(" name=", fcall->trename.name, sb);
+ return;
+
+ case L9P_RRENAME:
+ return;
+
+ case L9P_TREADLINK:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ return;
+
+ case L9P_RREADLINK:
+ l9p_describe_name(" target=", fcall->rreadlink.target, sb);
+ return;
+
+ case L9P_TGETATTR:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_getattr_mask(fcall->tgetattr.request_mask, sb);
+ return;
+
+ case L9P_RGETATTR:
+ /* Don't need to decode bits: they're implied by the output */
+ mask = fcall->rgetattr.valid;
+ sbuf_printf(sb, " valid=0x%016" PRIx64, mask);
+ l9p_describe_qid(" qid=", &fcall->rgetattr.qid, sb);
+ if (mask & L9PL_GETATTR_MODE)
+ l9p_describe_lperm(" mode=", fcall->rgetattr.mode, sb);
+ if (mask & L9PL_GETATTR_UID)
+ l9p_describe_ugid(" uid=", fcall->rgetattr.uid, sb);
+ if (mask & L9PL_GETATTR_GID)
+ l9p_describe_ugid(" gid=", fcall->rgetattr.gid, sb);
+ if (mask & L9PL_GETATTR_NLINK)
+ sbuf_printf(sb, " nlink=%" PRIu64,
+ fcall->rgetattr.nlink);
+ if (mask & L9PL_GETATTR_RDEV)
+ sbuf_printf(sb, " rdev=0x%" PRIx64,
+ fcall->rgetattr.rdev);
+ if (mask & L9PL_GETATTR_SIZE)
+ l9p_describe_size(" size=", fcall->rgetattr.size, sb);
+ if (mask & L9PL_GETATTR_BLOCKS)
+ sbuf_printf(sb, " blksize=%" PRIu64 " blocks=%" PRIu64,
+ fcall->rgetattr.blksize, fcall->rgetattr.blocks);
+ if (mask & L9PL_GETATTR_ATIME)
+ l9p_describe_time(sb, " atime=",
+ fcall->rgetattr.atime_sec,
+ fcall->rgetattr.atime_nsec);
+ if (mask & L9PL_GETATTR_MTIME)
+ l9p_describe_time(sb, " mtime=",
+ fcall->rgetattr.mtime_sec,
+ fcall->rgetattr.mtime_nsec);
+ if (mask & L9PL_GETATTR_CTIME)
+ l9p_describe_time(sb, " ctime=",
+ fcall->rgetattr.ctime_sec,
+ fcall->rgetattr.ctime_nsec);
+ if (mask & L9PL_GETATTR_BTIME)
+ l9p_describe_time(sb, " btime=",
+ fcall->rgetattr.btime_sec,
+ fcall->rgetattr.btime_nsec);
+ if (mask & L9PL_GETATTR_GEN)
+ sbuf_printf(sb, " gen=0x%" PRIx64, fcall->rgetattr.gen);
+ if (mask & L9PL_GETATTR_DATA_VERSION)
+ sbuf_printf(sb, " data_version=0x%" PRIx64,
+ fcall->rgetattr.data_version);
+ return;
+
+ case L9P_TSETATTR:
+ /* As with RGETATTR, we'll imply decode via output. */
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ mask = fcall->tsetattr.valid;
+ /* NB: tsetattr valid mask is only 32 bits, hence %08x */
+ sbuf_printf(sb, " valid=0x%08" PRIx64, mask);
+ if (mask & L9PL_SETATTR_MODE)
+ l9p_describe_lperm(" mode=", fcall->tsetattr.mode, sb);
+ if (mask & L9PL_SETATTR_UID)
+ l9p_describe_ugid(" uid=", fcall->tsetattr.uid, sb);
+ if (mask & L9PL_SETATTR_GID)
+ l9p_describe_ugid(" uid=", fcall->tsetattr.gid, sb);
+ if (mask & L9PL_SETATTR_SIZE)
+ l9p_describe_size(" size=", fcall->tsetattr.size, sb);
+ if (mask & L9PL_SETATTR_ATIME) {
+ if (mask & L9PL_SETATTR_ATIME_SET)
+ l9p_describe_time(sb, " atime=",
+ fcall->tsetattr.atime_sec,
+ fcall->tsetattr.atime_nsec);
+ else
+ sbuf_cat(sb, " atime=now");
+ }
+ if (mask & L9PL_SETATTR_MTIME) {
+ if (mask & L9PL_SETATTR_MTIME_SET)
+ l9p_describe_time(sb, " mtime=",
+ fcall->tsetattr.mtime_sec,
+ fcall->tsetattr.mtime_nsec);
+ else
+ sbuf_cat(sb, " mtime=now");
+ }
+ if (mask & L9PL_SETATTR_CTIME)
+ sbuf_cat(sb, " ctime=now");
+ return;
+
+ case L9P_RSETATTR:
+ return;
+
+ case L9P_TXATTRWALK:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_fid(" newfid=", fcall->txattrwalk.newfid, sb);
+ l9p_describe_name(" name=", fcall->txattrwalk.name, sb);
+ return;
+
+ case L9P_RXATTRWALK:
+ l9p_describe_size(" size=", fcall->rxattrwalk.size, sb);
+ return;
+
+ case L9P_TXATTRCREATE:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_name(" name=", fcall->txattrcreate.name, sb);
+ l9p_describe_size(" size=", fcall->txattrcreate.attr_size, sb);
+ sbuf_printf(sb, " flags=%" PRIu32, fcall->txattrcreate.flags);
+ return;
+
+ case L9P_RXATTRCREATE:
+ return;
+
+ case L9P_RREADDIR:
+ l9p_describe_readdir(sb, &fcall->io);
+ return;
+
+ case L9P_TFSYNC:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ return;
+
+ case L9P_RFSYNC:
+ return;
+
+ case L9P_TLOCK:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ /* decode better later */
+ sbuf_printf(sb, " type=%d flags=0x%" PRIx32
+ " start=%" PRIu64 " length=%" PRIu64
+ " proc_id=0x%" PRIx32 " client_id=\"%s\"",
+ fcall->tlock.type, fcall->tlock.flags,
+ fcall->tlock.start, fcall->tlock.length,
+ fcall->tlock.proc_id, fcall->tlock.client_id);
+ return;
+
+ case L9P_RLOCK:
+ sbuf_printf(sb, " status=%d", fcall->rlock.status);
+ return;
+
+ case L9P_TGETLOCK:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ /* FALLTHROUGH */
+
+ case L9P_RGETLOCK:
+ /* decode better later */
+ sbuf_printf(sb, " type=%d "
+ " start=%" PRIu64 " length=%" PRIu64
+ " proc_id=0x%" PRIx32 " client_id=\"%s\"",
+ fcall->getlock.type,
+ fcall->getlock.start, fcall->getlock.length,
+ fcall->getlock.proc_id, fcall->getlock.client_id);
+ return;
+
+ case L9P_TLINK:
+ l9p_describe_fid(" dfid=", fcall->tlink.dfid, sb);
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_name(" name=", fcall->tlink.name, sb);
+ return;
+
+ case L9P_RLINK:
+ return;
+
+ case L9P_TMKDIR:
+ l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+ l9p_describe_name(" name=", fcall->tmkdir.name, sb);
+ /* TMKDIR mode/permissions have S_IFDIR set */
+ l9p_describe_lperm(" mode=", fcall->tmkdir.mode, sb);
+ l9p_describe_ugid(" gid=", fcall->tmkdir.gid, sb);
+ return;
+
+ case L9P_RMKDIR:
+ l9p_describe_qid(" qid=", &fcall->rmkdir.qid, sb);
+ return;
+
+ case L9P_TRENAMEAT:
+ l9p_describe_fid(" olddirfid=", fcall->hdr.fid, sb);
+ l9p_describe_name(" oldname=", fcall->trenameat.oldname,
+ sb);
+ l9p_describe_fid(" newdirfid=", fcall->trenameat.newdirfid, sb);
+ l9p_describe_name(" newname=", fcall->trenameat.newname,
+ sb);
+ return;
+
+ case L9P_RRENAMEAT:
+ return;
+
+ case L9P_TUNLINKAT:
+ l9p_describe_fid(" dirfd=", fcall->hdr.fid, sb);
+ l9p_describe_name(" name=", fcall->tunlinkat.name, sb);
+ l9p_describe_unlinkat_flags(" flags=",
+ fcall->tunlinkat.flags, sb);
+ return;
+
+ case L9P_RUNLINKAT:
+ return;
+
+ default:
+ sbuf_printf(sb, " <missing case in %s()>", __func__);
+ }
+}