/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Principles of emulation 101. * * * *** Setting errno * * Just don't do it. This emulation library is loaded onto a * seperate link map from the application who's address space we're * running in. We have our own private copy of libc, so there for, * the errno value accessible from here is is also private and changing * it will not affect any errno value that the processes who's address * space we are running in will see. To return an error condition we * should return the negated errno value we'd like the system to return. * For more information about this see the comment in s10_handler(). * Basically, when we return to the caller that initiated the system * call it's their responsibility to set errno. * * * *** Recursion Considerations * * When emulating system calls we need to be very careful about what * library calls we invoke. Library calls should be kept to a minimum. * One issue is that library calls can invoke system calls, so if we're * emulating a system call and we invoke a library call that depends on * that system call we will probably enter a recursive loop, which would * be bad. * * * *** Return Values. * * When declaring new syscall emulation functions, it is very important * to to set the proper RV_* flags in the s10_sysent_table. Upon failure, * syscall emulation fuctions should return an errno value. Upon success * syscall emulation functions should return 0 and set the sysret_t return * value parameters accordingly. * * * *** Agent lwp considerations * * It is currently impossible to do any emulation for these system call * when they are being invoked on behalf of an agent lwp. To understand why * it's impossible you have to understand how agent lwp syscalls work. * * The agent lwp syscall process works as follows: * 1 The controlling process stops the target. * 2 The controlling process injects an agent lwp which is also stopped. * This agent lwp assumes the userland stack and register values * of another stopped lwp in the current process. * 3 The controlling process configures the agent lwp to start * executing the requested system call. * 4 The controlling process configure /proc to stop the agent lwp when * it enters the requested system call. * 5 The controlling processes allows the agent lwp to start executing. * 6 The agent lwp traps into the kernel to perform the requested system * call and immediately stop. * 7 The controlling process copies all the arguments for the requested * system call onto the agent lwp's stack. * 8 The controlling process configures /proc to stop the agent lwp * when it completes the requested system call. * 9 The controlling processes allows the agent lwp to start executing. * 10 The agent lwp executes the system call and then stop before returning * to userland. * 11 The controlling process copies the return value and return arguments * back from the agent lwps stack. * 12 The controlling process destroys the agent lwp and restarts * the target process. * * The fundamental problem is that when the agent executes the request * system call in step 5, if we're emulating that system call then the * lwp is redirected back to our emulation layer without blocking * in the kernel. But our emulation layer can't access the arguments * for the system call because they haven't been copied to the stack * yet and they still only exist in the controlling processes address * space. This prevents us from being able to do any emulation of * agent lwp system calls. Hence, currently our brand trap interposition * callback (s10_brand_syscall_callback_common) will detect if a system * call is being made by an agent lwp, and if this is the case it will * never redirect the system call to this emulation library. * * In the future, if this proves to be a problem the the easiest solution * would probably be to replace the branded versions of these application * with their native counterparts. Ie, truss, plimit, and pfiles could be * replace with wrapper scripts that execute the native versions of these * applications. In the case of plimit and pfiles this should be pretty * strait forward. Truss would probably be more tricky since it can * execute applications which would be branded applications, so in that * case it might be necessary to create a loadable library which could * be LD_PRELOADed into truss and this library would interpose on the * exec() system call to allow truss to correctly execute branded * processes. It should be pointed out that this solution could work * because "native agent lwps" (ie, agent lwps created by native * processes) can be treated differently from "branded aged lwps" (ie, * agent lwps created by branded processes), since native agent lwps * would presumably be making native system calls and hence not need * any interposition. * * * *** s10 brand emulation scope considerations * * One of the differences between the lx brand and the s8 and s9 * brands, is that the s8 and s9 brands only interpose on syscalls * that need some kind of emulation, where as the lx brand interposes * on _all_ system calls. Lx branded system calls that don't need * any emulation are then redirected back to the kernel from the * userland library via the IN_KERNEL_SYSCALL macro. The lx-syscall * dtrace provider depends on this behavior. * */ static zoneid_t zoneid; static boolean_t ipshared; static boolean_t emul_global_zone = B_FALSE; static int emul_vers; pid_t zone_init_pid; #define EMULATE(cb, args) { (sysent_cb_t)(cb), (args) } #define NOSYS EMULATE(s10_unimpl, (0 | RV_DEFAULT)) typedef long (*sysent_cb_t)(); typedef struct s10_sysent_table { sysent_cb_t st_callc; uintptr_t st_args; } s10_sysent_table_t; s10_sysent_table_t s10_sysent_table[]; #define S10_UTS_RELEASE "5.10" #define S10_UTS_VERSION "Generic_Virtual" /*LINTED: static unused*/ static volatile int s10_abort_err; /*LINTED: static unused*/ static volatile const char *s10_abort_msg; /*LINTED: static unused*/ static volatile const char *s10_abort_file; /*LINTED: static unused*/ static volatile int s10_abort_line; extern int errno; /*ARGSUSED*/ void _s10_abort(int err, const char *msg, const char *file, int line) { sysret_t rval; /* Save the error message into convenient globals */ s10_abort_err = err; s10_abort_msg = msg; s10_abort_file = file; s10_abort_line = line; /* kill ourselves */ abort(); /* If abort() didn't work, try something stronger. */ (void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGKILL); } static int s10_uucopy(const void *from, void *to, size_t size) { sysret_t rval; int err; err = __systemcall(&rval, SYS_uucopy + 1024, from, to, size); if (err == 0) return (0); return (EFAULT); } /* * ATTENTION: uucopystr() does NOT ensure that string are null terminated! */ static int s10_uucopystr(const void *from, void *to, size_t size) { sysret_t rval; int err; err = __systemcall(&rval, SYS_uucopystr + 1024, from, to, size); if (err == 0) return (0); return (EFAULT); } /* * Figures out the PID of init for the zone. Also returns a boolean * indicating whether this process currently has that pid: if so, * then at this moment, we are init. */ static boolean_t get_initpid_info(void) { pid_t pid; sysret_t rval; int err; /* * Determine the current process PID and the PID of the zone's init. * We use care not to call getpid() here, because we're not supposed * to call getpid() until after the program is fully linked-- the * first call to getpid() is a signal from the linker to debuggers * that linking has been completed. */ if ((err = __systemcall(&rval, SYS_brand, B_S10_PIDINFO, &pid, &zone_init_pid)) != 0) { s10_abort(err, "Failed to get init's pid"); } /* * Note that we need to be cautious with the pid we get back-- * it should not be stashed and used in place of getpid(), since * we might fork(2). So we keep zone_init_pid and toss the pid * we otherwise got. */ if (pid == zone_init_pid) return (B_TRUE); return (B_FALSE); } /* * This function is defined to be NOSYS but it won't be called from the * the kernel since the NOSYS system calls are not enabled in the kernel. * Thus, the only time this function is called is directly from within the * indirect system call path. */ /*ARGSUSED*/ static long s10_unimpl(sysret_t *rv, uintptr_t p1) { sysret_t rval; /* * We'd like to print out some kind of error message here like * "unsupported syscall", but we can't because it's not safe to * assume that stderr or STDERR_FILENO actually points to something * that is a terminal, and if we wrote to those files we could * inadvertantly write to some applications open files, which would * be bad. * * Normally, if an application calls an invalid system call * it get a SIGSYS sent to it. So we'll just go ahead and send * ourselves a signal here. Note that this is far from ideal since * if the application has registered a signal handler, that signal * handler may recieve a ucontext_t as the third parameter to * indicate the context of the process when the signal was * generated, and in this case that context will not be what the * application is expecting. Hence, we should probably create a * brandsys() kernel function that can deliver the signal to us * with the correct ucontext_t. */ (void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGSYS); return (ENOSYS); } #if defined(__sparc) && !defined(__sparcv9) /* * Yuck. For 32-bit sparc applications, handle indirect system calls. * Note that we declare this interface to use the maximum number of * system call arguments. If we recieve a system call that uses less * arguments, then the additional arguments will be garbage, but they * will also be ignored so that should be ok. */ static long s10_indir(sysret_t *rv, int code, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7) { s10_sysent_table_t *sst = &(s10_sysent_table[code]); s10_assert(code < NSYSCALL); switch (sst->st_args & NARGS_MASK) { case 0: return ((sst->st_callc)(rv)); case 1: return ((sst->st_callc)(rv, a0)); case 2: return ((sst->st_callc)(rv, a0, a1)); case 3: return ((sst->st_callc)(rv, a0, a1, a2)); case 4: return ((sst->st_callc)(rv, a0, a1, a2, a3)); case 5: return ((sst->st_callc)(rv, a0, a1, a2, a3, a4)); case 6: return ((sst->st_callc)(rv, rv, a0, a1, a2, a3, a4, a5)); case 7: return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6)); case 8: return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6, a7)); } s10_abort(0, "invalid entry in s10_sysent_table"); return (EINVAL); } #endif /* __sparc && !__sparcv9 */ /* * The process contract CT_TGET and CT_TSET parameter structure ct_param_t * changed between S10 and Nevada, so we have to emulate the old S10 * ct_param_t structure when interposing on the ioctl syscall. */ typedef struct s10_ct_param { uint32_t ctpm_id; uint32_t ctpm_pad; uint64_t ctpm_value; } s10_ct_param_t; /* * New first arg "legacy" should be set to 1. */ static int s10_getpagesizes(sysret_t *rval, size_t *buf, int nelem) { int err; if ((err = __systemcall(rval, SYS_getpagesizes + 1024, 1, buf, nelem)) != 0) return (err); return (0); } int s10_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg) { int err; s10_ct_param_t s10param; ct_param_t param; struct stat statbuf; /* * We have to emulate process contract ioctls for init(1M) because the * ioctl parameter structure changed between S10 and Nevada. This is * a relatively simple process of filling Nevada structure fields, * shuffling values, and initiating a native system call. * * For now, we'll assume that all consumers of CT_TGET and CT_TSET will * need emulation. We'll issue a stat to make sure that the ioctl * is meant for the contract file system. * */ switch (cmd) { case CT_TGET: if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &statbuf)) != 0) return (err); if (strcmp(statbuf.st_fstype, MNTTYPE_CTFS) != 0) goto nonemuioctl; if (s10_uucopy((const void *)arg, &s10param, sizeof (s10param)) != 0) return (EFAULT); param.ctpm_id = s10param.ctpm_id; param.ctpm_size = sizeof (uint64_t); param.ctpm_value = &s10param.ctpm_value; if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, ¶m)) != 0) return (err); if (s10_uucopy(&s10param, (void *)arg, sizeof (s10param)) != 0) return (EFAULT); return (0); case CT_TSET: if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &statbuf)) != 0) return (err); if (strcmp(statbuf.st_fstype, MNTTYPE_CTFS) != 0) goto nonemuioctl; if (s10_uucopy((const void *)arg, &s10param, sizeof (s10param)) != 0) return (EFAULT); param.ctpm_id = s10param.ctpm_id; param.ctpm_size = sizeof (uint64_t); param.ctpm_value = &s10param.ctpm_value; if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, ¶m)) != 0) return (err); return (0); } nonemuioctl: if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg)) != 0) return (err); return (0); } /* * Unfortunately, pwrite()'s behavior differs between S10 and Nevada when * applied to files opened with O_APPEND. The offset argument is ignored and * the buffer is appended to the target file in S10, whereas the current file * position is ignored in Nevada (i.e., pwrite() acts as though the target file * wasn't opened with O_APPEND). This is a result of the fix for CR 6655660 * (pwrite() must ignore the O_APPEND/FAPPEND flag). * * We emulate the old S10 pwrite() behavior by checking whether the target file * was opened with O_APPEND. If it was, then invoke the write() system call * instead of pwrite(); otherwise, invoke the pwrite() system call as usual. */ static int s10_pwrite(sysret_t *rval, int fd, const void *bufferp, size_t num_bytes, off_t offset) { int err; if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0) return (err); if (rval->sys_rval1 & O_APPEND) return (__systemcall(rval, SYS_write + 1024, fd, bufferp, num_bytes)); return (__systemcall(rval, SYS_pwrite + 1024, fd, bufferp, num_bytes, offset)); } #ifndef _LP64 /* * This is the large file version of the pwrite() system call for 32-bit * processes. This exists for the same reason that s10_pwrite() exists; see * the comment above s10_pwrite(). */ static int s10_pwrite64(sysret_t *rval, int fd, const void *bufferp, size32_t num_bytes, uint32_t offset_1, uint32_t offset_2) { int err; if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0) return (err); if (rval->sys_rval1 & O_APPEND) return (__systemcall(rval, SYS_write + 1024, fd, bufferp, num_bytes)); return (__systemcall(rval, SYS_pwrite64 + 1024, fd, bufferp, num_bytes, offset_1, offset_2)); } #endif /* !_LP64 */ #define S10_AC_PROC (0x1 << 28) #define S10_AC_TASK (0x2 << 28) #define S10_AC_FLOW (0x4 << 28) #define S10_AC_MODE(x) ((x) & 0xf0000000) #define S10_AC_OPTION(x) ((x) & 0x0fffffff) /* * The mode shift, mode mask and option mask for acctctl have changed. The * mode is currently the top full byte and the option is the lower 3 full bytes. */ int s10_acctctl(sysret_t *rval, int cmd, void *buf, size_t bufsz) { int mode = S10_AC_MODE(cmd); int option = S10_AC_OPTION(cmd); switch (mode) { case S10_AC_PROC: mode = AC_PROC; break; case S10_AC_TASK: mode = AC_TASK; break; case S10_AC_FLOW: mode = AC_FLOW; break; default: return (S10_TRUSS_POINT_3(rval, SYS_acctctl, EINVAL, cmd, buf, bufsz)); } return (__systemcall(rval, SYS_acctctl + 1024, mode | option, buf, bufsz)); } /* * Determine whether the executable passed to SYS_exec or SYS_execve is a * wrapper around a native executable. If so, then fudge the executable's * name and parameters to eliminate any trace of the wrapper. This will make * pgrep and other commands that examine process' executable names and * command-line parameters work properly. */ static int s10_exec_native(sysret_t *rval, const char *fname, const char **argp, const char **envp) { const char *filename = fname; char path[64]; int err; /* Get a copy of the executable we're trying to run */ path[0] = '\0'; (void) s10_uucopystr(filename, path, sizeof (path)); /* Check if we're trying to run a native binary */ if (strncmp(path, "/.SUNWnative/usr/lib/brand/solaris10/s10_native", sizeof (path)) != 0) return (0); /* Skip the first element in the argv array */ argp++; /* * The name of the new program to execute was the second parameter * passed to s10_exec_native(). */ if (s10_uucopy(argp, &filename, sizeof (char *)) != 0) return (EFAULT); /* If an exec call succeeds, it never returns */ err = __systemcall(rval, SYS_brand + 1024, B_EXEC_NATIVE, filename, argp, envp, NULL, NULL, NULL); s10_assert(err != 0); return (err); } /* * Interpose on the SYS_exec syscall to detect native wrappers. */ int s10_exec(sysret_t *rval, const char *fname, const char **argp) { int err; if ((err = s10_exec_native(rval, fname, argp, NULL)) != 0) return (err); /* If an exec call succeeds, it never returns */ err = __systemcall(rval, SYS_exec + 1024, fname, argp); s10_assert(err != 0); return (err); } /* * Interpose on the SYS_execve syscall to detect native wrappers. */ int s10_execve(sysret_t *rval, const char *fname, const char **argp, const char **envp) { int err; if ((err = s10_exec_native(rval, fname, argp, envp)) != 0) return (err); /* If an exec call succeeds, it never returns */ err = __systemcall(rval, SYS_execve + 1024, fname, argp, envp); s10_assert(err != 0); return (err); } /* * S10's issetugid() syscall is now a subcode to privsys(). */ static int s10_issetugid(sysret_t *rval) { int err; if ((err = __systemcall(rval, SYS_privsys + 1024, PRIVSYS_ISSETUGID, 0, 0, 0, 0, 0)) != 0) return (err); return (0); } /* * New last arg "block" flag should be zero. The block flag is used by * the Opensolaris AIO implementation, which is now part of libc. */ static int s10_sigqueue(sysret_t *rval, pid_t pid, int signo, void *value, int si_code) { int err; if ((err = __systemcall(rval, SYS_sigqueue + 1024, pid, signo, value, si_code, 0)) != 0) return (err); return (0); } static long s10_uname(sysret_t *rv, uintptr_t p1) { struct utsname un, *unp = (struct utsname *)p1; int rev, err; if ((err = __systemcall(rv, SYS_uname + 1024, &un)) != 0) return (err); rev = atoi(&un.release[2]); s10_assert(rev >= 11); bzero(un.release, _SYS_NMLN); (void) strlcpy(un.release, S10_UTS_RELEASE, _SYS_NMLN); bzero(un.version, _SYS_NMLN); (void) strlcpy(un.version, S10_UTS_VERSION, _SYS_NMLN); /* copy out the modified uname info */ if (s10_uucopy(&un, unp, sizeof (un)) != 0) return (EFAULT); return (0); } int s10_sysinfo(sysret_t *rv, int command, char *buf, long count) { char *value; int err, len; /* * We must interpose on the sysinfo(2) commands SI_RELEASE and * SI_VERSION; all others get passed to the native sysinfo(2) * command. */ switch (command) { case SI_RELEASE: value = S10_UTS_RELEASE; break; case SI_VERSION: value = S10_UTS_VERSION; break; default: /* * The default action is to pass the command to the * native sysinfo(2) syscall. */ if ((err = __systemcall(rv, SYS_systeminfo + 1024, command, buf, count)) != 0) return (err); return (0); } len = strlen(value) + 1; if (count > 0) { if (s10_uucopystr(value, buf, count) != 0) return (EFAULT); /* Assure NULL termination of buf as s10_uucopystr() doesn't. */ if (len > count && s10_uucopy("\0", buf + (count - 1), 1) != 0) return (EFAULT); } /* * On success, sysinfo(2) returns the size of buffer required to hold * the complete value plus its terminating NULL byte. */ rv->sys_rval1 = len; rv->sys_rval2 = 0; S10_TRUSS_POINT_3(rv, SYS_systeminfo, 0, command, buf, count); return (0); } /* * If the emul_global_zone flag is set then emulate some aspects of the * zone system call. In particular, emulate the global zone ID on the * ZONE_LOOKUP subcommand and emulate some of the global zone attributes * on the ZONE_GETATTR subcommand. If the flag is not set or we're performing * some other operation, simply pass the calls through. */ int s10_zone(sysret_t *rval, int cmd, void *arg1, void *arg2, void *arg3, void *arg4) { char *aval; int len; zoneid_t zid; int attr; char *buf; size_t bufsize; /* * We only emulate the zone syscall for a subset of specific commands, * otherwise we just pass the call through. */ if (!emul_global_zone) return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2, arg3, arg4)); switch (cmd) { case ZONE_LOOKUP: (void) S10_TRUSS_POINT_1(rval, SYS_zone, 0, cmd); rval->sys_rval1 = GLOBAL_ZONEID; rval->sys_rval2 = 0; return (0); case ZONE_GETATTR: zid = (zoneid_t)(uintptr_t)arg1; attr = (int)(uintptr_t)arg2; buf = (char *)arg3; bufsize = (size_t)arg4; /* * If the request is for the global zone then we're emulating * that, otherwise pass this thru. */ if (zid != GLOBAL_ZONEID) goto passthru; (void) S10_TRUSS_POINT_3(rval, SYS_zone, 0, cmd, zid, attr); switch (attr) { case ZONE_ATTR_NAME: aval = GLOBAL_ZONENAME; break; case ZONE_ATTR_BRAND: aval = NATIVE_BRAND_NAME; break; default: /* * We only emulate a subset of the attrs, use the * real zone id to pass thru the rest. */ arg1 = (void *)(uintptr_t)zoneid; goto passthru; } len = strlen(aval) + 1; if (len > bufsize) return (ENAMETOOLONG); if (buf != NULL) { if (len == 1) { if (s10_uucopy("\0", buf, 1) != 0) return (EFAULT); } else { if (s10_uucopystr(aval, buf, len) != 0) return (EFAULT); /* * Assure NULL termination of "buf" as * s10_uucopystr() does NOT. */ if (s10_uucopy("\0", buf + (len - 1), 1) != 0) return (EFAULT); } } rval->sys_rval1 = len; rval->sys_rval2 = 0; return (0); default: break; } passthru: return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2, arg3, arg4)); } /* * This routine is run only when the init daemon starts up, in order * to do any pre-initialization needed before the environment boots. */ static void s10_init1m_handler() { /* * Take special actions in advance of starting init(1m). * * XXX Nothing to do (yet). */ } /* * Close a libc file handle, but don't actually close the underlying * file descriptor. */ static void s10_close_fh(FILE *file) { int fd, fd_new; if (file == NULL) return; if ((fd = fileno(file)) < 0) return; fd_new = dup(fd); if (fd_new == -1) return; (void) fclose(file); (void) dup2(fd_new, fd); (void) close(fd_new); } /*ARGSUSED*/ int s10_init(int argc, char *argv[], char *envp[]) { sysret_t rval; s10_brand_reg_t reg; s10_elf_data_t sed; auxv_t *ap; uintptr_t *p; int i, err; ushort_t flags; char *bname; /* Sanity check our translation table return value codes */ for (i = 0; i < NSYSCALL; i++) { s10_sysent_table_t *est = &(s10_sysent_table[i]); s10_assert(BIT_ONLYONESET(est->st_args & RV_MASK)); } /* * We need to shutdown all libc stdio. libc stdio normally goes to * file descriptors, but since we're actually part of a another * process we don't own these file descriptors and we can't make * any assumptions about their state. */ s10_close_fh(stdin); s10_close_fh(stdout); s10_close_fh(stderr); /* * Cache the pid of the zone's init process and determine if * we're init(1m) for the zone. Remember: we might be init * now, but as soon as we fork(2) we won't be. */ if (get_initpid_info()) { s10_init1m_handler(); } /* get the current zoneid */ err = __systemcall(&rval, SYS_zone, ZONE_LOOKUP, NULL); s10_assert(err == 0); zoneid = (zoneid_t)rval.sys_rval1; /* Get the emulation version number. */ if ((err = __systemcall(&rval, SYS_zone, ZONE_GETATTR, zoneid, S10_EMUL_VERSION_NUM, &emul_vers, sizeof (emul_vers))) != 0 || emul_vers != 0) { s10_abort(err, "The zone's patch level is unsupported"); /*NOTREACHED*/ } /* Figure out if this zone has a shared ip */ err = __systemcall(&rval, SYS_zone, ZONE_GETATTR, zoneid, ZONE_ATTR_FLAGS, &flags, sizeof (flags)); s10_assert(err == 0); ipshared = ((flags & ZF_NET_EXCL) == 0); bname = basename(argv[0]); /* * In general we want the S10 commands that are zone-aware to continue * to behave as they normally do within a zone. Since these commands * are zone-aware, they should continue to "do the right thing". * However, some zone-aware commands aren't going to work the way * we expect them to inside the branded zone. In particular, the pkg * and patch commands will not properly manage all pkgs/patches * unless the commands think they are running in the global zone. For * these commands we want to emulate the global zone. * * XXX One issue is the handling of hollow pkgs. This is not normally * a problem since the p2v/v2v process handles those. However, if * the user attempts to install a hollow pkg after the zone is running, * the pkg code will do the wrong thing. Luckily, most of the hollow * pkgs are core pkgs which will already be installed in the image * before we p2v/v2v it into the zone and there should be little need * to pkgadd these later. */ if (strcmp("pkgadd", bname) == 0 || strcmp("pkgrm", bname) == 0 || strcmp("pkgcond", bname) == 0 || strcmp("patchadd", bname) == 0 || strcmp("patchrm", bname) == 0) emul_global_zone = B_TRUE; /* * Register our syscall emulation table with the kernel. * Note that we don't have to do invoke (syscall_number + 1024) * until we've actually establised a syscall emulation callback * handler address, which is what we're doing with this brand * syscall. */ reg.sbr_version = S10_VERSION; reg.sbr_handler = (caddr_t)s10_handler; if ((err = __systemcall(&rval, SYS_brand, B_REGISTER, ®)) != 0) { s10_abort(err, "Failed to brand current process"); /*NOTREACHED*/ } /* Get data about the executable we're running from the kernel. */ if ((err = __systemcall(&rval, SYS_brand + 1024, B_ELFDATA, (void *)&sed)) != 0) { s10_abort(err, "Failed to get required brand ELF data from the kernel"); /*NOTREACHED*/ } /* * Find the aux vector on the stack. */ p = (uintptr_t *)envp; while (*p != NULL) p++; /* * p is now pointing at the 0 word after the environ pointers. * After that is the aux vectors. * * The aux vectors are currently pointing to the brand emulation * library and associated linker. We're going to change them to * point to the brand executable and associated linker (or to no * linker for static binaries). This matches the process data * stored within the kernel and visible from /proc, which was * all setup in s10_elfexec(). We do this so that when a debugger * attaches to the process it sees the process as a normal solaris * process, this brand emulation library and everything on it's * link map will not be visible, unless our librtld_db plugin * is used. Note that this is very different from how Linux * branded processes are implemented within lx branded zones. * In that situation, the primary linkmap of the process is the * brand emulation libraries linkmap, not the Linux applications * linkmap. * * We also need to clear the AF_SUN_NOPLM flag from the AT_SUN_AUXFLAGS * aux vector. This flag told our linker that we don't have a * primary link map. Now that our linker is done initializing, we * want to clear this flag before we transfer control to the * applications copy of the linker, since we want that linker to have * a primary link map which will be the link map for the application * we're running. */ p++; for (ap = (auxv_t *)p; ap->a_type != AT_NULL; ap++) { switch (ap->a_type) { case AT_BASE: /* Hide AT_BASE if static binary */ if (sed.sed_base == NULL) { ap->a_type = AT_IGNORE; ap->a_un.a_val = NULL; } else { ap->a_un.a_val = sed.sed_base; } break; case AT_ENTRY: ap->a_un.a_val = sed.sed_entry; break; case AT_PHDR: ap->a_un.a_val = sed.sed_phdr; break; case AT_PHENT: ap->a_un.a_val = sed.sed_phent; break; case AT_PHNUM: ap->a_un.a_val = sed.sed_phnum; break; case AT_SUN_AUXFLAGS: ap->a_un.a_val &= ~AF_SUN_NOPLM; break; case AT_SUN_EMULATOR: /* * ld.so.1 inspects AT_SUN_EMULATOR to see if * if it is the linker for the brand emulation * library. Hide AT_SUN_EMULATOR, as the * linker we are about to jump to is the linker * for the binary. */ ap->a_type = AT_IGNORE; ap->a_un.a_val = NULL; break; case AT_SUN_LDDATA: /* Hide AT_SUN_LDDATA if static binary */ if (sed.sed_lddata == NULL) { ap->a_type = AT_IGNORE; ap->a_un.a_val = NULL; } else { ap->a_un.a_val = sed.sed_lddata; } break; default: break; } } s10_runexe(argv, sed.sed_ldentry); /*NOTREACHED*/ s10_abort(0, "s10_runexe() returned"); return (-1); } /* * This table must have at least NSYSCALL entries in it. * * The second parameter of each entry in the s10_sysent_table * contains the number of parameters and flags that describe the * syscall return value encoding. See the block comments at the * top of this file for more information about the syscall return * value flags and when they should be used. */ s10_sysent_table_t s10_sysent_table[] = { #if defined(__sparc) && !defined(__sparcv9) EMULATE(s10_indir, 9 | RV_64RVAL), /* 0 */ #else /* !__sparc || __sparcv9 */ NOSYS, /* 0 */ #endif /* !__sparc || __sparcv9 */ NOSYS, /* 1 */ NOSYS, /* 2 */ NOSYS, /* 3 */ NOSYS, /* 4 */ NOSYS, /* 5 */ NOSYS, /* 6 */ NOSYS, /* 7 */ NOSYS, /* 8 */ NOSYS, /* 9 */ NOSYS, /* 10 */ EMULATE(s10_exec, 2 | RV_DEFAULT), /* 11 */ NOSYS, /* 12 */ NOSYS, /* 13 */ NOSYS, /* 14 */ NOSYS, /* 15 */ NOSYS, /* 16 */ NOSYS, /* 17 */ NOSYS, /* 18 */ NOSYS, /* 19 */ NOSYS, /* 20 */ NOSYS, /* 21 */ NOSYS, /* 22 */ NOSYS, /* 23 */ NOSYS, /* 24 */ NOSYS, /* 25 */ NOSYS, /* 26 */ NOSYS, /* 27 */ NOSYS, /* 28 */ NOSYS, /* 29 */ NOSYS, /* 30 */ NOSYS, /* 31 */ NOSYS, /* 32 */ NOSYS, /* 33 */ NOSYS, /* 34 */ NOSYS, /* 35 */ NOSYS, /* 36 */ NOSYS, /* 37 */ NOSYS, /* 38 */ NOSYS, /* 39 */ NOSYS, /* 40 */ NOSYS, /* 41 */ NOSYS, /* 42 */ NOSYS, /* 43 */ NOSYS, /* 44 */ NOSYS, /* 45 */ NOSYS, /* 46 */ NOSYS, /* 47 */ NOSYS, /* 48 */ NOSYS, /* 49 */ NOSYS, /* 50 */ NOSYS, /* 51 */ NOSYS, /* 52 */ NOSYS, /* 53 */ EMULATE(s10_ioctl, 3 | RV_DEFAULT), /* 54 */ NOSYS, /* 55 */ NOSYS, /* 56 */ NOSYS, /* 57 */ NOSYS, /* 58 */ EMULATE(s10_execve, 3 | RV_DEFAULT), /* 59 */ NOSYS, /* 60 */ NOSYS, /* 61 */ NOSYS, /* 62 */ NOSYS, /* 63 */ NOSYS, /* 64 */ NOSYS, /* 65 */ NOSYS, /* 66 */ NOSYS, /* 67 */ NOSYS, /* 68 */ NOSYS, /* 69 */ NOSYS, /* 70 */ EMULATE(s10_acctctl, 3 | RV_DEFAULT), /* 71 */ NOSYS, /* 72 */ EMULATE(s10_getpagesizes, 2 | RV_DEFAULT), /* 73 */ NOSYS, /* 74 */ EMULATE(s10_issetugid, 0 | RV_DEFAULT), /* 75 */ NOSYS, /* 76 */ NOSYS, /* 77 */ NOSYS, /* 78 */ NOSYS, /* 79 */ NOSYS, /* 80 */ NOSYS, /* 81 */ NOSYS, /* 82 */ NOSYS, /* 83 */ NOSYS, /* 84 */ NOSYS, /* 85 */ NOSYS, /* 86 */ NOSYS, /* 87 */ NOSYS, /* 88 */ NOSYS, /* 89 */ NOSYS, /* 90 */ NOSYS, /* 91 */ NOSYS, /* 92 */ NOSYS, /* 93 */ NOSYS, /* 94 */ NOSYS, /* 95 */ NOSYS, /* 96 */ NOSYS, /* 97 */ NOSYS, /* 98 */ NOSYS, /* 99 */ NOSYS, /* 100 */ NOSYS, /* 101 */ NOSYS, /* 102 */ NOSYS, /* 103 */ NOSYS, /* 104 */ NOSYS, /* 105 */ NOSYS, /* 106 */ NOSYS, /* 107 */ NOSYS, /* 108 */ NOSYS, /* 109 */ NOSYS, /* 110 */ NOSYS, /* 111 */ NOSYS, /* 112 */ NOSYS, /* 113 */ NOSYS, /* 114 */ NOSYS, /* 115 */ NOSYS, /* 116 */ NOSYS, /* 117 */ NOSYS, /* 118 */ NOSYS, /* 119 */ NOSYS, /* 120 */ NOSYS, /* 121 */ NOSYS, /* 122 */ NOSYS, /* 123 */ NOSYS, /* 124 */ NOSYS, /* 125 */ NOSYS, /* 126 */ NOSYS, /* 127 */ NOSYS, /* 128 */ NOSYS, /* 129 */ NOSYS, /* 130 */ NOSYS, /* 131 */ NOSYS, /* 132 */ NOSYS, /* 133 */ NOSYS, /* 134 */ EMULATE(s10_uname, 1 | RV_DEFAULT), /* 135 */ NOSYS, /* 136 */ NOSYS, /* 137 */ NOSYS, /* 138 */ EMULATE(s10_sysinfo, 3 | RV_DEFAULT), /* 139 */ NOSYS, /* 140 */ NOSYS, /* 141 */ NOSYS, /* 142 */ NOSYS, /* 143 */ NOSYS, /* 144 */ NOSYS, /* 145 */ NOSYS, /* 146 */ NOSYS, /* 147 */ NOSYS, /* 148 */ NOSYS, /* 149 */ NOSYS, /* 150 */ NOSYS, /* 151 */ NOSYS, /* 152 */ NOSYS, /* 153 */ NOSYS, /* 154 */ NOSYS, /* 155 */ NOSYS, /* 156 */ NOSYS, /* 157 */ NOSYS, /* 158 */ NOSYS, /* 159 */ NOSYS, /* 160 */ NOSYS, /* 161 */ NOSYS, /* 162 */ NOSYS, /* 163 */ NOSYS, /* 164 */ NOSYS, /* 165 */ NOSYS, /* 166 */ NOSYS, /* 167 */ NOSYS, /* 168 */ NOSYS, /* 169 */ NOSYS, /* 170 */ NOSYS, /* 171 */ NOSYS, /* 172 */ NOSYS, /* 173 */ EMULATE(s10_pwrite, 4 | RV_DEFAULT), /* 174 */ NOSYS, /* 175 */ NOSYS, /* 176 */ NOSYS, /* 177 */ NOSYS, /* 178 */ NOSYS, /* 179 */ NOSYS, /* 180 */ NOSYS, /* 181 */ NOSYS, /* 182 */ NOSYS, /* 183 */ NOSYS, /* 184 */ NOSYS, /* 185 */ NOSYS, /* 186 */ NOSYS, /* 187 */ NOSYS, /* 188 */ NOSYS, /* 189 */ EMULATE(s10_sigqueue, 4 | RV_DEFAULT), /* 190 */ NOSYS, /* 191 */ NOSYS, /* 192 */ NOSYS, /* 193 */ NOSYS, /* 194 */ NOSYS, /* 195 */ NOSYS, /* 196 */ NOSYS, /* 197 */ NOSYS, /* 198 */ NOSYS, /* 199 */ NOSYS, /* 200 */ NOSYS, /* 201 */ NOSYS, /* 202 */ NOSYS, /* 203 */ NOSYS, /* 204 */ NOSYS, /* 205 */ NOSYS, /* 206 */ NOSYS, /* 207 */ NOSYS, /* 208 */ NOSYS, /* 209 */ NOSYS, /* 210 */ NOSYS, /* 211 */ NOSYS, /* 212 */ NOSYS, /* 213 */ NOSYS, /* 214 */ NOSYS, /* 215 */ NOSYS, /* 216 */ NOSYS, /* 217 */ NOSYS, /* 218 */ NOSYS, /* 219 */ NOSYS, /* 220 */ NOSYS, /* 221 */ NOSYS, /* 222 */ #ifdef _LP64 NOSYS, /* 223 */ #else /* !_LP64 */ EMULATE(s10_pwrite64, 5 | RV_DEFAULT), /* 223 */ #endif /* !_LP64 */ NOSYS, /* 224 */ NOSYS, /* 225 */ NOSYS, /* 226 */ EMULATE(s10_zone, 5 | RV_DEFAULT), /* 227 */ NOSYS, /* 228 */ NOSYS, /* 229 */ NOSYS, /* 230 */ NOSYS, /* 231 */ NOSYS, /* 232 */ NOSYS, /* 233 */ NOSYS, /* 234 */ NOSYS, /* 235 */ NOSYS, /* 236 */ NOSYS, /* 237 */ NOSYS, /* 238 */ NOSYS, /* 239 */ NOSYS, /* 240 */ NOSYS, /* 241 */ NOSYS, /* 242 */ NOSYS, /* 243 */ NOSYS, /* 244 */ NOSYS, /* 245 */ NOSYS, /* 246 */ NOSYS, /* 247 */ NOSYS, /* 248 */ NOSYS, /* 249 */ NOSYS, /* 250 */ NOSYS, /* 251 */ NOSYS, /* 252 */ NOSYS, /* 253 */ NOSYS, /* 254 */ NOSYS /* 255 */ };