Merge version 0.45.
authorMalte S. Stretz <mss@apache.org>
Wed, 11 Aug 2010 20:31:15 +0000 (22:31 +0200)
committerMalte S. Stretz <mss@apache.org>
Wed, 11 Aug 2010 20:31:15 +0000 (22:31 +0200)
nscd-0.45.c [moved from nscd-0.36.c with 75% similarity]

similarity index 75%
rename from nscd-0.36.c
rename to nscd-0.45.c
index 3037240..979e654 100644 (file)
@@ -17,7 +17,7 @@
 /*
 Build instructions:
 
-gcc -Os -o nscd nscd.c
+gcc -Wall -Wunused-parameter -Os -o nscd nscd.c
 
 gcc -fomit-frame-pointer -Wl,--sort-section -Wl,alignment -Wl,--sort-common
       -Os -o nscd nscd.c
@@ -29,7 +29,7 @@ quite a bit of bugs in it. This leads people to invent babysitters
 which restart crashed/hung nscd. This is ugly.
 
 After looking at nscd source in glibc I arrived to the conclusion
-that its desidn is contributing to this significantly. Even if nscd's
+that its design is contributing to this significantly. Even if nscd's
 code is 100.00% perfect and bug-free, it can still suffer from bugs
 in libraries it calls.
 
@@ -42,7 +42,7 @@ or file descriptor leaks and other bugs.
 
 Since nscd is multithreaded program with single shared cache,
 any resource leak in any NSS library has cumulative effect.
-Even if an NSS library leaks a file descriptor 0.01% of the time,
+Even if a NSS library leaks a file descriptor 0.01% of the time,
 this will make nscd crash or hang after some time.
 
 Of course bugs in NSS .so modules should be fixed, but meanwhile
@@ -120,8 +120,25 @@ vda.linux@googlemail.com
  * 0.36 excercise extreme paranoia handling server-user option;
  *      a little bit more verbose logging:
  *      L_DEBUG2 log level added, use debug-level 7 to get it
+ * 0.37 users reported over-zealous "detected change in /etc/passwd",
+ *      apparently stat() returns random garbage in unused padding
+ *      on some systems. Made the check less paranoid.
+ * 0.38 log POLLHUP better
+ * 0.39 log answers to client better, log getpwnam in the worker,
+ *      pass debug level value down to worker.
+ * 0.40   fix handling of shutdown and invalidate requests;
+ *        fix bug with answer written in several pieces
+ * 0.40.1 set hints.ai_socktype = SOCK_STREAM in GETAI request
+ * 0.41   eliminate double caching of two near-simultaneous identical requests -
+ *        EXPERIMENTAL
+ * 0.42   execute /proc/self/exe by link name first (better comm field)
+ * 0.43   fix off-by-one error in setgroups
+ * 0.44   make -d[ddd] bump up debug - easier to explain to users
+ *        how to produce detailed log (no nscd.conf tweaking)
+ * 0.45   Fix out-of-bounds array access and log/pid file permissions -
+ *        thanks to Sebastian Krahmer (krahmer AT suse.de)
  */
-#define PROGRAM_VERSION "0.36"
+#define PROGRAM_VERSION "0.45"
 
 #define DEBUG_BUILD 1
 
@@ -130,6 +147,8 @@ vda.linux@googlemail.com
 ** Generic helpers
 */
 
+#define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
+
 #define NORETURN __attribute__ ((__noreturn__))
 
 
@@ -473,6 +492,12 @@ enum {
        SRV_HOSTS,
 };
 
+static const char srv_name[3][7] = {
+       "passwd",
+       "group",
+       "hosts"
+};
+
 static struct {
        const char *logfile;
        const char *user;
@@ -503,9 +528,9 @@ static const char *self_exe_points_to = "/proc/self/exe";
 
 /* Header common to all requests */
 #define USER_REQ_STRUCT \
-       int32_t version; /* Version number of the daemon interface */ \
-       int32_t type;    /* Service requested */ \
-       int32_t key_len; /* Key length */
+       uint32_t version; /* Version number of the daemon interface */ \
+       uint32_t type;    /* Service requested */ \
+       uint32_t key_len; /* Key length */
 
 typedef struct user_req_header {
        USER_REQ_STRUCT
@@ -528,7 +553,7 @@ typedef struct user_req {
                        USER_REQ_STRUCT
                };
                struct { /* when stored in cache, overlaps .version */
-                       unsigned refcount:8; /* actually, can be 1 or 0 only */
+                       unsigned refcount:8;
                        /* (timestamp24 * 256) == timestamp in ms */
                        unsigned timestamp24:24;
                };
@@ -607,16 +632,12 @@ static int unsupported_ureq_type(unsigned type)
 {
        if (type == GETAI) return 0;
        if (type == INITGROUPS) return 0;
-       if (type > GETHOSTBYADDRv6) return 1;
+       if (type == GETSTAT) return 1;
+       if (type > INVALIDATE) return 1;
        return 0;
 }
 
-/* Possible reductions:
- * fd, bufidx - uint8_t
- * started_ms -> uint16_t started_s
- * ureq - eliminate (derivable from bufidx?)
- * cell - eliminate (derivable from resptr?)
- */
+
 typedef struct client_info {
        /* if client_fd != 0, we are waiting for the reply from worker
         * on pfd[i].fd, and client_fd is saved client's fd
@@ -626,12 +647,12 @@ typedef struct client_info {
        unsigned bufidx;        /* buffer# in global client_buf[] */
        unsigned started_ms;
        unsigned respos;        /* response */
-       //unsigned resp_sz;
        user_req *resptr;       /* response */
+       user_req **cache_pp;    /* cache entry address */
        user_req *ureq;         /* request (points to client_buf[x]) */
-       user_req **cell;        /* cache cell ptr */
 } client_info;
 
+static unsigned g_now_ms;
 static int min_closed = INT_MAX;
 static int cnt_closed = 0;
 static int num_clients = 2; /* two listening sockets are "clients" too */
@@ -639,25 +660,86 @@ static int num_clients = 2; /* two listening sockets are "clients" too */
 /* We read up to max_reqnum requests in parallel */
 static unsigned max_reqnum = 14;
 static int next_buf;
-/* Each of these points to [max_reqnum] sized array */
 static char          (*client_buf)[MAX_USER_REQ_SIZE];
 static char          *busy_cbuf;
 static struct pollfd *pfd;
 static client_info   *cinfo;
 
+/* Request, response and cache data structures:
+ *
+ * cache[] (defined later):
+ * cacheline_t cache[cache_size] array, or in other words,
+ * user_req* cache[cache_size][8] array.
+ * Every client request is hashed, hash value determines which cache[x]
+ * will have the response stored in one of its 8 elements.
+ * Cache entries have this format: request, then padding to 32 bits,
+ * then the response.
+ * Addresses in cache[x][y] may be NULL or:
+ * (&client_buf[z]) & 1: the cache miss is in progress ("future entry"):
+ * "the data is not in the cache (yet), wait for it to appear"
+ * (&client_buf[z]) & 3: the cache miss is in progress and other clients
+ * also want the same data ("shared future entry")
+ * else (non-NULL but low two bits are 0): cached data in malloc'ed block
+ *
+ * Each of these is a [max_reqnum] sized array:
+ * pfd[i] - given to poll() to wait for requests and replies.
+ *      .fd: first two pfd[i]: listening Unix domain sockets, else
+ *      .fd: open fd to a client, for reading client's request, or
+ *      .fd: open fd to a worker, to send request and get response back
+ * cinfo[i] - auxiliary client data for pfd[i]
+ *      .client_fd: open fd to a client, in case we already had read its
+ *          request and got a cache miss, and created a worker or
+ *          wait for another client's worker.
+ *          Otherwise, it's 0 and client's fd is in pfd[i].fd
+ *      .bufidx: index in client_buf[] we store client's request in
+ *      .bytecnt: size of the request
+ *      .started_ms: used to time out unresponsive clients
+ *      .respos:
+ *      .resptr:
+ *      .cache_pp: &cache[x][y] where the response is, or will be stored.
+ *      .ureq:
+ * When a client has received its reply (or otherwise closed (timeout etc)),
+ * corresponding pfd[i] and cinfo[i] are removed by shifting [i+1], [i+2] etc
+ * elements down, so that both arrays never have free holes.
+ * [num_clients] is always the first free element.
+ *
+ * Each of these also is a [max_reqnum] sized array, but indexes
+ * do not correspond directly to pfd[i] and cinfo[i]:
+ * client_buf[n][MAX_USER_REQ_SIZE] - buffers we read client requests into
+ * busy_cbuf[n] - bool flags marking busy client_buf[]
+ */
+/* Possible reductions:
+ * fd, bufidx - uint8_t
+ * started_ms -> uint16_t started_s
+ * ureq - eliminate (derivable from bufidx?)
+ */
+
+/* Are special bits 0? is it a true cached entry? */
+#define CACHED_ENTRY(p)     ( ((long)(p) & 3) == 0 )
+/* Are special bits 11? is it a shared future cache entry? */
+#define CACHE_SHARED(p)     ( ((long)(p) & 3) == 3 )
+/* Return a ptr with special bits cleared (used for accessing data) */
+#define CACHE_PTR(p)        ( (void*) ((long)(p) & ~(long)3) )
+/* Return a ptr with special bits set to x1: make future cache entry ptr */
+#define MAKE_FUTURE_PTR(p)  ( (void*) ((long)(p) | 1) )
+/* Modify ptr, set special bits to 11: shared future cache entry */
+#define MARK_PTR_SHARED(pp) ( *(long*)(pp) |= 3 )
+
 static inline unsigned ureq_size(const user_req *ureq)
 {
        return sizeof(user_req_header) + ureq->key_len;
 }
 
-static unsigned cache_age(unsigned now_ms, const user_req *ureq)
+static unsigned cache_age(const user_req *ureq)
 {
-       return (uint32_t)now_ms - (ureq->timestamp24 << 8);
+       if (!CACHED_ENTRY(ureq))
+               return 0;
+       return (uint32_t) (g_now_ms - (ureq->timestamp24 << 8));
 }
 
-static void set_cache_timestamp(user_req *ureq, unsigned now_ms)
+static void set_cache_timestamp(user_req *ureq)
 {
-       ureq->timestamp24 = now_ms >> 8;
+       ureq->timestamp24 = g_now_ms >> 8;
 }
 
 static int alloc_buf_no(void)
@@ -679,11 +761,16 @@ static inline void *bufno2buf(int i)
        return client_buf[i];
 }
 
-static void close_client(int i)
+static void close_client(unsigned i)
 {
-       log(L_DEBUG, "closing client %d (fd %d)", i, pfd[i].fd);
+       log(L_DEBUG, "closing client %u (fd %u,%u)", i, pfd[i].fd, cinfo[i].client_fd);
+       /* Paranoia. We had nasty bugs where client was closed twice. */
+       if (pfd[i].fd == 0) ////
+               return;
        close(pfd[i].fd);
-       pfd[i].fd = 0; /* flag as unused */
+       if (cinfo[i].client_fd && cinfo[i].client_fd != pfd[i].fd)
+               close(cinfo[i].client_fd);
+       pfd[i].fd = 0; /* flag as unused (coalescing needs this) */
        busy_cbuf[cinfo[i].bufidx] = 0;
        cnt_closed++;
        if (i < min_closed)
@@ -692,7 +779,7 @@ static void close_client(int i)
 
 
 /*
-** ncsd API <-> C API conversion
+** nscd API <-> C API conversion
 */
 
 typedef struct response_header {
@@ -995,7 +1082,9 @@ static ai_response_header *obtain_addrinfo(const char *hostname)
 
        memset(&hints, 0, sizeof(hints));
        hints.ai_flags = AI_CANONNAME;
-       /* hinst.ai_socktype = SOCK_STREAM; - can kill dups (one for each possible SOCK_xxx) */
+       /* kills dups (one for each possible SOCK_xxx) */
+       /* this matches glibc behavior */
+       hints.ai_socktype = SOCK_STREAM;
        ai = NULL; /* on failure getaddrinfo may leave it as-is */
        err = getaddrinfo(hostname, NULL, &hints, &ai);
 
@@ -1085,108 +1174,118 @@ static uint32_t bernstein_hash(void *p, unsigned sz, uint32_t hash)
        return hash;
 }
 
-static user_req *find_cell_and_response(user_req ***cellp, user_req *ureq)
+static void free_refcounted_ureq(user_req **ureqp)
 {
-       user_req **cell;
+       user_req *ureq = *ureqp;
+
+       if (!CACHED_ENTRY(ureq))
+               return;
+
+       if (ureq->refcount) {
+               ureq->refcount--;
+       } else {
+               log(L_DEBUG2, "refcount == 0, free(%p)", ureq);
+               free(ureq);
+       }
+       *ureqp = NULL;
+}
+
+static user_req **lookup_in_cache(user_req *ureq)
+{
+       user_req **cacheline;
+       int free_cache;
        unsigned hash;
        unsigned i;
        unsigned ureq_sz = ureq_size(ureq);
 
        /* prevent overflow and division by zero */
-       if ((int)(cache_access_cnt+1) < 0) {
+       cache_access_cnt++;
+       if ((int)cache_access_cnt < 0) {
                cache_access_cnt = (cache_access_cnt >> 1) + 1;
                cache_hit_cnt = (cache_hit_cnt >> 1) + 1;
        }
-       cache_access_cnt++;
 
        hash = bernstein_hash(&ureq->key_len, ureq_sz - offsetof(user_req, key_len), ureq->type);
-       log(L_DEBUG, "hash:%08x", hash);
+       log(L_DEBUG2, "hash:%08x", hash);
        hash = hash % cache_size;
-       (*cellp) = cell = cache[hash];
+       cacheline = cache[hash];
 
+       free_cache = -1;
        for (i = 0; i < 8; i++) {
-               if (!cell[i])
+               user_req *cached = CACHE_PTR(cacheline[i]);
+               if (!cached) {
+                       if (free_cache == -1)
+                               free_cache = i;
                        continue;
-// TODO: do secondary hash match
+               }
                /* ureq->version is always 2 and is reused in cache
                 * for other purposes, we need to skip it here */
-               if (memcmp(&ureq->type, &cell[i]->type, ureq_sz - offsetof(user_req, type)) == 0) {
+               if (memcmp(&ureq->type, &cached->type, ureq_sz - offsetof(user_req, type)) == 0) {
                        log(L_DEBUG, "found in cache[%u][%u]", hash, i);
                        cache_hit_cnt++;
-                       return cell[i];
+                       return &cacheline[i];
                }
        }
-       log(L_DEBUG, "not found in cache[%u][x]", hash);
-       return NULL;
-}
 
-static void free_refcounted_ureq(user_req **ureqp)
-{
-       user_req *ureq = *ureqp;
-       /* is it in use? */
-       if (ureq->refcount) {
-               ureq->refcount = 0; /* since it can be only 1 or 0... */
-       } else {
-               free(ureq);
+       if (free_cache >= 0) {
+               cached_cnt++;
+               i = free_cache;
+               log(L_DEBUG, "not found, using free cache[%u][%u]", hash, i);
+               goto ret;
        }
-       *ureqp = NULL;
-}
 
-static void save_in_cell(user_req **cell, user_req *new_cached, unsigned now_ms)
-{
        unsigned oldest_idx = 0;
        unsigned oldest_age = 0;
-       unsigned age;
-       unsigned i;
-
        for (i = 0; i < 8; i++) {
-               if (!cell[i]) {
-                       log(L_DEBUG, "using free cache[x][%u]", i);
-                       cached_cnt++;
-                       cell[i] = new_cached;
-                       aging_interval_ms = min_aging_interval_ms;
-                       return;
-               }
-               age = cache_age(now_ms, cell[i]);
+               unsigned age = cache_age(cacheline[i]);
                if (age > oldest_age) {
                        oldest_age = age;
                        oldest_idx = i;
                }
        }
-       log(L_DEBUG, "freeing and reusing cache[x][%u] (age %u)", oldest_idx, oldest_age);
-       if (cell[oldest_idx]) {
-               free_refcounted_ureq(&cell[oldest_idx]);
-       } else {
-               cached_cnt++;
+       if (oldest_age == 0) {
+               /* All entries in cacheline are "future" entries!
+                * This is very unlikely, but we must still work correctly.
+                * We call this "fake cache entry".
+                * The data will be "cached" only for the duration
+                * of this client's request lifetime.
+                */
+               log(L_DEBUG, "not found, and cache[%u] is full: using fake cache entry", hash);
+               return NULL;
        }
-       cell[oldest_idx] = new_cached;
-       aging_interval_ms = min_aging_interval_ms;
+       i = oldest_idx;
+       log(L_DEBUG, "not found, freeing and reusing cache[%u][%u] (age %u)", hash, i, oldest_age);
+       free_refcounted_ureq(&cacheline[i]);
+
+ ret:
+       cacheline[i] = MAKE_FUTURE_PTR(ureq);
+       return &cacheline[i];
 }
 
-static void age_cache(unsigned now_ms, int srv)
+static void age_cache(unsigned free_all, int srv)
 {
        user_req **cp = *cache;
        int i;
        unsigned sv = cached_cnt;
 
-       log(L_DEBUG, "aging cache, srv:%d, now:%u", srv, now_ms);
-       if (srv == -1 || !now_ms)
+       log(L_DEBUG, "aging cache, srv:%d, free_all:%u", srv, free_all);
+       if (srv == -1 || free_all)
                aging_interval_ms = INT_MAX;
        i = cache_size * 8;
        do {
                user_req *cached = *cp;
-               if (cached) {
+               if (CACHED_ENTRY(cached) && cached != NULL) {
                        int csrv = type_to_srv[cached->type];
                        if (srv == -1 || srv == csrv) {
-                               if (!now_ms) {
+                               if (free_all) {
                                        cached_cnt--;
                                        free_refcounted_ureq(cp);
                                } else {
-                                       unsigned age = cache_age(now_ms, cached);
+                                       unsigned age = cache_age(cached);
                                        response_header *resp = ureq_response(cached);
                                        unsigned ttl = (resp->found ? config.pttl : config.nttl)[csrv];
                                        if (age >= ttl) {
-                                               log(L_DEBUG, "freeing: age %u positive %d ttl %u", age, resp->found, ttl);
+                                               log(L_DEBUG2, "freeing: age %u positive %d ttl %u", age, resp->found, ttl);
                                                cached_cnt--;
                                                free_refcounted_ureq(cp);
                                        } else if (srv == -1) {
@@ -1200,8 +1299,7 @@ static void age_cache(unsigned now_ms, int srv)
                cp++;
        } while (--i);
        log(L_INFO, "aged cache, freed:%u, remain:%u", sv - cached_cnt, cached_cnt);
-       if (srv == -1 || !now_ms)
-               log(L_DEBUG, "aging interval now %u ms", aging_interval_ms);
+       log(L_DEBUG2, "aging interval now %u ms", aging_interval_ms);
 }
 
 
@@ -1213,8 +1311,6 @@ static void age_cache(unsigned now_ms, int srv)
 /* Returns stdout fd of the worker, in blocking mode */
 static int create_and_feed_worker(user_req *ureq)
 {
-       static const char *const argv[] = { "worker_nscd", NULL };
-
        pid_t pid;
        struct {
                int rd;
@@ -1229,15 +1325,25 @@ static int create_and_feed_worker(user_req *ureq)
        if (pid < 0) /* error */
                perror_and_die("vfork");
        if (!pid) { /* child */
+               char param[sizeof(int)*3 + 2];
+               char *argv[3];
+
                close(to_child.wr);
                close(to_parent.rd);
                xmovefd(to_child.rd, 0);
                xmovefd(to_parent.wr, 1);
+               sprintf(param, "%u", debug);
+               argv[0] = (char*) "worker_nscd";
+               argv[1] = param;
+               argv[2] = NULL;
                /* Re-exec ourself, cleaning up all allocated memory.
                 * fds in parent are marked CLOEXEC and will be closed too
                 * (modulo bugs) */
-               execve("/proc/self/exe", (char**)argv, (char**)(argv+1));
-               xexecve(self_exe_points_to, (char**)argv, (char**)(argv+1));
+               /* Try link name first: it's better to have comm field
+                * of "nscd" than "exe" (pgrep reported to fail to find us
+                * by name when comm field contains "exe") */
+               execve(self_exe_points_to, argv, argv+2);
+               xexecve("/proc/self/exe", argv, argv+2);
        }
 
        /* parent */
@@ -1250,6 +1356,7 @@ static int create_and_feed_worker(user_req *ureq)
        xsafe_write(to_child.wr, ureq, ureq_size(ureq));
 
        close(to_child.wr);
+       close_on_exec(to_parent.rd);
        return to_parent.rd;
 }
 
@@ -1291,12 +1398,14 @@ static void worker_signal_handler(int sig)
        _exit(0);
 }
 
-static void worker(void) NORETURN;
-static void worker(void)
+static void worker(const char *param) NORETURN;
+static void worker(const char *param)
 {
        user_req ureq;
        void *resp;
 
+       debug = atoi(param);
+
        worker_ureq = &ureq; /* for signal handler */
 
        /* Make sure we won't hang, but rather die */
@@ -1315,7 +1424,9 @@ static void worker(void)
        signal(SIGILL,    worker_signal_handler);
        signal(SIGFPE,    worker_signal_handler);
        signal(SIGABRT,   worker_signal_handler);
+#ifdef SIGSTKFLT
        signal(SIGSTKFLT, worker_signal_handler);
+#endif
 
        if (ureq.type == GETHOSTBYNAME
         || ureq.type == GETHOSTBYNAMEv6
@@ -1332,7 +1443,11 @@ static void worker(void)
                        (ureq.type == GETHOSTBYADDR ? AF_INET : AF_INET6)
                ));
        } else if (ureq.type == GETPWBYNAME) {
-               resp = marshal_passwd(getpwnam(ureq.reqbuf));
+               struct passwd *pw;
+               log(L_DEBUG2, "getpwnam('%s')", ureq.reqbuf);
+               pw = getpwnam(ureq.reqbuf);
+               log(L_DEBUG2, "getpwnam result:%p", pw);
+               resp = marshal_passwd(pw);
        } else if (ureq.type == GETPWBYUID) {
                resp = marshal_passwd(getpwuid(atoi(ureq.reqbuf)));
        } else if (ureq.type == GETGRBYNAME) {
@@ -1364,27 +1479,30 @@ static void worker(void)
 ** Main loop
 */
 
-static const char check_filenames[][sizeof("/etc/passwd")] = {
+static const char checked_filenames[][sizeof("/etc/passwd")] = {
        [SRV_PASSWD] = "/etc/passwd", /*  "/etc/shadow"? */
        [SRV_GROUP]  = "/etc/group",
        [SRV_HOSTS]  = "/etc/hosts", /* "/etc/resolv.conf" "/etc/nsswitch.conf"? */
 };
 
-static struct stat check_statbuf[sizeof(check_filenames) / sizeof(check_filenames[0])];
+static long checked_status[ARRAY_SIZE(checked_filenames)];
 
 static void check_files(int srv)
 {
-       const char *file = check_filenames[srv];
-       struct stat *sb = &check_statbuf[srv];
-       struct stat tempbuf;
-       
-       memset(&tempbuf, 0, sizeof(tempbuf));
-       stat(file, &tempbuf); /* ignore errors */
-       tempbuf.st_atime = 0; /* this is not a change */
-       if (memcmp(sb, &tempbuf, sizeof(tempbuf)) != 0) {
+       struct stat tsb;
+       const char *file = checked_filenames[srv];
+       long v;
+
+       memset(&tsb, 0, sizeof(tsb));
+       stat(file, &tsb); /* ignore errors */
+       /* Comparing struct stat's was giving false positives.
+        * Extracting only those fields which are interesting: */
+       v = (long)tsb.st_mtime ^ (long)tsb.st_size ^ (long)tsb.st_ino; /* ^ (long)tsb.st_dev ? */
+
+       if (v != checked_status[srv]) {
+               checked_status[srv] = v;
                log(L_INFO, "detected change in %s", file);
-               memcpy(sb, &tempbuf, sizeof(tempbuf));
-               age_cache(0, srv); /* frees entire cache */
+               age_cache(/*free_all:*/ 1, srv);
        }
 }
 
@@ -1393,30 +1511,28 @@ static int handle_client(int i)
 {
        int srv;
        user_req *ureq = cinfo[i].ureq;
-       user_req **cell;
+       user_req **cache_pp;
        user_req *ureq_and_resp;
 
+#if DEBUG_BUILD
        log(L_DEBUG, "version:%d type:%d(%s) key_len:%d '%s'",
-                       ureq->version, ureq->type, typestr[ureq->type],
+                       ureq->version, ureq->type,
+                       ureq->type < ARRAY_SIZE(typestr) ? typestr[ureq->type] : "BAD",
                        ureq->key_len, req_str(ureq->type, ureq->reqbuf));
-       hex_dump(cinfo[i].ureq, cinfo[i].bytecnt);
+#endif
 
-       if (unsupported_ureq_type(ureq->type)) {
-               /* We don't know this request. Just close the connection */
-               /* (glibc client interprets this like "not supported by this nscd") */
-               log(L_INFO, "unsupported query, dropping");
+       if (ureq->version != NSCD_VERSION) {
+               log(L_INFO, "wrong version");
                close_client(i);
                return 0;
        }
-       srv = type_to_srv[ureq->type];
-       if (!config.srv_enable[srv]) {
-               log(L_INFO, "service %d is disabled, dropping", srv);
+       if (ureq->key_len > sizeof(ureq->reqbuf)) {
+               log(L_INFO, "bogus key_len %u - ignoring", ureq->key_len);
                close_client(i);
                return 0;
        }
-
        if (cinfo[i].bytecnt < USER_HDR_SIZE + ureq->key_len) {
-               log(L_INFO, "read %d, need %d more to read",
+               log(L_INFO, "read %d, need to read %d",
                        cinfo[i].bytecnt, USER_HDR_SIZE + ureq->key_len);
                return 0; /* more to read */
        }
@@ -1425,11 +1541,56 @@ static int handle_client(int i)
                close_client(i);
                return 0;
        }
-       if (ureq->version != NSCD_VERSION) {
-               log(L_INFO, "wrong version");
+       if (unsupported_ureq_type(ureq->type)) {
+               /* We don't know this request. Just close the connection.
+                * (glibc client interprets this like "not supported by this nscd")
+                * Happens very often, thus DEBUG, not INFO */
+               log(L_DEBUG, "unsupported query, dropping");
+               close_client(i);
+               return 0;
+       }
+       srv = type_to_srv[ureq->type];
+       if (!config.srv_enable[srv]) {
+               log(L_INFO, "service %d is disabled, dropping", srv);
+               close_client(i);
+               return 0;
+       }
+
+       hex_dump(cinfo[i].ureq, cinfo[i].bytecnt);
+
+       if (ureq->type == SHUTDOWN
+        || ureq->type == INVALIDATE
+       ) {
+#ifdef SO_PEERCRED
+               struct ucred caller;
+               socklen_t optlen = sizeof(caller);
+               if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) {
+                       log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno));
+                       close_client(i);
+                       return 0;
+               }
+               if (caller.uid != 0) {
+                       log(L_INFO, "special request from non-root - ignoring");
+                       close_client(i);
+                       return 0;
+               }
+#endif
+               if (ureq->type == SHUTDOWN) {
+                       log(L_INFO, "got shutdown request, exiting");
+                       exit(0);
+               }
+               if (!ureq->key_len || ureq->reqbuf[ureq->key_len - 1]) {
+                       log(L_INFO, "malformed invalidate request - ignoring");
+                       close_client(i);
+                       return 0;
+               }
+               log(L_INFO, "got invalidate request, flushing cache");
+               /* Frees entire cache. TODO: replace -1 with service (in ureq->reqbuf) */
+               age_cache(/*free_all:*/ 1, -1);
                close_client(i);
                return 0;
        }
+
        if (ureq->type != GETHOSTBYADDR
         && ureq->type != GETHOSTBYADDRv6
        ) {
@@ -1444,104 +1605,166 @@ static int handle_client(int i)
                check_files(srv);
        }
 
-       /* If in cache, save ptr to response into cinfo and return */
-       ureq_and_resp = find_cell_and_response(&cell, ureq);
+       cache_pp = lookup_in_cache(ureq);
+       ureq_and_resp = cache_pp ? *cache_pp : NULL;
+
        if (ureq_and_resp) {
-               response_header *resp = ureq_response(ureq_and_resp);
-               unsigned sz = resp->version_or_size;
-               log(L_DEBUG, "sz:%u", sz);
-               hex_dump(resp, sz);
-               ureq_and_resp->refcount = 1; /* cache shouldn't free it under us! */
-               pfd[i].events = POLLOUT; /* we want to write out */
-               cinfo[i].resptr = ureq_and_resp;
-               cinfo[i].respos = 0;
-               //cinfo[i].resp_sz = sz;
-               return 1;
+               if (CACHED_ENTRY(ureq_and_resp)) {
+                       /* Found. Save ptr to response into cinfo and return */
+                       response_header *resp = ureq_response(ureq_and_resp);
+                       unsigned sz = resp->version_or_size;
+
+                       log(L_DEBUG, "sz:%u", sz);
+                       hex_dump(resp, sz);
+                       ureq_and_resp->refcount++; /* cache shouldn't free it under us! */
+                       pfd[i].events = POLLOUT; /* we want to write out */
+                       cinfo[i].resptr = ureq_and_resp;
+                       /*cinfo[i].respos = 0; - already is */
+                       /* prevent future matches with anything */
+                       cinfo[i].cache_pp = (void *) 1;
+                       return 1; /* "ready to write data out to client" */
+               }
+
+               /* Not found. Remember a pointer where it will appear */
+               cinfo[i].cache_pp = cache_pp;
+
+               if (CACHE_PTR(ureq_and_resp) != ureq) {
+                       /* We are not the first client who wants this */
+                       log(L_DEBUG, "another request is in progress (%p), waiting for its result", ureq_and_resp);
+                       MARK_PTR_SHARED(cache_pp); /* "please inform us when it's ready" */
+                       /* "we do not wait for client anymore" */
+                       cinfo[i].client_fd = pfd[i].fd;
+                       /* Don't wait on fd. Worker response will unblock us */
+                       pfd[i].events = 0;
+                       return 0;
+               }
+               /* else: lookup_in_cache inserted (ureq & 1) into *cache_pp:
+                * we are the first client to miss on this ureq. */
        }
 
        /* Start worker thread */
-       cinfo[i].cell = cell;
+       log(L_DEBUG, "stored %p in cache, starting a worker", ureq_and_resp);
        /* Now we will wait on worker's fd, not client's! */
        cinfo[i].client_fd = pfd[i].fd;
        pfd[i].fd = create_and_feed_worker(ureq);
-
-/* We can do it here, but we don't really need to.
- * We need to have client_buf[] big enough anyway for worst case scenario,
- * so we can simply keep cbuf allocated until we close a client.
-       cinfo[i].ureq = NULL;
-       busy_cbuf[cinfo[i].bufidx] = 0;
- */
        return 0;
 }
 
-/* When we return, reply is fully read and stored in cache,
+static void prepare_for_writeout(unsigned i, user_req *cached)
+{
+       log(L_DEBUG2, "client %u: data is ready at %p", i, cached);
+
+       if (cinfo[i].client_fd) {
+               pfd[i].fd = cinfo[i].client_fd;
+               cinfo[i].client_fd = 0; /* "we don't wait for worker reply" */
+       }
+       pfd[i].events = POLLOUT;
+
+       /* Writeout position etc */
+       cinfo[i].resptr = cached;
+       /*cinfo[i].respos = 0; - already is */
+       /* if worker took some time to get info (e.g. DNS query),
+        * prevent client timeout from triggering at once */
+       cinfo[i].started_ms = g_now_ms;
+}
+
+/* Worker seems to be ready to write the response.
+ * When we return, response is fully read and stored in cache,
  * worker's fd is closed, pfd[i] and cinfo[i] are updated. */
-static void handle_worker_response(int i, unsigned now_ms)
+static void handle_worker_response(int i)
 {
-       response_header sz_and_found;
+       struct { /* struct response_header + small body */
+               uint32_t version_or_size;
+               int32_t found;
+               char body[256 - 8];
+       } sz_and_found;
        user_req *cached;
-       user_req *ureq = cinfo[i].ureq;
+       user_req *ureq;
        response_header *resp;
-       unsigned resp_sz;
-       unsigned ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq;
-
-       /* Replies can be big (getgrnam("guest") on a big user db),
-        * we cannot rely on them being atomic. However, we know that worker
-        * _always_ gives reply in one full_write(), so loop and read it all
-        * (looping is implemented inside full_read()) */
-       resp_sz = full_read(pfd[i].fd, &sz_and_found, 8);
-       if (resp_sz != 8) {
+       unsigned sz, resp_sz;
+       unsigned ureq_sz_aligned;
+
+       cached = NULL;
+       ureq = cinfo[i].ureq;
+       ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq;
+
+       sz = full_read(pfd[i].fd, &sz_and_found, sizeof(sz_and_found));
+       if (sz < 8) {
                /* worker was killed? */
-               log(L_DEBUG, "worker gave short reply:%u != 8", resp_sz);
+               log(L_DEBUG, "worker gave short reply:%u < 8", sz);
                goto err;
        }
 
        resp_sz = sz_and_found.version_or_size;
-       if (resp_sz < 8 || resp_sz > 0xfffffff) { /* 256 mb */
+       if (resp_sz < sz || resp_sz > 0xfffffff) { /* 256 mb */
                error("BUG: bad size from worker:%u", resp_sz);
                goto err;
        }
 
        /* Create new block of cached info */
        cached = xzalloc(ureq_sz_aligned + resp_sz);
-       resp = (void*) ((char*)cached + ureq_sz_aligned);
+       log(L_DEBUG2, "xzalloc(%u):%p", ureq_sz_aligned + resp_sz, cached);
+       resp = (void*) (((char*) cached) + ureq_sz_aligned);
        memcpy(cached, ureq, ureq_size(ureq));
-       resp->version_or_size = resp_sz;
-       resp->found = sz_and_found.found;
-       if (sz_and_found.found) {
+       memcpy(resp, &sz_and_found, sz);
+       if (sz_and_found.found && resp_sz > sz) {
                /* We need to read data only if it's found
-                * (otherwise worker sends only 8 bytes) */
-               if (full_read(pfd[i].fd, resp->body, resp_sz - 8) != resp_sz - 8) {
+                * (otherwise worker sends only 8 bytes).
+                *
+                * Replies can be big (getgrnam("guest") on a big user db),
+                * we cannot rely on them being atomic. However, we know
+                * that worker _always_ gives reply in one full_write(),
+                * so we loop and read it all
+                * (looping is implemented inside full_read())
+                */
+               if (full_read(pfd[i].fd, ((char*) resp) + sz, resp_sz - sz) != resp_sz - sz) {
                        /* worker was killed? */
-                       log(L_DEBUG, "worker gave short reply");
-                       free(cached);
+                       log(L_DEBUG, "worker gave short reply, free(%p)", cached);
  err:
+                       free(cached);
                        cached = NULL;
                        goto wo;
                }
        }
+       set_cache_timestamp(cached);
        hex_dump(resp, resp_sz);
-       /* save in cache */
-       cached->refcount = 1; /* cache shouldn't free it under us! */
-       set_cache_timestamp(cached, now_ms);
-       save_in_cell(cinfo[i].cell, cached, now_ms);
 
  wo:
        close(pfd[i].fd);
 
-       /* schedule for writeout */
-       pfd[i].fd = cinfo[i].client_fd;
-       cinfo[i].client_fd = 0; /* no, we don't wait for worker reply anymore */
-       pfd[i].events = POLLOUT;
-       /* pfd[i].revents = 0; - not needed? */
+       /* Save in cache */
+       unsigned ref = 0;
+       user_req **cache_pp = cinfo[i].cache_pp;
+       if (cache_pp != NULL) { /* if not a fake entry */
+               ureq = *cache_pp;
+               *cache_pp = cached;
+               if (CACHE_SHARED(ureq)) {
+                       /* Other clients wait for this response too,
+                        * wake them (and us) up and set refcount = no_of_clients */
+                       unsigned j;
+
+                       for (j = 2; j < num_clients; j++) {
+                               if (cinfo[j].cache_pp == cache_pp) {
+                                       /* This client uses the same cache entry */
+                                       ref++;
+                                       /* prevent future matches with anything */
+                                       cinfo[j].cache_pp = (void *) 1;
+                                       prepare_for_writeout(j, cached);
+                               }
+                       }
+                       goto ret;
+               }
+               /* prevent future matches with anything */
+               cinfo[i].cache_pp = (void *) 1;
+               ref = 1;
+       }
 
-       /* writeout position etc */
-       cinfo[i].resptr = cached;
-       cinfo[i].respos = 0;
-       //cinfo[i].resp_sz = resp_sz;
-       /* if worker took some time to get info (e.g. DNS query),
-        * prevent client timeout from triggering at once */
-       cinfo[i].started_ms = now_ms;
+       prepare_for_writeout(i, cached);
+ret:
+       /* cache shouldn't free it under us! */
+       if (cached)
+               cached->refcount = ref;
+       aging_interval_ms = min_aging_interval_ms;
 }
 
 static void main_loop(void)
@@ -1556,26 +1779,32 @@ static void main_loop(void)
        while (1) {
                int i, j;
                int r;
-               unsigned now_ms;
 
                r = SMALL_POLL_TIMEOUT_MS;
                if (num_clients <= 2 && !cached_cnt)
                        r = -1; /* infinite */
                else if (num_clients < max_reqnum)
                        r = aging_interval_ms;
-
 #if 0 /* Debug: leak detector */
                {
                        static unsigned long long cnt;
-                       void *p = malloc(240); /* should not be too small */
+                       static unsigned long low_malloc = -1L;
+                       static unsigned long low_sbrk = -1L;
+                       void *p = malloc(540); /* should not be too small */
                        void *s = sbrk(0);
                        free(p);
-                       log(L_INFO, "entering poll %llu (%d ms). num_clients:%u cached:%u %u/%u next malloc:%p, sbrk:%p",
-                               cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt, p, s);
+                       if ((unsigned long)p < low_malloc)
+                               low_malloc = (unsigned long)p;
+                       if ((unsigned long)s < low_sbrk)
+                               low_sbrk = (unsigned long)s;
+                       log(L_INFO, "poll %llu (%d ms). clients:%u cached:%u %u/%u malloc:%p (%lu), sbrk:%p (%lu)",
+                               cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt,
+                               p, (unsigned long)p - low_malloc,
+                               s, (unsigned long)s - low_sbrk);
                        cnt++;
                }
 #else
-               log(L_DEBUG, "entering poll (%d ms). num_clients:%u cached:%u hit_ratio:%u/%u",
+               log(L_DEBUG, "poll (%d ms). clients:%u cached:%u hit ratio:%u/%u",
                                r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt);
 #endif
 
@@ -1591,7 +1820,7 @@ static void main_loop(void)
                 * There is no blocking I/O (except when we talk to worker thread
                 * which is guaranteed to not block us for long) */
 
-               now_ms = monotonic_ms();
+               g_now_ms = monotonic_ms();
                if (r == 0)
                        goto skip_fd_checks;
 
@@ -1609,13 +1838,14 @@ static void main_loop(void)
                        ndelay_on(cfd);
                        close_on_exec(cfd);
                        /* x[num_clients] is next free element, taking it */
+                       log(L_DEBUG2, "new client %d, fd %d", num_clients, cfd);
                        pfd[num_clients].fd = cfd;
                        pfd[num_clients].events = POLLIN;
                        /* this will make us do read() in next for() loop: */
                        pfd[num_clients].revents = POLLIN;
                        memset(&cinfo[num_clients], 0, sizeof(cinfo[num_clients]));
                        /* cinfo[num_clients].bytecnt = 0; - done */
-                       cinfo[num_clients].started_ms = now_ms;
+                       cinfo[num_clients].started_ms = g_now_ms;
                        cinfo[num_clients].bufidx = alloc_buf_no();
                        cinfo[num_clients].ureq = bufno2buf(cinfo[num_clients].bufidx);
                        num_clients++;
@@ -1644,14 +1874,24 @@ static void main_loop(void)
                                resp = ureq_response(cinfo[i].resptr);
                                resp_sz = resp->version_or_size;
                                resp->version_or_size = NSCD_VERSION;
-                               r = safe_write(pfd[i].fd, resp + cinfo[i].respos, resp_sz - cinfo[i].respos);
+                               r = safe_write(pfd[i].fd, ((char*) resp) + cinfo[i].respos, resp_sz - cinfo[i].respos);
                                resp->version_or_size = resp_sz;
 
                                if (r < 0 && errno == EAGAIN)
                                        continue;
                                if (r <= 0) { /* client isn't there anymore */
+                                       log(L_DEBUG, "client %d is gone (write returned %d)", i, r);
  write_out_is_done:
-                                       free_refcounted_ureq(&cinfo[i].resptr);
+                                       if (cinfo[i].cache_pp == NULL) {
+                                               log(L_DEBUG, "client %d: freeing fake cache entry %p", i, cinfo[i].resptr);
+                                               free(cinfo[i].resptr);
+                                       } else {
+                                               /* Most of the time, it is not freed here,
+                                                * only refcounted--. Freeing happens
+                                                * if it was deleted from cache[] but retained
+                                                * for writeout. */
+                                               free_refcounted_ureq(&cinfo[i].resptr);
+                                       }
                                        close_client(i);
                                        continue;
                                }
@@ -1665,6 +1905,7 @@ static void main_loop(void)
                                         * read(3, "www.google.com\0\0", 16) = 16
                                         * close(3) = 0
                                         */
+                                       log(L_DEBUG, "client %u: sent answer %u bytes", i, cinfo[i].respos);
                                        goto write_out_is_done;
                                }
                        }
@@ -1673,15 +1914,22 @@ static void main_loop(void)
                         * already dead, revents may contain other bits too */
                        if ((pfd[i].revents & POLLIN) && cinfo[i].client_fd) {
                                log(L_DEBUG, "reading response for client %u", i);
-                               handle_worker_response(i, now_ms);
+                               handle_worker_response(i);
                                /* We can immediately try to write a response
                                 * to client */
                                goto write_out;
                        }
 
+                       if (pfd[i].revents == POLLHUP) {
+                               /* POLLHUP means we can't write to it anymore */
+                               log(L_INFO, "client %u disappered (got POLLHUP)", i);
+                               close_client(i);
+                               continue;
+                       }
+
                        /* All strange and unexpected cases */
                        if (pfd[i].revents != POLLIN) {
-                               /* Not just "can read" - prolly POLLHUP too */
+                               /* Not just "can read", but some other bits are there */
                                log(L_INFO, "client %u revents is strange:%x", i, pfd[i].revents);
                                close_client(i);
                                continue;
@@ -1703,44 +1951,6 @@ static void main_loop(void)
                        }
                        cinfo[i].bytecnt += r;
                        if (cinfo[i].bytecnt >= sizeof(user_req_header)) {
-                               if (cinfo[i].ureq->type == SHUTDOWN
-                                || cinfo[i].ureq->type == INVALIDATE
-                               ) {
-                                       const char *service;
-                                       unsigned len;
-#ifdef SO_PEERCRED
-                                       struct ucred caller;
-                                       socklen_t optlen = sizeof(caller);
-                                       if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) {
-                                               log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno));
-                                               close_client(i);
-                                               continue;
-                                       }
-                                       if (caller.uid != 0) {
-                                               log(L_INFO, "special request from non-root - ignoring");
-                                               close_client(i);
-                                               continue;
-                                       }
-#endif
-                                       if (cinfo[i].ureq->type == SHUTDOWN) {
-                                               log(L_INFO, "got shutdown request, exiting");
-                                               return; /* exits nscd */;
-                                       }
-                                       len = cinfo[i].ureq->key_len;
-                                       service = (char*)&cinfo[i].ureq + len;
-                                       if (sizeof(user_req_header) + len != cinfo[i].bytecnt
-                                        || !len
-                                        || service[len-1] != '\0'
-                                       ) {
-                                               log(L_INFO, "malformed invalidate request - ignoring");
-                                               close_client(i);
-                                               continue;
-                                       }
-                                       log(L_INFO, "got invalidate request, flushing cache");
-                                       age_cache(0, -1); /* frees entire cache. TODO: replace -1 with service */
-                                       close_client(i);
-                                       continue;
-                               }
                                if (handle_client(i)) {
                                        /* Response is found in cache! */
                                        goto write_out;
@@ -1750,17 +1960,19 @@ static void main_loop(void)
 
  skip_fd_checks:
                /* Age cache */
-               if ((now_ms - last_age_time) >= aging_interval_ms) {
-                       last_age_time = now_ms;
-                       age_cache(now_ms, -1);
+               if ((g_now_ms - last_age_time) >= aging_interval_ms) {
+                       last_age_time = g_now_ms;
+                       age_cache(/*free_all:*/ 0, -1);
                }
 
                /* Close timed out client connections */
                for (i = 2; i < num_clients; i++) {
-                       if (pfd[i].fd && !cinfo[i].client_fd
-                        && (now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS
+                       if (pfd[i].fd != 0 /* not closed yet? */ ////
+                        && cinfo[i].client_fd == 0 /* do we still wait for client, not worker? */
+                        && (g_now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS
                        ) {
-                               log(L_INFO, "timed out waiting for client %u, dropping", i);
+                               log(L_INFO, "timed out waiting for client %u (%u ms), dropping",
+                                       i, (unsigned)(g_now_ms - cinfo[i].started_ms));
                                close_client(i);
                        }
                }
@@ -1789,7 +2001,7 @@ static void main_loop(void)
 
  coalesce_done:
                num_clients -= cnt_closed;
-               log(L_DEBUG, "removing %d closed clients. num_clients:%d", cnt_closed, num_clients);
+               log(L_DEBUG, "removing %d closed clients. clients:%d", cnt_closed, num_clients);
                min_closed = INT_MAX;
                cnt_closed = 0;
                /* start accepting new connects */
@@ -1914,7 +2126,7 @@ static void handle_logfile(const char *str, int srv)
 
 static void handle_debuglvl(const char *str, int srv)
 {
-       debug |= getnum(str);
+       debug |= (uint8_t) getnum(str);
 }
 
 static void handle_threads(const char *str, int srv)
@@ -2017,7 +2229,7 @@ static void parse_conffile(const char *conffile, int warn)
                word = conf_words;
                while (1) {
                        if (strcmp(word->str + 1, p) == 0) {
-                               int srv;
+                               int srv = 0;
                                p = skip_whitespace(p + strlen(p) + 1);
                                *skip_non_whitespace(p) = '\0';
                                if (word->str[0] == 'S') {
@@ -2147,8 +2359,7 @@ static void special_op(const char *arg)
 
        if (!arg) { /* shutdown */
                xfull_write(sock, &ureq, sizeof(ureq));
-               dup2(2, 1);
-               error_and_die("sent shutdown request, exiting");
+               printf("sent shutdown request, exiting\n");
        } else { /* invalidate */
                size_t arg_len = strlen(arg) + 1;
                struct {
@@ -2160,9 +2371,9 @@ static void special_op(const char *arg)
                reqdata.req.key_len = arg_len;
                memcpy(reqdata.arg, arg, arg_len);
                xfull_write(sock, &reqdata, arg_len + sizeof(ureq));
-               dup2(2, 1);
-               error_and_die("sent invalidate(%s) request, exiting", arg);
+               printf("sent invalidate(%s) request, exiting\n", arg);
        }
+       exit(0);
 }
 
 
@@ -2173,6 +2384,7 @@ void __nss_disable_nscd(void);
 int main(int argc, char **argv)
 {
        int n;
+       unsigned opt_d_cnt;
        const char *env_U;
        const char *conffile;
 
@@ -2180,18 +2392,30 @@ int main(int argc, char **argv)
        __nss_disable_nscd();
 
        if (argv[0][0] == 'w') /* "worker_nscd" */
-               worker();
+               worker(argv[1]);
 
        setlinebuf(stdout);
        setlinebuf(stderr);
 
+       /* Make sure stdio is not closed */
+       n = xopen3("/dev/null", O_RDWR, 0);
+       while (n < 2)
+               n = dup(n);
+       /* Close unexpected open file descriptors */
+       n |= 0xff; /* start from at least fd# 255 */
+       do {
+               close(n--);
+       } while (n > 2);
+
        /* For idiotic kernels which disallow "exec /proc/self/exe" */
        readlink_self_exe();
 
        conffile = default_conffile;
+       opt_d_cnt = 0;
        while ((n = getopt_long(argc, argv, "df:i:KVgt:", longopt, NULL)) != -1) {
                switch (n) {
                case 'd':
+                       opt_d_cnt++;
                        debug &= ~D_DAEMON;
                        break;
                case 'f':
@@ -2219,6 +2443,11 @@ int main(int argc, char **argv)
                        print_help_and_die();
                }
        }
+       /* Multiple -d can bump debug regardless of nscd.conf:
+        * no -d or -d: 0, -dd: 1,
+        * -ddd: 3, -dddd: 7, -ddddd: 15
+        */
+       debug |= (((1U << opt_d_cnt) >> 1) - 1) & L_ALL;
 
        env_U = getenv("U");
        /* Avoid duplicate warnings if $U exists */
@@ -2244,37 +2473,27 @@ int main(int argc, char **argv)
                if (putenv(user_to_env_U(config.user)))
                        error_and_die("out of memory");
                /* fds leaked by nss will be closed by execed copy */
-               execv("/proc/self/exe", argv);
-               xexecve(self_exe_points_to, argv, environ);
+               execv(self_exe_points_to, argv);
+               xexecve("/proc/self/exe", argv, environ);
        }
 
        /* Allocate dynamically sized stuff */
        max_reqnum += 2; /* account for 2 first "fake" clients */
        if (max_reqnum < 8) max_reqnum = 8; /* sanitize */
-       if (max_reqnum > 0xffff) max_reqnum = 0xffff;
-       log(L_DEBUG, "will handle %u requests in parallel", max_reqnum - 2);
+       /* Since refcount is a byte, can't serve more than 255-2 clients
+        * at once. The rest will block in connect() */
+       if (max_reqnum > 0xff) max_reqnum = 0xff;
        client_buf = xzalloc(max_reqnum * sizeof(client_buf[0]));
        busy_cbuf  = xzalloc(max_reqnum * sizeof(busy_cbuf[0]));
        pfd        = xzalloc(max_reqnum * sizeof(pfd[0]));
        cinfo      = xzalloc(max_reqnum * sizeof(cinfo[0]));
 
        cache_size = (config.size[0] + config.size[1] + config.size[2]) / 8;
-       if (cache_size < 64) cache_size = 64; /* 8*64 = 512 entries min */
+       if (cache_size < 8) cache_size = 8; /* 8*8 = 64 entries min */
        if (cache_size > 0xffff) cache_size = 0xffff; /* 8*64k entries max */
        cache_size |= 1; /* force it to be odd */
-       log(L_DEBUG, "cache size %u x 8 entries", cache_size);
        cache = xzalloc(cache_size * sizeof(cache[0]));
 
-       /* Make sure stdio is not closed */
-       n = xopen3("/dev/null", O_RDWR, 0);
-       while (n < 2)
-               n = dup(n);
-       /* Close unexpected open file descriptors */
-       n |= 0xff; /* start from at least fd# 255 */
-       do {
-               close(n--);
-       } while (n > 2);
-
        /* Register cleanup hooks */
        signal(SIGINT, cleanup_on_signal);
        signal(SIGTERM, cleanup_on_signal);
@@ -2287,12 +2506,15 @@ int main(int argc, char **argv)
        signal(SIGALRM, SIG_DFL);
 #endif
 
-       mkdir(NSCD_DIR, 0777);
+       if (mkdir(NSCD_DIR, 0755) == 0) {
+               /* prevent bad mode of NSCD_DIR if umask is e.g. 077 */
+               chmod(NSCD_DIR, 0755);
+       }
        pfd[0].fd = open_socket(NSCD_SOCKET);
        pfd[1].fd = open_socket(NSCD_SOCKET_OLD);
        pfd[0].events = POLLIN;
        pfd[1].events = POLLIN;
-       
+
        if (debug & D_DAEMON) {
                daemon(/*nochdir*/ 1, /*noclose*/ 0);
                if (config.logfile) {
@@ -2311,31 +2533,28 @@ int main(int argc, char **argv)
                signal(SIGTSTP, SIG_IGN);
        }
 
+       log(L_ALL, "nscd v" PROGRAM_VERSION ", debug level 0x%x", debug & L_ALL);
+       log(L_DEBUG, "max %u requests in parallel", max_reqnum - 2);
+       log(L_DEBUG, "cache size %u x 8 entries", cache_size);
+
        if (env_U) {
                int size;
                gid_t *ug = env_U_to_uid_and_gids(env_U, &size);
-               if (setgroups(size, &ug[1]))
-                       perror_and_die("cannot set groups for user '%s'", config.user);
-               if (setuid(ug[0]))
-                       perror_and_die("cannot set uid to %u", (unsigned)(ug[0]));
+               if (size > 1)
+                       if (setgroups(size - 1, &ug[1]) || setgid(ug[1]))
+                               perror_and_die("cannot set groups for user '%s'", config.user);
+               if (size > 0)
+                       if (setuid(ug[0]))
+                               perror_and_die("cannot set uid to %u", (unsigned)(ug[0]));
                free(ug);
        }
 
-       log(L_ALL, "nscd v" PROGRAM_VERSION ", debug level %x", debug & L_ALL);
-       log(L_DEBUG, "passwd cache: %d pttl %u nttl %u",
-                               config.srv_enable[SRV_PASSWD],
-                               config.pttl[SRV_PASSWD],
-                               config.nttl[SRV_PASSWD]);
-       log(L_DEBUG, " group cache: %d pttl %u nttl %u",
-                               config.srv_enable[SRV_GROUP ],
-                               config.pttl[SRV_GROUP],
-                               config.nttl[SRV_GROUP]);
-       log(L_DEBUG, " hosts cache: %d pttl %u nttl %u",
-                               config.srv_enable[SRV_HOSTS ],
-                               config.pttl[SRV_HOSTS],
-                               config.nttl[SRV_HOSTS]);
-
        for (n = 0; n < 3; n++) {
+               log(L_DEBUG, "%s cache enabled:%u pttl:%u nttl:%u",
+                               srv_name[n],
+                               config.srv_enable[n],
+                               config.pttl[n],
+                               config.nttl[n]);
                config.pttl[n] *= 1000;
                config.nttl[n] *= 1000;
        }