From e194809e547eb5bc8e8d1ad09d874ebfde0efe4f Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Tue, 2 Mar 2021 13:18:22 +0100
Subject: [PATCH 01/39] fsmonitor: fix memory corruption in some corner cases

In 56c6910028a (fsmonitor: change last update timestamp on the
index_state to opaque token, 2020-01-07), we forgot to adjust the part
of `unpack_trees()` that copies the FSMonitor "last-update" information
that we copy from the source index to the result index since 679f2f9fdd2
(unpack-trees: skip stat on fsmonitor-valid files, 2019-11-20).

Since the "last-update" information is no longer a 64-bit number, but a
free-form string that has been allocated, we need to duplicate it rather
than just copying it.

This is important because there _are_ cases when `unpack_trees()` will
perform a oneway merge that implicitly calls `refresh_fsmonitor()`
(which will allocate that "last-update" token). This happens _after_
that token was copied into the result index. However, we _then_ call
`check_updates()` on that index, which will _also_ call
`refresh_fsmonitor()`, accessing the "last-update" string, which by now
would be released already.

In the instance that lead to this patch, this caused a segmentation
fault during a lengthy, complicated rebase involving the todo command
`reset` that (crucially) had to updated many files. Unfortunately, it
seems very hard to trigger that crash, therefore this patch is not
accompanied by a regression test.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 unpack-trees.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 2399b6818be6dd..63e3d961b37803 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1544,8 +1544,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
 	o->merge_size = len;
 	mark_all_ce_unused(o->src_index);
 
-	if (o->src_index->fsmonitor_last_update)
-		o->result.fsmonitor_last_update = o->src_index->fsmonitor_last_update;
+	o->result.fsmonitor_last_update =
+		xstrdup_or_null(o->src_index->fsmonitor_last_update);
 
 	/*
 	 * Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries

From 2b4dd0c9160ebf1d676c63585869f1a35da1a0bd Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Tue, 2 Mar 2021 13:42:06 +0100
Subject: [PATCH 02/39] fsmonitor: do not forget to release the token in
 `discard_index()`

In 56c6910028a (fsmonitor: change last update timestamp on the
index_state to opaque token, 2020-01-07), we forgot to adjust
`discard_index()` to release the "last-update" token: it is no longer a
64-bit number, but a free-form string that has been allocated.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 read-cache.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/read-cache.c b/read-cache.c
index aa427c5c170f02..cf5ff315855015 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2364,6 +2364,7 @@ int discard_index(struct index_state *istate)
 	cache_tree_free(&(istate->cache_tree));
 	istate->initialized = 0;
 	istate->fsmonitor_has_run_once = 0;
+	FREE_AND_NULL(istate->fsmonitor_last_update);
 	FREE_AND_NULL(istate->cache);
 	istate->cache_alloc = 0;
 	discard_split_index(istate);

From ebf106b42c520a3995f271ce48a3f110e1a5fa48 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Tue, 26 Jan 2021 12:59:07 -0500
Subject: [PATCH 03/39] pkt-line: eliminate the need for static buffer in
 packet_write_gently()

Teach `packet_write_gently()` to write the pkt-line header and the actual
buffer in 2 separate calls to `write_in_full()` and avoid the need for a
static buffer, thread-safe scratch space, or an excessively large stack
buffer.

Change `write_packetized_from_fd()` to allocate a temporary buffer rather
than using a static buffer to avoid similar issues here.

These changes are intended to make it easier to use pkt-line routines in
a multi-threaded context with multiple concurrent writers writing to
different streams.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 pkt-line.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/pkt-line.c b/pkt-line.c
index d633005ef746ad..8b35121904429e 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -196,17 +196,25 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
 
 static int packet_write_gently(const int fd_out, const char *buf, size_t size)
 {
-	static char packet_write_buffer[LARGE_PACKET_MAX];
+	char header[4];
 	size_t packet_size;
 
-	if (size > sizeof(packet_write_buffer) - 4)
+	if (size > LARGE_PACKET_DATA_MAX)
 		return error(_("packet write failed - data exceeds max packet size"));
 
 	packet_trace(buf, size, 1);
 	packet_size = size + 4;
-	set_packet_header(packet_write_buffer, packet_size);
-	memcpy(packet_write_buffer + 4, buf, size);
-	if (write_in_full(fd_out, packet_write_buffer, packet_size) < 0)
+
+	set_packet_header(header, packet_size);
+
+	/*
+	 * Write the header and the buffer in 2 parts so that we do not need
+	 * to allocate a buffer or rely on a static buffer.  This avoids perf
+	 * and multi-threading issues.
+	 */
+
+	if (write_in_full(fd_out, header, 4) < 0 ||
+	    write_in_full(fd_out, buf, size) < 0)
 		return error(_("packet write failed"));
 	return 0;
 }
@@ -244,20 +252,23 @@ void packet_buf_write_len(struct strbuf *buf, const char *data, size_t len)
 
 int write_packetized_from_fd(int fd_in, int fd_out)
 {
-	static char buf[LARGE_PACKET_DATA_MAX];
+	char *buf = xmalloc(LARGE_PACKET_DATA_MAX);
 	int err = 0;
 	ssize_t bytes_to_write;
 
 	while (!err) {
-		bytes_to_write = xread(fd_in, buf, sizeof(buf));
-		if (bytes_to_write < 0)
+		bytes_to_write = xread(fd_in, buf, LARGE_PACKET_DATA_MAX);
+		if (bytes_to_write < 0) {
+			free(buf);
 			return COPY_READ_ERROR;
+		}
 		if (bytes_to_write == 0)
 			break;
 		err = packet_write_gently(fd_out, buf, bytes_to_write);
 	}
 	if (!err)
 		err = packet_flush_gently(fd_out);
+	free(buf);
 	return err;
 }
 

From a6c0c4222d1fe8abfeb968f2e2ac06b0eb296650 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Tue, 11 Feb 2020 15:13:11 +0100
Subject: [PATCH 04/39] pkt-line: do not issue flush packets in
 write_packetized_*()

Remove the `packet_flush_gently()` call in `write_packetized_from_buf() and
`write_packetized_from_fd()` and require the caller to call it if desired.
Rename both functions to `write_packetized_from_*_no_flush()` to prevent
later merge accidents.

`write_packetized_from_buf()` currently only has one caller:
`apply_multi_file_filter()` in `convert.c`.  It always wants a flush packet
to be written after writing the payload.

However, we are about to introduce a caller that wants to write many
packets before a final flush packet, so let's make the caller responsible
for emitting the flush packet.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 convert.c  | 8 ++++++--
 pkt-line.c | 8 ++------
 pkt-line.h | 4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/convert.c b/convert.c
index ee360c2f07ced0..976d4905cb3a1f 100644
--- a/convert.c
+++ b/convert.c
@@ -884,9 +884,13 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len
 		goto done;
 
 	if (fd >= 0)
-		err = write_packetized_from_fd(fd, process->in);
+		err = write_packetized_from_fd_no_flush(fd, process->in);
 	else
-		err = write_packetized_from_buf(src, len, process->in);
+		err = write_packetized_from_buf_no_flush(src, len, process->in);
+	if (err)
+		goto done;
+
+	err = packet_flush_gently(process->in);
 	if (err)
 		goto done;
 
diff --git a/pkt-line.c b/pkt-line.c
index 8b35121904429e..434da3a0c48dfc 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -250,7 +250,7 @@ void packet_buf_write_len(struct strbuf *buf, const char *data, size_t len)
 	packet_trace(data, len, 1);
 }
 
-int write_packetized_from_fd(int fd_in, int fd_out)
+int write_packetized_from_fd_no_flush(int fd_in, int fd_out)
 {
 	char *buf = xmalloc(LARGE_PACKET_DATA_MAX);
 	int err = 0;
@@ -266,13 +266,11 @@ int write_packetized_from_fd(int fd_in, int fd_out)
 			break;
 		err = packet_write_gently(fd_out, buf, bytes_to_write);
 	}
-	if (!err)
-		err = packet_flush_gently(fd_out);
 	free(buf);
 	return err;
 }
 
-int write_packetized_from_buf(const char *src_in, size_t len, int fd_out)
+int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_out)
 {
 	int err = 0;
 	size_t bytes_written = 0;
@@ -288,8 +286,6 @@ int write_packetized_from_buf(const char *src_in, size_t len, int fd_out)
 		err = packet_write_gently(fd_out, src_in + bytes_written, bytes_to_write);
 		bytes_written += bytes_to_write;
 	}
-	if (!err)
-		err = packet_flush_gently(fd_out);
 	return err;
 }
 
diff --git a/pkt-line.h b/pkt-line.h
index 8c90daa59ef0ca..31012b9943bf35 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -32,8 +32,8 @@ void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((f
 void packet_buf_write_len(struct strbuf *buf, const char *data, size_t len);
 int packet_flush_gently(int fd);
 int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
-int write_packetized_from_fd(int fd_in, int fd_out);
-int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
+int write_packetized_from_fd_no_flush(int fd_in, int fd_out);
+int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_out);
 
 /*
  * Read a packetized line into the buffer, which must be at least size bytes

From e24c74bdb8f517fac4909a5b9bb540be6f7a323e Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Mon, 10 Feb 2020 23:37:07 +0100
Subject: [PATCH 05/39] pkt-line: add PACKET_READ_GENTLE_ON_READ_ERROR option

Introduce PACKET_READ_GENTLE_ON_READ_ERROR option to help libify the
packet readers.

So far, the (possibly indirect) callers of `get_packet_data()` can ask
that function to return an error instead of `die()`ing upon end-of-file.
However, random read errors will still cause the process to die.

So let's introduce an explicit option to tell the packet reader
machinery to please be nice and only return an error on read errors.

This change prepares pkt-line for use by long-running daemon processes.
Such processes should be able to serve multiple concurrent clients and
and survive random IO errors.  If there is an error on one connection,
a daemon should be able to drop that connection and continue serving
existing and future connections.

This ability will be used by a Git-aware "Builtin FSMonitor" feature
in a later patch series.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 pkt-line.c | 19 +++++++++++++++++--
 pkt-line.h | 11 ++++++++---
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/pkt-line.c b/pkt-line.c
index 434da3a0c48dfc..22775e37a72b15 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -305,8 +305,11 @@ static int get_packet_data(int fd, char **src_buf, size_t *src_size,
 		*src_size -= ret;
 	} else {
 		ret = read_in_full(fd, dst, size);
-		if (ret < 0)
+		if (ret < 0) {
+			if (options & PACKET_READ_GENTLE_ON_READ_ERROR)
+				return error_errno(_("read error"));
 			die_errno(_("read error"));
+		}
 	}
 
 	/* And complain if we didn't get enough bytes to satisfy the read. */
@@ -314,6 +317,8 @@ static int get_packet_data(int fd, char **src_buf, size_t *src_size,
 		if (options & PACKET_READ_GENTLE_ON_EOF)
 			return -1;
 
+		if (options & PACKET_READ_GENTLE_ON_READ_ERROR)
+			return error(_("the remote end hung up unexpectedly"));
 		die(_("the remote end hung up unexpectedly"));
 	}
 
@@ -342,6 +347,9 @@ enum packet_read_status packet_read_with_status(int fd, char **src_buffer,
 	len = packet_length(linelen);
 
 	if (len < 0) {
+		if (options & PACKET_READ_GENTLE_ON_READ_ERROR)
+			return error(_("protocol error: bad line length "
+				       "character: %.4s"), linelen);
 		die(_("protocol error: bad line length character: %.4s"), linelen);
 	} else if (!len) {
 		packet_trace("0000", 4, 0);
@@ -356,12 +364,19 @@ enum packet_read_status packet_read_with_status(int fd, char **src_buffer,
 		*pktlen = 0;
 		return PACKET_READ_RESPONSE_END;
 	} else if (len < 4) {
+		if (options & PACKET_READ_GENTLE_ON_READ_ERROR)
+			return error(_("protocol error: bad line length %d"),
+				     len);
 		die(_("protocol error: bad line length %d"), len);
 	}
 
 	len -= 4;
-	if ((unsigned)len >= size)
+	if ((unsigned)len >= size) {
+		if (options & PACKET_READ_GENTLE_ON_READ_ERROR)
+			return error(_("protocol error: bad line length %d"),
+				     len);
 		die(_("protocol error: bad line length %d"), len);
+	}
 
 	if (get_packet_data(fd, src_buffer, src_len, buffer, len, options) < 0) {
 		*pktlen = -1;
diff --git a/pkt-line.h b/pkt-line.h
index 31012b9943bf35..80ce0187e2ea7e 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -68,10 +68,15 @@ int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_ou
  *
  * If options contains PACKET_READ_DIE_ON_ERR_PACKET, it dies when it sees an
  * ERR packet.
+ *
+ * If options contains PACKET_READ_GENTLE_ON_READ_ERROR, we will not die
+ * on read errors, but instead return -1.  However, we may still die on an
+ * ERR packet (if requested).
  */
-#define PACKET_READ_GENTLE_ON_EOF     (1u<<0)
-#define PACKET_READ_CHOMP_NEWLINE     (1u<<1)
-#define PACKET_READ_DIE_ON_ERR_PACKET (1u<<2)
+#define PACKET_READ_GENTLE_ON_EOF        (1u<<0)
+#define PACKET_READ_CHOMP_NEWLINE        (1u<<1)
+#define PACKET_READ_DIE_ON_ERR_PACKET    (1u<<2)
+#define PACKET_READ_GENTLE_ON_READ_ERROR (1u<<3)
 int packet_read(int fd, char **src_buffer, size_t *src_len, char
 		*buffer, unsigned size, int options);
 

From c6e4e015e69a4d50646410cdaab1137d8b0d80d5 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Tue, 11 Feb 2020 15:15:12 +0100
Subject: [PATCH 06/39] pkt-line: add options argument to
 read_packetized_to_strbuf()

Update the calling sequence of `read_packetized_to_strbuf()` to take
an options argument and not assume a fixed set of options.  Update the
only existing caller accordingly to explicitly pass the
formerly-assumed flags.

The `read_packetized_to_strbuf()` function calls `packet_read()` with
a fixed set of assumed options (`PACKET_READ_GENTLE_ON_EOF`).  This
assumption has been fine for the single existing caller
`apply_multi_file_filter()` in `convert.c`.

In a later commit we would like to add other callers to
`read_packetized_to_strbuf()` that need a different set of options.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 convert.c  | 3 ++-
 pkt-line.c | 4 ++--
 pkt-line.h | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/convert.c b/convert.c
index 976d4905cb3a1f..516f1095b06eea 100644
--- a/convert.c
+++ b/convert.c
@@ -907,7 +907,8 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len
 		if (err)
 			goto done;
 
-		err = read_packetized_to_strbuf(process->out, &nbuf) < 0;
+		err = read_packetized_to_strbuf(process->out, &nbuf,
+						PACKET_READ_GENTLE_ON_EOF) < 0;
 		if (err)
 			goto done;
 
diff --git a/pkt-line.c b/pkt-line.c
index 22775e37a72b15..695ea37b9d3087 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -443,7 +443,7 @@ char *packet_read_line_buf(char **src, size_t *src_len, int *dst_len)
 	return packet_read_line_generic(-1, src, src_len, dst_len);
 }
 
-ssize_t read_packetized_to_strbuf(int fd_in, struct strbuf *sb_out)
+ssize_t read_packetized_to_strbuf(int fd_in, struct strbuf *sb_out, int options)
 {
 	int packet_len;
 
@@ -459,7 +459,7 @@ ssize_t read_packetized_to_strbuf(int fd_in, struct strbuf *sb_out)
 			 * that there is already room for the extra byte.
 			 */
 			sb_out->buf + sb_out->len, LARGE_PACKET_DATA_MAX+1,
-			PACKET_READ_GENTLE_ON_EOF);
+			options);
 		if (packet_len <= 0)
 			break;
 		sb_out->len += packet_len;
diff --git a/pkt-line.h b/pkt-line.h
index 80ce0187e2ea7e..5af5f456876841 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -136,7 +136,7 @@ char *packet_read_line_buf(char **src_buf, size_t *src_len, int *size);
 /*
  * Reads a stream of variable sized packets until a flush packet is detected.
  */
-ssize_t read_packetized_to_strbuf(int fd_in, struct strbuf *sb_out);
+ssize_t read_packetized_to_strbuf(int fd_in, struct strbuf *sb_out, int options);
 
 /*
  * Receive multiplexed output stream over git native protocol.

From 0213ed95d2f556046c1b9d9766ad2b65254b542a Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 8 Jul 2020 13:26:00 -0400
Subject: [PATCH 07/39] simple-ipc: design documentation for new IPC mechanism

Brief design documentation for new IPC mechanism allowing
foreground Git client to talk with an existing daemon process
at a known location using a named pipe or unix domain socket.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Documentation/technical/api-simple-ipc.txt | 105 +++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 Documentation/technical/api-simple-ipc.txt

diff --git a/Documentation/technical/api-simple-ipc.txt b/Documentation/technical/api-simple-ipc.txt
new file mode 100644
index 00000000000000..d79ad323e67530
--- /dev/null
+++ b/Documentation/technical/api-simple-ipc.txt
@@ -0,0 +1,105 @@
+Simple-IPC API
+==============
+
+The Simple-IPC API is a collection of `ipc_` prefixed library routines
+and a basic communication protocol that allow an IPC-client process to
+send an application-specific IPC-request message to an IPC-server
+process and receive an application-specific IPC-response message.
+
+Communication occurs over a named pipe on Windows and a Unix domain
+socket on other platforms.  IPC-clients and IPC-servers rendezvous at
+a previously agreed-to application-specific pathname (which is outside
+the scope of this design) that is local to the computer system.
+
+The IPC-server routines within the server application process create a
+thread pool to listen for connections and receive request messages
+from multiple concurrent IPC-clients.  When received, these messages
+are dispatched up to the server application callbacks for handling.
+IPC-server routines then incrementally relay responses back to the
+IPC-client.
+
+The IPC-client routines within a client application process connect
+to the IPC-server and send a request message and wait for a response.
+When received, the response is returned back the caller.
+
+For example, the `fsmonitor--daemon` feature will be built as a server
+application on top of the IPC-server library routines.  It will have
+threads watching for file system events and a thread pool waiting for
+client connections.  Clients, such as `git status` will request a list
+of file system events since a point in time and the server will
+respond with a list of changed files and directories.  The formats of
+the request and response are application-specific; the IPC-client and
+IPC-server routines treat them as opaque byte streams.
+
+
+Comparison with sub-process model
+---------------------------------
+
+The Simple-IPC mechanism differs from the existing `sub-process.c`
+model (Documentation/technical/long-running-process-protocol.txt) and
+used by applications like Git-LFS.  In the LFS-style sub-process model
+the helper is started by the foreground process, communication happens
+via a pair of file descriptors bound to the stdin/stdout of the
+sub-process, the sub-process only serves the current foreground
+process, and the sub-process exits when the foreground process
+terminates.
+
+In the Simple-IPC model the server is a very long-running service.  It
+can service many clients at the same time and has a private socket or
+named pipe connection to each active client.  It might be started
+(on-demand) by the current client process or it might have been
+started by a previous client or by the OS at boot time.  The server
+process is not associated with a terminal and it persists after
+clients terminate.  Clients do not have access to the stdin/stdout of
+the server process and therefore must communicate over sockets or
+named pipes.
+
+
+Server startup and shutdown
+---------------------------
+
+How an application server based upon IPC-server is started is also
+outside the scope of the Simple-IPC design and is a property of the
+application using it.  For example, the server might be started or
+restarted during routine maintenance operations, or it might be
+started as a system service during the system boot-up sequence, or it
+might be started on-demand by a foreground Git command when needed.
+
+Similarly, server shutdown is a property of the application using
+the simple-ipc routines.  For example, the server might decide to
+shutdown when idle or only upon explicit request.
+
+
+Simple-IPC protocol
+-------------------
+
+The Simple-IPC protocol consists of a single request message from the
+client and an optional response message from the server.  Both the
+client and server messages are unlimited in length and are terminated
+with a flush packet.
+
+The pkt-line routines (Documentation/technical/protocol-common.txt)
+are used to simplify buffer management during message generation,
+transmission, and reception.  A flush packet is used to mark the end
+of the message.  This allows the sender to incrementally generate and
+transmit the message.  It allows the receiver to incrementally receive
+the message in chunks and to know when they have received the entire
+message.
+
+The actual byte format of the client request and server response
+messages are application specific.  The IPC layer transmits and
+receives them as opaque byte buffers without any concern for the
+content within.  It is the job of the calling application layer to
+understand the contents of the request and response messages.
+
+
+Summary
+-------
+
+Conceptually, the Simple-IPC protocol is similar to an HTTP REST
+request.  Clients connect, make an application-specific and
+stateless request, receive an application-specific
+response, and disconnect.  It is a one round trip facility for
+querying the server.  The Simple-IPC routines hide the socket,
+named pipe, and thread pool details and allow the application
+layer to focus on the application at hand.

From ae8103d3e0a1bef5246f78da2044a8bb830f6f49 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 9 Jul 2020 08:23:12 -0400
Subject: [PATCH 08/39] simple-ipc: add win32 implementation

Create Windows implementation of "simple-ipc" using named pipes.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Makefile                            |   5 +
 compat/simple-ipc/ipc-shared.c      |  28 ++
 compat/simple-ipc/ipc-win32.c       | 751 ++++++++++++++++++++++++++++
 config.mak.uname                    |   2 +
 contrib/buildsystems/CMakeLists.txt |   4 +
 simple-ipc.h                        | 228 +++++++++
 6 files changed, 1018 insertions(+)
 create mode 100644 compat/simple-ipc/ipc-shared.c
 create mode 100644 compat/simple-ipc/ipc-win32.c
 create mode 100644 simple-ipc.h

diff --git a/Makefile b/Makefile
index 4128b457e14bda..40d5cab78d3f17 100644
--- a/Makefile
+++ b/Makefile
@@ -1679,6 +1679,11 @@ else
 	LIB_OBJS += unix-socket.o
 endif
 
+ifdef USE_WIN32_IPC
+	LIB_OBJS += compat/simple-ipc/ipc-shared.o
+	LIB_OBJS += compat/simple-ipc/ipc-win32.o
+endif
+
 ifdef NO_ICONV
 	BASIC_CFLAGS += -DNO_ICONV
 endif
diff --git a/compat/simple-ipc/ipc-shared.c b/compat/simple-ipc/ipc-shared.c
new file mode 100644
index 00000000000000..1edec8159532eb
--- /dev/null
+++ b/compat/simple-ipc/ipc-shared.c
@@ -0,0 +1,28 @@
+#include "cache.h"
+#include "simple-ipc.h"
+#include "strbuf.h"
+#include "pkt-line.h"
+#include "thread-utils.h"
+
+#ifdef SUPPORTS_SIMPLE_IPC
+
+int ipc_server_run(const char *path, const struct ipc_server_opts *opts,
+		   ipc_server_application_cb *application_cb,
+		   void *application_data)
+{
+	struct ipc_server_data *server_data = NULL;
+	int ret;
+
+	ret = ipc_server_run_async(&server_data, path, opts,
+				   application_cb, application_data);
+	if (ret)
+		return ret;
+
+	ret = ipc_server_await(server_data);
+
+	ipc_server_free(server_data);
+
+	return ret;
+}
+
+#endif /* SUPPORTS_SIMPLE_IPC */
diff --git a/compat/simple-ipc/ipc-win32.c b/compat/simple-ipc/ipc-win32.c
new file mode 100644
index 00000000000000..8f89c02037e36c
--- /dev/null
+++ b/compat/simple-ipc/ipc-win32.c
@@ -0,0 +1,751 @@
+#include "cache.h"
+#include "simple-ipc.h"
+#include "strbuf.h"
+#include "pkt-line.h"
+#include "thread-utils.h"
+
+#ifndef GIT_WINDOWS_NATIVE
+#error This file can only be compiled on Windows
+#endif
+
+static int initialize_pipe_name(const char *path, wchar_t *wpath, size_t alloc)
+{
+	int off = 0;
+	struct strbuf realpath = STRBUF_INIT;
+
+	if (!strbuf_realpath(&realpath, path, 0))
+		return -1;
+
+	off = swprintf(wpath, alloc, L"\\\\.\\pipe\\");
+	if (xutftowcs(wpath + off, realpath.buf, alloc - off) < 0)
+		return -1;
+
+	/* Handle drive prefix */
+	if (wpath[off] && wpath[off + 1] == L':') {
+		wpath[off + 1] = L'_';
+		off += 2;
+	}
+
+	for (; wpath[off]; off++)
+		if (wpath[off] == L'/')
+			wpath[off] = L'\\';
+
+	strbuf_release(&realpath);
+	return 0;
+}
+
+static enum ipc_active_state get_active_state(wchar_t *pipe_path)
+{
+	if (WaitNamedPipeW(pipe_path, NMPWAIT_USE_DEFAULT_WAIT))
+		return IPC_STATE__LISTENING;
+
+	if (GetLastError() == ERROR_SEM_TIMEOUT)
+		return IPC_STATE__NOT_LISTENING;
+
+	if (GetLastError() == ERROR_FILE_NOT_FOUND)
+		return IPC_STATE__PATH_NOT_FOUND;
+
+	return IPC_STATE__OTHER_ERROR;
+}
+
+enum ipc_active_state ipc_get_active_state(const char *path)
+{
+	wchar_t pipe_path[MAX_PATH];
+
+	if (initialize_pipe_name(path, pipe_path, ARRAY_SIZE(pipe_path)) < 0)
+		return IPC_STATE__INVALID_PATH;
+
+	return get_active_state(pipe_path);
+}
+
+#define WAIT_STEP_MS (50)
+
+static enum ipc_active_state connect_to_server(
+	const wchar_t *wpath,
+	DWORD timeout_ms,
+	const struct ipc_client_connect_options *options,
+	int *pfd)
+{
+	DWORD t_start_ms, t_waited_ms;
+	DWORD step_ms;
+	HANDLE hPipe = INVALID_HANDLE_VALUE;
+	DWORD mode = PIPE_READMODE_BYTE;
+	DWORD gle;
+
+	*pfd = -1;
+
+	for (;;) {
+		hPipe = CreateFileW(wpath, GENERIC_READ | GENERIC_WRITE,
+				    0, NULL, OPEN_EXISTING, 0, NULL);
+		if (hPipe != INVALID_HANDLE_VALUE)
+			break;
+
+		gle = GetLastError();
+
+		switch (gle) {
+		case ERROR_FILE_NOT_FOUND:
+			if (!options->wait_if_not_found)
+				return IPC_STATE__PATH_NOT_FOUND;
+			if (!timeout_ms)
+				return IPC_STATE__PATH_NOT_FOUND;
+
+			step_ms = (timeout_ms < WAIT_STEP_MS) ?
+				timeout_ms : WAIT_STEP_MS;
+			sleep_millisec(step_ms);
+
+			timeout_ms -= step_ms;
+			break; /* try again */
+
+		case ERROR_PIPE_BUSY:
+			if (!options->wait_if_busy)
+				return IPC_STATE__NOT_LISTENING;
+			if (!timeout_ms)
+				return IPC_STATE__NOT_LISTENING;
+
+			t_start_ms = (DWORD)(getnanotime() / 1000000);
+
+			if (!WaitNamedPipeW(wpath, timeout_ms)) {
+				if (GetLastError() == ERROR_SEM_TIMEOUT)
+					return IPC_STATE__NOT_LISTENING;
+
+				return IPC_STATE__OTHER_ERROR;
+			}
+
+			/*
+			 * A pipe server instance became available.
+			 * Race other client processes to connect to
+			 * it.
+			 *
+			 * But first decrement our overall timeout so
+			 * that we don't starve if we keep losing the
+			 * race.  But also guard against special
+			 * NPMWAIT_ values (0 and -1).
+			 */
+			t_waited_ms = (DWORD)(getnanotime() / 1000000) - t_start_ms;
+			if (t_waited_ms < timeout_ms)
+				timeout_ms -= t_waited_ms;
+			else
+				timeout_ms = 1;
+			break; /* try again */
+
+		default:
+			return IPC_STATE__OTHER_ERROR;
+		}
+	}
+
+	if (!SetNamedPipeHandleState(hPipe, &mode, NULL, NULL)) {
+		CloseHandle(hPipe);
+		return IPC_STATE__OTHER_ERROR;
+	}
+
+	*pfd = _open_osfhandle((intptr_t)hPipe, O_RDWR|O_BINARY);
+	if (*pfd < 0) {
+		CloseHandle(hPipe);
+		return IPC_STATE__OTHER_ERROR;
+	}
+
+	/* fd now owns hPipe */
+
+	return IPC_STATE__LISTENING;
+}
+
+/*
+ * The default connection timeout for Windows clients.
+ *
+ * This is not currently part of the ipc_ API (nor the config settings)
+ * because of differences between Windows and other platforms.
+ *
+ * This value was chosen at random.
+ */
+#define WINDOWS_CONNECTION_TIMEOUT_MS (30000)
+
+enum ipc_active_state ipc_client_try_connect(
+	const char *path,
+	const struct ipc_client_connect_options *options,
+	struct ipc_client_connection **p_connection)
+{
+	wchar_t wpath[MAX_PATH];
+	enum ipc_active_state state = IPC_STATE__OTHER_ERROR;
+	int fd = -1;
+
+	*p_connection = NULL;
+
+	trace2_region_enter("ipc-client", "try-connect", NULL);
+	trace2_data_string("ipc-client", NULL, "try-connect/path", path);
+
+	if (initialize_pipe_name(path, wpath, ARRAY_SIZE(wpath)) < 0)
+		state = IPC_STATE__INVALID_PATH;
+	else
+		state = connect_to_server(wpath, WINDOWS_CONNECTION_TIMEOUT_MS,
+					  options, &fd);
+
+	trace2_data_intmax("ipc-client", NULL, "try-connect/state",
+			   (intmax_t)state);
+	trace2_region_leave("ipc-client", "try-connect", NULL);
+
+	if (state == IPC_STATE__LISTENING) {
+		(*p_connection) = xcalloc(1, sizeof(struct ipc_client_connection));
+		(*p_connection)->fd = fd;
+	}
+
+	return state;
+}
+
+void ipc_client_close_connection(struct ipc_client_connection *connection)
+{
+	if (!connection)
+		return;
+
+	if (connection->fd != -1)
+		close(connection->fd);
+
+	free(connection);
+}
+
+int ipc_client_send_command_to_connection(
+	struct ipc_client_connection *connection,
+	const char *message, struct strbuf *answer)
+{
+	int ret = 0;
+
+	strbuf_setlen(answer, 0);
+
+	trace2_region_enter("ipc-client", "send-command", NULL);
+
+	if (write_packetized_from_buf_no_flush(message, strlen(message),
+					       connection->fd) < 0 ||
+	    packet_flush_gently(connection->fd) < 0) {
+		ret = error(_("could not send IPC command"));
+		goto done;
+	}
+
+	FlushFileBuffers((HANDLE)_get_osfhandle(connection->fd));
+
+	if (read_packetized_to_strbuf(
+		    connection->fd, answer,
+		    PACKET_READ_GENTLE_ON_EOF | PACKET_READ_GENTLE_ON_READ_ERROR) < 0) {
+		ret = error(_("could not read IPC response"));
+		goto done;
+	}
+
+done:
+	trace2_region_leave("ipc-client", "send-command", NULL);
+	return ret;
+}
+
+int ipc_client_send_command(const char *path,
+			    const struct ipc_client_connect_options *options,
+			    const char *message, struct strbuf *response)
+{
+	int ret = -1;
+	enum ipc_active_state state;
+	struct ipc_client_connection *connection = NULL;
+
+	state = ipc_client_try_connect(path, options, &connection);
+
+	if (state != IPC_STATE__LISTENING)
+		return ret;
+
+	ret = ipc_client_send_command_to_connection(connection, message, response);
+
+	ipc_client_close_connection(connection);
+
+	return ret;
+}
+
+/*
+ * Duplicate the given pipe handle and wrap it in a file descriptor so
+ * that we can use pkt-line on it.
+ */
+static int dup_fd_from_pipe(const HANDLE pipe)
+{
+	HANDLE process = GetCurrentProcess();
+	HANDLE handle;
+	int fd;
+
+	if (!DuplicateHandle(process, pipe, process, &handle, 0, FALSE,
+			     DUPLICATE_SAME_ACCESS)) {
+		errno = err_win_to_posix(GetLastError());
+		return -1;
+	}
+
+	fd = _open_osfhandle((intptr_t)handle, O_RDWR|O_BINARY);
+	if (fd < 0) {
+		errno = err_win_to_posix(GetLastError());
+		CloseHandle(handle);
+		return -1;
+	}
+
+	/*
+	 * `handle` is now owned by `fd` and will be automatically closed
+	 * when the descriptor is closed.
+	 */
+
+	return fd;
+}
+
+/*
+ * Magic numbers used to annotate callback instance data.
+ * These are used to help guard against accidentally passing the
+ * wrong instance data across multiple levels of callbacks (which
+ * is easy to do if there are `void*` arguments).
+ */
+enum magic {
+	MAGIC_SERVER_REPLY_DATA,
+	MAGIC_SERVER_THREAD_DATA,
+	MAGIC_SERVER_DATA,
+};
+
+struct ipc_server_reply_data {
+	enum magic magic;
+	int fd;
+	struct ipc_server_thread_data *server_thread_data;
+};
+
+struct ipc_server_thread_data {
+	enum magic magic;
+	struct ipc_server_thread_data *next_thread;
+	struct ipc_server_data *server_data;
+	pthread_t pthread_id;
+	HANDLE hPipe;
+};
+
+/*
+ * On Windows, the conceptual "ipc-server" is implemented as a pool of
+ * n idential/peer "server-thread" threads.  That is, there is no
+ * hierarchy of threads; and therefore no controller thread managing
+ * the pool.  Each thread has an independent handle to the named pipe,
+ * receives incoming connections, processes the client, and re-uses
+ * the pipe for the next client connection.
+ *
+ * Therefore, the "ipc-server" only needs to maintain a list of the
+ * spawned threads for eventual "join" purposes.
+ *
+ * A single "stop-event" is visible to all of the server threads to
+ * tell them to shutdown (when idle).
+ */
+struct ipc_server_data {
+	enum magic magic;
+	ipc_server_application_cb *application_cb;
+	void *application_data;
+	struct strbuf buf_path;
+	wchar_t wpath[MAX_PATH];
+
+	HANDLE hEventStopRequested;
+	struct ipc_server_thread_data *thread_list;
+	int is_stopped;
+};
+
+enum connect_result {
+	CR_CONNECTED = 0,
+	CR_CONNECT_PENDING,
+	CR_CONNECT_ERROR,
+	CR_WAIT_ERROR,
+	CR_SHUTDOWN,
+};
+
+static enum connect_result queue_overlapped_connect(
+	struct ipc_server_thread_data *server_thread_data,
+	OVERLAPPED *lpo)
+{
+	if (ConnectNamedPipe(server_thread_data->hPipe, lpo))
+		goto failed;
+
+	switch (GetLastError()) {
+	case ERROR_IO_PENDING:
+		return CR_CONNECT_PENDING;
+
+	case ERROR_PIPE_CONNECTED:
+		SetEvent(lpo->hEvent);
+		return CR_CONNECTED;
+
+	default:
+		break;
+	}
+
+failed:
+	error(_("ConnectNamedPipe failed for '%s' (%lu)"),
+	      server_thread_data->server_data->buf_path.buf,
+	      GetLastError());
+	return CR_CONNECT_ERROR;
+}
+
+/*
+ * Use Windows Overlapped IO to wait for a connection or for our event
+ * to be signalled.
+ */
+static enum connect_result wait_for_connection(
+	struct ipc_server_thread_data *server_thread_data,
+	OVERLAPPED *lpo)
+{
+	enum connect_result r;
+	HANDLE waitHandles[2];
+	DWORD dwWaitResult;
+
+	r = queue_overlapped_connect(server_thread_data, lpo);
+	if (r != CR_CONNECT_PENDING)
+		return r;
+
+	waitHandles[0] = server_thread_data->server_data->hEventStopRequested;
+	waitHandles[1] = lpo->hEvent;
+
+	dwWaitResult = WaitForMultipleObjects(2, waitHandles, FALSE, INFINITE);
+	switch (dwWaitResult) {
+	case WAIT_OBJECT_0 + 0:
+		return CR_SHUTDOWN;
+
+	case WAIT_OBJECT_0 + 1:
+		ResetEvent(lpo->hEvent);
+		return CR_CONNECTED;
+
+	default:
+		return CR_WAIT_ERROR;
+	}
+}
+
+/*
+ * Forward declare our reply callback function so that any compiler
+ * errors are reported when we actually define the function (in addition
+ * to any errors reported when we try to pass this callback function as
+ * a parameter in a function call).  The former are easier to understand.
+ */
+static ipc_server_reply_cb do_io_reply_callback;
+
+/*
+ * Relay application's response message to the client process.
+ * (We do not flush at this point because we allow the caller
+ * to chunk data to the client thru us.)
+ */
+static int do_io_reply_callback(struct ipc_server_reply_data *reply_data,
+		       const char *response, size_t response_len)
+{
+	if (reply_data->magic != MAGIC_SERVER_REPLY_DATA)
+		BUG("reply_cb called with wrong instance data");
+
+	return write_packetized_from_buf_no_flush(response, response_len,
+						  reply_data->fd);
+}
+
+/*
+ * Receive the request/command from the client and pass it to the
+ * registered request-callback.  The request-callback will compose
+ * a response and call our reply-callback to send it to the client.
+ *
+ * Simple-IPC only contains one round trip, so we flush and close
+ * here after the response.
+ */
+static int do_io(struct ipc_server_thread_data *server_thread_data)
+{
+	struct strbuf buf = STRBUF_INIT;
+	struct ipc_server_reply_data reply_data;
+	int ret = 0;
+
+	reply_data.magic = MAGIC_SERVER_REPLY_DATA;
+	reply_data.server_thread_data = server_thread_data;
+
+	reply_data.fd = dup_fd_from_pipe(server_thread_data->hPipe);
+	if (reply_data.fd < 0)
+		return error(_("could not create fd from pipe for '%s'"),
+			     server_thread_data->server_data->buf_path.buf);
+
+	ret = read_packetized_to_strbuf(
+		reply_data.fd, &buf,
+		PACKET_READ_GENTLE_ON_EOF | PACKET_READ_GENTLE_ON_READ_ERROR);
+	if (ret >= 0) {
+		ret = server_thread_data->server_data->application_cb(
+			server_thread_data->server_data->application_data,
+			buf.buf, do_io_reply_callback, &reply_data);
+
+		packet_flush_gently(reply_data.fd);
+
+		FlushFileBuffers((HANDLE)_get_osfhandle((reply_data.fd)));
+	}
+	else {
+		/*
+		 * The client probably disconnected/shutdown before it
+		 * could send a well-formed message.  Ignore it.
+		 */
+	}
+
+	strbuf_release(&buf);
+	close(reply_data.fd);
+
+	return ret;
+}
+
+/*
+ * Handle IPC request and response with this connected client.  And reset
+ * the pipe to prepare for the next client.
+ */
+static int use_connection(struct ipc_server_thread_data *server_thread_data)
+{
+	int ret;
+
+	ret = do_io(server_thread_data);
+
+	FlushFileBuffers(server_thread_data->hPipe);
+	DisconnectNamedPipe(server_thread_data->hPipe);
+
+	return ret;
+}
+
+/*
+ * Thread proc for an IPC server worker thread.  It handles a series of
+ * connections from clients.  It cleans and reuses the hPipe between each
+ * client.
+ */
+static void *server_thread_proc(void *_server_thread_data)
+{
+	struct ipc_server_thread_data *server_thread_data = _server_thread_data;
+	HANDLE hEventConnected = INVALID_HANDLE_VALUE;
+	OVERLAPPED oConnect;
+	enum connect_result cr;
+	int ret;
+
+	assert(server_thread_data->hPipe != INVALID_HANDLE_VALUE);
+
+	trace2_thread_start("ipc-server");
+	trace2_data_string("ipc-server", NULL, "pipe",
+			   server_thread_data->server_data->buf_path.buf);
+
+	hEventConnected = CreateEventW(NULL, TRUE, FALSE, NULL);
+
+	memset(&oConnect, 0, sizeof(oConnect));
+	oConnect.hEvent = hEventConnected;
+
+	for (;;) {
+		cr = wait_for_connection(server_thread_data, &oConnect);
+
+		switch (cr) {
+		case CR_SHUTDOWN:
+			goto finished;
+
+		case CR_CONNECTED:
+			ret = use_connection(server_thread_data);
+			if (ret == SIMPLE_IPC_QUIT) {
+				ipc_server_stop_async(
+					server_thread_data->server_data);
+				goto finished;
+			}
+			if (ret > 0) {
+				/*
+				 * Ignore (transient) IO errors with this
+				 * client and reset for the next client.
+				 */
+			}
+			break;
+
+		case CR_CONNECT_PENDING:
+			/* By construction, this should not happen. */
+			BUG("ipc-server[%s]: unexpeced CR_CONNECT_PENDING",
+			    server_thread_data->server_data->buf_path.buf);
+
+		case CR_CONNECT_ERROR:
+		case CR_WAIT_ERROR:
+			/*
+			 * Ignore these theoretical errors.
+			 */
+			DisconnectNamedPipe(server_thread_data->hPipe);
+			break;
+
+		default:
+			BUG("unandled case after wait_for_connection");
+		}
+	}
+
+finished:
+	CloseHandle(server_thread_data->hPipe);
+	CloseHandle(hEventConnected);
+
+	trace2_thread_exit();
+	return NULL;
+}
+
+static HANDLE create_new_pipe(wchar_t *wpath, int is_first)
+{
+	HANDLE hPipe;
+	DWORD dwOpenMode, dwPipeMode;
+	LPSECURITY_ATTRIBUTES lpsa = NULL;
+
+	dwOpenMode = PIPE_ACCESS_INBOUND | PIPE_ACCESS_OUTBOUND |
+		FILE_FLAG_OVERLAPPED;
+
+	dwPipeMode = PIPE_TYPE_MESSAGE | PIPE_READMODE_BYTE | PIPE_WAIT |
+		PIPE_REJECT_REMOTE_CLIENTS;
+
+	if (is_first) {
+		dwOpenMode |= FILE_FLAG_FIRST_PIPE_INSTANCE;
+
+		/*
+		 * On Windows, the first server pipe instance gets to
+		 * set the ACL / Security Attributes on the named
+		 * pipe; subsequent instances inherit and cannot
+		 * change them.
+		 *
+		 * TODO Should we allow the application layer to
+		 * specify security attributes, such as `LocalService`
+		 * or `LocalSystem`, when we create the named pipe?
+		 * This question is probably not important when the
+		 * daemon is started by a foreground user process and
+		 * only needs to talk to the current user, but may be
+		 * if the daemon is run via the Control Panel as a
+		 * System Service.
+		 */
+	}
+
+	hPipe = CreateNamedPipeW(wpath, dwOpenMode, dwPipeMode,
+				 PIPE_UNLIMITED_INSTANCES, 1024, 1024, 0, lpsa);
+
+	return hPipe;
+}
+
+int ipc_server_run_async(struct ipc_server_data **returned_server_data,
+			 const char *path, const struct ipc_server_opts *opts,
+			 ipc_server_application_cb *application_cb,
+			 void *application_data)
+{
+	struct ipc_server_data *server_data;
+	wchar_t wpath[MAX_PATH];
+	HANDLE hPipeFirst = INVALID_HANDLE_VALUE;
+	int k;
+	int ret = 0;
+	int nr_threads = opts->nr_threads;
+
+	*returned_server_data = NULL;
+
+	ret = initialize_pipe_name(path, wpath, ARRAY_SIZE(wpath));
+	if (ret < 0) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	hPipeFirst = create_new_pipe(wpath, 1);
+	if (hPipeFirst == INVALID_HANDLE_VALUE) {
+		errno = EADDRINUSE;
+		return -2;
+	}
+
+	server_data = xcalloc(1, sizeof(*server_data));
+	server_data->magic = MAGIC_SERVER_DATA;
+	server_data->application_cb = application_cb;
+	server_data->application_data = application_data;
+	server_data->hEventStopRequested = CreateEvent(NULL, TRUE, FALSE, NULL);
+	strbuf_init(&server_data->buf_path, 0);
+	strbuf_addstr(&server_data->buf_path, path);
+	wcscpy(server_data->wpath, wpath);
+
+	if (nr_threads < 1)
+		nr_threads = 1;
+
+	for (k = 0; k < nr_threads; k++) {
+		struct ipc_server_thread_data *std;
+
+		std = xcalloc(1, sizeof(*std));
+		std->magic = MAGIC_SERVER_THREAD_DATA;
+		std->server_data = server_data;
+		std->hPipe = INVALID_HANDLE_VALUE;
+
+		std->hPipe = (k == 0)
+			? hPipeFirst
+			: create_new_pipe(server_data->wpath, 0);
+
+		if (std->hPipe == INVALID_HANDLE_VALUE) {
+			/*
+			 * If we've reached a pipe instance limit for
+			 * this path, just use fewer threads.
+			 */
+			free(std);
+			break;
+		}
+
+		if (pthread_create(&std->pthread_id, NULL,
+				   server_thread_proc, std)) {
+			/*
+			 * Likewise, if we're out of threads, just use
+			 * fewer threads than requested.
+			 *
+			 * However, we just give up if we can't even get
+			 * one thread.  This should not happen.
+			 */
+			if (k == 0)
+				die(_("could not start thread[0] for '%s'"),
+				    path);
+
+			CloseHandle(std->hPipe);
+			free(std);
+			break;
+		}
+
+		std->next_thread = server_data->thread_list;
+		server_data->thread_list = std;
+	}
+
+	*returned_server_data = server_data;
+	return 0;
+}
+
+int ipc_server_stop_async(struct ipc_server_data *server_data)
+{
+	if (!server_data)
+		return 0;
+
+	/*
+	 * Gently tell all of the ipc_server threads to shutdown.
+	 * This will be seen the next time they are idle (and waiting
+	 * for a connection).
+	 *
+	 * We DO NOT attempt to force them to drop an active connection.
+	 */
+	SetEvent(server_data->hEventStopRequested);
+	return 0;
+}
+
+int ipc_server_await(struct ipc_server_data *server_data)
+{
+	DWORD dwWaitResult;
+
+	if (!server_data)
+		return 0;
+
+	dwWaitResult = WaitForSingleObject(server_data->hEventStopRequested, INFINITE);
+	if (dwWaitResult != WAIT_OBJECT_0)
+		return error(_("wait for hEvent failed for '%s'"),
+			     server_data->buf_path.buf);
+
+	while (server_data->thread_list) {
+		struct ipc_server_thread_data *std = server_data->thread_list;
+
+		pthread_join(std->pthread_id, NULL);
+
+		server_data->thread_list = std->next_thread;
+		free(std);
+	}
+
+	server_data->is_stopped = 1;
+
+	return 0;
+}
+
+void ipc_server_free(struct ipc_server_data *server_data)
+{
+	if (!server_data)
+		return;
+
+	if (!server_data->is_stopped)
+		BUG("cannot free ipc-server while running for '%s'",
+		    server_data->buf_path.buf);
+
+	strbuf_release(&server_data->buf_path);
+
+	if (server_data->hEventStopRequested != INVALID_HANDLE_VALUE)
+		CloseHandle(server_data->hEventStopRequested);
+
+	while (server_data->thread_list) {
+		struct ipc_server_thread_data *std = server_data->thread_list;
+
+		server_data->thread_list = std->next_thread;
+		free(std);
+	}
+
+	free(server_data);
+}
diff --git a/config.mak.uname b/config.mak.uname
index 198ab1e58f837d..76087cff678905 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -421,6 +421,7 @@ ifeq ($(uname_S),Windows)
 	RUNTIME_PREFIX = YesPlease
 	HAVE_WPGMPTR = YesWeDo
 	NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
+	USE_WIN32_IPC = YesPlease
 	USE_WIN32_MMAP = YesPlease
 	MMAP_PREVENTS_DELETE = UnfortunatelyYes
 	# USE_NED_ALLOCATOR = YesPlease
@@ -597,6 +598,7 @@ ifneq (,$(findstring MINGW,$(uname_S)))
 	RUNTIME_PREFIX = YesPlease
 	HAVE_WPGMPTR = YesWeDo
 	NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
+	USE_WIN32_IPC = YesPlease
 	USE_WIN32_MMAP = YesPlease
 	MMAP_PREVENTS_DELETE = UnfortunatelyYes
 	USE_NED_ALLOCATOR = YesPlease
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index c151dd7257f31c..4bd41054ee707e 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -246,6 +246,10 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 	list(APPEND compat_SOURCES unix-socket.c)
 endif()
 
+if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+	list(APPEND compat_SOURCES compat/simple-ipc/ipc-shared.c compat/simple-ipc/ipc-win32.c)
+endif()
+
 set(EXE_EXTENSION ${CMAKE_EXECUTABLE_SUFFIX})
 
 #header checks
diff --git a/simple-ipc.h b/simple-ipc.h
new file mode 100644
index 00000000000000..ab5619e3d76f9b
--- /dev/null
+++ b/simple-ipc.h
@@ -0,0 +1,228 @@
+#ifndef GIT_SIMPLE_IPC_H
+#define GIT_SIMPLE_IPC_H
+
+/*
+ * See Documentation/technical/api-simple-ipc.txt
+ */
+
+#if defined(GIT_WINDOWS_NATIVE)
+#define SUPPORTS_SIMPLE_IPC
+#endif
+
+#ifdef SUPPORTS_SIMPLE_IPC
+#include "pkt-line.h"
+
+/*
+ * Simple IPC Client Side API.
+ */
+
+enum ipc_active_state {
+	/*
+	 * The pipe/socket exists and the daemon is waiting for connections.
+	 */
+	IPC_STATE__LISTENING = 0,
+
+	/*
+	 * The pipe/socket exists, but the daemon is not listening.
+	 * Perhaps it is very busy.
+	 * Perhaps the daemon died without deleting the path.
+	 * Perhaps it is shutting down and draining existing clients.
+	 * Perhaps it is dead, but other clients are lingering and
+	 * still holding a reference to the pathname.
+	 */
+	IPC_STATE__NOT_LISTENING,
+
+	/*
+	 * The requested pathname is bogus and no amount of retries
+	 * will fix that.
+	 */
+	IPC_STATE__INVALID_PATH,
+
+	/*
+	 * The requested pathname is not found.  This usually means
+	 * that there is no daemon present.
+	 */
+	IPC_STATE__PATH_NOT_FOUND,
+
+	IPC_STATE__OTHER_ERROR,
+};
+
+struct ipc_client_connect_options {
+	/*
+	 * Spin under timeout if the server is running but can't
+	 * accept our connection yet.  This should always be set
+	 * unless you just want to poke the server and see if it
+	 * is alive.
+	 */
+	unsigned int wait_if_busy:1;
+
+	/*
+	 * Spin under timeout if the pipe/socket is not yet present
+	 * on the file system.  This is useful if we just started
+	 * the service and need to wait for it to become ready.
+	 */
+	unsigned int wait_if_not_found:1;
+};
+
+#define IPC_CLIENT_CONNECT_OPTIONS_INIT { \
+	.wait_if_busy = 0, \
+	.wait_if_not_found = 0, \
+}
+
+/*
+ * Determine if a server is listening on this named pipe or socket using
+ * platform-specific logic.  This might just probe the filesystem or it
+ * might make a trivial connection to the server using this pathname.
+ */
+enum ipc_active_state ipc_get_active_state(const char *path);
+
+struct ipc_client_connection {
+	int fd;
+};
+
+/*
+ * Try to connect to the daemon on the named pipe or socket.
+ *
+ * Returns IPC_STATE__LISTENING and a connection handle.
+ *
+ * Otherwise, returns info to help decide whether to retry or to
+ * spawn/respawn the server.
+ */
+enum ipc_active_state ipc_client_try_connect(
+	const char *path,
+	const struct ipc_client_connect_options *options,
+	struct ipc_client_connection **p_connection);
+
+void ipc_client_close_connection(struct ipc_client_connection *connection);
+
+/*
+ * Used by the client to synchronously send and receive a message with
+ * the server on the provided client connection.
+ *
+ * Returns 0 when successful.
+ *
+ * Calls error() and returns non-zero otherwise.
+ */
+int ipc_client_send_command_to_connection(
+	struct ipc_client_connection *connection,
+	const char *message, struct strbuf *answer);
+
+/*
+ * Used by the client to synchronously connect and send and receive a
+ * message to the server listening at the given path.
+ *
+ * Returns 0 when successful.
+ *
+ * Calls error() and returns non-zero otherwise.
+ */
+int ipc_client_send_command(const char *path,
+			    const struct ipc_client_connect_options *options,
+			    const char *message, struct strbuf *answer);
+
+/*
+ * Simple IPC Server Side API.
+ */
+
+struct ipc_server_reply_data;
+
+typedef int (ipc_server_reply_cb)(struct ipc_server_reply_data *,
+				  const char *response,
+				  size_t response_len);
+
+/*
+ * Prototype for an application-supplied callback to process incoming
+ * client IPC messages and compose a reply.  The `application_cb` should
+ * use the provided `reply_cb` and `reply_data` to send an IPC response
+ * back to the client.  The `reply_cb` callback can be called multiple
+ * times for chunking purposes.  A reply message is optional and may be
+ * omitted if not necessary for the application.
+ *
+ * The return value from the application callback is ignored.
+ * The value `SIMPLE_IPC_QUIT` can be used to shutdown the server.
+ */
+typedef int (ipc_server_application_cb)(void *application_data,
+					const char *request,
+					ipc_server_reply_cb *reply_cb,
+					struct ipc_server_reply_data *reply_data);
+
+#define SIMPLE_IPC_QUIT -2
+
+/*
+ * Opaque instance data to represent an IPC server instance.
+ */
+struct ipc_server_data;
+
+/*
+ * Control parameters for the IPC server instance.
+ * Use this to hide platform-specific settings.
+ */
+struct ipc_server_opts
+{
+	int nr_threads;
+};
+
+/*
+ * Start an IPC server instance in one or more background threads
+ * and return a handle to the pool.
+ *
+ * Returns 0 if the asynchronous server pool was started successfully.
+ * Returns -1 if not.
+ * Returns -2 if we could not startup because another server is using
+ * the socket or named pipe.
+ *
+ * When a client IPC message is received, the `application_cb` will be
+ * called (possibly on a random thread) to handle the message and
+ * optionally compose a reply message.
+ */
+int ipc_server_run_async(struct ipc_server_data **returned_server_data,
+			 const char *path, const struct ipc_server_opts *opts,
+			 ipc_server_application_cb *application_cb,
+			 void *application_data);
+
+/*
+ * Gently signal the IPC server pool to shutdown.  No new client
+ * connections will be accepted, but existing connections will be
+ * allowed to complete.
+ */
+int ipc_server_stop_async(struct ipc_server_data *server_data);
+
+/*
+ * Block the calling thread until all threads in the IPC server pool
+ * have completed and been joined.
+ */
+int ipc_server_await(struct ipc_server_data *server_data);
+
+/*
+ * Close and free all resource handles associated with the IPC server
+ * pool.
+ */
+void ipc_server_free(struct ipc_server_data *server_data);
+
+/*
+ * Run an IPC server instance and block the calling thread of the
+ * current process.  It does not return until the IPC server has
+ * either shutdown or had an unrecoverable error.
+ *
+ * The IPC server handles incoming IPC messages from client processes
+ * and may use one or more background threads as necessary.
+ *
+ * Returns 0 after the server has completed successfully.
+ * Returns -1 if the server cannot be started.
+ * Returns -2 if we could not startup because another server is using
+ * the socket or named pipe.
+ *
+ * When a client IPC message is received, the `application_cb` will be
+ * called (possibly on a random thread) to handle the message and
+ * optionally compose a reply message.
+ *
+ * Note that `ipc_server_run()` is a synchronous wrapper around the
+ * above asynchronous routines.  It effectively hides all of the
+ * server state and thread details from the caller and presents a
+ * simple synchronous interface.
+ */
+int ipc_server_run(const char *path, const struct ipc_server_opts *opts,
+		   ipc_server_application_cb *application_cb,
+		   void *application_data);
+
+#endif /* SUPPORTS_SIMPLE_IPC */
+#endif /* GIT_SIMPLE_IPC_H */

From e287fcb78351ac9afe2d13e81dab2472519307be Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 27 Jan 2021 13:55:20 -0500
Subject: [PATCH 09/39] unix-socket: eliminate static unix_stream_socket()
 helper function

The static helper function `unix_stream_socket()` calls `die()`.  This
is not appropriate for all callers.  Eliminate the wrapper function
and make the callers propagate the error.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 unix-socket.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/unix-socket.c b/unix-socket.c
index 19ed48be9902f3..69f81d64e9d556 100644
--- a/unix-socket.c
+++ b/unix-socket.c
@@ -1,14 +1,6 @@
 #include "cache.h"
 #include "unix-socket.h"
 
-static int unix_stream_socket(void)
-{
-	int fd = socket(AF_UNIX, SOCK_STREAM, 0);
-	if (fd < 0)
-		die_errno("unable to create socket");
-	return fd;
-}
-
 static int chdir_len(const char *orig, int len)
 {
 	char *path = xmemdupz(orig, len);
@@ -73,13 +65,16 @@ static int unix_sockaddr_init(struct sockaddr_un *sa, const char *path,
 
 int unix_stream_connect(const char *path)
 {
-	int fd, saved_errno;
+	int fd = -1, saved_errno;
 	struct sockaddr_un sa;
 	struct unix_sockaddr_context ctx;
 
 	if (unix_sockaddr_init(&sa, path, &ctx) < 0)
 		return -1;
-	fd = unix_stream_socket();
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		goto fail;
+
 	if (connect(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0)
 		goto fail;
 	unix_sockaddr_cleanup(&ctx);
@@ -87,15 +82,16 @@ int unix_stream_connect(const char *path)
 
 fail:
 	saved_errno = errno;
+	if (fd != -1)
+		close(fd);
 	unix_sockaddr_cleanup(&ctx);
-	close(fd);
 	errno = saved_errno;
 	return -1;
 }
 
 int unix_stream_listen(const char *path)
 {
-	int fd, saved_errno;
+	int fd = -1, saved_errno;
 	struct sockaddr_un sa;
 	struct unix_sockaddr_context ctx;
 
@@ -103,7 +99,9 @@ int unix_stream_listen(const char *path)
 
 	if (unix_sockaddr_init(&sa, path, &ctx) < 0)
 		return -1;
-	fd = unix_stream_socket();
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		goto fail;
 
 	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0)
 		goto fail;
@@ -116,8 +114,9 @@ int unix_stream_listen(const char *path)
 
 fail:
 	saved_errno = errno;
+	if (fd != -1)
+		close(fd);
 	unix_sockaddr_cleanup(&ctx);
-	close(fd);
 	errno = saved_errno;
 	return -1;
 }

From 5b0f4e91fb5e39f2e2a3a25485c16e35f1ec13d3 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Tue, 28 Jul 2020 14:57:11 -0400
Subject: [PATCH 10/39] unix-socket: add backlog size option to
 unix_stream_listen()

Update `unix_stream_listen()` to take an options structure to override
default behaviors.  This commit includes the size of the `listen()` backlog.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/credential-cache--daemon.c |  3 ++-
 unix-socket.c                      | 11 +++++++++--
 unix-socket.h                      |  9 ++++++++-
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/builtin/credential-cache--daemon.c b/builtin/credential-cache--daemon.c
index c61f123a3b81be..4c6c89ab0de2eb 100644
--- a/builtin/credential-cache--daemon.c
+++ b/builtin/credential-cache--daemon.c
@@ -203,9 +203,10 @@ static int serve_cache_loop(int fd)
 
 static void serve_cache(const char *socket_path, int debug)
 {
+	struct unix_stream_listen_opts opts = UNIX_STREAM_LISTEN_OPTS_INIT;
 	int fd;
 
-	fd = unix_stream_listen(socket_path);
+	fd = unix_stream_listen(socket_path, &opts);
 	if (fd < 0)
 		die_errno("unable to bind to '%s'", socket_path);
 
diff --git a/unix-socket.c b/unix-socket.c
index 69f81d64e9d556..012becd93d577d 100644
--- a/unix-socket.c
+++ b/unix-socket.c
@@ -1,6 +1,8 @@
 #include "cache.h"
 #include "unix-socket.h"
 
+#define DEFAULT_UNIX_STREAM_LISTEN_BACKLOG (5)
+
 static int chdir_len(const char *orig, int len)
 {
 	char *path = xmemdupz(orig, len);
@@ -89,9 +91,11 @@ int unix_stream_connect(const char *path)
 	return -1;
 }
 
-int unix_stream_listen(const char *path)
+int unix_stream_listen(const char *path,
+		       const struct unix_stream_listen_opts *opts)
 {
 	int fd = -1, saved_errno;
+	int backlog;
 	struct sockaddr_un sa;
 	struct unix_sockaddr_context ctx;
 
@@ -106,7 +110,10 @@ int unix_stream_listen(const char *path)
 	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0)
 		goto fail;
 
-	if (listen(fd, 5) < 0)
+	backlog = opts->listen_backlog_size;
+	if (backlog <= 0)
+		backlog = DEFAULT_UNIX_STREAM_LISTEN_BACKLOG;
+	if (listen(fd, backlog) < 0)
 		goto fail;
 
 	unix_sockaddr_cleanup(&ctx);
diff --git a/unix-socket.h b/unix-socket.h
index e271aeec5a07dc..ec2fb3ea726714 100644
--- a/unix-socket.h
+++ b/unix-socket.h
@@ -1,7 +1,14 @@
 #ifndef UNIX_SOCKET_H
 #define UNIX_SOCKET_H
 
+struct unix_stream_listen_opts {
+	int listen_backlog_size;
+};
+
+#define UNIX_STREAM_LISTEN_OPTS_INIT { 0 }
+
 int unix_stream_connect(const char *path);
-int unix_stream_listen(const char *path);
+int unix_stream_listen(const char *path,
+		       const struct unix_stream_listen_opts *opts);
 
 #endif /* UNIX_SOCKET_H */

From d74719ac8b2965432107a5a14b68456d1b923156 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 29 Jul 2020 14:22:00 -0400
Subject: [PATCH 11/39] unix-socket: disallow chdir() when creating unix domain
 sockets

Calls to `chdir()` are dangerous in a multi-threaded context.  If
`unix_stream_listen()` or `unix_stream_connect()` is given a socket
pathname that is too long to fit in a `sockaddr_un` structure, it will
`chdir()` to the parent directory of the requested socket pathname,
create the socket using a relative pathname, and then `chdir()` back.
This is not thread-safe.

Teach `unix_sockaddr_init()` to not allow calls to `chdir()` when this
flag is set.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/credential-cache.c |  2 +-
 unix-socket.c              | 17 ++++++++++++-----
 unix-socket.h              |  3 ++-
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/builtin/credential-cache.c b/builtin/credential-cache.c
index 9b3f709905979e..76a6ba37223fa9 100644
--- a/builtin/credential-cache.c
+++ b/builtin/credential-cache.c
@@ -14,7 +14,7 @@
 static int send_request(const char *socket, const struct strbuf *out)
 {
 	int got_data = 0;
-	int fd = unix_stream_connect(socket);
+	int fd = unix_stream_connect(socket, 0);
 
 	if (fd < 0)
 		return -1;
diff --git a/unix-socket.c b/unix-socket.c
index 012becd93d577d..e0be1badb58dee 100644
--- a/unix-socket.c
+++ b/unix-socket.c
@@ -30,16 +30,23 @@ static void unix_sockaddr_cleanup(struct unix_sockaddr_context *ctx)
 }
 
 static int unix_sockaddr_init(struct sockaddr_un *sa, const char *path,
-			      struct unix_sockaddr_context *ctx)
+			      struct unix_sockaddr_context *ctx,
+			      int disallow_chdir)
 {
 	int size = strlen(path) + 1;
 
 	ctx->orig_dir = NULL;
 	if (size > sizeof(sa->sun_path)) {
-		const char *slash = find_last_dir_sep(path);
+		const char *slash;
 		const char *dir;
 		struct strbuf cwd = STRBUF_INIT;
 
+		if (disallow_chdir) {
+			errno = ENAMETOOLONG;
+			return -1;
+		}
+
+		slash = find_last_dir_sep(path);
 		if (!slash) {
 			errno = ENAMETOOLONG;
 			return -1;
@@ -65,13 +72,13 @@ static int unix_sockaddr_init(struct sockaddr_un *sa, const char *path,
 	return 0;
 }
 
-int unix_stream_connect(const char *path)
+int unix_stream_connect(const char *path, int disallow_chdir)
 {
 	int fd = -1, saved_errno;
 	struct sockaddr_un sa;
 	struct unix_sockaddr_context ctx;
 
-	if (unix_sockaddr_init(&sa, path, &ctx) < 0)
+	if (unix_sockaddr_init(&sa, path, &ctx, disallow_chdir) < 0)
 		return -1;
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
 	if (fd < 0)
@@ -101,7 +108,7 @@ int unix_stream_listen(const char *path,
 
 	unlink(path);
 
-	if (unix_sockaddr_init(&sa, path, &ctx) < 0)
+	if (unix_sockaddr_init(&sa, path, &ctx, opts->disallow_chdir) < 0)
 		return -1;
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
 	if (fd < 0)
diff --git a/unix-socket.h b/unix-socket.h
index ec2fb3ea726714..8542cdd7995d79 100644
--- a/unix-socket.h
+++ b/unix-socket.h
@@ -3,11 +3,12 @@
 
 struct unix_stream_listen_opts {
 	int listen_backlog_size;
+	unsigned int disallow_chdir:1;
 };
 
 #define UNIX_STREAM_LISTEN_OPTS_INIT { 0 }
 
-int unix_stream_connect(const char *path);
+int unix_stream_connect(const char *path, int disallow_chdir);
 int unix_stream_listen(const char *path,
 		       const struct unix_stream_listen_opts *opts);
 

From c4bd6a02d70036885d3cb49eb7b4e633c8ba7bec Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Tue, 9 Feb 2021 15:56:31 -0500
Subject: [PATCH 12/39] unix-stream-server: create unix domain socket under
 lock

Create a wrapper class for `unix_stream_listen()` that uses a ".lock"
lockfile to create the unix domain socket in a race-free manner.

Unix domain sockets have a fundamental problem on Unix systems because
they persist in the filesystem until they are deleted.  This is
independent of whether a server is actually listening for connections.
Well-behaved servers are expected to delete the socket when they
shutdown.  A new server cannot easily tell if a found socket is
attached to an active server or is leftover cruft from a dead server.
The traditional solution used by `unix_stream_listen()` is to force
delete the socket pathname and then create a new socket.  This solves
the latter (cruft) problem, but in the case of the former, it orphans
the existing server (by stealing the pathname associated with the
socket it is listening on).

We cannot directly use a .lock lockfile to create the socket because
the socket is created by `bind(2)` rather than the `open(2)` mechanism
used by `tempfile.c`.

As an alternative, we hold a plain lockfile ("<path>.lock") as a
mutual exclusion device.  Under the lock, we test if an existing
socket ("<path>") is has an active server.  If not, we create a new
socket and begin listening.  Then we use "rollback" to delete the
lockfile in all cases.

This wrapper code conceptually exists at a higher-level than the core
unix_stream_connect() and unix_stream_listen() routines that it
consumes.  It is isolated in a wrapper class for clarity.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Makefile                            |   1 +
 contrib/buildsystems/CMakeLists.txt |   2 +-
 unix-stream-server.c                | 128 ++++++++++++++++++++++++++++
 unix-stream-server.h                |  36 ++++++++
 4 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 unix-stream-server.c
 create mode 100644 unix-stream-server.h

diff --git a/Makefile b/Makefile
index 40d5cab78d3f17..f29a9e82e1036e 100644
--- a/Makefile
+++ b/Makefile
@@ -1677,6 +1677,7 @@ ifdef NO_UNIX_SOCKETS
 	BASIC_CFLAGS += -DNO_UNIX_SOCKETS
 else
 	LIB_OBJS += unix-socket.o
+	LIB_OBJS += unix-stream-server.o
 endif
 
 ifdef USE_WIN32_IPC
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 4bd41054ee707e..6ffbbb6a6fb360 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -243,7 +243,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
 
 elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 	add_compile_definitions(PROCFS_EXECUTABLE_PATH="/proc/self/exe" HAVE_DEV_TTY )
-	list(APPEND compat_SOURCES unix-socket.c)
+	list(APPEND compat_SOURCES unix-socket.c unix-stream-server.c)
 endif()
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
diff --git a/unix-stream-server.c b/unix-stream-server.c
new file mode 100644
index 00000000000000..5dfe2a9ac2c0ed
--- /dev/null
+++ b/unix-stream-server.c
@@ -0,0 +1,128 @@
+#include "cache.h"
+#include "lockfile.h"
+#include "unix-socket.h"
+#include "unix-stream-server.h"
+
+#define DEFAULT_LOCK_TIMEOUT (100)
+
+/*
+ * Try to connect to a unix domain socket at `path` (if it exists) and
+ * see if there is a server listening.
+ *
+ * We don't know if the socket exists, whether a server died and
+ * failed to cleanup, or whether we have a live server listening, so
+ * we "poke" it.
+ *
+ * We immediately hangup without sending/receiving any data because we
+ * don't know anything about the protocol spoken and don't want to
+ * block while writing/reading data.  It is sufficient to just know
+ * that someone is listening.
+ */
+static int is_another_server_alive(const char *path,
+				   const struct unix_stream_listen_opts *opts)
+{
+	int fd = unix_stream_connect(path, opts->disallow_chdir);
+	if (fd >= 0) {
+		close(fd);
+		return 1;
+	}
+
+	return 0;
+}
+
+int unix_stream_server__create(
+	const char *path,
+	const struct unix_stream_listen_opts *opts,
+	long timeout_ms,
+	struct unix_stream_server_socket **new_server_socket)
+{
+	struct lock_file lock = LOCK_INIT;
+	int fd_socket;
+	struct unix_stream_server_socket *server_socket;
+
+	*new_server_socket = NULL;
+
+	if (timeout_ms < 0)
+		timeout_ms = DEFAULT_LOCK_TIMEOUT;
+
+	/*
+	 * Create a lock at "<path>.lock" if we can.
+	 */
+	if (hold_lock_file_for_update_timeout(&lock, path, 0, timeout_ms) < 0)
+		return -1;
+
+	/*
+	 * If another server is listening on "<path>" give up.  We do not
+	 * want to create a socket and steal future connections from them.
+	 */
+	if (is_another_server_alive(path, opts)) {
+		rollback_lock_file(&lock);
+		errno = EADDRINUSE;
+		return -2;
+	}
+
+	/*
+	 * Create and bind to a Unix domain socket at "<path>".
+	 */
+	fd_socket = unix_stream_listen(path, opts);
+	if (fd_socket < 0) {
+		int saved_errno = errno;
+		rollback_lock_file(&lock);
+		errno = saved_errno;
+		return -1;
+	}
+
+	server_socket = xcalloc(1, sizeof(*server_socket));
+	server_socket->path_socket = strdup(path);
+	server_socket->fd_socket = fd_socket;
+	lstat(path, &server_socket->st_socket);
+
+	*new_server_socket = server_socket;
+
+	/*
+	 * Always rollback (just delete) "<path>.lock" because we already created
+	 * "<path>" as a socket and do not want to commit_lock to do the atomic
+	 * rename trick.
+	 */
+	rollback_lock_file(&lock);
+
+	return 0;
+}
+
+void unix_stream_server__free(
+	struct unix_stream_server_socket *server_socket)
+{
+	if (!server_socket)
+		return;
+
+	if (server_socket->fd_socket >= 0) {
+		if (!unix_stream_server__was_stolen(server_socket))
+			unlink(server_socket->path_socket);
+		close(server_socket->fd_socket);
+	}
+
+	free(server_socket->path_socket);
+	free(server_socket);
+}
+
+int unix_stream_server__was_stolen(
+	struct unix_stream_server_socket *server_socket)
+{
+	struct stat st_now;
+
+	if (!server_socket)
+		return 0;
+
+	if (lstat(server_socket->path_socket, &st_now) == -1)
+		return 1;
+
+	if (st_now.st_ino != server_socket->st_socket.st_ino)
+		return 1;
+	if (st_now.st_dev != server_socket->st_socket.st_dev)
+		return 1;
+
+	if (!S_ISSOCK(st_now.st_mode))
+		return 1;
+
+	return 0;
+}
diff --git a/unix-stream-server.h b/unix-stream-server.h
new file mode 100644
index 00000000000000..ef9241d0ef70c5
--- /dev/null
+++ b/unix-stream-server.h
@@ -0,0 +1,36 @@
+#ifndef UNIX_STREAM_SERVER_H
+#define UNIX_STREAM_SERVER_H
+
+#include "unix-socket.h"
+
+struct unix_stream_server_socket {
+	char *path_socket;
+	struct stat st_socket;
+	int fd_socket;
+};
+
+/*
+ * Create a Unix Domain Socket at the given path under the protection
+ * of a '.lock' lockfile.
+ *
+ * Returns 0 on success, -1 on error, -2 if socket is in use.
+ */
+int unix_stream_server__create(
+	const char *path,
+	const struct unix_stream_listen_opts *opts,
+	long timeout_ms,
+	struct unix_stream_server_socket **server_socket);
+
+/*
+ * Close and delete the socket.
+ */
+void unix_stream_server__free(
+	struct unix_stream_server_socket *server_socket);
+
+/*
+ * Return 1 if the inode of the pathname to our socket changes.
+ */
+int unix_stream_server__was_stolen(
+	struct unix_stream_server_socket *server_socket);
+
+#endif /* UNIX_STREAM_SERVER_H */

From ff57cc0606c50653980d8dcd49d52497863388cc Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 12 Feb 2021 11:32:20 -0500
Subject: [PATCH 13/39] simple-ipc: add Unix domain socket implementation

Create Unix domain socket based implementation of "simple-ipc".

A set of `ipc_client` routines implement a client library to connect
to an `ipc_server` over a Unix domain socket, send a simple request,
and receive a single response.  Clients use blocking IO on the socket.

A set of `ipc_server` routines implement a thread pool to listen for
and concurrently service client connections.

The server creates a new Unix domain socket at a known location.  If a
socket already exists with that name, the server tries to determine if
another server is already listening on the socket or if the socket is
dead.  If socket is busy, the server exits with an error rather than
stealing the socket.  If the socket is dead, the server creates a new
one and starts up.

If while running, the server detects that its socket has been stolen
by another server, it automatically exits.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Makefile                            |   2 +
 compat/simple-ipc/ipc-unix-socket.c | 986 ++++++++++++++++++++++++++++
 contrib/buildsystems/CMakeLists.txt |   2 +
 simple-ipc.h                        |  13 +-
 4 files changed, 1002 insertions(+), 1 deletion(-)
 create mode 100644 compat/simple-ipc/ipc-unix-socket.c

diff --git a/Makefile b/Makefile
index f29a9e82e1036e..8e4bde2d81ac51 100644
--- a/Makefile
+++ b/Makefile
@@ -1678,6 +1678,8 @@ ifdef NO_UNIX_SOCKETS
 else
 	LIB_OBJS += unix-socket.o
 	LIB_OBJS += unix-stream-server.o
+	LIB_OBJS += compat/simple-ipc/ipc-shared.o
+	LIB_OBJS += compat/simple-ipc/ipc-unix-socket.o
 endif
 
 ifdef USE_WIN32_IPC
diff --git a/compat/simple-ipc/ipc-unix-socket.c b/compat/simple-ipc/ipc-unix-socket.c
new file mode 100644
index 00000000000000..6e381a9e030eb5
--- /dev/null
+++ b/compat/simple-ipc/ipc-unix-socket.c
@@ -0,0 +1,986 @@
+#include "cache.h"
+#include "simple-ipc.h"
+#include "strbuf.h"
+#include "pkt-line.h"
+#include "thread-utils.h"
+#include "unix-socket.h"
+#include "unix-stream-server.h"
+
+#ifdef NO_UNIX_SOCKETS
+#error compat/simple-ipc/ipc-unix-socket.c requires Unix sockets
+#endif
+
+enum ipc_active_state ipc_get_active_state(const char *path)
+{
+	enum ipc_active_state state = IPC_STATE__OTHER_ERROR;
+	struct ipc_client_connect_options options
+		= IPC_CLIENT_CONNECT_OPTIONS_INIT;
+	struct stat st;
+	struct ipc_client_connection *connection_test = NULL;
+
+	options.wait_if_busy = 0;
+	options.wait_if_not_found = 0;
+
+	if (lstat(path, &st) == -1) {
+		switch (errno) {
+		case ENOENT:
+		case ENOTDIR:
+			return IPC_STATE__NOT_LISTENING;
+		default:
+			return IPC_STATE__INVALID_PATH;
+		}
+	}
+
+	/* also complain if a plain file is in the way */
+	if ((st.st_mode & S_IFMT) != S_IFSOCK)
+		return IPC_STATE__INVALID_PATH;
+
+	/*
+	 * Just because the filesystem has a S_IFSOCK type inode
+	 * at `path`, doesn't mean it that there is a server listening.
+	 * Ping it to be sure.
+	 */
+	state = ipc_client_try_connect(path, &options, &connection_test);
+	ipc_client_close_connection(connection_test);
+
+	return state;
+}
+
+/*
+ * This value was chosen at random.
+ */
+#define WAIT_STEP_MS (50)
+
+/*
+ * Try to connect to the server.  If the server is just starting up or
+ * is very busy, we may not get a connection the first time.
+ */
+static enum ipc_active_state connect_to_server(
+	const char *path,
+	int timeout_ms,
+	const struct ipc_client_connect_options *options,
+	int *pfd)
+{
+	int wait_ms = 50;
+	int k;
+
+	*pfd = -1;
+
+	for (k = 0; k < timeout_ms; k += wait_ms) {
+		int fd = unix_stream_connect(path, options->uds_disallow_chdir);
+
+		if (fd != -1) {
+			*pfd = fd;
+			return IPC_STATE__LISTENING;
+		}
+
+		if (errno == ENOENT) {
+			if (!options->wait_if_not_found)
+				return IPC_STATE__PATH_NOT_FOUND;
+
+			goto sleep_and_try_again;
+		}
+
+		if (errno == ETIMEDOUT) {
+			if (!options->wait_if_busy)
+				return IPC_STATE__NOT_LISTENING;
+
+			goto sleep_and_try_again;
+		}
+
+		if (errno == ECONNREFUSED) {
+			if (!options->wait_if_busy)
+				return IPC_STATE__NOT_LISTENING;
+
+			goto sleep_and_try_again;
+		}
+
+		return IPC_STATE__OTHER_ERROR;
+
+	sleep_and_try_again:
+		sleep_millisec(wait_ms);
+	}
+
+	return IPC_STATE__NOT_LISTENING;
+}
+
+/*
+ * A randomly chosen timeout value.
+ */
+#define MY_CONNECTION_TIMEOUT_MS (1000)
+
+enum ipc_active_state ipc_client_try_connect(
+	const char *path,
+	const struct ipc_client_connect_options *options,
+	struct ipc_client_connection **p_connection)
+{
+	enum ipc_active_state state = IPC_STATE__OTHER_ERROR;
+	int fd = -1;
+
+	*p_connection = NULL;
+
+	trace2_region_enter("ipc-client", "try-connect", NULL);
+	trace2_data_string("ipc-client", NULL, "try-connect/path", path);
+
+	state = connect_to_server(path, MY_CONNECTION_TIMEOUT_MS,
+				  options, &fd);
+
+	trace2_data_intmax("ipc-client", NULL, "try-connect/state",
+			   (intmax_t)state);
+	trace2_region_leave("ipc-client", "try-connect", NULL);
+
+	if (state == IPC_STATE__LISTENING) {
+		(*p_connection) = xcalloc(1, sizeof(struct ipc_client_connection));
+		(*p_connection)->fd = fd;
+	}
+
+	return state;
+}
+
+void ipc_client_close_connection(struct ipc_client_connection *connection)
+{
+	if (!connection)
+		return;
+
+	if (connection->fd != -1)
+		close(connection->fd);
+
+	free(connection);
+}
+
+int ipc_client_send_command_to_connection(
+	struct ipc_client_connection *connection,
+	const char *message, struct strbuf *answer)
+{
+	int ret = 0;
+
+	strbuf_setlen(answer, 0);
+
+	trace2_region_enter("ipc-client", "send-command", NULL);
+
+	if (write_packetized_from_buf_no_flush(message, strlen(message),
+					       connection->fd) < 0 ||
+	    packet_flush_gently(connection->fd) < 0) {
+		ret = error(_("could not send IPC command"));
+		goto done;
+	}
+
+	if (read_packetized_to_strbuf(
+		    connection->fd, answer,
+		    PACKET_READ_GENTLE_ON_EOF | PACKET_READ_GENTLE_ON_READ_ERROR) < 0) {
+		ret = error(_("could not read IPC response"));
+		goto done;
+	}
+
+done:
+	trace2_region_leave("ipc-client", "send-command", NULL);
+	return ret;
+}
+
+int ipc_client_send_command(const char *path,
+			    const struct ipc_client_connect_options *options,
+			    const char *message, struct strbuf *answer)
+{
+	int ret = -1;
+	enum ipc_active_state state;
+	struct ipc_client_connection *connection = NULL;
+
+	state = ipc_client_try_connect(path, options, &connection);
+
+	if (state != IPC_STATE__LISTENING)
+		return ret;
+
+	ret = ipc_client_send_command_to_connection(connection, message, answer);
+
+	ipc_client_close_connection(connection);
+
+	return ret;
+}
+
+static int set_socket_blocking_flag(int fd, int make_nonblocking)
+{
+	int flags;
+
+	flags = fcntl(fd, F_GETFL, NULL);
+
+	if (flags < 0)
+		return -1;
+
+	if (make_nonblocking)
+		flags |= O_NONBLOCK;
+	else
+		flags &= ~O_NONBLOCK;
+
+	return fcntl(fd, F_SETFL, flags);
+}
+
+/*
+ * Magic numbers used to annotate callback instance data.
+ * These are used to help guard against accidentally passing the
+ * wrong instance data across multiple levels of callbacks (which
+ * is easy to do if there are `void*` arguments).
+ */
+enum magic {
+	MAGIC_SERVER_REPLY_DATA,
+	MAGIC_WORKER_THREAD_DATA,
+	MAGIC_ACCEPT_THREAD_DATA,
+	MAGIC_SERVER_DATA,
+};
+
+struct ipc_server_reply_data {
+	enum magic magic;
+	int fd;
+	struct ipc_worker_thread_data *worker_thread_data;
+};
+
+struct ipc_worker_thread_data {
+	enum magic magic;
+	struct ipc_worker_thread_data *next_thread;
+	struct ipc_server_data *server_data;
+	pthread_t pthread_id;
+};
+
+struct ipc_accept_thread_data {
+	enum magic magic;
+	struct ipc_server_data *server_data;
+
+	struct unix_stream_server_socket *server_socket;
+
+	int fd_send_shutdown;
+	int fd_wait_shutdown;
+	pthread_t pthread_id;
+};
+
+/*
+ * With unix-sockets, the conceptual "ipc-server" is implemented as a single
+ * controller "accept-thread" thread and a pool of "worker-thread" threads.
+ * The former does the usual `accept()` loop and dispatches connections
+ * to an idle worker thread.  The worker threads wait in an idle loop for
+ * a new connection, communicate with the client and relay data to/from
+ * the `application_cb` and then wait for another connection from the
+ * server thread.  This avoids the overhead of constantly creating and
+ * destroying threads.
+ */
+struct ipc_server_data {
+	enum magic magic;
+	ipc_server_application_cb *application_cb;
+	void *application_data;
+	struct strbuf buf_path;
+
+	struct ipc_accept_thread_data *accept_thread;
+	struct ipc_worker_thread_data *worker_thread_list;
+
+	pthread_mutex_t work_available_mutex;
+	pthread_cond_t work_available_cond;
+
+	/*
+	 * Accepted but not yet processed client connections are kept
+	 * in a circular buffer FIFO.  The queue is empty when the
+	 * positions are equal.
+	 */
+	int *fifo_fds;
+	int queue_size;
+	int back_pos;
+	int front_pos;
+
+	int shutdown_requested;
+	int is_stopped;
+};
+
+/*
+ * Remove and return the oldest queued connection.
+ *
+ * Returns -1 if empty.
+ */
+static int fifo_dequeue(struct ipc_server_data *server_data)
+{
+	/* ASSERT holding mutex */
+
+	int fd;
+
+	if (server_data->back_pos == server_data->front_pos)
+		return -1;
+
+	fd = server_data->fifo_fds[server_data->front_pos];
+	server_data->fifo_fds[server_data->front_pos] = -1;
+
+	server_data->front_pos++;
+	if (server_data->front_pos == server_data->queue_size)
+		server_data->front_pos = 0;
+
+	return fd;
+}
+
+/*
+ * Push a new fd onto the back of the queue.
+ *
+ * Drop it and return -1 if queue is already full.
+ */
+static int fifo_enqueue(struct ipc_server_data *server_data, int fd)
+{
+	/* ASSERT holding mutex */
+
+	int next_back_pos;
+
+	next_back_pos = server_data->back_pos + 1;
+	if (next_back_pos == server_data->queue_size)
+		next_back_pos = 0;
+
+	if (next_back_pos == server_data->front_pos) {
+		/* Queue is full. Just drop it. */
+		close(fd);
+		return -1;
+	}
+
+	server_data->fifo_fds[server_data->back_pos] = fd;
+	server_data->back_pos = next_back_pos;
+
+	return fd;
+}
+
+/*
+ * Wait for a connection to be queued to the FIFO and return it.
+ *
+ * Returns -1 if someone has already requested a shutdown.
+ */
+static int worker_thread__wait_for_connection(
+	struct ipc_worker_thread_data *worker_thread_data)
+{
+	/* ASSERT NOT holding mutex */
+
+	struct ipc_server_data *server_data = worker_thread_data->server_data;
+	int fd = -1;
+
+	pthread_mutex_lock(&server_data->work_available_mutex);
+	for (;;) {
+		if (server_data->shutdown_requested)
+			break;
+
+		fd = fifo_dequeue(server_data);
+		if (fd >= 0)
+			break;
+
+		pthread_cond_wait(&server_data->work_available_cond,
+				  &server_data->work_available_mutex);
+	}
+	pthread_mutex_unlock(&server_data->work_available_mutex);
+
+	return fd;
+}
+
+/*
+ * Forward declare our reply callback function so that any compiler
+ * errors are reported when we actually define the function (in addition
+ * to any errors reported when we try to pass this callback function as
+ * a parameter in a function call).  The former are easier to understand.
+ */
+static ipc_server_reply_cb do_io_reply_callback;
+
+/*
+ * Relay application's response message to the client process.
+ * (We do not flush at this point because we allow the caller
+ * to chunk data to the client thru us.)
+ */
+static int do_io_reply_callback(struct ipc_server_reply_data *reply_data,
+		       const char *response, size_t response_len)
+{
+	if (reply_data->magic != MAGIC_SERVER_REPLY_DATA)
+		BUG("reply_cb called with wrong instance data");
+
+	return write_packetized_from_buf_no_flush(response, response_len,
+						  reply_data->fd);
+}
+
+/* A randomly chosen value. */
+#define MY_WAIT_POLL_TIMEOUT_MS (10)
+
+/*
+ * If the client hangs up without sending any data on the wire, just
+ * quietly close the socket and ignore this client.
+ *
+ * This worker thread is committed to reading the IPC request data
+ * from the client at the other end of this fd.  Wait here for the
+ * client to actually put something on the wire -- because if the
+ * client just does a ping (connect and hangup without sending any
+ * data), our use of the pkt-line read routines will spew an error
+ * message.
+ *
+ * Return -1 if the client hung up.
+ * Return 0 if data (possibly incomplete) is ready.
+ */
+static int worker_thread__wait_for_io_start(
+	struct ipc_worker_thread_data *worker_thread_data,
+	int fd)
+{
+	struct ipc_server_data *server_data = worker_thread_data->server_data;
+	struct pollfd pollfd[1];
+	int result;
+
+	for (;;) {
+		pollfd[0].fd = fd;
+		pollfd[0].events = POLLIN;
+
+		result = poll(pollfd, 1, MY_WAIT_POLL_TIMEOUT_MS);
+		if (result < 0) {
+			if (errno == EINTR)
+				continue;
+			goto cleanup;
+		}
+
+		if (result == 0) {
+			/* a timeout */
+
+			int in_shutdown;
+
+			pthread_mutex_lock(&server_data->work_available_mutex);
+			in_shutdown = server_data->shutdown_requested;
+			pthread_mutex_unlock(&server_data->work_available_mutex);
+
+			/*
+			 * If a shutdown is already in progress and this
+			 * client has not started talking yet, just drop it.
+			 */
+			if (in_shutdown)
+				goto cleanup;
+			continue;
+		}
+
+		if (pollfd[0].revents & POLLHUP)
+			goto cleanup;
+
+		if (pollfd[0].revents & POLLIN)
+			return 0;
+
+		goto cleanup;
+	}
+
+cleanup:
+	close(fd);
+	return -1;
+}
+
+/*
+ * Receive the request/command from the client and pass it to the
+ * registered request-callback.  The request-callback will compose
+ * a response and call our reply-callback to send it to the client.
+ */
+static int worker_thread__do_io(
+	struct ipc_worker_thread_data *worker_thread_data,
+	int fd)
+{
+	/* ASSERT NOT holding lock */
+
+	struct strbuf buf = STRBUF_INIT;
+	struct ipc_server_reply_data reply_data;
+	int ret = 0;
+
+	reply_data.magic = MAGIC_SERVER_REPLY_DATA;
+	reply_data.worker_thread_data = worker_thread_data;
+
+	reply_data.fd = fd;
+
+	ret = read_packetized_to_strbuf(
+		reply_data.fd, &buf,
+		PACKET_READ_GENTLE_ON_EOF | PACKET_READ_GENTLE_ON_READ_ERROR);
+	if (ret >= 0) {
+		ret = worker_thread_data->server_data->application_cb(
+			worker_thread_data->server_data->application_data,
+			buf.buf, do_io_reply_callback, &reply_data);
+
+		packet_flush_gently(reply_data.fd);
+	}
+	else {
+		/*
+		 * The client probably disconnected/shutdown before it
+		 * could send a well-formed message.  Ignore it.
+		 */
+	}
+
+	strbuf_release(&buf);
+	close(reply_data.fd);
+
+	return ret;
+}
+
+/*
+ * Block SIGPIPE on the current thread (so that we get EPIPE from
+ * write() rather than an actual signal).
+ *
+ * Note that using sigchain_push() and _pop() to control SIGPIPE
+ * around our IO calls is not thread safe:
+ * [] It uses a global stack of handler frames.
+ * [] It uses ALLOC_GROW() to resize it.
+ * [] Finally, according to the `signal(2)` man-page:
+ *    "The effects of `signal()` in a multithreaded process are unspecified."
+ */
+static void thread_block_sigpipe(sigset_t *old_set)
+{
+	sigset_t new_set;
+
+	sigemptyset(&new_set);
+	sigaddset(&new_set, SIGPIPE);
+
+	sigemptyset(old_set);
+	pthread_sigmask(SIG_BLOCK, &new_set, old_set);
+}
+
+/*
+ * Thread proc for an IPC worker thread.  It handles a series of
+ * connections from clients.  It pulls the next fd from the queue
+ * processes it, and then waits for the next client.
+ *
+ * Block SIGPIPE in this worker thread for the life of the thread.
+ * This avoids stray (and sometimes delayed) SIGPIPE signals caused
+ * by client errors and/or when we are under extremely heavy IO load.
+ *
+ * This means that the application callback will have SIGPIPE blocked.
+ * The callback should not change it.
+ */
+static void *worker_thread_proc(void *_worker_thread_data)
+{
+	struct ipc_worker_thread_data *worker_thread_data = _worker_thread_data;
+	struct ipc_server_data *server_data = worker_thread_data->server_data;
+	sigset_t old_set;
+	int fd, io;
+	int ret;
+
+	trace2_thread_start("ipc-worker");
+
+	thread_block_sigpipe(&old_set);
+
+	for (;;) {
+		fd = worker_thread__wait_for_connection(worker_thread_data);
+		if (fd == -1)
+			break; /* in shutdown */
+
+		io = worker_thread__wait_for_io_start(worker_thread_data, fd);
+		if (io == -1)
+			continue; /* client hung up without sending anything */
+
+		ret = worker_thread__do_io(worker_thread_data, fd);
+
+		if (ret == SIMPLE_IPC_QUIT) {
+			trace2_data_string("ipc-worker", NULL, "queue_stop_async",
+					   "application_quit");
+			/*
+			 * The application layer is telling the ipc-server
+			 * layer to shutdown.
+			 *
+			 * We DO NOT have a response to send to the client.
+			 *
+			 * Queue an async stop (to stop the other threads) and
+			 * allow this worker thread to exit now (no sense waiting
+			 * for the thread-pool shutdown signal).
+			 *
+			 * Other non-idle worker threads are allowed to finish
+			 * responding to their current clients.
+			 */
+			ipc_server_stop_async(server_data);
+			break;
+		}
+	}
+
+	trace2_thread_exit();
+	return NULL;
+}
+
+/* A randomly chosen value. */
+#define MY_ACCEPT_POLL_TIMEOUT_MS (60 * 1000)
+
+/*
+ * Accept a new client connection on our socket.  This uses non-blocking
+ * IO so that we can also wait for shutdown requests on our socket-pair
+ * without actually spinning on a fast timeout.
+ */
+static int accept_thread__wait_for_connection(
+	struct ipc_accept_thread_data *accept_thread_data)
+{
+	struct pollfd pollfd[2];
+	int result;
+
+	for (;;) {
+		pollfd[0].fd = accept_thread_data->fd_wait_shutdown;
+		pollfd[0].events = POLLIN;
+
+		pollfd[1].fd = accept_thread_data->server_socket->fd_socket;
+		pollfd[1].events = POLLIN;
+
+		result = poll(pollfd, 2, MY_ACCEPT_POLL_TIMEOUT_MS);
+		if (result < 0) {
+			if (errno == EINTR)
+				continue;
+			return result;
+		}
+
+		if (result == 0) {
+			/* a timeout */
+
+			/*
+			 * If someone deletes or force-creates a new unix
+			 * domain socket at our path, all future clients
+			 * will be routed elsewhere and we silently starve.
+			 * If that happens, just queue a shutdown.
+			 */
+			if (unix_stream_server__was_stolen(
+				    accept_thread_data->server_socket)) {
+				trace2_data_string("ipc-accept", NULL,
+						   "queue_stop_async",
+						   "socket_stolen");
+				ipc_server_stop_async(
+					accept_thread_data->server_data);
+			}
+			continue;
+		}
+
+		if (pollfd[0].revents & POLLIN) {
+			/* shutdown message queued to socketpair */
+			return -1;
+		}
+
+		if (pollfd[1].revents & POLLIN) {
+			/* a connection is available on server_socket */
+
+			int client_fd =
+				accept(accept_thread_data->server_socket->fd_socket,
+				       NULL, NULL);
+			if (client_fd >= 0)
+				return client_fd;
+
+			/*
+			 * An error here is unlikely -- it probably
+			 * indicates that the connecting process has
+			 * already dropped the connection.
+			 */
+			continue;
+		}
+
+		BUG("unandled poll result errno=%d r[0]=%d r[1]=%d",
+		    errno, pollfd[0].revents, pollfd[1].revents);
+	}
+}
+
+/*
+ * Thread proc for the IPC server "accept thread".  This waits for
+ * an incoming socket connection, appends it to the queue of available
+ * connections, and notifies a worker thread to process it.
+ *
+ * Block SIGPIPE in this thread for the life of the thread.  This
+ * avoids any stray SIGPIPE signals when closing pipe fds under
+ * extremely heavy loads (such as when the fifo queue is full and we
+ * drop incomming connections).
+ */
+static void *accept_thread_proc(void *_accept_thread_data)
+{
+	struct ipc_accept_thread_data *accept_thread_data = _accept_thread_data;
+	struct ipc_server_data *server_data = accept_thread_data->server_data;
+	sigset_t old_set;
+
+	trace2_thread_start("ipc-accept");
+
+	thread_block_sigpipe(&old_set);
+
+	for (;;) {
+		int client_fd = accept_thread__wait_for_connection(
+			accept_thread_data);
+
+		pthread_mutex_lock(&server_data->work_available_mutex);
+		if (server_data->shutdown_requested) {
+			pthread_mutex_unlock(&server_data->work_available_mutex);
+			if (client_fd >= 0)
+				close(client_fd);
+			break;
+		}
+
+		if (client_fd < 0) {
+			/* ignore transient accept() errors */
+		}
+		else {
+			fifo_enqueue(server_data, client_fd);
+			pthread_cond_broadcast(&server_data->work_available_cond);
+		}
+		pthread_mutex_unlock(&server_data->work_available_mutex);
+	}
+
+	trace2_thread_exit();
+	return NULL;
+}
+
+/*
+ * We can't predict the connection arrival rate relative to the worker
+ * processing rate, therefore we allow the "accept-thread" to queue up
+ * a generous number of connections, since we'd rather have the client
+ * not unnecessarily timeout if we can avoid it.  (The assumption is
+ * that this will be used for FSMonitor and a few second wait on a
+ * connection is better than having the client timeout and do the full
+ * computation itself.)
+ *
+ * The FIFO queue size is set to a multiple of the worker pool size.
+ * This value chosen at random.
+ */
+#define FIFO_SCALE (100)
+
+/*
+ * The backlog value for `listen(2)`.  This doesn't need to huge,
+ * rather just large enough for our "accept-thread" to wake up and
+ * queue incoming connections onto the FIFO without the kernel
+ * dropping any.
+ *
+ * This value chosen at random.
+ */
+#define LISTEN_BACKLOG (50)
+
+static int create_listener_socket(
+	const char *path,
+	const struct ipc_server_opts *ipc_opts,
+	struct unix_stream_server_socket **new_server_socket)
+{
+	struct unix_stream_server_socket *server_socket = NULL;
+	struct unix_stream_listen_opts uslg_opts = UNIX_STREAM_LISTEN_OPTS_INIT;
+	int ret;
+
+	uslg_opts.listen_backlog_size = LISTEN_BACKLOG;
+	uslg_opts.disallow_chdir = ipc_opts->uds_disallow_chdir;
+
+	ret = unix_stream_server__create(path, &uslg_opts, -1, &server_socket);
+	if (ret)
+		return ret;
+
+	if (set_socket_blocking_flag(server_socket->fd_socket, 1)) {
+		int saved_errno = errno;
+		unix_stream_server__free(server_socket);
+		errno = saved_errno;
+		return -1;
+	}
+
+	*new_server_socket = server_socket;
+
+	trace2_data_string("ipc-server", NULL, "listen-with-lock", path);
+	return 0;
+}
+
+static int setup_listener_socket(
+	const char *path,
+	const struct ipc_server_opts *ipc_opts,
+	struct unix_stream_server_socket **new_server_socket)
+{
+	int ret, saved_errno;
+
+	trace2_region_enter("ipc-server", "create-listener_socket", NULL);
+
+	ret = create_listener_socket(path, ipc_opts, new_server_socket);
+
+	saved_errno = errno;
+	trace2_region_leave("ipc-server", "create-listener_socket", NULL);
+	errno = saved_errno;
+
+	return ret;
+}
+
+/*
+ * Start IPC server in a pool of background threads.
+ */
+int ipc_server_run_async(struct ipc_server_data **returned_server_data,
+			 const char *path, const struct ipc_server_opts *opts,
+			 ipc_server_application_cb *application_cb,
+			 void *application_data)
+{
+	struct unix_stream_server_socket *server_socket = NULL;
+	struct ipc_server_data *server_data;
+	int sv[2];
+	int k;
+	int ret;
+	int nr_threads = opts->nr_threads;
+
+	*returned_server_data = NULL;
+
+	/*
+	 * Create a socketpair and set sv[1] to non-blocking.  This
+	 * will used to send a shutdown message to the accept-thread
+	 * and allows the accept-thread to wait on EITHER a client
+	 * connection or a shutdown request without spinning.
+	 */
+	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) < 0)
+		return -1;
+
+	if (set_socket_blocking_flag(sv[1], 1)) {
+		int saved_errno = errno;
+		close(sv[0]);
+		close(sv[1]);
+		errno = saved_errno;
+		return -1;
+	}
+
+	ret = setup_listener_socket(path, opts, &server_socket);
+	if (ret) {
+		int saved_errno = errno;
+		close(sv[0]);
+		close(sv[1]);
+		errno = saved_errno;
+		return ret;
+	}
+
+	server_data = xcalloc(1, sizeof(*server_data));
+	server_data->magic = MAGIC_SERVER_DATA;
+	server_data->application_cb = application_cb;
+	server_data->application_data = application_data;
+	strbuf_init(&server_data->buf_path, 0);
+	strbuf_addstr(&server_data->buf_path, path);
+
+	if (nr_threads < 1)
+		nr_threads = 1;
+
+	pthread_mutex_init(&server_data->work_available_mutex, NULL);
+	pthread_cond_init(&server_data->work_available_cond, NULL);
+
+	server_data->queue_size = nr_threads * FIFO_SCALE;
+	server_data->fifo_fds = xcalloc(server_data->queue_size,
+					sizeof(*server_data->fifo_fds));
+
+	server_data->accept_thread =
+		xcalloc(1, sizeof(*server_data->accept_thread));
+	server_data->accept_thread->magic = MAGIC_ACCEPT_THREAD_DATA;
+	server_data->accept_thread->server_data = server_data;
+	server_data->accept_thread->server_socket = server_socket;
+	server_data->accept_thread->fd_send_shutdown = sv[0];
+	server_data->accept_thread->fd_wait_shutdown = sv[1];
+
+	if (pthread_create(&server_data->accept_thread->pthread_id, NULL,
+			   accept_thread_proc, server_data->accept_thread))
+		die_errno(_("could not start accept_thread '%s'"), path);
+
+	for (k = 0; k < nr_threads; k++) {
+		struct ipc_worker_thread_data *wtd;
+
+		wtd = xcalloc(1, sizeof(*wtd));
+		wtd->magic = MAGIC_WORKER_THREAD_DATA;
+		wtd->server_data = server_data;
+
+		if (pthread_create(&wtd->pthread_id, NULL, worker_thread_proc,
+				   wtd)) {
+			if (k == 0)
+				die(_("could not start worker[0] for '%s'"),
+				    path);
+			/*
+			 * Limp along with the thread pool that we have.
+			 */
+			break;
+		}
+
+		wtd->next_thread = server_data->worker_thread_list;
+		server_data->worker_thread_list = wtd;
+	}
+
+	*returned_server_data = server_data;
+	return 0;
+}
+
+/*
+ * Gently tell the IPC server treads to shutdown.
+ * Can be run on any thread.
+ */
+int ipc_server_stop_async(struct ipc_server_data *server_data)
+{
+	/* ASSERT NOT holding mutex */
+
+	int fd;
+
+	if (!server_data)
+		return 0;
+
+	trace2_region_enter("ipc-server", "server-stop-async", NULL);
+
+	pthread_mutex_lock(&server_data->work_available_mutex);
+
+	server_data->shutdown_requested = 1;
+
+	/*
+	 * Write a byte to the shutdown socket pair to wake up the
+	 * accept-thread.
+	 */
+	if (write(server_data->accept_thread->fd_send_shutdown, "Q", 1) < 0)
+		error_errno("could not write to fd_send_shutdown");
+
+	/*
+	 * Drain the queue of existing connections.
+	 */
+	while ((fd = fifo_dequeue(server_data)) != -1)
+		close(fd);
+
+	/*
+	 * Gently tell worker threads to stop processing new connections
+	 * and exit.  (This does not abort in-process conversations.)
+	 */
+	pthread_cond_broadcast(&server_data->work_available_cond);
+
+	pthread_mutex_unlock(&server_data->work_available_mutex);
+
+	trace2_region_leave("ipc-server", "server-stop-async", NULL);
+
+	return 0;
+}
+
+/*
+ * Wait for all IPC server threads to stop.
+ */
+int ipc_server_await(struct ipc_server_data *server_data)
+{
+	pthread_join(server_data->accept_thread->pthread_id, NULL);
+
+	if (!server_data->shutdown_requested)
+		BUG("ipc-server: accept-thread stopped for '%s'",
+		    server_data->buf_path.buf);
+
+	while (server_data->worker_thread_list) {
+		struct ipc_worker_thread_data *wtd =
+			server_data->worker_thread_list;
+
+		pthread_join(wtd->pthread_id, NULL);
+
+		server_data->worker_thread_list = wtd->next_thread;
+		free(wtd);
+	}
+
+	server_data->is_stopped = 1;
+
+	return 0;
+}
+
+void ipc_server_free(struct ipc_server_data *server_data)
+{
+	struct ipc_accept_thread_data * accept_thread_data;
+
+	if (!server_data)
+		return;
+
+	if (!server_data->is_stopped)
+		BUG("cannot free ipc-server while running for '%s'",
+		    server_data->buf_path.buf);
+
+	accept_thread_data = server_data->accept_thread;
+	if (accept_thread_data) {
+		unix_stream_server__free(accept_thread_data->server_socket);
+
+		if (accept_thread_data->fd_send_shutdown != -1)
+			close(accept_thread_data->fd_send_shutdown);
+		if (accept_thread_data->fd_wait_shutdown != -1)
+			close(accept_thread_data->fd_wait_shutdown);
+
+		free(server_data->accept_thread);
+	}
+
+	while (server_data->worker_thread_list) {
+		struct ipc_worker_thread_data *wtd =
+			server_data->worker_thread_list;
+
+		server_data->worker_thread_list = wtd->next_thread;
+		free(wtd);
+	}
+
+	pthread_cond_destroy(&server_data->work_available_cond);
+	pthread_mutex_destroy(&server_data->work_available_mutex);
+
+	strbuf_release(&server_data->buf_path);
+
+	free(server_data->fifo_fds);
+	free(server_data);
+}
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 6ffbbb6a6fb360..629a1817459b8c 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -248,6 +248,8 @@ endif()
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
 	list(APPEND compat_SOURCES compat/simple-ipc/ipc-shared.c compat/simple-ipc/ipc-win32.c)
+else()
+	list(APPEND compat_SOURCES compat/simple-ipc/ipc-shared.c compat/simple-ipc/ipc-unix-socket.c)
 endif()
 
 set(EXE_EXTENSION ${CMAKE_EXECUTABLE_SUFFIX})
diff --git a/simple-ipc.h b/simple-ipc.h
index ab5619e3d76f9b..dc3606e30bd619 100644
--- a/simple-ipc.h
+++ b/simple-ipc.h
@@ -5,7 +5,7 @@
  * See Documentation/technical/api-simple-ipc.txt
  */
 
-#if defined(GIT_WINDOWS_NATIVE)
+#if defined(GIT_WINDOWS_NATIVE) || !defined(NO_UNIX_SOCKETS)
 #define SUPPORTS_SIMPLE_IPC
 #endif
 
@@ -62,11 +62,17 @@ struct ipc_client_connect_options {
 	 * the service and need to wait for it to become ready.
 	 */
 	unsigned int wait_if_not_found:1;
+
+	/*
+	 * Disallow chdir() when creating a Unix domain socket.
+	 */
+	unsigned int uds_disallow_chdir:1;
 };
 
 #define IPC_CLIENT_CONNECT_OPTIONS_INIT { \
 	.wait_if_busy = 0, \
 	.wait_if_not_found = 0, \
+	.uds_disallow_chdir = 0, \
 }
 
 /*
@@ -159,6 +165,11 @@ struct ipc_server_data;
 struct ipc_server_opts
 {
 	int nr_threads;
+
+	/*
+	 * Disallow chdir() when creating a Unix domain socket.
+	 */
+	unsigned int uds_disallow_chdir:1;
 };
 
 /*

From 6202d0eba51f92cabfd5ae61f779a82c04088a4a Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 8 Jul 2020 16:08:58 -0400
Subject: [PATCH 14/39] t0052: add simple-ipc tests and
 t/helper/test-simple-ipc tool

Create t0052-simple-ipc.sh with unit tests for the "simple-ipc" mechanism.

Create t/helper/test-simple-ipc test tool to exercise the "simple-ipc"
functions.

When the tool is invoked with "run-daemon", it runs a server to listen
for "simple-ipc" connections on a test socket or named pipe and
responds to a set of commands to exercise/stress the communication
setup.

When the tool is invoked with "start-daemon", it spawns a "run-daemon"
command in the background and waits for the server to become ready
before exiting.  (This helps make unit tests in t0052 more predictable
and avoids the need for arbitrary sleeps in the test script.)

The tool also has a series of client "send" commands to send commands
and data to a server instance.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Makefile                   |   1 +
 t/helper/test-simple-ipc.c | 787 +++++++++++++++++++++++++++++++++++++
 t/helper/test-tool.c       |   1 +
 t/helper/test-tool.h       |   1 +
 t/t0052-simple-ipc.sh      | 122 ++++++
 5 files changed, 912 insertions(+)
 create mode 100644 t/helper/test-simple-ipc.c
 create mode 100755 t/t0052-simple-ipc.sh

diff --git a/Makefile b/Makefile
index 8e4bde2d81ac51..dfc37b0480e481 100644
--- a/Makefile
+++ b/Makefile
@@ -740,6 +740,7 @@ TEST_BUILTINS_OBJS += test-serve-v2.o
 TEST_BUILTINS_OBJS += test-sha1.o
 TEST_BUILTINS_OBJS += test-sha256.o
 TEST_BUILTINS_OBJS += test-sigchain.o
+TEST_BUILTINS_OBJS += test-simple-ipc.o
 TEST_BUILTINS_OBJS += test-strcmp-offset.o
 TEST_BUILTINS_OBJS += test-string-list.o
 TEST_BUILTINS_OBJS += test-submodule-config.o
diff --git a/t/helper/test-simple-ipc.c b/t/helper/test-simple-ipc.c
new file mode 100644
index 00000000000000..42040ef81b1e84
--- /dev/null
+++ b/t/helper/test-simple-ipc.c
@@ -0,0 +1,787 @@
+/*
+ * test-simple-ipc.c: verify that the Inter-Process Communication works.
+ */
+
+#include "test-tool.h"
+#include "cache.h"
+#include "strbuf.h"
+#include "simple-ipc.h"
+#include "parse-options.h"
+#include "thread-utils.h"
+#include "strvec.h"
+
+#ifndef SUPPORTS_SIMPLE_IPC
+int cmd__simple_ipc(int argc, const char **argv)
+{
+	die("simple IPC not available on this platform");
+}
+#else
+
+/*
+ * The test daemon defines an "application callback" that supports a
+ * series of commands (see `test_app_cb()`).
+ *
+ * Unknown commands are caught here and we send an error message back
+ * to the client process.
+ */
+static int app__unhandled_command(const char *command,
+				  ipc_server_reply_cb *reply_cb,
+				  struct ipc_server_reply_data *reply_data)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int ret;
+
+	strbuf_addf(&buf, "unhandled command: %s", command);
+	ret = reply_cb(reply_data, buf.buf, buf.len);
+	strbuf_release(&buf);
+
+	return ret;
+}
+
+/*
+ * Reply with a single very large buffer.  This is to ensure that
+ * long response are properly handled -- whether the chunking occurs
+ * in the kernel or in the (probably pkt-line) layer.
+ */
+#define BIG_ROWS (10000)
+static int app__big_command(ipc_server_reply_cb *reply_cb,
+			    struct ipc_server_reply_data *reply_data)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int row;
+	int ret;
+
+	for (row = 0; row < BIG_ROWS; row++)
+		strbuf_addf(&buf, "big: %.75d\n", row);
+
+	ret = reply_cb(reply_data, buf.buf, buf.len);
+	strbuf_release(&buf);
+
+	return ret;
+}
+
+/*
+ * Reply with a series of lines.  This is to ensure that we can incrementally
+ * compute the response and chunk it to the client.
+ */
+#define CHUNK_ROWS (10000)
+static int app__chunk_command(ipc_server_reply_cb *reply_cb,
+			      struct ipc_server_reply_data *reply_data)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int row;
+	int ret;
+
+	for (row = 0; row < CHUNK_ROWS; row++) {
+		strbuf_setlen(&buf, 0);
+		strbuf_addf(&buf, "big: %.75d\n", row);
+		ret = reply_cb(reply_data, buf.buf, buf.len);
+	}
+
+	strbuf_release(&buf);
+
+	return ret;
+}
+
+/*
+ * Slowly reply with a series of lines.  This is to model an expensive to
+ * compute chunked response (which might happen if this callback is running
+ * in a thread and is fighting for a lock with other threads).
+ */
+#define SLOW_ROWS     (1000)
+#define SLOW_DELAY_MS (10)
+static int app__slow_command(ipc_server_reply_cb *reply_cb,
+			     struct ipc_server_reply_data *reply_data)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int row;
+	int ret;
+
+	for (row = 0; row < SLOW_ROWS; row++) {
+		strbuf_setlen(&buf, 0);
+		strbuf_addf(&buf, "big: %.75d\n", row);
+		ret = reply_cb(reply_data, buf.buf, buf.len);
+		sleep_millisec(SLOW_DELAY_MS);
+	}
+
+	strbuf_release(&buf);
+
+	return ret;
+}
+
+/*
+ * The client sent a command followed by a (possibly very) large buffer.
+ */
+static int app__sendbytes_command(const char *received,
+				  ipc_server_reply_cb *reply_cb,
+				  struct ipc_server_reply_data *reply_data)
+{
+	struct strbuf buf_resp = STRBUF_INIT;
+	const char *p = "?";
+	int len_ballast = 0;
+	int k;
+	int errs = 0;
+	int ret;
+
+	if (skip_prefix(received, "sendbytes ", &p))
+		len_ballast = strlen(p);
+
+	/*
+	 * Verify that the ballast is n copies of a single letter.
+	 * And that the multi-threaded IO layer didn't cross the streams.
+	 */
+	for (k = 1; k < len_ballast; k++)
+		if (p[k] != p[0])
+			errs++;
+
+	if (errs)
+		strbuf_addf(&buf_resp, "errs:%d\n", errs);
+	else
+		strbuf_addf(&buf_resp, "rcvd:%c%08d\n", p[0], len_ballast);
+
+	ret = reply_cb(reply_data, buf_resp.buf, buf_resp.len);
+
+	strbuf_release(&buf_resp);
+
+	return ret;
+}
+
+/*
+ * An arbitrary fixed address to verify that the application instance
+ * data is handled properly.
+ */
+static int my_app_data = 42;
+
+static ipc_server_application_cb test_app_cb;
+
+/*
+ * This is the "application callback" that sits on top of the
+ * "ipc-server".  It completely defines the set of commands supported
+ * by this application.
+ */
+static int test_app_cb(void *application_data,
+		       const char *command,
+		       ipc_server_reply_cb *reply_cb,
+		       struct ipc_server_reply_data *reply_data)
+{
+	/*
+	 * Verify that we received the application-data that we passed
+	 * when we started the ipc-server.  (We have several layers of
+	 * callbacks calling callbacks and it's easy to get things mixed
+	 * up (especially when some are "void*").)
+	 */
+	if (application_data != (void*)&my_app_data)
+		BUG("application_cb: application_data pointer wrong");
+
+	if (!strcmp(command, "quit")) {
+		/*
+		 * The client sent a "quit" command.  This is an async
+		 * request for the server to shutdown.
+		 *
+		 * We DO NOT send the client a response message
+		 * (because we have nothing to say and the other
+		 * server threads have not yet stopped).
+		 *
+		 * Tell the ipc-server layer to start shutting down.
+		 * This includes: stop listening for new connections
+		 * on the socket/pipe and telling all worker threads
+		 * to finish/drain their outgoing responses to other
+		 * clients.
+		 *
+		 * This DOES NOT force an immediate sync shutdown.
+		 */
+		return SIMPLE_IPC_QUIT;
+	}
+
+	if (!strcmp(command, "ping")) {
+		const char *answer = "pong";
+		return reply_cb(reply_data, answer, strlen(answer));
+	}
+
+	if (!strcmp(command, "big"))
+		return app__big_command(reply_cb, reply_data);
+
+	if (!strcmp(command, "chunk"))
+		return app__chunk_command(reply_cb, reply_data);
+
+	if (!strcmp(command, "slow"))
+		return app__slow_command(reply_cb, reply_data);
+
+	if (starts_with(command, "sendbytes "))
+		return app__sendbytes_command(command, reply_cb, reply_data);
+
+	return app__unhandled_command(command, reply_cb, reply_data);
+}
+
+struct cl_args
+{
+	const char *subcommand;
+	const char *path;
+	const char *token;
+
+	int nr_threads;
+	int max_wait_sec;
+	int bytecount;
+	int batchsize;
+
+	char bytevalue;
+};
+
+static struct cl_args cl_args = {
+	.subcommand = NULL,
+	.path = "ipc-test",
+	.token = NULL,
+
+	.nr_threads = 5,
+	.max_wait_sec = 60,
+	.bytecount = 1024,
+	.batchsize = 10,
+
+	.bytevalue = 'x',
+};
+
+/*
+ * This process will run as a simple-ipc server and listen for IPC commands
+ * from client processes.
+ */
+static int daemon__run_server(void)
+{
+	int ret;
+
+	struct ipc_server_opts opts = {
+		.nr_threads = cl_args.nr_threads,
+	};
+
+	/*
+	 * Synchronously run the ipc-server.  We don't need any application
+	 * instance data, so pass an arbitrary pointer (that we'll later
+	 * verify made the round trip).
+	 */
+	ret = ipc_server_run(cl_args.path, &opts, test_app_cb, (void*)&my_app_data);
+	if (ret == -2)
+		error(_("socket/pipe already in use: '%s'"), cl_args.path);
+	else if (ret == -1)
+		error_errno(_("could not start server on: '%s'"), cl_args.path);
+
+	return ret;
+}
+
+#ifndef GIT_WINDOWS_NATIVE
+/*
+ * This is adapted from `daemonize()`.  Use `fork()` to directly create and
+ * run the daemon in a child process.
+ */
+static int spawn_server(pid_t *pid)
+{
+	struct ipc_server_opts opts = {
+		.nr_threads = cl_args.nr_threads,
+	};
+
+	*pid = fork();
+
+	switch (*pid) {
+	case 0:
+		if (setsid() == -1)
+			error_errno(_("setsid failed"));
+		close(0);
+		close(1);
+		close(2);
+		sanitize_stdfds();
+
+		return ipc_server_run(cl_args.path, &opts, test_app_cb,
+				      (void*)&my_app_data);
+
+	case -1:
+		return error_errno(_("could not spawn daemon in the background"));
+
+	default:
+		return 0;
+	}
+}
+#else
+/*
+ * Conceptually like `daemonize()` but different because Windows does not
+ * have `fork(2)`.  Spawn a normal Windows child process but without the
+ * limitations of `start_command()` and `finish_command()`.
+ */
+static int spawn_server(pid_t *pid)
+{
+	char test_tool_exe[MAX_PATH];
+	struct strvec args = STRVEC_INIT;
+	int in, out;
+
+	GetModuleFileNameA(NULL, test_tool_exe, MAX_PATH);
+
+	in = open("/dev/null", O_RDONLY);
+	out = open("/dev/null", O_WRONLY);
+
+	strvec_push(&args, test_tool_exe);
+	strvec_push(&args, "simple-ipc");
+	strvec_push(&args, "run-daemon");
+	strvec_pushf(&args, "--name=%s", cl_args.path);
+	strvec_pushf(&args, "--threads=%d", cl_args.nr_threads);
+
+	*pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out);
+	close(in);
+	close(out);
+
+	strvec_clear(&args);
+
+	if (*pid < 0)
+		return error(_("could not spawn daemon in the background"));
+
+	return 0;
+}
+#endif
+
+/*
+ * This is adapted from `wait_or_whine()`.  Watch the child process and
+ * let it get started and begin listening for requests on the socket
+ * before reporting our success.
+ */
+static int wait_for_server_startup(pid_t pid_child)
+{
+	int status;
+	pid_t pid_seen;
+	enum ipc_active_state s;
+	time_t time_limit, now;
+
+	time(&time_limit);
+	time_limit += cl_args.max_wait_sec;
+
+	for (;;) {
+		pid_seen = waitpid(pid_child, &status, WNOHANG);
+
+		if (pid_seen == -1)
+			return error_errno(_("waitpid failed"));
+
+		else if (pid_seen == 0) {
+			/*
+			 * The child is still running (this should be
+			 * the normal case).  Try to connect to it on
+			 * the socket and see if it is ready for
+			 * business.
+			 *
+			 * If there is another daemon already running,
+			 * our child will fail to start (possibly
+			 * after a timeout on the lock), but we don't
+			 * care (who responds) if the socket is live.
+			 */
+			s = ipc_get_active_state(cl_args.path);
+			if (s == IPC_STATE__LISTENING)
+				return 0;
+
+			time(&now);
+			if (now > time_limit)
+				return error(_("daemon not online yet"));
+
+			continue;
+		}
+
+		else if (pid_seen == pid_child) {
+			/*
+			 * The new child daemon process shutdown while
+			 * it was starting up, so it is not listening
+			 * on the socket.
+			 *
+			 * Try to ping the socket in the odd chance
+			 * that another daemon started (or was already
+			 * running) while our child was starting.
+			 *
+			 * Again, we don't care who services the socket.
+			 */
+			s = ipc_get_active_state(cl_args.path);
+			if (s == IPC_STATE__LISTENING)
+				return 0;
+
+			/*
+			 * We don't care about the WEXITSTATUS() nor
+			 * any of the WIF*(status) values because
+			 * `cmd__simple_ipc()` does the `!!result`
+			 * trick on all function return values.
+			 *
+			 * So it is sufficient to just report the
+			 * early shutdown as an error.
+			 */
+			return error(_("daemon failed to start"));
+		}
+
+		else
+			return error(_("waitpid is confused"));
+	}
+}
+
+/*
+ * This process will start a simple-ipc server in a background process and
+ * wait for it to become ready.  This is like `daemonize()` but gives us
+ * more control and better error reporting (and makes it easier to write
+ * unit tests).
+ */
+static int daemon__start_server(void)
+{
+	pid_t pid_child;
+	int ret;
+
+	/*
+	 * Run the actual daemon in a background process.
+	 */
+	ret = spawn_server(&pid_child);
+	if (pid_child <= 0)
+		return ret;
+
+	/*
+	 * Let the parent wait for the child process to get started
+	 * and begin listening for requests on the socket.
+	 */
+	ret = wait_for_server_startup(pid_child);
+
+	return ret;
+}
+
+/*
+ * This process will run a quick probe to see if a simple-ipc server
+ * is active on this path.
+ *
+ * Returns 0 if the server is alive.
+ */
+static int client__probe_server(void)
+{
+	enum ipc_active_state s;
+
+	s = ipc_get_active_state(cl_args.path);
+	switch (s) {
+	case IPC_STATE__LISTENING:
+		return 0;
+
+	case IPC_STATE__NOT_LISTENING:
+		return error("no server listening at '%s'", cl_args.path);
+
+	case IPC_STATE__PATH_NOT_FOUND:
+		return error("path not found '%s'", cl_args.path);
+
+	case IPC_STATE__INVALID_PATH:
+		return error("invalid pipe/socket name '%s'", cl_args.path);
+
+	case IPC_STATE__OTHER_ERROR:
+	default:
+		return error("other error for '%s'", cl_args.path);
+	}
+}
+
+/*
+ * Send an IPC command token to an already-running server daemon and
+ * print the response.
+ *
+ * This is a simple 1 word command/token that `test_app_cb()` (in the
+ * daemon process) will understand.
+ */
+static int client__send_ipc(void)
+{
+	const char *command = "(no-command)";
+	struct strbuf buf = STRBUF_INIT;
+	struct ipc_client_connect_options options
+		= IPC_CLIENT_CONNECT_OPTIONS_INIT;
+
+	if (cl_args.token && *cl_args.token)
+		command = cl_args.token;
+
+	options.wait_if_busy = 1;
+	options.wait_if_not_found = 0;
+
+	if (!ipc_client_send_command(cl_args.path, &options, command, &buf)) {
+		if (buf.len) {
+			printf("%s\n", buf.buf);
+			fflush(stdout);
+		}
+		strbuf_release(&buf);
+
+		return 0;
+	}
+
+	return error("failed to send '%s' to '%s'", command, cl_args.path);
+}
+
+/*
+ * Send an IPC command to an already-running server and ask it to
+ * shutdown.  "send quit" is an async request and queues a shutdown
+ * event in the server, so we spin and wait here for it to actually
+ * shutdown to make the unit tests a little easier to write.
+ */
+static int client__stop_server(void)
+{
+	int ret;
+	time_t time_limit, now;
+	enum ipc_active_state s;
+
+	time(&time_limit);
+	time_limit += cl_args.max_wait_sec;
+
+	cl_args.token = "quit";
+
+	ret = client__send_ipc();
+	if (ret)
+		return ret;
+
+	for (;;) {
+		sleep_millisec(100);
+
+		s = ipc_get_active_state(cl_args.path);
+
+		if (s != IPC_STATE__LISTENING) {
+			/*
+			 * The socket/pipe is gone and/or has stopped
+			 * responding.  Lets assume that the daemon
+			 * process has exited too.
+			 */
+			return 0;
+		}
+
+		time(&now);
+		if (now > time_limit)
+			return error(_("daemon has not shutdown yet"));
+	}
+}
+
+/*
+ * Send an IPC command followed by ballast to confirm that a large
+ * message can be sent and that the kernel or pkt-line layers will
+ * properly chunk it and that the daemon receives the entire message.
+ */
+static int do_sendbytes(int bytecount, char byte, const char *path,
+			const struct ipc_client_connect_options *options)
+{
+	struct strbuf buf_send = STRBUF_INIT;
+	struct strbuf buf_resp = STRBUF_INIT;
+
+	strbuf_addstr(&buf_send, "sendbytes ");
+	strbuf_addchars(&buf_send, byte, bytecount);
+
+	if (!ipc_client_send_command(path, options, buf_send.buf, &buf_resp)) {
+		strbuf_rtrim(&buf_resp);
+		printf("sent:%c%08d %s\n", byte, bytecount, buf_resp.buf);
+		fflush(stdout);
+		strbuf_release(&buf_send);
+		strbuf_release(&buf_resp);
+
+		return 0;
+	}
+
+	return error("client failed to sendbytes(%d, '%c') to '%s'",
+		     bytecount, byte, path);
+}
+
+/*
+ * Send an IPC command with ballast to an already-running server daemon.
+ */
+static int client__sendbytes(void)
+{
+	struct ipc_client_connect_options options
+		= IPC_CLIENT_CONNECT_OPTIONS_INIT;
+
+	options.wait_if_busy = 1;
+	options.wait_if_not_found = 0;
+	options.uds_disallow_chdir = 0;
+
+	return do_sendbytes(cl_args.bytecount, cl_args.bytevalue, cl_args.path,
+			    &options);
+}
+
+struct multiple_thread_data {
+	pthread_t pthread_id;
+	struct multiple_thread_data *next;
+	const char *path;
+	int bytecount;
+	int batchsize;
+	int sum_errors;
+	int sum_good;
+	char letter;
+};
+
+static void *multiple_thread_proc(void *_multiple_thread_data)
+{
+	struct multiple_thread_data *d = _multiple_thread_data;
+	int k;
+	struct ipc_client_connect_options options
+		= IPC_CLIENT_CONNECT_OPTIONS_INIT;
+
+	options.wait_if_busy = 1;
+	options.wait_if_not_found = 0;
+	/*
+	 * A multi-threaded client should not be randomly calling chdir().
+	 * The test will pass without this restriction because the test is
+	 * not otherwise accessing the filesystem, but it makes us honest.
+	 */
+	options.uds_disallow_chdir = 1;
+
+	trace2_thread_start("multiple");
+
+	for (k = 0; k < d->batchsize; k++) {
+		if (do_sendbytes(d->bytecount + k, d->letter, d->path, &options))
+			d->sum_errors++;
+		else
+			d->sum_good++;
+	}
+
+	trace2_thread_exit();
+	return NULL;
+}
+
+/*
+ * Start a client-side thread pool.  Each thread sends a series of
+ * IPC requests.  Each request is on a new connection to the server.
+ */
+static int client__multiple(void)
+{
+	struct multiple_thread_data *list = NULL;
+	int k;
+	int sum_join_errors = 0;
+	int sum_thread_errors = 0;
+	int sum_good = 0;
+
+	for (k = 0; k < cl_args.nr_threads; k++) {
+		struct multiple_thread_data *d = xcalloc(1, sizeof(*d));
+		d->next = list;
+		d->path = cl_args.path;
+		d->bytecount = cl_args.bytecount + cl_args.batchsize*(k/26);
+		d->batchsize = cl_args.batchsize;
+		d->sum_errors = 0;
+		d->sum_good = 0;
+		d->letter = 'A' + (k % 26);
+
+		if (pthread_create(&d->pthread_id, NULL, multiple_thread_proc, d)) {
+			warning("failed to create thread[%d] skipping remainder", k);
+			free(d);
+			break;
+		}
+
+		list = d;
+	}
+
+	while (list) {
+		struct multiple_thread_data *d = list;
+
+		if (pthread_join(d->pthread_id, NULL))
+			sum_join_errors++;
+
+		sum_thread_errors += d->sum_errors;
+		sum_good += d->sum_good;
+
+		list = d->next;
+		free(d);
+	}
+
+	printf("client (good %d) (join %d), (errors %d)\n",
+	       sum_good, sum_join_errors, sum_thread_errors);
+
+	return (sum_join_errors + sum_thread_errors) ? 1 : 0;
+}
+
+int cmd__simple_ipc(int argc, const char **argv)
+{
+	const char * const simple_ipc_usage[] = {
+		N_("test-helper simple-ipc is-active    [<name>] [<options>]"),
+		N_("test-helper simple-ipc run-daemon   [<name>] [<threads>]"),
+		N_("test-helper simple-ipc start-daemon [<name>] [<threads>] [<max-wait>]"),
+		N_("test-helper simple-ipc stop-daemon  [<name>] [<max-wait>]"),
+		N_("test-helper simple-ipc send         [<name>] [<token>]"),
+		N_("test-helper simple-ipc sendbytes    [<name>] [<bytecount>] [<byte>]"),
+		N_("test-helper simple-ipc multiple     [<name>] [<threads>] [<bytecount>] [<batchsize>]"),
+		NULL
+	};
+
+	const char *bytevalue = NULL;
+
+	struct option options[] = {
+#ifndef GIT_WINDOWS_NATIVE
+		OPT_STRING(0, "name", &cl_args.path, N_("name"), N_("name or pathname of unix domain socket")),
+#else
+		OPT_STRING(0, "name", &cl_args.path, N_("name"), N_("named-pipe name")),
+#endif
+		OPT_INTEGER(0, "threads", &cl_args.nr_threads, N_("number of threads in server thread pool")),
+		OPT_INTEGER(0, "max-wait", &cl_args.max_wait_sec, N_("seconds to wait for daemon to start or stop")),
+
+		OPT_INTEGER(0, "bytecount", &cl_args.bytecount, N_("number of bytes")),
+		OPT_INTEGER(0, "batchsize", &cl_args.batchsize, N_("number of requests per thread")),
+
+		OPT_STRING(0, "byte", &bytevalue, N_("byte"), N_("ballast character")),
+		OPT_STRING(0, "token", &cl_args.token, N_("token"), N_("command token to send to the server")),
+
+		OPT_END()
+	};
+
+	if (argc < 2)
+		usage_with_options(simple_ipc_usage, options);
+
+	if (argc == 2 && !strcmp(argv[1], "-h"))
+		usage_with_options(simple_ipc_usage, options);
+
+	if (argc == 2 && !strcmp(argv[1], "SUPPORTS_SIMPLE_IPC"))
+		return 0;
+
+	cl_args.subcommand = argv[1];
+
+	argc--;
+	argv++;
+
+	argc = parse_options(argc, argv, NULL, options, simple_ipc_usage, 0);
+
+	if (cl_args.nr_threads < 1)
+		cl_args.nr_threads = 1;
+	if (cl_args.max_wait_sec < 0)
+		cl_args.max_wait_sec = 0;
+	if (cl_args.bytecount < 1)
+		cl_args.bytecount = 1;
+	if (cl_args.batchsize < 1)
+		cl_args.batchsize = 1;
+
+	if (bytevalue && *bytevalue)
+		cl_args.bytevalue = bytevalue[0];
+
+	/*
+	 * Use '!!' on all dispatch functions to map from `error()` style
+	 * (returns -1) style to `test_must_fail` style (expects 1).  This
+	 * makes shell error messages less confusing.
+	 */
+
+	if (!strcmp(cl_args.subcommand, "is-active"))
+		return !!client__probe_server();
+
+	if (!strcmp(cl_args.subcommand, "run-daemon"))
+		return !!daemon__run_server();
+
+	if (!strcmp(cl_args.subcommand, "start-daemon"))
+		return !!daemon__start_server();
+
+	/*
+	 * Client commands follow.  Ensure a server is running before
+	 * sending any data.  This might be overkill, but then again
+	 * this is a test harness.
+	 */
+
+	if (!strcmp(cl_args.subcommand, "stop-daemon")) {
+		if (client__probe_server())
+			return 1;
+		return !!client__stop_server();
+	}
+
+	if (!strcmp(cl_args.subcommand, "send")) {
+		if (client__probe_server())
+			return 1;
+		return !!client__send_ipc();
+	}
+
+	if (!strcmp(cl_args.subcommand, "sendbytes")) {
+		if (client__probe_server())
+			return 1;
+		return !!client__sendbytes();
+	}
+
+	if (!strcmp(cl_args.subcommand, "multiple")) {
+		if (client__probe_server())
+			return 1;
+		return !!client__multiple();
+	}
+
+	die("Unhandled subcommand: '%s'", cl_args.subcommand);
+}
+#endif
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 9d6d14d9293753..a409655f03b5bc 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -64,6 +64,7 @@ static struct test_cmd cmds[] = {
 	{ "sha1", cmd__sha1 },
 	{ "sha256", cmd__sha256 },
 	{ "sigchain", cmd__sigchain },
+	{ "simple-ipc", cmd__simple_ipc },
 	{ "strcmp-offset", cmd__strcmp_offset },
 	{ "string-list", cmd__string_list },
 	{ "submodule-config", cmd__submodule_config },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index a6470ff62c4252..564eb3c8e9112a 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -54,6 +54,7 @@ int cmd__sha1(int argc, const char **argv);
 int cmd__oid_array(int argc, const char **argv);
 int cmd__sha256(int argc, const char **argv);
 int cmd__sigchain(int argc, const char **argv);
+int cmd__simple_ipc(int argc, const char **argv);
 int cmd__strcmp_offset(int argc, const char **argv);
 int cmd__string_list(int argc, const char **argv);
 int cmd__submodule_config(int argc, const char **argv);
diff --git a/t/t0052-simple-ipc.sh b/t/t0052-simple-ipc.sh
new file mode 100755
index 00000000000000..ff98be31a51b36
--- /dev/null
+++ b/t/t0052-simple-ipc.sh
@@ -0,0 +1,122 @@
+#!/bin/sh
+
+test_description='simple command server'
+
+. ./test-lib.sh
+
+test-tool simple-ipc SUPPORTS_SIMPLE_IPC || {
+	skip_all='simple IPC not supported on this platform'
+	test_done
+}
+
+stop_simple_IPC_server () {
+	test-tool simple-ipc stop-daemon
+}
+
+test_expect_success 'start simple command server' '
+	test_atexit stop_simple_IPC_server &&
+	test-tool simple-ipc start-daemon --threads=8 &&
+	test-tool simple-ipc is-active
+'
+
+test_expect_success 'simple command server' '
+	test-tool simple-ipc send --token=ping >actual &&
+	echo pong >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success 'servers cannot share the same path' '
+	test_must_fail test-tool simple-ipc run-daemon &&
+	test-tool simple-ipc is-active
+'
+
+test_expect_success 'big response' '
+	test-tool simple-ipc send --token=big >actual &&
+	test_line_count -ge 10000 actual &&
+	grep -q "big: [0]*9999\$" actual
+'
+
+test_expect_success 'chunk response' '
+	test-tool simple-ipc send --token=chunk >actual &&
+	test_line_count -ge 10000 actual &&
+	grep -q "big: [0]*9999\$" actual
+'
+
+test_expect_success 'slow response' '
+	test-tool simple-ipc send --token=slow >actual &&
+	test_line_count -ge 100 actual &&
+	grep -q "big: [0]*99\$" actual
+'
+
+# Send an IPC with n=100,000 bytes of ballast.  This should be large enough
+# to force both the kernel and the pkt-line layer to chunk the message to the
+# daemon and for the daemon to receive it in chunks.
+#
+test_expect_success 'sendbytes' '
+	test-tool simple-ipc sendbytes --bytecount=100000 --byte=A >actual &&
+	grep "sent:A00100000 rcvd:A00100000" actual
+'
+
+# Start a series of <threads> client threads that each make <batchsize>
+# IPC requests to the server.  Each (<threads> * <batchsize>) request
+# will open a new connection to the server and randomly bind to a server
+# thread.  Each client thread exits after completing its batch.  So the
+# total number of live client threads will be smaller than the total.
+# Each request will send a message containing at least <bytecount> bytes
+# of ballast.  (Responses are small.)
+#
+# The purpose here is to test threading in the server and responding to
+# many concurrent client requests (regardless of whether they come from
+# 1 client process or many).  And to test that the server side of the
+# named pipe/socket is stable.  (On Windows this means that the server
+# pipe is properly recycled.)
+#
+# On Windows it also lets us adjust the connection timeout in the
+# `ipc_client_send_command()`.
+#
+# Note it is easy to drive the system into failure by requesting an
+# insane number of threads on client or server and/or increasing the
+# per-thread batchsize or the per-request bytecount (ballast).
+# On Windows these failures look like "pipe is busy" errors.
+# So I've chosen fairly conservative values for now.
+#
+# We expect output of the form "sent:<letter><length> ..."
+# With terms (7, 19, 13) we expect:
+#   <letter> in [A-G]
+#   <length> in [19+0 .. 19+(13-1)]
+# and (7 * 13) successful responses.
+#
+test_expect_success 'stress test threads' '
+	test-tool simple-ipc multiple \
+		--threads=7 \
+		--bytecount=19 \
+		--batchsize=13 \
+		>actual &&
+	test_line_count = 92 actual &&
+	grep "good 91" actual &&
+	grep "sent:A" <actual >actual_a &&
+	cat >expect_a <<-EOF &&
+		sent:A00000019 rcvd:A00000019
+		sent:A00000020 rcvd:A00000020
+		sent:A00000021 rcvd:A00000021
+		sent:A00000022 rcvd:A00000022
+		sent:A00000023 rcvd:A00000023
+		sent:A00000024 rcvd:A00000024
+		sent:A00000025 rcvd:A00000025
+		sent:A00000026 rcvd:A00000026
+		sent:A00000027 rcvd:A00000027
+		sent:A00000028 rcvd:A00000028
+		sent:A00000029 rcvd:A00000029
+		sent:A00000030 rcvd:A00000030
+		sent:A00000031 rcvd:A00000031
+	EOF
+	test_cmp expect_a actual_a
+'
+
+test_expect_success 'stop-daemon works' '
+	test-tool simple-ipc stop-daemon &&
+	test_must_fail test-tool simple-ipc is-active &&
+	test_must_fail test-tool simple-ipc send --token=ping
+'
+
+test_done

From f783c65350dcd4662272a19d1b3f5466c0dcf8d5 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Mon, 14 Dec 2020 13:48:26 -0500
Subject: [PATCH 15/39] fsmonitor--daemon: man page and documentation

Create a manual page describing the `git fsmonitor--daemon` feature.

Update references to `core.fsmonitor`, `core.fsmonitorHookVersion` and
pointers to `watchman` to mention the built-in FSMonitor.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Documentation/config/core.txt           |  45 +++++++---
 Documentation/git-fsmonitor--daemon.txt | 104 ++++++++++++++++++++++++
 Documentation/git-update-index.txt      |   4 +-
 Documentation/githooks.txt              |   3 +-
 4 files changed, 144 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/git-fsmonitor--daemon.txt

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index 160aacad84bae6..22c2c4456f8655 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -66,18 +66,43 @@ core.fsmonitor::
 	will identify all files that may have changed since the
 	requested date/time. This information is used to speed up git by
 	avoiding unnecessary processing of files that have not changed.
-	See the "fsmonitor-watchman" section of linkgit:githooks[5].
++
+See the "fsmonitor-watchman" section of linkgit:githooks[5].
++
+Note: FSMonitor hooks (and this config setting) are ignored if the
+built-in FSMonitor is enabled (see `core.useBuiltinFSMonitor`).
 
 core.fsmonitorHookVersion::
-	Sets the version of hook that is to be used when calling fsmonitor.
-	There are currently versions 1 and 2. When this is not set,
-	version 2 will be tried first and if it fails then version 1
-	will be tried. Version 1 uses a timestamp as input to determine
-	which files have changes since that time but some monitors
-	like watchman have race conditions when used with a timestamp.
-	Version 2 uses an opaque string so that the monitor can return
-	something that can be used to determine what files have changed
-	without race conditions.
+	Sets the version of hook that is to be used when calling the
+	FSMonitor hook (as configured via `core.fsmonitor`).
++
+There are currently versions 1 and 2. When this is not set,
+version 2 will be tried first and if it fails then version 1
+will be tried. Version 1 uses a timestamp as input to determine
+which files have changes since that time but some monitors
+like watchman have race conditions when used with a timestamp.
+Version 2 uses an opaque string so that the monitor can return
+something that can be used to determine what files have changed
+without race conditions.
++
+Note: FSMonitor hooks (and this config setting) are ignored if the
+built-in FSMonitor is enabled (see `core.useBuiltinFSMonitor`).
+
+core.useBuiltinFSMonitor::
+	If set to true, enable the built-in filesystem event watcher (for
+	technical details, see linkgit:git-fsmonitor--daemon[1]).
++
+Like external (hook-based) FSMonitors, the built-in FSMonitor can speed up
+Git commands that need to refresh the Git index (e.g. `git status`) in a
+worktree with many files. The built-in FSMonitor facility eliminates the
+need to install and maintain an external third-party monitoring tool.
++
+The built-in FSMonitor is currently available only on a limited set of
+supported platforms.
++
+Note: if this config setting is set to `true`, any FSMonitor hook
+configured via `core.fsmonitor` (and possibly `core.fsmonitorHookVersion`)
+is ignored.
 
 core.trustctime::
 	If false, the ctime differences between the index and the
diff --git a/Documentation/git-fsmonitor--daemon.txt b/Documentation/git-fsmonitor--daemon.txt
new file mode 100644
index 00000000000000..b94f57c97fe472
--- /dev/null
+++ b/Documentation/git-fsmonitor--daemon.txt
@@ -0,0 +1,104 @@
+git-fsmonitor--daemon(1)
+========================
+
+NAME
+----
+git-fsmonitor--daemon - Builtin file system monitor daemon
+
+SYNOPSIS
+--------
+[verse]
+'git fsmonitor--daemon' --start
+'git fsmonitor--daemon' --run
+'git fsmonitor--daemon' --stop
+'git fsmonitor--daemon' --is-running
+'git fsmonitor--daemon' --is-supported
+'git fsmonitor--daemon' --query <token>
+'git fsmonitor--daemon' --query-index
+'git fsmonitor--daemon' --flush
+
+DESCRIPTION
+-----------
+
+Monitors files and directories in the working directory for changes using
+platform-specific file system notification facilities.
+
+It communicates directly with commands like `git status` using the
+link:technical/api-simple-ipc.html[simple IPC] interface instead of
+the slower linkgit:githooks[5] interface.
+
+OPTIONS
+-------
+
+--start::
+	Starts the fsmonitor daemon in the background.
+
+--run::
+	Runs the fsmonitor daemon in the foreground.
+
+--stop::
+	Stops the fsmonitor daemon running for the current working
+	directory, if present.
+
+--is-running::
+	Exits with zero status if the fsmonitor daemon is watching the
+	current working directory.
+
+--is-supported::
+	Exits with zero status if the fsmonitor daemon feature is supported
+	on this platform.
+
+--query <token>::
+	Connects to the fsmonitor daemon (starting it if necessary) and
+	requests the list of changed files and directories since the
+	given token.
+	This is intended for testing purposes.
+
+--query-index::
+	Read the current `<token>` from the File System Monitor index
+	extension (if present) and use it to query the fsmonitor daemon.
+	This is intended for testing purposes.
+
+--flush::
+	Force the fsmonitor daemon to flush its in-memory cache and
+	re-sync with the file system.
+	This is intended for testing purposes.
+
+REMARKS
+-------
+The fsmonitor daemon is a long running process that will watch a single
+working directory.  Commands, such as `git status`, should automatically
+start it (if necessary) when `core.useBuiltinFSMonitor` is set to `true`
+(see linkgit:git-config[1]).
+
+Configure the built-in FSMonitor via `core.useBuiltinFSMonitor` in each
+working directory separately, or globally via `git config --global
+core.useBuiltinFSMonitor true`.
+
+Tokens are opaque strings.  They are used by the fsmonitor daemon to
+mark a point in time and the associated internal state.  Callers should
+make no assumptions about the content of the token.  In particular,
+the should not assume that it is a timestamp.
+
+Query commands send a request-token to the daemon and it responds with
+a summary of the changes that have occurred since that token was
+created.  The daemon also returns a response-token that the client can
+use in a future query.
+
+For more information see the "File System Monitor" section in
+linkgit:git-update-index[1].
+
+CAVEATS
+-------
+
+The fsmonitor daemon does not currently know about submodules and does
+not know to filter out file system events that happen within a
+submodule.  If fsmonitor daemon is watching a super repo and a file is
+modified within the working directory of a submodule, it will report
+the change (as happening against the super repo).  However, the client
+should properly ignore these extra events, so performance may be affected
+but it should not cause an incorrect result.
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index 2853f168d97685..8169aad7ee9fcd 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -498,7 +498,9 @@ FILE SYSTEM MONITOR
 This feature is intended to speed up git operations for repos that have
 large working directories.
 
-It enables git to work together with a file system monitor (see the
+It enables git to work together with a file system monitor (see
+linkgit:git-fsmonitor--daemon[1]
+and the
 "fsmonitor-watchman" section of linkgit:githooks[5]) that can
 inform it as to what files have been modified. This enables git to avoid
 having to lstat() every file to find modified files.
diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt
index 1f3b57d04db6ca..193156ebec2da5 100644
--- a/Documentation/githooks.txt
+++ b/Documentation/githooks.txt
@@ -584,7 +584,8 @@ fsmonitor-watchman
 
 This hook is invoked when the configuration option `core.fsmonitor` is
 set to `.git/hooks/fsmonitor-watchman` or `.git/hooks/fsmonitor-watchmanv2`
-depending on the version of the hook to use.
+depending on the version of the hook to use, unless overridden via
+`core.useBuiltinFSMonitor` (see linkgit:git-config[1]).
 
 Version 1 takes two arguments, a version (1) and the time in elapsed
 nanoseconds since midnight, January 1, 1970.

From 7c17e4f64acfb83ee230b2c12b4998197d8db666 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Mon, 14 Dec 2020 17:35:32 -0500
Subject: [PATCH 16/39] fsmonitor-ipc: create client routines for
 git-fsmonitor--daemon

Create client routines to spawn a fsmonitor daemon and send it an IPC
request using `simple-ipc`.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Makefile        |   1 +
 fsmonitor-ipc.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++
 fsmonitor-ipc.h |  48 +++++++++++++++
 help.c          |   4 ++
 4 files changed, 206 insertions(+)
 create mode 100644 fsmonitor-ipc.c
 create mode 100644 fsmonitor-ipc.h

diff --git a/Makefile b/Makefile
index dfc37b0480e481..4a23095d2db4eb 100644
--- a/Makefile
+++ b/Makefile
@@ -884,6 +884,7 @@ LIB_OBJS += fetch-pack.o
 LIB_OBJS += fmt-merge-msg.o
 LIB_OBJS += fsck.o
 LIB_OBJS += fsmonitor.o
+LIB_OBJS += fsmonitor-ipc.o
 LIB_OBJS += gettext.o
 LIB_OBJS += gpg-interface.o
 LIB_OBJS += graph.o
diff --git a/fsmonitor-ipc.c b/fsmonitor-ipc.c
new file mode 100644
index 00000000000000..b0dc334ff02d43
--- /dev/null
+++ b/fsmonitor-ipc.c
@@ -0,0 +1,153 @@
+#include "cache.h"
+#include "fsmonitor.h"
+#include "fsmonitor-ipc.h"
+#include "run-command.h"
+#include "strbuf.h"
+#include "trace2.h"
+
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+#define FSMONITOR_DAEMON_IS_SUPPORTED 1
+#else
+#define FSMONITOR_DAEMON_IS_SUPPORTED 0
+#endif
+
+/*
+ * A trivial function so that this source file always defines at least
+ * one symbol even when the feature is not supported.  This quiets an
+ * annoying compiler error.
+ */
+int fsmonitor_ipc__is_supported(void)
+{
+	return FSMONITOR_DAEMON_IS_SUPPORTED;
+}
+
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+
+GIT_PATH_FUNC(fsmonitor_ipc__get_path, "fsmonitor")
+
+enum ipc_active_state fsmonitor_ipc__get_state(void)
+{
+	return ipc_get_active_state(fsmonitor_ipc__get_path());
+}
+
+static int spawn_daemon(void)
+{
+	const char *args[] = { "fsmonitor--daemon", "--start", NULL };
+
+	return run_command_v_opt_tr2(args, RUN_COMMAND_NO_STDIN | RUN_GIT_CMD,
+				    "fsmonitor");
+}
+
+int fsmonitor_ipc__send_query(const char *since_token,
+			      struct strbuf *answer)
+{
+	int ret = -1;
+	int tried_to_spawn = 0;
+	enum ipc_active_state state = IPC_STATE__OTHER_ERROR;
+	struct ipc_client_connection *connection = NULL;
+	struct ipc_client_connect_options options
+		= IPC_CLIENT_CONNECT_OPTIONS_INIT;
+
+	options.wait_if_busy = 1;
+	options.wait_if_not_found = 0;
+
+	trace2_region_enter("fsm_client", "query", NULL);
+
+	trace2_data_string("fsm_client", NULL, "query/command",
+			   since_token);
+
+try_again:
+	state = ipc_client_try_connect(fsmonitor_ipc__get_path(), &options,
+				       &connection);
+
+	switch (state) {
+	case IPC_STATE__LISTENING:
+		ret = ipc_client_send_command_to_connection(
+			connection, since_token, answer);
+		ipc_client_close_connection(connection);
+
+		trace2_data_intmax("fsm_client", NULL,
+				   "query/response-length", answer->len);
+
+		if (fsmonitor_is_trivial_response(answer))
+			trace2_data_intmax("fsm_client", NULL,
+					   "query/trivial-response", 1);
+
+		goto done;
+
+	case IPC_STATE__NOT_LISTENING:
+		ret = error(_("fsmonitor_ipc__send_query: daemon not available"));
+		goto done;
+
+	case IPC_STATE__PATH_NOT_FOUND:
+		if (tried_to_spawn)
+			goto done;
+
+		tried_to_spawn++;
+		if (spawn_daemon())
+			goto done;
+
+		/*
+		 * Try again, but this time give the daemon a chance to
+		 * actually create the pipe/socket.
+		 *
+		 * Granted, the daemon just started so it can't possibly have
+		 * any FS cached yet, so we'll always get a trivial answer.
+		 * BUT the answer should include a new token that can serve
+		 * as the basis for subsequent requests.
+		 */
+		options.wait_if_not_found = 1;
+		goto try_again;
+
+	case IPC_STATE__INVALID_PATH:
+		ret = error(_("fsmonitor_ipc__send_query: invalid path '%s'"),
+			    fsmonitor_ipc__get_path());
+		goto done;
+
+	case IPC_STATE__OTHER_ERROR:
+	default:
+		ret = error(_("fsmonitor_ipc__send_query: unspecified error on '%s'"),
+			    fsmonitor_ipc__get_path());
+		goto done;
+	}
+
+done:
+	trace2_region_leave("fsm_client", "query", NULL);
+
+	return ret;
+}
+
+int fsmonitor_ipc__send_command(const char *command,
+				struct strbuf *answer)
+{
+	struct ipc_client_connection *connection = NULL;
+	struct ipc_client_connect_options options
+		= IPC_CLIENT_CONNECT_OPTIONS_INIT;
+	int ret;
+	enum ipc_active_state state;
+
+	strbuf_reset(answer);
+
+	options.wait_if_busy = 1;
+	options.wait_if_not_found = 0;
+
+	state = ipc_client_try_connect(fsmonitor_ipc__get_path(), &options,
+				       &connection);
+	if (state != IPC_STATE__LISTENING) {
+		die("fsmonitor--daemon is not running");
+		return -1;
+	}
+
+	ret = ipc_client_send_command_to_connection(connection, command, answer);
+	ipc_client_close_connection(connection);
+
+	if (ret == -1) {
+		die("could not send '%s' command to fsmonitor--daemon",
+		    command);
+		return -1;
+	}
+
+	return 0;
+}
+
+#endif
diff --git a/fsmonitor-ipc.h b/fsmonitor-ipc.h
new file mode 100644
index 00000000000000..7d21c1260151d2
--- /dev/null
+++ b/fsmonitor-ipc.h
@@ -0,0 +1,48 @@
+#ifndef FSMONITOR_IPC_H
+#define FSMONITOR_IPC_H
+
+/*
+ * Returns true if a filesystem notification backend is defined
+ * for this platform.  This symbol must always be visible and
+ * outside of the HAVE_ ifdef.
+ */
+int fsmonitor_ipc__is_supported(void);
+
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+#include "run-command.h"
+#include "simple-ipc.h"
+
+/*
+ * Returns the pathname to the IPC named pipe or Unix domain socket
+ * where a `git-fsmonitor--daemon` process will listen.  This is a
+ * per-worktree value.
+ */
+const char *fsmonitor_ipc__get_path(void);
+
+/*
+ * Try to determine whether there is a `git-fsmonitor--daemon` process
+ * listening on the IPC pipe/socket.
+ */
+enum ipc_active_state fsmonitor_ipc__get_state(void);
+
+/*
+ * Connect to a `git-fsmonitor--daemon` process via simple-ipc
+ * and ask for the set of changed files since the given token.
+ *
+ * This DOES NOT use the hook interface.
+ *
+ * Spawn a daemon process in the background if necessary.
+ */
+int fsmonitor_ipc__send_query(const char *since_token,
+			      struct strbuf *answer);
+
+/*
+ * Connect to a `git-fsmonitor--daemon` process via simple-ipc and
+ * send a command verb.  If no daemon is available, we DO NOT try to
+ * start one.
+ */
+int fsmonitor_ipc__send_command(const char *command,
+				struct strbuf *answer);
+
+#endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
+#endif /* FSMONITOR_IPC_H */
diff --git a/help.c b/help.c
index 3c3bdec21356d9..e22ba1d246a5b0 100644
--- a/help.c
+++ b/help.c
@@ -11,6 +11,7 @@
 #include "version.h"
 #include "refs.h"
 #include "parse-options.h"
+#include "fsmonitor-ipc.h"
 
 struct category_description {
 	uint32_t category;
@@ -664,6 +665,9 @@ void get_version_info(struct strbuf *buf, int show_build_options)
 		strbuf_addf(buf, "sizeof-size_t: %d\n", (int)sizeof(size_t));
 		strbuf_addf(buf, "shell-path: %s\n", SHELL_PATH);
 		/* NEEDSWORK: also save and output GIT-BUILD_OPTIONS? */
+
+		if (fsmonitor_ipc__is_supported())
+			strbuf_addstr(buf, "feature: fsmonitor--daemon\n");
 	}
 }
 

From 88bfdc9094eaa3f6ba955931eac0407ddb55209d Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 5 Mar 2021 21:20:26 +0100
Subject: [PATCH 17/39] config: FSMonitor is repository-specific

This commit refactors `git_config_get_fsmonitor()` into the `repo_*()`
form that takes a parameter `struct repository *r`.

That change prepares for the upcoming `core.useFSMonitorDaemon` flag which
will be stored in the `repo_settings` struct.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/update-index.c | 4 ++--
 config.c               | 4 ++--
 config.h               | 2 +-
 fsmonitor.c            | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 79087bccea4b8b..84793df8b2b6a7 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1214,14 +1214,14 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 	}
 
 	if (fsmonitor > 0) {
-		if (git_config_get_fsmonitor() == 0)
+		if (repo_config_get_fsmonitor(r) == 0)
 			warning(_("core.fsmonitor is unset; "
 				"set it if you really want to "
 				"enable fsmonitor"));
 		add_fsmonitor(&the_index);
 		report(_("fsmonitor enabled"));
 	} else if (!fsmonitor) {
-		if (git_config_get_fsmonitor() == 1)
+		if (repo_config_get_fsmonitor(r) == 1)
 			warning(_("core.fsmonitor is set; "
 				"remove it if you really want to "
 				"disable fsmonitor"));
diff --git a/config.c b/config.c
index 1137bd73aff07c..90b7363ded0afd 100644
--- a/config.c
+++ b/config.c
@@ -2345,9 +2345,9 @@ int git_config_get_max_percent_split_change(void)
 	return -1; /* default value */
 }
 
-int git_config_get_fsmonitor(void)
+int repo_config_get_fsmonitor(struct repository *r)
 {
-	if (git_config_get_pathname("core.fsmonitor", &core_fsmonitor))
+	if (repo_config_get_pathname(r, "core.fsmonitor", &core_fsmonitor))
 		core_fsmonitor = getenv("GIT_TEST_FSMONITOR");
 
 	if (core_fsmonitor && !*core_fsmonitor)
diff --git a/config.h b/config.h
index c1449bb790b81e..44a8f45bb21bd1 100644
--- a/config.h
+++ b/config.h
@@ -606,7 +606,7 @@ int git_config_get_index_threads(int *dest);
 int git_config_get_untracked_cache(void);
 int git_config_get_split_index(void);
 int git_config_get_max_percent_split_change(void);
-int git_config_get_fsmonitor(void);
+int repo_config_get_fsmonitor(struct repository *r);
 
 /* This dies if the configured or default date is in the future */
 int git_config_get_expiry(const char *key, const char **output);
diff --git a/fsmonitor.c b/fsmonitor.c
index e12214b30071e7..a391b40894eee8 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -410,7 +410,7 @@ void remove_fsmonitor(struct index_state *istate)
 void tweak_fsmonitor(struct index_state *istate)
 {
 	unsigned int i;
-	int fsmonitor_enabled = git_config_get_fsmonitor();
+	int fsmonitor_enabled = repo_config_get_fsmonitor(istate->repo ? istate->repo : the_repository);
 
 	if (istate->fsmonitor_dirty) {
 		if (fsmonitor_enabled) {

From 4654a3e00abbbe581d5d4a5d0fd3a240e6603ace Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 2 Aug 2019 19:17:25 +0200
Subject: [PATCH 18/39] fsmonitor: introduce `core.useBuiltinFSMonitor` to call
 the daemon via IPC

The `core.fsmonitor` setting is supposed to be set to a path pointing
to a script or executable that (via the Hook API) queries an fsmonitor
process such as watchman.

We are about to implement our own fsmonitor backend, and do not want
to spawn hook processes just to query it.  Let's use `Simple IPC` to
directly communicate with the daemon (and start it if necessary),
guarded by the brand-new `core.useBuiltinFSMonitor` toggle.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 config.c        |  5 +++++
 fsmonitor.c     | 20 +++++++++++++++++---
 repo-settings.c |  3 +++
 repository.h    |  2 ++
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/config.c b/config.c
index 90b7363ded0afd..287803f3cddf18 100644
--- a/config.c
+++ b/config.c
@@ -2347,6 +2347,11 @@ int git_config_get_max_percent_split_change(void)
 
 int repo_config_get_fsmonitor(struct repository *r)
 {
+	if (r->settings.use_builtin_fsmonitor > 0) {
+		core_fsmonitor = "(built-in daemon)";
+		return 1;
+	}
+
 	if (repo_config_get_pathname(r, "core.fsmonitor", &core_fsmonitor))
 		core_fsmonitor = getenv("GIT_TEST_FSMONITOR");
 
diff --git a/fsmonitor.c b/fsmonitor.c
index a391b40894eee8..e28a28315043f3 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -3,6 +3,7 @@
 #include "dir.h"
 #include "ewah/ewok.h"
 #include "fsmonitor.h"
+#include "fsmonitor-ipc.h"
 #include "run-command.h"
 #include "strbuf.h"
 
@@ -147,14 +148,27 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
 /*
  * Call the query-fsmonitor hook passing the last update token of the saved results.
  */
-static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result)
+static int query_fsmonitor(int version, struct index_state *istate, struct strbuf *query_result)
 {
+	struct repository *r = istate->repo ? istate->repo : the_repository;
+	const char *last_update = istate->fsmonitor_last_update;
 	struct child_process cp = CHILD_PROCESS_INIT;
 	int result;
 
 	if (!core_fsmonitor)
 		return -1;
 
+	if (r->settings.use_builtin_fsmonitor > 0) {
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+		return fsmonitor_ipc__send_query(last_update, query_result);
+#else
+		/* Fake a trivial response. */
+		warning(_("fsmonitor--daemon unavailable; falling back"));
+		strbuf_add(query_result, "/", 2);
+		return 0;
+#endif
+	}
+
 	strvec_push(&cp.args, core_fsmonitor);
 	strvec_pushf(&cp.args, "%d", version);
 	strvec_pushf(&cp.args, "%s", last_update);
@@ -262,7 +276,7 @@ void refresh_fsmonitor(struct index_state *istate)
 	if (istate->fsmonitor_last_update) {
 		if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) {
 			query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION2,
-				istate->fsmonitor_last_update, &query_result);
+				istate, &query_result);
 
 			if (query_success) {
 				if (hook_version < 0)
@@ -292,7 +306,7 @@ void refresh_fsmonitor(struct index_state *istate)
 
 		if (hook_version == HOOK_INTERFACE_VERSION1) {
 			query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1,
-				istate->fsmonitor_last_update, &query_result);
+				istate, &query_result);
 		}
 
 		trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor);
diff --git a/repo-settings.c b/repo-settings.c
index f7fff0f5ab837e..93aab92ff16436 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -58,6 +58,9 @@ void prepare_repo_settings(struct repository *r)
 		r->settings.core_multi_pack_index = value;
 	UPDATE_DEFAULT_BOOL(r->settings.core_multi_pack_index, 1);
 
+	if (!repo_config_get_bool(r, "core.usebuiltinfsmonitor", &value) && value)
+		r->settings.use_builtin_fsmonitor = 1;
+
 	if (!repo_config_get_bool(r, "feature.manyfiles", &value) && value) {
 		UPDATE_DEFAULT_BOOL(r->settings.index_version, 4);
 		UPDATE_DEFAULT_BOOL(r->settings.core_untracked_cache, UNTRACKED_CACHE_WRITE);
diff --git a/repository.h b/repository.h
index b385ca3c94b62b..7eeab871ac3efb 100644
--- a/repository.h
+++ b/repository.h
@@ -41,6 +41,8 @@ struct repo_settings {
 	enum fetch_negotiation_setting fetch_negotiation_algorithm;
 
 	int core_multi_pack_index;
+
+	int use_builtin_fsmonitor;
 };
 
 struct repository {

From 5c6a9f842246e2cee51830c9f46c04cead2d6755 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 16 Dec 2020 10:24:30 -0500
Subject: [PATCH 19/39] fsmonitor--daemon: add a built-in fsmonitor daemon

Create a built-in file system monitoring daemon that can be used by
the existing `fsmonitor` feature (protocol API and index extension)
to improve the performance of various Git commands, such as `status`.

The `fsmonitor--daemon` feature builds upon the `Simple IPC` API and
provides an alternative to hook access to existing fsmonitors such
as `watchman`.

This commit merely adds the new command without any functionality.

Co-authored-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 .gitignore                  |  1 +
 Makefile                    |  1 +
 builtin.h                   |  1 +
 builtin/fsmonitor--daemon.c | 52 +++++++++++++++++++++++++++++++++++++
 git.c                       |  1 +
 5 files changed, 56 insertions(+)
 create mode 100644 builtin/fsmonitor--daemon.c

diff --git a/.gitignore b/.gitignore
index 3dcdb6bb5ab8d1..beccf34abe9ee0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
 /git-format-patch
 /git-fsck
 /git-fsck-objects
+/git-fsmonitor--daemon
 /git-gc
 /git-get-tar-commit-id
 /git-grep
diff --git a/Makefile b/Makefile
index 4a23095d2db4eb..7ac3ee3e370711 100644
--- a/Makefile
+++ b/Makefile
@@ -1084,6 +1084,7 @@ BUILTIN_OBJS += builtin/fmt-merge-msg.o
 BUILTIN_OBJS += builtin/for-each-ref.o
 BUILTIN_OBJS += builtin/for-each-repo.o
 BUILTIN_OBJS += builtin/fsck.o
+BUILTIN_OBJS += builtin/fsmonitor--daemon.o
 BUILTIN_OBJS += builtin/gc.o
 BUILTIN_OBJS += builtin/get-tar-commit-id.o
 BUILTIN_OBJS += builtin/grep.o
diff --git a/builtin.h b/builtin.h
index b6ce981b737735..7554476f90a4bc 100644
--- a/builtin.h
+++ b/builtin.h
@@ -158,6 +158,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix);
 int cmd_for_each_repo(int argc, const char **argv, const char *prefix);
 int cmd_format_patch(int argc, const char **argv, const char *prefix);
 int cmd_fsck(int argc, const char **argv, const char *prefix);
+int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix);
 int cmd_gc(int argc, const char **argv, const char *prefix);
 int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix);
 int cmd_grep(int argc, const char **argv, const char *prefix);
diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
new file mode 100644
index 00000000000000..6700bac92c7d97
--- /dev/null
+++ b/builtin/fsmonitor--daemon.c
@@ -0,0 +1,52 @@
+#include "builtin.h"
+#include "config.h"
+#include "parse-options.h"
+#include "fsmonitor.h"
+#include "fsmonitor-ipc.h"
+#include "simple-ipc.h"
+#include "khash.h"
+
+static const char * const builtin_fsmonitor__daemon_usage[] = {
+	NULL
+};
+
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+
+int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
+{
+	enum daemon_mode {
+		UNDEFINED_MODE,
+	} mode = UNDEFINED_MODE;
+
+	struct option options[] = {
+		OPT_END()
+	};
+
+	if (argc == 2 && !strcmp(argv[1], "-h"))
+		usage_with_options(builtin_fsmonitor__daemon_usage, options);
+
+	git_config(git_default_config, NULL);
+
+	argc = parse_options(argc, argv, prefix, options,
+			     builtin_fsmonitor__daemon_usage, 0);
+
+	switch (mode) {
+	case UNDEFINED_MODE:
+	default:
+		die(_("Unhandled command mode %d"), mode);
+	}
+}
+
+#else
+int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
+{
+	struct option options[] = {
+		OPT_END()
+	};
+
+	if (argc == 2 && !strcmp(argv[1], "-h"))
+		usage_with_options(builtin_fsmonitor__daemon_usage, options);
+
+	die(_("fsmonitor--daemon not supported on this platform"));
+}
+#endif
diff --git a/git.c b/git.c
index a00a0a4d941261..ea84e4c1274c77 100644
--- a/git.c
+++ b/git.c
@@ -520,6 +520,7 @@ static struct cmd_struct commands[] = {
 	{ "format-patch", cmd_format_patch, RUN_SETUP },
 	{ "fsck", cmd_fsck, RUN_SETUP },
 	{ "fsck-objects", cmd_fsck, RUN_SETUP },
+	{ "fsmonitor--daemon", cmd_fsmonitor__daemon, RUN_SETUP },
 	{ "gc", cmd_gc, RUN_SETUP },
 	{ "get-tar-commit-id", cmd_get_tar_commit_id, NO_PARSEOPT },
 	{ "grep", cmd_grep, RUN_SETUP_GENTLY },

From 1c9f8ecb7e7649d661efd8a5d9464fbbd9319d24 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 16 Dec 2020 11:02:46 -0500
Subject: [PATCH 20/39] fsmonitor--daemon: implement client command options

Implement command options `--stop`, `--is-running`, `--query`,
`--query-index`, and `--flush` to control and query the status of a
`fsmonitor--daemon` server process (and implicitly start a server
process if necessary).

Later commits will implement the actual server and monitor
the file system.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 144 ++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 6700bac92c7d97..10434bce4b646e 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -7,18 +7,144 @@
 #include "khash.h"
 
 static const char * const builtin_fsmonitor__daemon_usage[] = {
+	N_("git fsmonitor--daemon --stop"),
+	N_("git fsmonitor--daemon --is-running"),
+	N_("git fsmonitor--daemon --query <token>"),
+	N_("git fsmonitor--daemon --query-index"),
+	N_("git fsmonitor--daemon --flush"),
 	NULL
 };
 
 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+/*
+ * Acting as a CLIENT.
+ *
+ * Send an IPC query to a `git-fsmonitor--daemon` SERVER process and
+ * ask for the changes since the given token.  This will implicitly
+ * start a daemon process if necessary.  The daemon process will
+ * persist after we exit.
+ *
+ * This feature is primarily used by the test suite.
+ */
+static int do_as_client__query_token(const char *token)
+{
+	struct strbuf answer = STRBUF_INIT;
+	int ret;
+
+	ret = fsmonitor_ipc__send_query(token, &answer);
+	if (ret < 0)
+		die(_("could not query fsmonitor--daemon"));
+
+	write_in_full(1, answer.buf, answer.len);
+	strbuf_release(&answer);
+
+	return 0;
+}
+
+/*
+ * Acting as a CLIENT.
+ *
+ * Read the `.git/index` to get the last token written to the FSMonitor index
+ * extension and use that to make a query.
+ *
+ * This feature is primarily used by the test suite.
+ */
+static int do_as_client__query_from_index(void)
+{
+	struct index_state *istate = the_repository->index;
+
+	setup_git_directory();
+	if (do_read_index(istate, the_repository->index_file, 0) < 0)
+		die("unable to read index file");
+	if (!istate->fsmonitor_last_update)
+		die("index file does not have fsmonitor extension");
+
+	return do_as_client__query_token(istate->fsmonitor_last_update);
+}
+
+/*
+ * Acting as a CLIENT.
+ *
+ * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
+ * and wait for it to shutdown.
+ */
+static int do_as_client__send_stop(void)
+{
+	struct strbuf answer = STRBUF_INIT;
+	int ret;
+
+	ret = fsmonitor_ipc__send_command("quit", &answer);
+
+	/* The quit command does not return any response data. */
+	strbuf_release(&answer);
+
+	if (ret)
+		return ret;
+
+	trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
+	while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
+		sleep_millisec(50);
+	trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
+
+	return 0;
+}
+
+/*
+ * Acting as a CLIENT.
+ *
+ * Send a "flush" command to the `git-fsmonitor--daemon` (if running)
+ * and tell it to flush its cache.
+ *
+ * This feature is primarily used by the test suite to simulate a loss of
+ * sync with the filesystem where we miss kernel events.
+ */
+static int do_as_client__send_flush(void)
+{
+	struct strbuf answer = STRBUF_INIT;
+	int ret;
+
+	ret = fsmonitor_ipc__send_command("flush", &answer);
+	if (ret)
+		return ret;
+
+	write_in_full(1, answer.buf, answer.len);
+	strbuf_release(&answer);
+
+	return 0;
+}
+
+static int is_ipc_daemon_listening(void)
+{
+	return fsmonitor_ipc__get_state() == IPC_STATE__LISTENING;
+}
 
 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
 {
 	enum daemon_mode {
 		UNDEFINED_MODE,
+		STOP,
+		IS_RUNNING,
+		QUERY,
+		QUERY_INDEX,
+		FLUSH,
 	} mode = UNDEFINED_MODE;
 
 	struct option options[] = {
+		OPT_CMDMODE(0, "stop", &mode, N_("stop the running daemon"),
+			    STOP),
+
+		OPT_CMDMODE(0, "is-running", &mode,
+			    N_("test whether the daemon is running"),
+			    IS_RUNNING),
+
+		OPT_CMDMODE(0, "query", &mode,
+			    N_("query the daemon (starting if necessary)"),
+			    QUERY),
+		OPT_CMDMODE(0, "query-index", &mode,
+			    N_("query the daemon (starting if necessary) using token from index"),
+			    QUERY_INDEX),
+		OPT_CMDMODE(0, "flush", &mode, N_("flush cached filesystem events"),
+			    FLUSH),
 		OPT_END()
 	};
 
@@ -31,6 +157,24 @@ int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
 			     builtin_fsmonitor__daemon_usage, 0);
 
 	switch (mode) {
+	case STOP:
+		return !!do_as_client__send_stop();
+
+	case IS_RUNNING:
+		return !is_ipc_daemon_listening();
+
+	case QUERY:
+		if (argc != 1)
+			usage_with_options(builtin_fsmonitor__daemon_usage,
+					   options);
+		return !!do_as_client__query_token(argv[0]);
+
+	case QUERY_INDEX:
+		return !!do_as_client__query_from_index();
+
+	case FLUSH:
+		return !!do_as_client__send_flush();
+
 	case UNDEFINED_MODE:
 	default:
 		die(_("Unhandled command mode %d"), mode);

From c1cf3064958655c3302171f01ddc03e9cdbe7a12 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 17 Dec 2020 10:29:17 -0500
Subject: [PATCH 21/39] fsmonitor-fs-listen-win32: stub in backend for Windows

Stub in empty backend for fsmonitor--daemon on Windows.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Makefile                                     | 13 ++++++
 compat/fsmonitor/fsmonitor-fs-listen-win32.c | 21 +++++++++
 compat/fsmonitor/fsmonitor-fs-listen.h       | 49 ++++++++++++++++++++
 config.mak.uname                             |  2 +
 contrib/buildsystems/CMakeLists.txt          |  5 ++
 5 files changed, 90 insertions(+)
 create mode 100644 compat/fsmonitor/fsmonitor-fs-listen-win32.c
 create mode 100644 compat/fsmonitor/fsmonitor-fs-listen.h

diff --git a/Makefile b/Makefile
index 7ac3ee3e370711..18e8a1ce0c476d 100644
--- a/Makefile
+++ b/Makefile
@@ -471,6 +471,11 @@ all::
 # directory, and the JSON compilation database 'compile_commands.json' will be
 # created at the root of the repository.
 #
+# If your platform supports an built-in fsmonitor backend, set
+# FSMONITOR_DAEMON_BACKEND to the name of the corresponding
+# `compat/fsmonitor/fsmonitor-fs-listen-<name>.c` that implements the
+# `fsmonitor_fs_listen__*()` routines.
+#
 # Define DEVELOPER to enable more compiler warnings. Compiler version
 # and family are auto detected, but could be overridden by defining
 # COMPILER_FEATURES (see config.mak.dev). You can still set
@@ -1902,6 +1907,11 @@ ifdef NEED_ACCESS_ROOT_HANDLER
 	COMPAT_OBJS += compat/access.o
 endif
 
+ifdef FSMONITOR_DAEMON_BACKEND
+	COMPAT_CFLAGS += -DHAVE_FSMONITOR_DAEMON_BACKEND
+	COMPAT_OBJS += compat/fsmonitor/fsmonitor-fs-listen-$(FSMONITOR_DAEMON_BACKEND).o
+endif
+
 ifeq ($(TCLTK_PATH),)
 NO_TCLTK = NoThanks
 endif
@@ -2747,6 +2757,9 @@ GIT-BUILD-OPTIONS: FORCE
 	@echo PAGER_ENV=\''$(subst ','\'',$(subst ','\'',$(PAGER_ENV)))'\' >>$@+
 	@echo DC_SHA1=\''$(subst ','\'',$(subst ','\'',$(DC_SHA1)))'\' >>$@+
 	@echo X=\'$(X)\' >>$@+
+ifdef FSMONITOR_DAEMON_BACKEND
+	@echo FSMONITOR_DAEMON_BACKEND=\''$(subst ','\'',$(subst ','\'',$(FSMONITOR_DAEMON_BACKEND)))'\' >>$@+
+endif
 ifdef TEST_OUTPUT_DIRECTORY
 	@echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+
 endif
diff --git a/compat/fsmonitor/fsmonitor-fs-listen-win32.c b/compat/fsmonitor/fsmonitor-fs-listen-win32.c
new file mode 100644
index 00000000000000..880446b49e35fb
--- /dev/null
+++ b/compat/fsmonitor/fsmonitor-fs-listen-win32.c
@@ -0,0 +1,21 @@
+#include "cache.h"
+#include "config.h"
+#include "fsmonitor.h"
+#include "fsmonitor-fs-listen.h"
+
+void fsmonitor_fs_listen__stop_async(struct fsmonitor_daemon_state *state)
+{
+}
+
+void fsmonitor_fs_listen__loop(struct fsmonitor_daemon_state *state)
+{
+}
+
+int fsmonitor_fs_listen__ctor(struct fsmonitor_daemon_state *state)
+{
+	return -1;
+}
+
+void fsmonitor_fs_listen__dtor(struct fsmonitor_daemon_state *state)
+{
+}
diff --git a/compat/fsmonitor/fsmonitor-fs-listen.h b/compat/fsmonitor/fsmonitor-fs-listen.h
new file mode 100644
index 00000000000000..c7b5776b3b60b6
--- /dev/null
+++ b/compat/fsmonitor/fsmonitor-fs-listen.h
@@ -0,0 +1,49 @@
+#ifndef FSMONITOR_FS_LISTEN_H
+#define FSMONITOR_FS_LISTEN_H
+
+/* This needs to be implemented by each backend */
+
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+
+struct fsmonitor_daemon_state;
+
+/*
+ * Initialize platform-specific data for the fsmonitor listener thread.
+ * This will be called from the main thread PRIOR to staring the
+ * fsmonitor_fs_listener thread.
+ *
+ * Returns 0 if successful.
+ * Returns -1 otherwise.
+ */
+int fsmonitor_fs_listen__ctor(struct fsmonitor_daemon_state *state);
+
+/*
+ * Cleanup platform-specific data for the fsmonitor listener thread.
+ * This will be called from the main thread AFTER joining the listener.
+ */
+void fsmonitor_fs_listen__dtor(struct fsmonitor_daemon_state *state);
+
+/*
+ * The main body of the platform-specific event loop to watch for
+ * filesystem events.  This will run in the fsmonitor_fs_listen thread.
+ *
+ * It should call `ipc_server_stop_async()` if the listener thread
+ * prematurely terminates (because of a filesystem error or if it
+ * detects that the .git directory has been deleted).  (It should NOT
+ * do so if the listener thread receives a normal shutdown signal from
+ * the IPC layer.)
+ *
+ * It should set `state->error_code` to -1 if the daemon should exit
+ * with an error.
+ */
+void fsmonitor_fs_listen__loop(struct fsmonitor_daemon_state *state);
+
+/*
+ * Gently request that the fsmonitor listener thread shutdown.
+ * It does not wait for it to stop.  The caller should do a JOIN
+ * to wait for it.
+ */
+void fsmonitor_fs_listen__stop_async(struct fsmonitor_daemon_state *state);
+
+#endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
+#endif /* FSMONITOR_FS_LISTEN_H */
diff --git a/config.mak.uname b/config.mak.uname
index 76087cff678905..df082c996045d6 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -417,6 +417,7 @@ ifeq ($(uname_S),Windows)
 	# so we don't need this:
 	#
 	#   SNPRINTF_RETURNS_BOGUS = YesPlease
+	FSMONITOR_DAEMON_BACKEND = win32
 	NO_SVN_TESTS = YesPlease
 	RUNTIME_PREFIX = YesPlease
 	HAVE_WPGMPTR = YesWeDo
@@ -595,6 +596,7 @@ ifneq (,$(findstring MINGW,$(uname_S)))
 	NO_STRTOUMAX = YesPlease
 	NO_MKDTEMP = YesPlease
 	NO_SVN_TESTS = YesPlease
+	FSMONITOR_DAEMON_BACKEND = win32
 	RUNTIME_PREFIX = YesPlease
 	HAVE_WPGMPTR = YesWeDo
 	NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 629a1817459b8c..fe3e25f71de6e2 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -252,6 +252,11 @@ else()
 	list(APPEND compat_SOURCES compat/simple-ipc/ipc-shared.c compat/simple-ipc/ipc-unix-socket.c)
 endif()
 
+if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+	add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND)
+	list(APPEND compat_SOURCES compat/fsmonitor/fsmonitor-fs-listen-win32.c)
+endif()
+
 set(EXE_EXTENSION ${CMAKE_EXECUTABLE_SUFFIX})
 
 #header checks

From 3a7659bd8c0a523074fa6aab3d8add9a15422e57 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 17 Dec 2020 10:41:07 -0500
Subject: [PATCH 22/39] fsmonitor-fs-listen-macos: stub in backend for MacOS

Stub in empty implementation of fsmonitor--daemon
backend for MacOS.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 compat/fsmonitor/fsmonitor-fs-listen-macos.c | 20 ++++++++++++++++++++
 config.mak.uname                             |  2 ++
 contrib/buildsystems/CMakeLists.txt          |  3 +++
 3 files changed, 25 insertions(+)
 create mode 100644 compat/fsmonitor/fsmonitor-fs-listen-macos.c

diff --git a/compat/fsmonitor/fsmonitor-fs-listen-macos.c b/compat/fsmonitor/fsmonitor-fs-listen-macos.c
new file mode 100644
index 00000000000000..b91058d1c4f820
--- /dev/null
+++ b/compat/fsmonitor/fsmonitor-fs-listen-macos.c
@@ -0,0 +1,20 @@
+#include "cache.h"
+#include "fsmonitor.h"
+#include "fsmonitor-fs-listen.h"
+
+int fsmonitor_fs_listen__ctor(struct fsmonitor_daemon_state *state)
+{
+	return -1;
+}
+
+void fsmonitor_fs_listen__dtor(struct fsmonitor_daemon_state *state)
+{
+}
+
+void fsmonitor_fs_listen__stop_async(struct fsmonitor_daemon_state *state)
+{
+}
+
+void fsmonitor_fs_listen__loop(struct fsmonitor_daemon_state *state)
+{
+}
diff --git a/config.mak.uname b/config.mak.uname
index df082c996045d6..8679b154690008 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -144,6 +144,8 @@ ifeq ($(uname_S),Darwin)
 			MSGFMT = /usr/local/opt/gettext/bin/msgfmt
 		endif
 	endif
+	FSMONITOR_DAEMON_BACKEND = macos
+	BASIC_LDFLAGS += -framework CoreServices
 endif
 ifeq ($(uname_S),SunOS)
 	NEEDS_SOCKET = YesPlease
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index fe3e25f71de6e2..73745126739b4e 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -255,6 +255,9 @@ endif()
 if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
 	add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND)
 	list(APPEND compat_SOURCES compat/fsmonitor/fsmonitor-fs-listen-win32.c)
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+	add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND)
+	list(APPEND compat_SOURCES compat/fsmonitor/fsmonitor-fs-listen-macos.c)
 endif()
 
 set(EXE_EXTENSION ${CMAKE_EXECUTABLE_SUFFIX})

From fce149c845f79923d481fdcec23ddafd599824cd Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 17 Dec 2020 11:34:13 -0500
Subject: [PATCH 23/39] fsmonitor--daemon: implement daemon command options

Implement command options `--run` and `--start` to try to
begin listening for file system events.

This version defines the thread structure with a single
fsmonitor_fs_listen thread to watch for file system events
and a simple IPC thread pool to wait for connections from
Git clients over a well-known named pipe or Unix domain
socket.

This version does not actually do anything yet because the
backends are still just stubs.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 395 +++++++++++++++++++++++++++++++++++-
 fsmonitor--daemon.h         |  36 ++++
 2 files changed, 430 insertions(+), 1 deletion(-)
 create mode 100644 fsmonitor--daemon.h

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 10434bce4b646e..23a06370797202 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -3,10 +3,14 @@
 #include "parse-options.h"
 #include "fsmonitor.h"
 #include "fsmonitor-ipc.h"
+#include "compat/fsmonitor/fsmonitor-fs-listen.h"
+#include "fsmonitor--daemon.h"
 #include "simple-ipc.h"
 #include "khash.h"
 
 static const char * const builtin_fsmonitor__daemon_usage[] = {
+	N_("git fsmonitor--daemon --start [<options>]"),
+	N_("git fsmonitor--daemon --run [<options>]"),
 	N_("git fsmonitor--daemon --stop"),
 	N_("git fsmonitor--daemon --is-running"),
 	N_("git fsmonitor--daemon --query <token>"),
@@ -16,6 +20,38 @@ static const char * const builtin_fsmonitor__daemon_usage[] = {
 };
 
 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+/*
+ * Global state loaded from config.
+ */
+#define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
+static int fsmonitor__ipc_threads = 8;
+
+#define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
+static int fsmonitor__start_timeout_sec = 60;
+
+static int fsmonitor_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
+		int i = git_config_int(var, value);
+		if (i < 1)
+			return error(_("value of '%s' out of range: %d"),
+				     FSMONITOR__IPC_THREADS, i);
+		fsmonitor__ipc_threads = i;
+		return 0;
+	}
+
+	if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
+		int i = git_config_int(var, value);
+		if (i < 0)
+			return error(_("value of '%s' out of range: %d"),
+				     FSMONITOR__START_TIMEOUT, i);
+		fsmonitor__start_timeout_sec = i;
+		return 0;
+	}
+
+	return git_default_config(var, value, cb);
+}
+
 /*
  * Acting as a CLIENT.
  *
@@ -113,15 +149,350 @@ static int do_as_client__send_flush(void)
 	return 0;
 }
 
+static ipc_server_application_cb handle_client;
+
+static int handle_client(void *data, const char *command,
+			 ipc_server_reply_cb *reply,
+			 struct ipc_server_reply_data *reply_data)
+{
+	/* struct fsmonitor_daemon_state *state = data; */
+	int result;
+
+	trace2_region_enter("fsmonitor", "handle_client", the_repository);
+	trace2_data_string("fsmonitor", the_repository, "request", command);
+
+	result = 0; /* TODO Do something here. */
+
+	trace2_region_leave("fsmonitor", "handle_client", the_repository);
+
+	return result;
+}
+
+static void *fsmonitor_fs_listen__thread_proc(void *_state)
+{
+	struct fsmonitor_daemon_state *state = _state;
+
+	trace2_thread_start("fsm-listen");
+
+	trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
+			 state->path_worktree_watch.buf);
+	if (state->nr_paths_watching > 1)
+		trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
+				 state->path_gitdir_watch.buf);
+
+	fsmonitor_fs_listen__loop(state);
+
+	trace2_thread_exit();
+	return NULL;
+}
+
+static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
+{
+	struct ipc_server_opts ipc_opts = {
+		.nr_threads = fsmonitor__ipc_threads,
+
+		/*
+		 * We know that there are no other active threads yet,
+		 * so we can let the IPC layer temporarily chdir() if
+		 * it needs to when creating the server side of the
+		 * Unix domain socket.
+		 */
+		.uds_disallow_chdir = 0
+	};
+
+	/*
+	 * Start the IPC thread pool before the we've started the file
+	 * system event listener thread so that we have the IPC handle
+	 * before we need it.
+	 */
+	if (ipc_server_run_async(&state->ipc_server_data,
+				 fsmonitor_ipc__get_path(), &ipc_opts,
+				 handle_client, state))
+		return error(_("could not start IPC thread pool"));
+
+	/*
+	 * Start the fsmonitor listener thread to collect filesystem
+	 * events.
+	 */
+	if (pthread_create(&state->listener_thread, NULL,
+			   fsmonitor_fs_listen__thread_proc, state) < 0) {
+		ipc_server_stop_async(state->ipc_server_data);
+		ipc_server_await(state->ipc_server_data);
+
+		return error(_("could not start fsmonitor listener thread"));
+	}
+
+	/*
+	 * The daemon is now fully functional in background threads.
+	 * Wait for the IPC thread pool to shutdown (whether by client
+	 * request or from filesystem activity).
+	 */
+	ipc_server_await(state->ipc_server_data);
+
+	/*
+	 * The fsmonitor listener thread may have received a shutdown
+	 * event from the IPC thread pool, but it doesn't hurt to tell
+	 * it again.  And wait for it to shutdown.
+	 */
+	fsmonitor_fs_listen__stop_async(state);
+	pthread_join(state->listener_thread, NULL);
+
+	return state->error_code;
+}
+
+static int fsmonitor_run_daemon(void)
+{
+	struct fsmonitor_daemon_state state;
+	int err;
+
+	memset(&state, 0, sizeof(state));
+
+	pthread_mutex_init(&state.main_lock, NULL);
+	state.error_code = 0;
+	state.current_token_data = NULL;
+	state.test_client_delay_ms = 0;
+
+	/* Prepare to (recursively) watch the <worktree-root> directory. */
+	strbuf_init(&state.path_worktree_watch, 0);
+	strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
+	state.nr_paths_watching = 1;
+
+	/*
+	 * If ".git" is not a directory, then <gitdir> is not inside the
+	 * cone of <worktree-root>, so set up a second watch for it.
+	 */
+	strbuf_init(&state.path_gitdir_watch, 0);
+	strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
+	strbuf_addstr(&state.path_gitdir_watch, "/.git");
+	if (!is_directory(state.path_gitdir_watch.buf)) {
+		strbuf_reset(&state.path_gitdir_watch);
+		strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
+		state.nr_paths_watching = 2;
+	}
+
+	/*
+	 * Confirm that we can create platform-specific resources for the
+	 * filesystem listener before we bother starting all the threads.
+	 */
+	if (fsmonitor_fs_listen__ctor(&state)) {
+		err = error(_("could not initialize listener thread"));
+		goto done;
+	}
+
+	err = fsmonitor_run_daemon_1(&state);
+
+done:
+	pthread_mutex_destroy(&state.main_lock);
+	fsmonitor_fs_listen__dtor(&state);
+
+	ipc_server_free(state.ipc_server_data);
+
+	strbuf_release(&state.path_worktree_watch);
+	strbuf_release(&state.path_gitdir_watch);
+
+	return err;
+}
+
 static int is_ipc_daemon_listening(void)
 {
 	return fsmonitor_ipc__get_state() == IPC_STATE__LISTENING;
 }
 
+static int try_to_run_foreground_daemon(void)
+{
+	/*
+	 * Technically, we don't need to probe for an existing daemon
+	 * process, since we could just call `fsmonitor_run_daemon()`
+	 * and let it fail if the pipe/socket is busy.
+	 *
+	 * However, this method gives us a nicer error message for a
+	 * common error case.
+	 */
+	if (is_ipc_daemon_listening())
+		die("fsmonitor--daemon is already running.");
+
+	return !!fsmonitor_run_daemon();
+}
+
+#ifndef GIT_WINDOWS_NATIVE
+/*
+ * This is adapted from `daemonize()`.  Use `fork()` to directly create
+ * and run the daemon in a child process.  The fork-parent returns the
+ * child PID so that we can wait for the child to startup before exiting.
+ */
+static int spawn_background_fsmonitor_daemon(pid_t *pid)
+{
+	*pid = fork();
+
+	switch (*pid) {
+	case 0:
+		if (setsid() == -1)
+			error_errno(_("setsid failed"));
+		close(0);
+		close(1);
+		close(2);
+		sanitize_stdfds();
+
+		return !!fsmonitor_run_daemon();
+
+	case -1:
+		return error_errno(_("could not spawn fsmonitor--daemon in the background"));
+
+	default:
+		return 0;
+	}
+}
+#else
+/*
+ * Conceptually like `daemonize()` but different because Windows does not
+ * have `fork(2)`.  Spawn a normal Windows child process but without the
+ * limitations of `start_command()` and `finish_command()`.
+ */
+static int spawn_background_fsmonitor_daemon(pid_t *pid)
+{
+	char git_exe[MAX_PATH];
+	struct strvec args = STRVEC_INIT;
+	int in, out;
+
+	GetModuleFileNameA(NULL, git_exe, MAX_PATH);
+
+	in = open("/dev/null", O_RDONLY);
+	out = open("/dev/null", O_WRONLY);
+
+	strvec_push(&args, git_exe);
+	strvec_push(&args, "fsmonitor--daemon");
+	strvec_push(&args, "--run");
+
+	*pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out);
+	close(in);
+	close(out);
+
+	strvec_clear(&args);
+
+	if (*pid < 0)
+		return error(_("could not spawn fsmonitor--daemon in the background"));
+
+	return 0;
+}
+#endif
+
+/*
+ * This is adapted from `wait_or_whine()`.  Watch the child process and
+ * let it get started and begin listening for requests on the socket
+ * before reporting our success.
+ */
+static int wait_for_background_startup(pid_t pid_child)
+{
+	int status;
+	pid_t pid_seen;
+	enum ipc_active_state s;
+	time_t time_limit, now;
+
+	time(&time_limit);
+	time_limit += fsmonitor__start_timeout_sec;
+
+	for (;;) {
+		pid_seen = waitpid(pid_child, &status, WNOHANG);
+
+		if (pid_seen == -1)
+			return error_errno(_("waitpid failed"));
+
+		else if (pid_seen == 0) {
+			/*
+			 * The child is still running (this should be
+			 * the normal case).  Try to connect to it on
+			 * the socket and see if it is ready for
+			 * business.
+			 *
+			 * If there is another daemon already running,
+			 * our child will fail to start (possibly
+			 * after a timeout on the lock), but we don't
+			 * care (who responds) if the socket is live.
+			 */
+			s = fsmonitor_ipc__get_state();
+			if (s == IPC_STATE__LISTENING)
+				return 0;
+
+			time(&now);
+			if (now > time_limit)
+				return error(_("fsmonitor--daemon not online yet"));
+
+			continue;
+		}
+
+		else if (pid_seen == pid_child) {
+			/*
+			 * The new child daemon process shutdown while
+			 * it was starting up, so it is not listening
+			 * on the socket.
+			 *
+			 * Try to ping the socket in the odd chance
+			 * that another daemon started (or was already
+			 * running) while our child was starting.
+			 *
+			 * Again, we don't care who services the socket.
+			 */
+			s = fsmonitor_ipc__get_state();
+			if (s == IPC_STATE__LISTENING)
+				return 0;
+
+			/*
+			 * We don't care about the WEXITSTATUS() nor
+			 * any of the WIF*(status) values because
+			 * `cmd_fsmonitor__daemon()` does the `!!result`
+			 * trick on all function return values.
+			 *
+			 * So it is sufficient to just report the
+			 * early shutdown as an error.
+			 */
+			return error(_("fsmonitor--daemon failed to start"));
+		}
+
+		else
+			return error(_("waitpid is confused"));
+	}
+}
+
+static int try_to_start_background_daemon(void)
+{
+	pid_t pid_child;
+	int ret;
+
+	/*
+	 * Before we try to create a background daemon process, see
+	 * if a daemon process is already listening.  This makes it
+	 * easier for us to report an already-listening error to the
+	 * console, since our spawn/daemon can only report the success
+	 * of creating the background process (and not whether it
+	 * immediately exited).
+	 */
+	if (is_ipc_daemon_listening())
+		die("fsmonitor--daemon is already running.");
+
+	/*
+	 * Run the actual daemon in a background process.
+	 */
+	ret = spawn_background_fsmonitor_daemon(&pid_child);
+	if (pid_child <= 0)
+		return ret;
+
+	/*
+	 * Wait (with timeout) for the background child process get
+	 * started and begin listening on the socket/pipe.  This makes
+	 * the "start" command more synchronous and more reliable in
+	 * tests.
+	 */
+	ret = wait_for_background_startup(pid_child);
+
+	return ret;
+}
+
 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
 {
 	enum daemon_mode {
 		UNDEFINED_MODE,
+		START,
+		RUN,
 		STOP,
 		IS_RUNNING,
 		QUERY,
@@ -130,6 +501,11 @@ int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
 	} mode = UNDEFINED_MODE;
 
 	struct option options[] = {
+		OPT_CMDMODE(0, "start", &mode,
+			    N_("run the daemon in the background"),
+			    START),
+		OPT_CMDMODE(0, "run", &mode,
+			    N_("run the daemon in the foreground"), RUN),
 		OPT_CMDMODE(0, "stop", &mode, N_("stop the running daemon"),
 			    STOP),
 
@@ -145,18 +521,35 @@ int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
 			    QUERY_INDEX),
 		OPT_CMDMODE(0, "flush", &mode, N_("flush cached filesystem events"),
 			    FLUSH),
+
+		OPT_GROUP(N_("Daemon options")),
+		OPT_INTEGER(0, "ipc-threads",
+			    &fsmonitor__ipc_threads,
+			    N_("use <n> ipc worker threads")),
+		OPT_INTEGER(0, "start-timeout",
+			    &fsmonitor__start_timeout_sec,
+			    N_("Max seconds to wait for background daemon startup")),
 		OPT_END()
 	};
 
 	if (argc == 2 && !strcmp(argv[1], "-h"))
 		usage_with_options(builtin_fsmonitor__daemon_usage, options);
 
-	git_config(git_default_config, NULL);
+	git_config(fsmonitor_config, NULL);
 
 	argc = parse_options(argc, argv, prefix, options,
 			     builtin_fsmonitor__daemon_usage, 0);
+	if (fsmonitor__ipc_threads < 1)
+		die(_("invalid 'ipc-threads' value (%d)"),
+		    fsmonitor__ipc_threads);
 
 	switch (mode) {
+	case START:
+		return !!try_to_start_background_daemon();
+
+	case RUN:
+		return !!try_to_run_foreground_daemon();
+
 	case STOP:
 		return !!do_as_client__send_stop();
 
diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h
new file mode 100644
index 00000000000000..09e4a6fb6675d3
--- /dev/null
+++ b/fsmonitor--daemon.h
@@ -0,0 +1,36 @@
+#ifndef FSMONITOR_DAEMON_H
+#define FSMONITOR_DAEMON_H
+
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+
+#include "cache.h"
+#include "dir.h"
+#include "run-command.h"
+#include "simple-ipc.h"
+#include "thread-utils.h"
+
+struct fsmonitor_batch;
+struct fsmonitor_token_data;
+
+struct fsmonitor_daemon_backend_data; /* opaque platform-specific data */
+
+struct fsmonitor_daemon_state {
+	pthread_t listener_thread;
+	pthread_mutex_t main_lock;
+
+	struct strbuf path_worktree_watch;
+	struct strbuf path_gitdir_watch;
+	int nr_paths_watching;
+
+	struct fsmonitor_token_data *current_token_data;
+
+	int error_code;
+	struct fsmonitor_daemon_backend_data *backend_data;
+
+	struct ipc_server_data *ipc_server_data;
+
+	int test_client_delay_ms;
+};
+
+#endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
+#endif /* FSMONITOR_DAEMON_H */

From d49f16408b5cdae653dadf047527e9d73221b6a1 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 17 Dec 2020 12:37:15 -0500
Subject: [PATCH 24/39] fsmonitor--daemon: add pathname classification

Teach fsmonitor--daemon to classify relative and absolute
pathnames and decide how they should be handled.  This will
be used by the platform-specific backend to respond to each
filesystem event.

When we register for filesystem notifications on a directory,
we get events for everything (recursively) in the directory.
We want to report to clients changes to tracked and untracked
paths within the working directory.  We do not want to report
changes within the .git directory, for example.

This classification will be used in a later commit by the
different backends to classify paths as events are received.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 81 +++++++++++++++++++++++++++++++++++++
 fsmonitor--daemon.h         | 61 ++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 23a06370797202..16252487240a3c 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -168,6 +168,87 @@ static int handle_client(void *data, const char *command,
 	return result;
 }
 
+#define FSMONITOR_COOKIE_PREFIX ".fsmonitor-daemon-"
+
+enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
+	const char *rel)
+{
+	if (fspathncmp(rel, ".git", 4))
+		return IS_WORKDIR_PATH;
+	rel += 4;
+
+	if (!*rel)
+		return IS_DOT_GIT;
+	if (*rel != '/')
+		return IS_WORKDIR_PATH; /* e.g. .gitignore */
+	rel++;
+
+	if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
+			strlen(FSMONITOR_COOKIE_PREFIX)))
+		return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
+
+	return IS_INSIDE_DOT_GIT;
+}
+
+enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
+	const char *rel)
+{
+	if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
+			strlen(FSMONITOR_COOKIE_PREFIX)))
+		return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
+
+	return IS_INSIDE_GITDIR;
+}
+
+static enum fsmonitor_path_type try_classify_workdir_abs_path(
+	struct fsmonitor_daemon_state *state,
+	const char *path)
+{
+	const char *rel;
+
+	if (fspathncmp(path, state->path_worktree_watch.buf,
+		       state->path_worktree_watch.len))
+		return IS_OUTSIDE_CONE;
+
+	rel = path + state->path_worktree_watch.len;
+
+	if (!*rel)
+		return IS_WORKDIR_PATH; /* it is the root dir exactly */
+	if (*rel != '/')
+		return IS_OUTSIDE_CONE;
+	rel++;
+
+	return fsmonitor_classify_path_workdir_relative(rel);
+}
+
+enum fsmonitor_path_type fsmonitor_classify_path_absolute(
+	struct fsmonitor_daemon_state *state,
+	const char *path)
+{
+	const char *rel;
+	enum fsmonitor_path_type t;
+
+	t = try_classify_workdir_abs_path(state, path);
+	if (state->nr_paths_watching == 1)
+		return t;
+	if (t != IS_OUTSIDE_CONE)
+		return t;
+
+	if (fspathncmp(path, state->path_gitdir_watch.buf,
+		       state->path_gitdir_watch.len))
+		return IS_OUTSIDE_CONE;
+
+	rel = path + state->path_gitdir_watch.len;
+
+	if (!*rel)
+		return IS_GITDIR; /* it is the <gitdir> exactly */
+	if (*rel != '/')
+		return IS_OUTSIDE_CONE;
+	rel++;
+
+	return fsmonitor_classify_path_gitdir_relative(rel);
+}
+
 static void *fsmonitor_fs_listen__thread_proc(void *_state)
 {
 	struct fsmonitor_daemon_state *state = _state;
diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h
index 09e4a6fb6675d3..97ea3766e900a4 100644
--- a/fsmonitor--daemon.h
+++ b/fsmonitor--daemon.h
@@ -32,5 +32,66 @@ struct fsmonitor_daemon_state {
 	int test_client_delay_ms;
 };
 
+/*
+ * Pathname classifications.
+ *
+ * The daemon classifies the pathnames that it receives from file
+ * system notification events into the following categories and uses
+ * that to decide whether clients are told about them.  (And to watch
+ * for file system synchronization events.)
+ *
+ * The client should only care about paths within the working
+ * directory proper (inside the working directory and not ".git" nor
+ * inside of ".git/").  That is, the client has read the index and is
+ * asking for a list of any paths in the working directory that have
+ * been modified since the last token.  The client does not care about
+ * file system changes within the .git directory (such as new loose
+ * objects or packfiles).  So the client will only receive paths that
+ * are classified as IS_WORKDIR_PATH.
+ *
+ * The daemon uses the IS_DOT_GIT and IS_GITDIR internally to mean the
+ * exact ".git" directory or GITDIR.  If the daemon receives a delete
+ * event for either of these directories, it will automatically
+ * shutdown, for example.
+ *
+ * Note that the daemon DOES NOT explicitly watch nor special case the
+ * ".git/index" file.  The daemon does not read the index and does not
+ * have any internal index-relative state.  The daemon only collects
+ * the set of modified paths within the working directory.
+ */
+enum fsmonitor_path_type {
+	IS_WORKDIR_PATH = 0,
+
+	IS_DOT_GIT,
+	IS_INSIDE_DOT_GIT,
+	IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX,
+
+	IS_GITDIR,
+	IS_INSIDE_GITDIR,
+	IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX,
+
+	IS_OUTSIDE_CONE,
+};
+
+/*
+ * Classify a pathname relative to the root of the working directory.
+ */
+enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
+	const char *relative_path);
+
+/*
+ * Classify a pathname relative to a <gitdir> that is external to the
+ * worktree directory.
+ */
+enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
+	const char *relative_path);
+
+/*
+ * Classify an absolute pathname received from a filesystem event.
+ */
+enum fsmonitor_path_type fsmonitor_classify_path_absolute(
+	struct fsmonitor_daemon_state *state,
+	const char *path);
+
 #endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
 #endif /* FSMONITOR_DAEMON_H */

From 6b4070e4ee28e89e46a6030e43a6b34aefbdfee8 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 17 Dec 2020 12:58:14 -0500
Subject: [PATCH 25/39] fsmonitor--daemon: define token-ids

Teach fsmonitor--daemon to create token-ids and define the
overall token naming scheme.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 108 +++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 1 deletion(-)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 16252487240a3c..2d25e36601fedf 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -149,6 +149,112 @@ static int do_as_client__send_flush(void)
 	return 0;
 }
 
+/*
+ * Requests to and from a FSMonitor Protocol V2 provider use an opaque
+ * "token" as a virtual timestamp.  Clients can request a summary of all
+ * created/deleted/modified files relative to a token.  In the response,
+ * clients receive a new token for the next (relative) request.
+ *
+ *
+ * Token Format
+ * ============
+ *
+ * The contents of the token are private and provider-specific.
+ *
+ * For the built-in fsmonitor--daemon, we define a token as follows:
+ *
+ *     "builtin" ":" <token_id> ":" <sequence_nr>
+ *
+ * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
+ * UUID, or {timestamp,pid}.  It is used to group all filesystem
+ * events that happened while the daemon was monitoring (and in-sync
+ * with the filesystem).
+ *
+ *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
+ *     and does not define less-than/greater-than relationships.
+ *     (There are too many race conditions to rely on file system
+ *     event timestamps.)
+ *
+ * The <sequence_nr> is a simple integer incremented for each event
+ * received.  When a new <token_id> is created, the <sequence_nr> is
+ * reset to zero.
+ *
+ *
+ * About Token Ids
+ * ===============
+ *
+ * A new token_id is created:
+ *
+ * [1] each time the daemon is started.
+ *
+ * [2] any time that the daemon must re-sync with the filesystem
+ *     (such as when the kernel drops or we miss events on a very
+ *     active volume).
+ *
+ * [3] in response to a client "flush" command (for dropped event
+ *     testing).
+ *
+ * [4] MAYBE We might want to change the token_id after very complex
+ *     filesystem operations are performed, such as a directory move
+ *     sequence that affects many files within.  It might be simpler
+ *     to just give up and fake a re-sync (and let the client do a
+ *     full scan) than try to enumerate the effects of such a change.
+ *
+ * When a new token_id is created, the daemon is free to discard all
+ * cached filesystem events associated with any previous token_ids.
+ * Events associated with a non-current token_id will never be sent
+ * to a client.  A token_id change implicitly means that the daemon
+ * has gap in its event history.
+ *
+ * Therefore, clients that present a token with a stale (non-current)
+ * token_id will always be given a trivial response.
+ */
+struct fsmonitor_token_data {
+	struct strbuf token_id;
+	struct fsmonitor_batch *batch_head;
+	struct fsmonitor_batch *batch_tail;
+	uint64_t client_ref_count;
+};
+
+static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
+{
+	static int test_env_value = -1;
+	static uint64_t flush_count = 0;
+	struct fsmonitor_token_data *token;
+
+	token = (struct fsmonitor_token_data *)xcalloc(1, sizeof(*token));
+
+	strbuf_init(&token->token_id, 0);
+	token->batch_head = NULL;
+	token->batch_tail = NULL;
+	token->client_ref_count = 0;
+
+	if (test_env_value < 0)
+		test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
+
+	if (!test_env_value) {
+		struct timeval tv;
+		struct tm tm;
+		time_t secs;
+
+		gettimeofday(&tv, NULL);
+		secs = tv.tv_sec;
+		gmtime_r(&secs, &tm);
+
+		strbuf_addf(&token->token_id,
+			    "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
+			    flush_count++,
+			    getpid(),
+			    tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+			    tm.tm_hour, tm.tm_min, tm.tm_sec,
+			    (long)tv.tv_usec);
+	} else {
+		strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
+	}
+
+	return token;
+}
+
 static ipc_server_application_cb handle_client;
 
 static int handle_client(void *data, const char *command,
@@ -330,7 +436,7 @@ static int fsmonitor_run_daemon(void)
 
 	pthread_mutex_init(&state.main_lock, NULL);
 	state.error_code = 0;
-	state.current_token_data = NULL;
+	state.current_token_data = fsmonitor_new_token_data();
 	state.test_client_delay_ms = 0;
 
 	/* Prepare to (recursively) watch the <worktree-root> directory. */

From 3ffa3f6679cc2dee3b025269fd354835555da964 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 17 Dec 2020 13:17:33 -0500
Subject: [PATCH 26/39] fsmonitor--daemon: create token-based changed path
 cache

Teach fsmonitor--daemon to build lists of changed paths and associate
them with a token-id.  This will be used by the platform-specific
backends to accumulate changed paths in response to filesystem events.

The platform-specific event loops receive batches containing one or
more changed paths.  Their fs listener thread will accumulate them in
a `fsmonitor_batch` (and without locking) and then "publish" them to
associate them with the current token and to make them visible to the
client worker threads.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 192 ++++++++++++++++++++++++++++++++++++
 fsmonitor--daemon.h         |  40 ++++++++
 2 files changed, 232 insertions(+)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 2d25e36601fedf..48071d445c4922 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -255,6 +255,120 @@ static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 	return token;
 }
 
+struct fsmonitor_batch {
+	struct fsmonitor_batch *next;
+	uint64_t batch_seq_nr;
+	const char **interned_paths;
+	size_t nr, alloc;
+	time_t pinned_time;
+};
+
+struct fsmonitor_batch *fsmonitor_batch__new(void)
+{
+	struct fsmonitor_batch *batch = xcalloc(1, sizeof(*batch));
+
+	return batch;
+}
+
+struct fsmonitor_batch *fsmonitor_batch__free(struct fsmonitor_batch *batch)
+{
+	struct fsmonitor_batch *next;
+
+	if (!batch)
+		return NULL;
+
+	next = batch->next;
+
+	/*
+	 * The actual strings within the array are interned, so we don't
+	 * own them.
+	 */
+	free(batch->interned_paths);
+
+	return next;
+}
+
+void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
+			       const char *path)
+{
+	const char *interned_path = strintern(path);
+
+	trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
+
+	ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
+	batch->interned_paths[batch->nr++] = interned_path;
+}
+
+static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
+				     const struct fsmonitor_batch *batch_src)
+{
+	/* assert state->main_lock */
+
+	size_t k;
+
+	ALLOC_GROW(batch_dest->interned_paths,
+		   batch_dest->nr + batch_src->nr + 1,
+		   batch_dest->alloc);
+
+	for (k = 0; k < batch_src->nr; k++)
+		batch_dest->interned_paths[batch_dest->nr++] =
+			batch_src->interned_paths[k];
+}
+
+static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
+{
+	struct fsmonitor_batch *p;
+
+	if (!token)
+		return;
+
+	assert(token->client_ref_count == 0);
+
+	strbuf_release(&token->token_id);
+
+	for (p = token->batch_head; p; p = fsmonitor_batch__free(p))
+		;
+
+	free(token);
+}
+
+/*
+ * Flush all of our cached data about the filesystem.  Call this if we
+ * lose sync with the filesystem and miss some notification events.
+ *
+ * [1] If we are missing events, then we no longer have a complete
+ *     history of the directory (relative to our current start token).
+ *     We should create a new token and start fresh (as if we just
+ *     booted up).
+ *
+ * If there are no readers of the the current token data series, we
+ * can free it now.  Otherwise, let the last reader free it.  Either
+ * way, the old token data series is no longer associated with our
+ * state data.
+ */
+void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
+{
+	struct fsmonitor_token_data *free_me = NULL;
+	struct fsmonitor_token_data *new_one = NULL;
+
+	new_one = fsmonitor_new_token_data();
+
+	pthread_mutex_lock(&state->main_lock);
+
+	trace_printf_key(&trace_fsmonitor,
+			 "force resync [old '%s'][new '%s']",
+			 state->current_token_data->token_id.buf,
+			 new_one->token_id.buf);
+
+	if (state->current_token_data->client_ref_count == 0)
+		free_me = state->current_token_data;
+	state->current_token_data = new_one;
+
+	pthread_mutex_unlock(&state->main_lock);
+
+	fsmonitor_free_token_data(free_me);
+}
+
 static ipc_server_application_cb handle_client;
 
 static int handle_client(void *data, const char *command,
@@ -355,6 +469,77 @@ enum fsmonitor_path_type fsmonitor_classify_path_absolute(
 	return fsmonitor_classify_path_gitdir_relative(rel);
 }
 
+/*
+ * We try to combine small batches at the front of the batch-list to avoid
+ * having a long list.  This hopefully makes it a little easier when we want
+ * to truncate and maintain the list.  However, we don't want the paths array
+ * to just keep growing and growing with realloc, so we insert an arbitrary
+ * limit.
+ */
+#define MY_COMBINE_LIMIT (1024)
+
+void fsmonitor_publish(struct fsmonitor_daemon_state *state,
+		       struct fsmonitor_batch *batch,
+		       const struct string_list *cookie_names)
+{
+	if (!batch && !cookie_names->nr)
+		return;
+
+	pthread_mutex_lock(&state->main_lock);
+
+	if (batch) {
+		struct fsmonitor_batch *head;
+
+		head = state->current_token_data->batch_head;
+		if (!head) {
+			batch->batch_seq_nr = 0;
+			batch->next = NULL;
+			state->current_token_data->batch_head = batch;
+			state->current_token_data->batch_tail = batch;
+		} else if (head->pinned_time) {
+			/*
+			 * We cannot alter the current batch list
+			 * because:
+			 *
+			 * [a] it is being transmitted to at least one
+			 * client and the handle_client() thread has a
+			 * ref-count, but not a lock on the batch list
+			 * starting with this item.
+			 *
+			 * [b] it has been transmitted in the past to
+			 * at least one client such that future
+			 * requests are relative to this head batch.
+			 *
+			 * So, we can only prepend a new batch onto
+			 * the front of the list.
+			 */
+			batch->batch_seq_nr = head->batch_seq_nr + 1;
+			batch->next = head;
+			state->current_token_data->batch_head = batch;
+		} else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
+			/*
+			 * The head batch in the list has never been
+			 * transmitted to a client, but folding the
+			 * contents of the new batch onto it would
+			 * exceed our arbitrary limit, so just prepend
+			 * the new batch onto the list.
+			 */
+			batch->batch_seq_nr = head->batch_seq_nr + 1;
+			batch->next = head;
+			state->current_token_data->batch_head = batch;
+		} else {
+			/*
+			 * We are free to append the paths in the given
+			 * batch onto the end of the current head batch.
+			 */
+			fsmonitor_batch__combine(head, batch);
+			fsmonitor_batch__free(batch);
+		}
+	}
+
+	pthread_mutex_unlock(&state->main_lock);
+}
+
 static void *fsmonitor_fs_listen__thread_proc(void *_state)
 {
 	struct fsmonitor_daemon_state *state = _state;
@@ -369,6 +554,13 @@ static void *fsmonitor_fs_listen__thread_proc(void *_state)
 
 	fsmonitor_fs_listen__loop(state);
 
+	pthread_mutex_lock(&state->main_lock);
+	if (state->current_token_data &&
+	    state->current_token_data->client_ref_count == 0)
+		fsmonitor_free_token_data(state->current_token_data);
+	state->current_token_data = NULL;
+	pthread_mutex_unlock(&state->main_lock);
+
 	trace2_thread_exit();
 	return NULL;
 }
diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h
index 97ea3766e900a4..06563b6ed56c0d 100644
--- a/fsmonitor--daemon.h
+++ b/fsmonitor--daemon.h
@@ -12,6 +12,27 @@
 struct fsmonitor_batch;
 struct fsmonitor_token_data;
 
+/*
+ * Create a new batch of path(s).  The returned batch is considered
+ * private and not linked into the fsmonitor daemon state.  The caller
+ * should fill this batch with one or more paths and then publish it.
+ */
+struct fsmonitor_batch *fsmonitor_batch__new(void);
+
+/*
+ * Free this batch and return the value of the batch->next field.
+ */
+struct fsmonitor_batch *fsmonitor_batch__free(struct fsmonitor_batch *batch);
+
+/*
+ * Add this path to this batch of modified files.
+ *
+ * The batch should be private and NOT (yet) linked into the fsmonitor
+ * daemon state and therefore not yet visible to worker threads and so
+ * no locking is required.
+ */
+void fsmonitor_batch__add_path(struct fsmonitor_batch *batch, const char *path);
+
 struct fsmonitor_daemon_backend_data; /* opaque platform-specific data */
 
 struct fsmonitor_daemon_state {
@@ -93,5 +114,24 @@ enum fsmonitor_path_type fsmonitor_classify_path_absolute(
 	struct fsmonitor_daemon_state *state,
 	const char *path);
 
+/*
+ * Prepend the this batch of path(s) onto the list of batches associated
+ * with the current token.  This makes the batch visible to worker threads.
+ *
+ * The caller no longer owns the batch and must not free it.
+ *
+ * Wake up the client threads waiting on these cookies.
+ */
+void fsmonitor_publish(struct fsmonitor_daemon_state *state,
+		       struct fsmonitor_batch *batch,
+		       const struct string_list *cookie_names);
+
+/*
+ * If the platform-specific layer loses sync with the filesystem,
+ * it should call this to invalidate cached data and abort waiting
+ * threads.
+ */
+void fsmonitor_force_resync(struct fsmonitor_daemon_state *state);
+
 #endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
 #endif /* FSMONITOR_DAEMON_H */

From 7952e187e40a0c7c5e159a2251ace39333cbd7a8 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 17 Dec 2020 13:34:30 -0500
Subject: [PATCH 27/39] fsmonitor-fs-listen-win32: implement FSMonitor backend
 on Windows

Teach the win32 backend to register a watch on the working tree
root directory (recursively).  Also watch the <gitdir> if it is
not inside the working tree.  And to collect path change notifications
into batches and publish.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 compat/fsmonitor/fsmonitor-fs-listen-win32.c | 493 +++++++++++++++++++
 1 file changed, 493 insertions(+)

diff --git a/compat/fsmonitor/fsmonitor-fs-listen-win32.c b/compat/fsmonitor/fsmonitor-fs-listen-win32.c
index 880446b49e35fb..ecc8acdd5df4ed 100644
--- a/compat/fsmonitor/fsmonitor-fs-listen-win32.c
+++ b/compat/fsmonitor/fsmonitor-fs-listen-win32.c
@@ -2,20 +2,513 @@
 #include "config.h"
 #include "fsmonitor.h"
 #include "fsmonitor-fs-listen.h"
+#include "fsmonitor--daemon.h"
+
+/*
+ * The documentation of ReadDirectoryChangesW() states that the maximum
+ * buffer size is 64K when the monitored directory is remote.
+ *
+ * Larger buffers may be used when the monitored directory is local and
+ * will help us receive events faster from the kernel and avoid dropped
+ * events.
+ *
+ * So we try to use a very large buffer and silently fallback to 64K if
+ * we get an error.
+ */
+#define MAX_RDCW_BUF_FALLBACK (65536)
+#define MAX_RDCW_BUF          (65536 * 8)
+
+struct one_watch
+{
+	char buffer[MAX_RDCW_BUF];
+	DWORD buf_len;
+	DWORD count;
+
+	struct strbuf path;
+	HANDLE hDir;
+	HANDLE hEvent;
+	OVERLAPPED overlapped;
+
+	/*
+	 * Is there an active ReadDirectoryChangesW() call pending.  If so, we
+	 * need to later call GetOverlappedResult() and possibly CancelIoEx().
+	 */
+	BOOL is_active;
+};
+
+struct fsmonitor_daemon_backend_data
+{
+	struct one_watch *watch_worktree;
+	struct one_watch *watch_gitdir;
+
+	HANDLE hEventShutdown;
+
+	HANDLE hListener[3]; /* we don't own these handles */
+#define LISTENER_SHUTDOWN 0
+#define LISTENER_HAVE_DATA_WORKTREE 1
+#define LISTENER_HAVE_DATA_GITDIR 2
+	int nr_listener_handles;
+};
+
+/*
+ * Convert the WCHAR path from the notification into UTF8 and
+ * then normalize it.
+ */
+static int normalize_path_in_utf8(FILE_NOTIFY_INFORMATION *info,
+				  struct strbuf *normalized_path)
+{
+	int reserve;
+	int len = 0;
+
+	strbuf_reset(normalized_path);
+	if (!info->FileNameLength)
+		goto normalize;
+
+	/*
+	 * Pre-reserve enough space in the UTF8 buffer for
+	 * each Unicode WCHAR character to be mapped into a
+	 * sequence of 2 UTF8 characters.  That should let us
+	 * avoid ERROR_INSUFFICIENT_BUFFER 99.9+% of the time.
+	 */
+	reserve = info->FileNameLength + 1;
+	strbuf_grow(normalized_path, reserve);
+
+	for (;;) {
+		len = WideCharToMultiByte(CP_UTF8, 0, info->FileName,
+					  info->FileNameLength / sizeof(WCHAR),
+					  normalized_path->buf,
+					  strbuf_avail(normalized_path) - 1,
+					  NULL, NULL);
+		if (len > 0)
+			goto normalize;
+		if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+			error("[GLE %ld] could not convert path to UTF-8: '%.*S'",
+			      GetLastError(),
+			      (int)(info->FileNameLength / sizeof(WCHAR)),
+			      info->FileName);
+			return -1;
+		}
+
+		strbuf_grow(normalized_path,
+			    strbuf_avail(normalized_path) + reserve);
+	}
+
+normalize:
+	strbuf_setlen(normalized_path, len);
+	return strbuf_normalize_path(normalized_path);
+}
 
 void fsmonitor_fs_listen__stop_async(struct fsmonitor_daemon_state *state)
 {
+	SetEvent(state->backend_data->hListener[LISTENER_SHUTDOWN]);
+}
+
+static struct one_watch *create_watch(struct fsmonitor_daemon_state *state,
+				      const char *path)
+{
+	struct one_watch *watch = NULL;
+	DWORD desired_access = FILE_LIST_DIRECTORY;
+	DWORD share_mode =
+		FILE_SHARE_WRITE | FILE_SHARE_READ | FILE_SHARE_DELETE;
+	HANDLE hDir;
+
+	hDir = CreateFileA(path,
+			   desired_access, share_mode, NULL, OPEN_EXISTING,
+			   FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OVERLAPPED,
+			   NULL);
+	if (hDir == INVALID_HANDLE_VALUE) {
+		error(_("[GLE %ld] could not watch '%s'"),
+		      GetLastError(), path);
+		return NULL;
+	}
+
+	watch = xcalloc(1, sizeof(*watch));
+
+	watch->buf_len = sizeof(watch->buffer); /* assume full MAX_RDCW_BUF */
+
+	strbuf_init(&watch->path, 0);
+	strbuf_addstr(&watch->path, path);
+
+	watch->hDir = hDir;
+	watch->hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
+
+	return watch;
+}
+
+static void destroy_watch(struct one_watch *watch)
+{
+	if (!watch)
+		return;
+
+	strbuf_release(&watch->path);
+	if (watch->hDir != INVALID_HANDLE_VALUE)
+		CloseHandle(watch->hDir);
+	if (watch->hEvent != INVALID_HANDLE_VALUE)
+		CloseHandle(watch->hEvent);
+
+	free(watch);
+}
+
+static int start_rdcw_watch(struct fsmonitor_daemon_backend_data *data,
+			    struct one_watch *watch)
+{
+	DWORD dwNotifyFilter =
+		FILE_NOTIFY_CHANGE_FILE_NAME |
+		FILE_NOTIFY_CHANGE_DIR_NAME |
+		FILE_NOTIFY_CHANGE_ATTRIBUTES |
+		FILE_NOTIFY_CHANGE_SIZE |
+		FILE_NOTIFY_CHANGE_LAST_WRITE |
+		FILE_NOTIFY_CHANGE_CREATION;
+
+	ResetEvent(watch->hEvent);
+
+	memset(&watch->overlapped, 0, sizeof(watch->overlapped));
+	watch->overlapped.hEvent = watch->hEvent;
+
+start_watch:
+	watch->is_active = ReadDirectoryChangesW(
+		watch->hDir, watch->buffer, watch->buf_len, TRUE,
+		dwNotifyFilter, &watch->count, &watch->overlapped, NULL);
+
+	if (!watch->is_active &&
+	    GetLastError() == ERROR_INVALID_PARAMETER &&
+	    watch->buf_len > MAX_RDCW_BUF_FALLBACK) {
+		watch->buf_len = MAX_RDCW_BUF_FALLBACK;
+		goto start_watch;
+	}
+
+	if (watch->is_active)
+		return 0;
+
+	error("ReadDirectoryChangedW failed on '%s' [GLE %ld]",
+	      watch->path.buf, GetLastError());
+	return -1;
+}
+
+static int recv_rdcw_watch(struct one_watch *watch)
+{
+	watch->is_active = FALSE;
+
+	if (GetOverlappedResult(watch->hDir, &watch->overlapped, &watch->count,
+				TRUE))
+		return 0;
+
+	// TODO If an external <gitdir> is deleted, the above returns an error.
+	// TODO I'm not sure that there's anything that we can do here other
+	// TODO than failing -- the <worktree>/.git link file would be broken
+	// TODO anyway.  We might try to check for that and return a better
+	// TODO error message.
+
+	error("GetOverlappedResult failed on '%s' [GLE %ld]",
+	      watch->path.buf, GetLastError());
+	return -1;
+}
+
+static void cancel_rdcw_watch(struct one_watch *watch)
+{
+	DWORD count;
+
+	if (!watch || !watch->is_active)
+		return;
+
+	CancelIoEx(watch->hDir, &watch->overlapped);
+	GetOverlappedResult(watch->hDir, &watch->overlapped, &count, TRUE);
+	watch->is_active = FALSE;
+}
+
+/*
+ * Process filesystem events that happen anywhere (recursively) under the
+ * <worktree> root directory.  For a normal working directory, this includes
+ * both version controlled files and the contents of the .git/ directory.
+ *
+ * If <worktree>/.git is a file, then we only see events for the file
+ * itself.
+ */
+static int process_worktree_events(struct fsmonitor_daemon_state *state)
+{
+	struct fsmonitor_daemon_backend_data *data = state->backend_data;
+	struct one_watch *watch = data->watch_worktree;
+	struct strbuf path = STRBUF_INIT;
+	struct string_list cookie_list = STRING_LIST_INIT_DUP;
+	struct fsmonitor_batch *batch = NULL;
+	const char *p = watch->buffer;
+
+	/*
+	 * If the kernel gets more events than will fit in the kernel
+	 * buffer associated with our RDCW handle, it drops them and
+	 * returns a count of zero.  (A successful call, but with
+	 * length zero.)
+	 */
+	if (!watch->count) {
+		trace2_data_string("fsmonitor", NULL, "fsm-listen/kernel",
+				   "overflow");
+		fsmonitor_force_resync(state);
+		return LISTENER_HAVE_DATA_WORKTREE;
+	}
+
+	/*
+	 * On Windows, `info` contains an "array" of paths that are
+	 * relative to the root of whichever directory handle received
+	 * the event.
+	 */
+	for (;;) {
+		FILE_NOTIFY_INFORMATION *info = (void *)p;
+		const char *slash;
+		enum fsmonitor_path_type t;
+
+		strbuf_reset(&path);
+		if (normalize_path_in_utf8(info, &path) == -1)
+			goto skip_this_path;
+
+		t = fsmonitor_classify_path_workdir_relative(path.buf);
+
+		switch (t) {
+		case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX:
+			/* special case cookie files within .git */
+
+			/* Use just the filename of the cookie file. */
+			slash = find_last_dir_sep(path.buf);
+			string_list_append(&cookie_list,
+					   slash ? slash + 1 : path.buf);
+			break;
+
+		case IS_INSIDE_DOT_GIT:
+			/* ignore everything inside of "<worktree>/.git/" */
+			break;
+
+		case IS_DOT_GIT:
+			/* "<worktree>/.git" was deleted (or renamed away) */
+			if ((info->Action == FILE_ACTION_REMOVED) ||
+			    (info->Action == FILE_ACTION_RENAMED_OLD_NAME)) {
+				trace2_data_string("fsmonitor", NULL,
+						   "fsm-listen/dotgit",
+						   "removed");
+				goto force_shutdown;
+			}
+			break;
+
+		case IS_WORKDIR_PATH:
+			/* queue normal pathname */
+			if (!batch)
+				batch = fsmonitor_batch__new();
+			fsmonitor_batch__add_path(batch, path.buf);
+			break;
+
+		case IS_GITDIR:
+		case IS_INSIDE_GITDIR:
+		case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX:
+		default:
+			BUG("unexpected path classification '%d' for '%s'",
+			    t, path.buf);
+			goto skip_this_path;
+		}
+
+skip_this_path:
+		if (!info->NextEntryOffset)
+			break;
+		p += info->NextEntryOffset;
+	}
+
+	fsmonitor_publish(state, batch, &cookie_list);
+	batch = NULL;
+	string_list_clear(&cookie_list, 0);
+	strbuf_release(&path);
+	return LISTENER_HAVE_DATA_WORKTREE;
+
+force_shutdown:
+	fsmonitor_batch__free(batch);
+	string_list_clear(&cookie_list, 0);
+	strbuf_release(&path);
+	return LISTENER_SHUTDOWN;
+}
+
+/*
+ * Process filesystem events that happend anywhere (recursively) under the
+ * external <gitdir> (such as non-primary worktrees or submodules).
+ * We only care about cookie files that our client threads created here.
+ *
+ * Note that we DO NOT get filesystem events on the external <gitdir>
+ * itself (it is not inside something that we are watching).  In particular,
+ * we do not get an event if the external <gitdir> is deleted.
+ */
+static int process_gitdir_events(struct fsmonitor_daemon_state *state)
+{
+	struct fsmonitor_daemon_backend_data *data = state->backend_data;
+	struct one_watch *watch = data->watch_gitdir;
+	struct strbuf path = STRBUF_INIT;
+	struct string_list cookie_list = STRING_LIST_INIT_DUP;
+	const char *p = watch->buffer;
+
+	if (!watch->count) {
+		trace2_data_string("fsmonitor", NULL, "fsm-listen/kernel",
+				   "overflow");
+		fsmonitor_force_resync(state);
+		return LISTENER_HAVE_DATA_GITDIR;
+	}
+
+	for (;;) {
+		FILE_NOTIFY_INFORMATION *info = (void *)p;
+		const char *slash;
+		enum fsmonitor_path_type t;
+
+		strbuf_reset(&path);
+		if (normalize_path_in_utf8(info, &path) == -1)
+			goto skip_this_path;
+
+		t = fsmonitor_classify_path_gitdir_relative(path.buf);
+
+		trace_printf_key(&trace_fsmonitor, "BBB: %s", path.buf);
+
+		switch (t) {
+		case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX:
+			/* special case cookie files within gitdir */
+
+			/* Use just the filename of the cookie file. */
+			slash = find_last_dir_sep(path.buf);
+			string_list_append(&cookie_list,
+					   slash ? slash + 1 : path.buf);
+			break;
+
+		case IS_INSIDE_GITDIR:
+			goto skip_this_path;
+
+		default:
+			BUG("unexpected path classification '%d' for '%s'",
+			    t, path.buf);
+			goto skip_this_path;
+		}
+
+skip_this_path:
+		if (!info->NextEntryOffset)
+			break;
+		p += info->NextEntryOffset;
+	}
+
+	fsmonitor_publish(state, NULL, &cookie_list);
+	string_list_clear(&cookie_list, 0);
+	strbuf_release(&path);
+	return LISTENER_HAVE_DATA_GITDIR;
 }
 
 void fsmonitor_fs_listen__loop(struct fsmonitor_daemon_state *state)
 {
+	struct fsmonitor_daemon_backend_data *data = state->backend_data;
+	DWORD dwWait;
+
+	state->error_code = 0;
+
+	if (start_rdcw_watch(data, data->watch_worktree) == -1)
+		goto force_error_stop;
+
+	if (data->watch_gitdir &&
+	    start_rdcw_watch(data, data->watch_gitdir) == -1)
+		goto force_error_stop;
+
+	for (;;) {
+		dwWait = WaitForMultipleObjects(data->nr_listener_handles,
+						data->hListener,
+						FALSE, INFINITE);
+
+		if (dwWait == WAIT_OBJECT_0 + LISTENER_HAVE_DATA_WORKTREE) {
+			if (recv_rdcw_watch(data->watch_worktree) == -1)
+				goto force_error_stop;
+			if (process_worktree_events(state) == LISTENER_SHUTDOWN)
+				goto force_shutdown;
+			if (start_rdcw_watch(data, data->watch_worktree) == -1)
+				goto force_error_stop;
+			continue;
+		}
+
+		if (dwWait == WAIT_OBJECT_0 + LISTENER_HAVE_DATA_GITDIR) {
+			if (recv_rdcw_watch(data->watch_gitdir) == -1)
+				goto force_error_stop;
+			if (process_gitdir_events(state) == LISTENER_SHUTDOWN)
+				goto force_shutdown;
+			if (start_rdcw_watch(data, data->watch_gitdir) == -1)
+				goto force_error_stop;
+			continue;
+		}
+
+		if (dwWait == WAIT_OBJECT_0 + LISTENER_SHUTDOWN)
+			goto clean_shutdown;
+
+		error(_("could not read directory changes [GLE %ld]"),
+		      GetLastError());
+		goto force_error_stop;
+	}
+
+force_error_stop:
+	state->error_code = -1;
+
+force_shutdown:
+	/*
+	 * Tell the IPC thead pool to stop (which completes the await
+	 * in the main thread (which will also signal this thread (if
+	 * we are still alive))).
+	 */
+	ipc_server_stop_async(state->ipc_server_data);
+
+clean_shutdown:
+	cancel_rdcw_watch(data->watch_worktree);
+	cancel_rdcw_watch(data->watch_gitdir);
 }
 
 int fsmonitor_fs_listen__ctor(struct fsmonitor_daemon_state *state)
 {
+	struct fsmonitor_daemon_backend_data *data;
+
+	data = xcalloc(1, sizeof(*data));
+
+	data->hEventShutdown = CreateEvent(NULL, TRUE, FALSE, NULL);
+
+	data->watch_worktree = create_watch(state,
+					    state->path_worktree_watch.buf);
+	if (!data->watch_worktree)
+		goto failed;
+
+	if (state->nr_paths_watching > 1) {
+		data->watch_gitdir = create_watch(state,
+						  state->path_gitdir_watch.buf);
+		if (!data->watch_gitdir)
+			goto failed;
+	}
+
+	data->hListener[LISTENER_SHUTDOWN] = data->hEventShutdown;
+	data->nr_listener_handles++;
+
+	data->hListener[LISTENER_HAVE_DATA_WORKTREE] =
+		data->watch_worktree->hEvent;
+	data->nr_listener_handles++;
+
+	if (data->watch_gitdir) {
+		data->hListener[LISTENER_HAVE_DATA_GITDIR] =
+			data->watch_gitdir->hEvent;
+		data->nr_listener_handles++;
+	}
+
+	state->backend_data = data;
+	return 0;
+
+failed:
+	CloseHandle(data->hEventShutdown);
+	destroy_watch(data->watch_worktree);
+	destroy_watch(data->watch_gitdir);
+
 	return -1;
 }
 
 void fsmonitor_fs_listen__dtor(struct fsmonitor_daemon_state *state)
 {
+	struct fsmonitor_daemon_backend_data *data;
+
+	if (!state || !state->backend_data)
+		return;
+
+	data = state->backend_data;
+
+	CloseHandle(data->hEventShutdown);
+	destroy_watch(data->watch_worktree);
+	destroy_watch(data->watch_gitdir);
+
+	FREE_AND_NULL(state->backend_data);
 }

From 581ab48b71b368cbad7fcd286a6fa24dddd54cb0 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 18 Dec 2020 11:09:58 -0500
Subject: [PATCH 28/39] fsmonitor-fs-listen-macos: add macos header files for
 FSEvent

Include MacOS system declarations to allow us to use FSEvent and
CoreFoundation APIs.  We need GCC and clang versions because of
compiler and header file conflicts.

While it is quite possible to #include Apple's CoreServices.h when
compiling C source code with clang, trying to build it with GCC
currently fails with this error:

In file included
   from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/Security.framework/Headers/AuthSession.h:32,
   from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/Security.framework/Headers/Security.h:42,
   from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/CoreServices.framework/Frameworks/OSServices.framework/Headers/CSIdentity.h:43,
   from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/CoreServices.framework/Frameworks/OSServices.framework/Headers/OSServices.h:29,
   from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/CoreServices.framework/Frameworks/LaunchServices.framework/Headers/IconsCore.h:23,
   from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/CoreServices.framework/Frameworks/LaunchServices.framework/Headers/LaunchServices.h:23,
   from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/CoreServices.framework/Headers/CoreServices.h:45,
     /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/Security.framework/Headers/Authorization.h:193:7: error: variably modified 'bytes' at file scope
       193 | char bytes[kAuthorizationExternalFormLength];
           |      ^~~~~

The underlying reason is that GCC (rightfully) objects that an `enum`
value such as `kAuthorizationExternalFormLength` is not a constant
(because it is not, the preprocessor has no knowledge of it, only the
actual C compiler does) and can therefore not be used to define the size
of a C array.

This is a known problem and tracked in GCC's bug tracker:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082

In the meantime, let's not block things and go the slightly ugly route
of declaring/defining the FSEvents constants, data structures and
functions that we need, so that we can avoid above-mentioned issue.

Let's do this _only_ for GCC, though, so that the CI/PR builds (which
build both with clang and with GCC) can guarantee that we _are_ using
the correct data types.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 compat/fsmonitor/fsmonitor-fs-listen-macos.c | 96 ++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/compat/fsmonitor/fsmonitor-fs-listen-macos.c b/compat/fsmonitor/fsmonitor-fs-listen-macos.c
index b91058d1c4f820..bec5130d9e1d8a 100644
--- a/compat/fsmonitor/fsmonitor-fs-listen-macos.c
+++ b/compat/fsmonitor/fsmonitor-fs-listen-macos.c
@@ -1,3 +1,99 @@
+#if defined(__GNUC__)
+/*
+ * It is possible to #include CoreFoundation/CoreFoundation.h when compiling
+ * with clang, but not with GCC as of time of writing.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082 for details.
+ */
+typedef unsigned int FSEventStreamCreateFlags;
+#define kFSEventStreamEventFlagNone               0x00000000
+#define kFSEventStreamEventFlagMustScanSubDirs    0x00000001
+#define kFSEventStreamEventFlagUserDropped        0x00000002
+#define kFSEventStreamEventFlagKernelDropped      0x00000004
+#define kFSEventStreamEventFlagEventIdsWrapped    0x00000008
+#define kFSEventStreamEventFlagHistoryDone        0x00000010
+#define kFSEventStreamEventFlagRootChanged        0x00000020
+#define kFSEventStreamEventFlagMount              0x00000040
+#define kFSEventStreamEventFlagUnmount            0x00000080
+#define kFSEventStreamEventFlagItemCreated        0x00000100
+#define kFSEventStreamEventFlagItemRemoved        0x00000200
+#define kFSEventStreamEventFlagItemInodeMetaMod   0x00000400
+#define kFSEventStreamEventFlagItemRenamed        0x00000800
+#define kFSEventStreamEventFlagItemModified       0x00001000
+#define kFSEventStreamEventFlagItemFinderInfoMod  0x00002000
+#define kFSEventStreamEventFlagItemChangeOwner    0x00004000
+#define kFSEventStreamEventFlagItemXattrMod       0x00008000
+#define kFSEventStreamEventFlagItemIsFile         0x00010000
+#define kFSEventStreamEventFlagItemIsDir          0x00020000
+#define kFSEventStreamEventFlagItemIsSymlink      0x00040000
+#define kFSEventStreamEventFlagOwnEvent           0x00080000
+#define kFSEventStreamEventFlagItemIsHardlink     0x00100000
+#define kFSEventStreamEventFlagItemIsLastHardlink 0x00200000
+#define kFSEventStreamEventFlagItemCloned         0x00400000
+
+typedef struct __FSEventStream *FSEventStreamRef;
+typedef const FSEventStreamRef ConstFSEventStreamRef;
+
+typedef unsigned int CFStringEncoding;
+#define kCFStringEncodingUTF8 0x08000100
+
+typedef const struct __CFString *CFStringRef;
+typedef const struct __CFArray *CFArrayRef;
+typedef const struct __CFRunLoop *CFRunLoopRef;
+
+struct FSEventStreamContext {
+    long long version;
+    void *cb_data, *retain, *release, *copy_description;
+};
+
+typedef struct FSEventStreamContext FSEventStreamContext;
+typedef unsigned int FSEventStreamEventFlags;
+#define kFSEventStreamCreateFlagNoDefer 0x02
+#define kFSEventStreamCreateFlagWatchRoot 0x04
+#define kFSEventStreamCreateFlagFileEvents 0x10
+
+typedef unsigned long long FSEventStreamEventId;
+#define kFSEventStreamEventIdSinceNow 0xFFFFFFFFFFFFFFFFULL
+
+typedef void (*FSEventStreamCallback)(ConstFSEventStreamRef streamRef,
+				      void *context,
+				      __SIZE_TYPE__ num_of_events,
+				      void *event_paths,
+				      const FSEventStreamEventFlags event_flags[],
+				      const FSEventStreamEventId event_ids[]);
+typedef double CFTimeInterval;
+FSEventStreamRef FSEventStreamCreate(void *allocator,
+				     FSEventStreamCallback callback,
+				     FSEventStreamContext *context,
+				     CFArrayRef paths_to_watch,
+				     FSEventStreamEventId since_when,
+				     CFTimeInterval latency,
+				     FSEventStreamCreateFlags flags);
+CFStringRef CFStringCreateWithCString(void *allocator, const char *string,
+				      CFStringEncoding encoding);
+CFArrayRef CFArrayCreate(void *allocator, const void **items, long long count,
+			 void *callbacks);
+void CFRunLoopRun(void);
+void CFRunLoopStop(CFRunLoopRef run_loop);
+CFRunLoopRef CFRunLoopGetCurrent(void);
+extern CFStringRef kCFRunLoopDefaultMode;
+void FSEventStreamScheduleWithRunLoop(FSEventStreamRef stream,
+				      CFRunLoopRef run_loop,
+				      CFStringRef run_loop_mode);
+unsigned char FSEventStreamStart(FSEventStreamRef stream);
+void FSEventStreamStop(FSEventStreamRef stream);
+void FSEventStreamInvalidate(FSEventStreamRef stream);
+void FSEventStreamRelease(FSEventStreamRef stream);
+#else
+/*
+ * Let Apple's headers declare `isalnum()` first, before
+ * Git's headers override it via a constant
+ */
+#include <string.h>
+#include <CoreFoundation/CoreFoundation.h>
+#include <CoreServices/CoreServices.h>
+#endif
+
 #include "cache.h"
 #include "fsmonitor.h"
 #include "fsmonitor-fs-listen.h"

From 4d728d19c2344a8e28b3ec2cfca5fd74d56cecfd Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 18 Dec 2020 11:22:25 -0500
Subject: [PATCH 29/39] fsmonitor-fs-listen-macos: implement FSEvent listener
 on MacOS

Implement file system event listener on MacOS using FSEvent,
CoreFoundation, and CoreServices.

Co-authored-by: Kevin Willford <Kevin.Willford@microsoft.com>
Co-authored-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 compat/fsmonitor/fsmonitor-fs-listen-macos.c | 368 +++++++++++++++++++
 1 file changed, 368 insertions(+)

diff --git a/compat/fsmonitor/fsmonitor-fs-listen-macos.c b/compat/fsmonitor/fsmonitor-fs-listen-macos.c
index bec5130d9e1d8a..e055fb579cc49a 100644
--- a/compat/fsmonitor/fsmonitor-fs-listen-macos.c
+++ b/compat/fsmonitor/fsmonitor-fs-listen-macos.c
@@ -97,20 +97,388 @@ void FSEventStreamRelease(FSEventStreamRef stream);
 #include "cache.h"
 #include "fsmonitor.h"
 #include "fsmonitor-fs-listen.h"
+#include "fsmonitor--daemon.h"
+
+struct fsmonitor_daemon_backend_data
+{
+	CFStringRef cfsr_worktree_path;
+	CFStringRef cfsr_gitdir_path;
+
+	CFArrayRef cfar_paths_to_watch;
+	int nr_paths_watching;
+
+	FSEventStreamRef stream;
+
+	CFRunLoopRef rl;
+
+	enum shutdown_style {
+		SHUTDOWN_EVENT = 0,
+		FORCE_SHUTDOWN,
+		FORCE_ERROR_STOP,
+	} shutdown_style;
+
+	unsigned int stream_scheduled:1;
+	unsigned int stream_started:1;
+};
+
+static void log_flags_set(const char *path, const FSEventStreamEventFlags flag)
+{
+	struct strbuf msg = STRBUF_INIT;
+
+	if (flag & kFSEventStreamEventFlagMustScanSubDirs)
+		strbuf_addstr(&msg, "MustScanSubDirs|");
+	if (flag & kFSEventStreamEventFlagUserDropped)
+		strbuf_addstr(&msg, "UserDropped|");
+	if (flag & kFSEventStreamEventFlagKernelDropped)
+		strbuf_addstr(&msg, "KernelDropped|");
+	if (flag & kFSEventStreamEventFlagEventIdsWrapped)
+		strbuf_addstr(&msg, "EventIdsWrapped|");
+	if (flag & kFSEventStreamEventFlagHistoryDone)
+		strbuf_addstr(&msg, "HistoryDone|");
+	if (flag & kFSEventStreamEventFlagRootChanged)
+		strbuf_addstr(&msg, "RootChanged|");
+	if (flag & kFSEventStreamEventFlagMount)
+		strbuf_addstr(&msg, "Mount|");
+	if (flag & kFSEventStreamEventFlagUnmount)
+		strbuf_addstr(&msg, "Unmount|");
+	if (flag & kFSEventStreamEventFlagItemChangeOwner)
+		strbuf_addstr(&msg, "ItemChangeOwner|");
+	if (flag & kFSEventStreamEventFlagItemCreated)
+		strbuf_addstr(&msg, "ItemCreated|");
+	if (flag & kFSEventStreamEventFlagItemFinderInfoMod)
+		strbuf_addstr(&msg, "ItemFinderInfoMod|");
+	if (flag & kFSEventStreamEventFlagItemInodeMetaMod)
+		strbuf_addstr(&msg, "ItemInodeMetaMod|");
+	if (flag & kFSEventStreamEventFlagItemIsDir)
+		strbuf_addstr(&msg, "ItemIsDir|");
+	if (flag & kFSEventStreamEventFlagItemIsFile)
+		strbuf_addstr(&msg, "ItemIsFile|");
+	if (flag & kFSEventStreamEventFlagItemIsHardlink)
+		strbuf_addstr(&msg, "ItemIsHardlink|");
+	if (flag & kFSEventStreamEventFlagItemIsLastHardlink)
+		strbuf_addstr(&msg, "ItemIsLastHardlink|");
+	if (flag & kFSEventStreamEventFlagItemIsSymlink)
+		strbuf_addstr(&msg, "ItemIsSymlink|");
+	if (flag & kFSEventStreamEventFlagItemModified)
+		strbuf_addstr(&msg, "ItemModified|");
+	if (flag & kFSEventStreamEventFlagItemRemoved)
+		strbuf_addstr(&msg, "ItemRemoved|");
+	if (flag & kFSEventStreamEventFlagItemRenamed)
+		strbuf_addstr(&msg, "ItemRenamed|");
+	if (flag & kFSEventStreamEventFlagItemXattrMod)
+		strbuf_addstr(&msg, "ItemXattrMod|");
+	if (flag & kFSEventStreamEventFlagOwnEvent)
+		strbuf_addstr(&msg, "OwnEvent|");
+	if (flag & kFSEventStreamEventFlagItemCloned)
+		strbuf_addstr(&msg, "ItemCloned|");
+
+	trace_printf_key(&trace_fsmonitor, "fsevent: '%s', flags=%u %s",
+			 path, flag, msg.buf);
+
+	strbuf_release(&msg);
+}
+
+static int ef_is_root_delete(const FSEventStreamEventFlags ef)
+{
+	return (ef & kFSEventStreamEventFlagItemIsDir &&
+		ef & kFSEventStreamEventFlagItemRemoved);
+}
+
+static int ef_is_root_renamed(const FSEventStreamEventFlags ef)
+{
+	return (ef & kFSEventStreamEventFlagItemIsDir &&
+		ef & kFSEventStreamEventFlagItemRenamed);
+}
+
+static void fsevent_callback(ConstFSEventStreamRef streamRef,
+			     void *ctx,
+			     size_t num_of_events,
+			     void *event_paths,
+			     const FSEventStreamEventFlags event_flags[],
+			     const FSEventStreamEventId event_ids[])
+{
+	struct fsmonitor_daemon_state *state = ctx;
+	struct fsmonitor_daemon_backend_data *data = state->backend_data;
+	char **paths = (char **)event_paths;
+	struct fsmonitor_batch *batch = NULL;
+	struct string_list cookie_list = STRING_LIST_INIT_DUP;
+	const char *path_k;
+	const char *slash;
+	int k;
+
+	/*
+	 * Build a list of all filesystem changes into a private/local
+	 * list and without holding any locks.
+	 */
+	for (k = 0; k < num_of_events; k++) {
+		/*
+		 * On Mac, we receive an array of absolute paths.
+		 */
+		path_k = paths[k];
+
+		/*
+		 * If you want to debug FSEvents, log them to GIT_TRACE_FSMONITOR.
+		 * Please don't log them to Trace2.
+		 *
+		 * trace_printf_key(&trace_fsmonitor, "XXX '%s'", path_k);
+		 */
+
+		/*
+		 * If event[k] is marked as dropped, we assume that we have
+		 * lost sync with the filesystem and should flush our cached
+		 * data.  We need to:
+		 *
+		 * [1] Abort/wake any client threads waiting for a cookie and
+		 *     flush the cached state data (the current token), and
+		 *     create a new token.
+		 *
+		 * [2] Discard the batch that we were locally building (since
+		 *     they are conceptually relative to the just flushed
+		 *     token).
+		 */
+		if ((event_flags[k] & kFSEventStreamEventFlagKernelDropped) ||
+		    (event_flags[k] & kFSEventStreamEventFlagUserDropped)) {
+			/*
+			 * see also kFSEventStreamEventFlagMustScanSubDirs
+			 */
+			trace2_data_string("fsmonitor", NULL,
+					   "fsm-listen/kernel", "dropped");
+
+			fsmonitor_force_resync(state);
+
+			if (fsmonitor_batch__free(batch))
+				BUG("batch should not have a next");
+			string_list_clear(&cookie_list, 0);
+
+			/*
+			 * We assume that any events that we received
+			 * in this callback after this dropped event
+			 * may still be valid, so we continue rather
+			 * than break.  (And just in case there is a
+			 * delete of ".git" hiding in there.)
+			 */
+			continue;
+		}
+
+		switch (fsmonitor_classify_path_absolute(state, path_k)) {
+
+		case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX:
+		case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX:
+			/* special case cookie files within .git or gitdir */
+
+			/* Use just the filename of the cookie file. */
+			slash = find_last_dir_sep(path_k);
+			string_list_append(&cookie_list,
+					   slash ? slash + 1 : path_k);
+			break;
+
+		case IS_INSIDE_DOT_GIT:
+		case IS_INSIDE_GITDIR:
+			/* ignore all other paths inside of .git or gitdir */
+			break;
+
+		case IS_DOT_GIT:
+		case IS_GITDIR:
+			/*
+			 * If .git directory is deleted or renamed away,
+			 * we have to quit.
+			 */
+			if (ef_is_root_delete(event_flags[k])) {
+				trace2_data_string("fsmonitor", NULL,
+						   "fsm-listen/gitdir",
+						   "removed");
+				goto force_shutdown;
+			}
+			if (ef_is_root_renamed(event_flags[k])) {
+				trace2_data_string("fsmonitor", NULL,
+						   "fsm-listen/gitdir",
+						   "renamed");
+				goto force_shutdown;
+			}
+			break;
+
+		case IS_WORKDIR_PATH:
+			/* try to queue normal pathnames */
+
+			if (trace_pass_fl(&trace_fsmonitor))
+				log_flags_set(path_k, event_flags[k]);
+
+			/* fsevent could be marked as both a file and directory */
+
+			if (event_flags[k] & kFSEventStreamEventFlagItemIsFile) {
+				const char *rel = path_k +
+					state->path_worktree_watch.len + 1;
+
+				if (!batch)
+					batch = fsmonitor_batch__new();
+				fsmonitor_batch__add_path(batch, rel);
+			}
+
+			if (event_flags[k] & kFSEventStreamEventFlagItemIsDir) {
+				const char *rel = path_k +
+					state->path_worktree_watch.len + 1;
+				char *p = xstrfmt("%s/", rel);
+
+				if (!batch)
+					batch = fsmonitor_batch__new();
+				fsmonitor_batch__add_path(batch, p);
+
+				free(p);
+			}
+
+			break;
+
+		case IS_OUTSIDE_CONE:
+		default:
+			trace_printf_key(&trace_fsmonitor,
+					 "ignoring '%s'", path_k);
+			break;
+		}
+	}
+
+	fsmonitor_publish(state, batch, &cookie_list);
+	string_list_clear(&cookie_list, 0);
+	return;
+
+force_shutdown:
+	if (fsmonitor_batch__free(batch))
+		BUG("batch should not have a next");
+	string_list_clear(&cookie_list, 0);
+
+	data->shutdown_style = FORCE_SHUTDOWN;
+	CFRunLoopStop(data->rl);
+	return;
+}
+
+/*
+ * TODO Investigate the proper value for the `latency` argument in the call
+ * TODO to `FSEventStreamCreate()`.  I'm not sure that this needs to be a
+ * TODO config setting or just something that we tune after some testing.
+ * TODO
+ * TODO With a latency of 0.1, I was seeing lots of dropped events during
+ * TODO the "touch 100000" files test within t/perf/p7519, but with a
+ * TODO latency of 0.001 I did not see any dropped events.  So the "correct"
+ * TODO value may be somewhere in between.
+ * TODO
+ * TODO https://developer.apple.com/documentation/coreservices/1443980-fseventstreamcreate
+ */
 
 int fsmonitor_fs_listen__ctor(struct fsmonitor_daemon_state *state)
 {
+	FSEventStreamCreateFlags flags = kFSEventStreamCreateFlagNoDefer |
+		kFSEventStreamCreateFlagWatchRoot |
+		kFSEventStreamCreateFlagFileEvents;
+	FSEventStreamContext ctx = {
+		0,
+		state,
+		NULL,
+		NULL,
+		NULL
+	};
+	struct fsmonitor_daemon_backend_data *data;
+	const void *dir_array[2];
+
+	data = xcalloc(1, sizeof(*data));
+	state->backend_data = data;
+
+	data->cfsr_worktree_path = CFStringCreateWithCString(
+		NULL, state->path_worktree_watch.buf, kCFStringEncodingUTF8);
+	dir_array[data->nr_paths_watching++] = data->cfsr_worktree_path;
+
+	if (state->nr_paths_watching > 1) {
+		data->cfsr_gitdir_path = CFStringCreateWithCString(
+			NULL, state->path_gitdir_watch.buf,
+			kCFStringEncodingUTF8);
+		dir_array[data->nr_paths_watching++] = data->cfsr_gitdir_path;
+	}
+
+	data->cfar_paths_to_watch = CFArrayCreate(NULL, dir_array,
+						  data->nr_paths_watching,
+						  NULL);
+	data->stream = FSEventStreamCreate(NULL, fsevent_callback, &ctx,
+					   data->cfar_paths_to_watch,
+					   kFSEventStreamEventIdSinceNow,
+					   0.001, flags);
+	if (data->stream == NULL)
+		goto failed;
+
+	/*
+	 * `data->rl` needs to be set inside the listener thread.
+	 */
+
+	return 0;
+
+failed:
+	error("Unable to create FSEventStream.");
+
+	FREE_AND_NULL(state->backend_data);
 	return -1;
 }
 
 void fsmonitor_fs_listen__dtor(struct fsmonitor_daemon_state *state)
 {
+	struct fsmonitor_daemon_backend_data *data;
+
+	if (!state || !state->backend_data)
+		return;
+
+	data = state->backend_data;
+
+	if (data->stream) {
+		if (data->stream_started)
+			FSEventStreamStop(data->stream);
+		if (data->stream_scheduled)
+			FSEventStreamInvalidate(data->stream);
+		FSEventStreamRelease(data->stream);
+	}
+
+	FREE_AND_NULL(state->backend_data);
 }
 
 void fsmonitor_fs_listen__stop_async(struct fsmonitor_daemon_state *state)
 {
+	struct fsmonitor_daemon_backend_data *data;
+
+	data = state->backend_data;
+	data->shutdown_style = SHUTDOWN_EVENT;
+
+	CFRunLoopStop(data->rl);
 }
 
 void fsmonitor_fs_listen__loop(struct fsmonitor_daemon_state *state)
 {
+	struct fsmonitor_daemon_backend_data *data;
+
+	data = state->backend_data;
+
+	data->rl = CFRunLoopGetCurrent();
+
+	FSEventStreamScheduleWithRunLoop(data->stream, data->rl, kCFRunLoopDefaultMode);
+	data->stream_scheduled = 1;
+
+	if (!FSEventStreamStart(data->stream)) {
+		error("Failed to start the FSEventStream");
+		goto force_error_stop_without_loop;
+	}
+	data->stream_started = 1;
+
+	CFRunLoopRun();
+
+	switch (data->shutdown_style) {
+	case FORCE_ERROR_STOP:
+		state->error_code = -1;
+		/* fall thru */
+	case FORCE_SHUTDOWN:
+		ipc_server_stop_async(state->ipc_server_data);
+		/* fall thru */
+	case SHUTDOWN_EVENT:
+	default:
+		break;
+	}
+	return;
+
+force_error_stop_without_loop:
+	state->error_code = -1;
+	ipc_server_stop_async(state->ipc_server_data);
+	return;
 }

From bd9d11cf3a035e39ceef1dfd35eb3ad7ce2f23fb Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 18 Dec 2020 11:43:16 -0500
Subject: [PATCH 30/39] fsmonitor--daemon: implement handle_client callback

Teach fsmonitor--daemon to respond to IPC requests from client
Git processes and respond with a list of modified pathnames
relative to the provided token.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 335 +++++++++++++++++++++++++++++++++++-
 1 file changed, 333 insertions(+), 2 deletions(-)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 48071d445c4922..32df392b25d35d 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -7,6 +7,7 @@
 #include "fsmonitor--daemon.h"
 #include "simple-ipc.h"
 #include "khash.h"
+#include "pkt-line.h"
 
 static const char * const builtin_fsmonitor__daemon_usage[] = {
 	N_("git fsmonitor--daemon --start [<options>]"),
@@ -369,19 +370,349 @@ void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 	fsmonitor_free_token_data(free_me);
 }
 
+/*
+ * Format an opaque token string to send to the client.
+ */
+static void fsmonitor_format_response_token(
+	struct strbuf *response_token,
+	const struct strbuf *response_token_id,
+	const struct fsmonitor_batch *batch)
+{
+	uint64_t seq_nr = (batch) ? batch->batch_seq_nr + 1 : 0;
+
+	strbuf_reset(response_token);
+	strbuf_addf(response_token, "builtin:%s:%"PRIu64,
+		    response_token_id->buf, seq_nr);
+}
+
+/*
+ * Parse an opaque token from the client.
+ */
+static int fsmonitor_parse_client_token(const char *buf_token,
+					struct strbuf *requested_token_id,
+					uint64_t *seq_nr)
+{
+	const char *p;
+	char *p_end;
+
+	strbuf_reset(requested_token_id);
+	*seq_nr = 0;
+
+	if (!skip_prefix(buf_token, "builtin:", &p))
+		return 1;
+
+	while (*p && *p != ':')
+		strbuf_addch(requested_token_id, *p++);
+	if (!*p++)
+		return 1;
+
+	*seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
+	if (*p_end)
+		return 1;
+
+	return 0;
+}
+
+KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal);
+
+static int do_handle_client(struct fsmonitor_daemon_state *state,
+			    const char *command,
+			    ipc_server_reply_cb *reply,
+			    struct ipc_server_reply_data *reply_data)
+{
+	struct fsmonitor_token_data *token_data = NULL;
+	struct strbuf response_token = STRBUF_INIT;
+	struct strbuf requested_token_id = STRBUF_INIT;
+	struct strbuf payload = STRBUF_INIT;
+	uint64_t requested_oldest_seq_nr = 0;
+	uint64_t total_response_len = 0;
+	const char *p;
+	const struct fsmonitor_batch *batch_head;
+	const struct fsmonitor_batch *batch;
+	intmax_t count = 0, duplicates = 0;
+	kh_str_t *shown;
+	int hash_ret;
+	int result;
+
+	/*
+	 * We expect `command` to be of the form:
+	 *
+	 * <command> := quit NUL
+	 *            | flush NUL
+	 *            | <V1-time-since-epoch-ns> NUL
+	 *            | <V2-opaque-fsmonitor-token> NUL
+	 */
+
+	if (!strcmp(command, "quit")) {
+		/*
+		 * A client has requested over the socket/pipe that the
+		 * daemon shutdown.
+		 *
+		 * Tell the IPC thread pool to shutdown (which completes
+		 * the await in the main thread (which can stop the
+		 * fsmonitor listener thread)).
+		 *
+		 * There is no reply to the client.
+		 */
+		return SIMPLE_IPC_QUIT;
+	}
+
+	if (!strcmp(command, "flush")) {
+		/*
+		 * Flush all of our cached data and generate a new token
+		 * just like if we lost sync with the filesystem.
+		 *
+		 * Then send a trivial response using the new token.
+		 */
+		fsmonitor_force_resync(state);
+		result = 0;
+		goto send_trivial_response;
+	}
+
+	if (!skip_prefix(command, "builtin:", &p)) {
+		/* assume V1 timestamp or garbage */
+
+		char *p_end;
+
+		strtoumax(command, &p_end, 10);
+		trace_printf_key(&trace_fsmonitor,
+				 ((*p_end) ?
+				  "fsmonitor: invalid command line '%s'" :
+				  "fsmonitor: unsupported V1 protocol '%s'"),
+				 command);
+		result = -1;
+		goto send_trivial_response;
+	}
+
+	/* try V2 token */
+
+	if (fsmonitor_parse_client_token(command, &requested_token_id,
+					 &requested_oldest_seq_nr)) {
+		trace_printf_key(&trace_fsmonitor,
+				 "fsmonitor: invalid V2 protocol token '%s'",
+				 command);
+		result = -1;
+		goto send_trivial_response;
+	}
+
+	pthread_mutex_lock(&state->main_lock);
+
+	if (!state->current_token_data) {
+		/*
+		 * We don't have a current token.  This may mean that
+		 * the listener thread has not yet started.
+		 */
+		pthread_mutex_unlock(&state->main_lock);
+		result = 0;
+		goto send_trivial_response;
+	}
+	if (strcmp(requested_token_id.buf,
+		   state->current_token_data->token_id.buf)) {
+		/*
+		 * The client last spoke to a different daemon
+		 * instance -OR- the daemon had to resync with
+		 * the filesystem (and lost events), so reject.
+		 */
+		pthread_mutex_unlock(&state->main_lock);
+		result = 0;
+		trace2_data_string("fsmonitor", the_repository,
+				   "response/token", "different");
+		goto send_trivial_response;
+	}
+	if (!state->current_token_data->batch_tail) {
+		/*
+		 * The listener has not received any filesystem
+		 * events yet since we created the current token.
+		 * We can respond with an empty list, since the
+		 * client has already seen the current token and
+		 * we have nothing new to report.  (This is
+		 * instead of sending a trivial response.)
+		 */
+		pthread_mutex_unlock(&state->main_lock);
+		result = 0;
+		goto send_empty_response;
+	}
+	if (requested_oldest_seq_nr <
+	    state->current_token_data->batch_tail->batch_seq_nr) {
+		/*
+		 * The client wants older events than we have for
+		 * this token_id.  This means that the end of our
+		 * batch list was truncated and we cannot give the
+		 * client a complete snapshot relative to their
+		 * request.
+		 */
+		pthread_mutex_unlock(&state->main_lock);
+
+		trace_printf_key(&trace_fsmonitor,
+				 "client requested truncated data");
+		result = 0;
+		goto send_trivial_response;
+	}
+
+	/*
+	 * We're going to hold onto a pointer to the current
+	 * token-data while we walk the list of batches of files.
+	 * During this time, we will NOT be under the lock.
+	 * So we ref-count it.
+	 *
+	 * This allows the listener thread to continue prepending
+	 * new batches of items to the token-data (which we'll ignore).
+	 *
+	 * AND it allows the listener thread to do a token-reset
+	 * (and install a new `current_token_data`).
+	 *
+	 * We mark the current head of the batch list as "pinned" so
+	 * that the listener thread will treat this item as read-only
+	 * (and prevent any more paths from being added to it) from
+	 * now on.
+	 */
+	token_data = state->current_token_data;
+	token_data->client_ref_count++;
+
+	batch_head = token_data->batch_head;
+	((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
+
+	pthread_mutex_unlock(&state->main_lock);
+
+	/*
+	 * FSMonitor Protocol V2 requires that we send a response header
+	 * with a "new current token" and then all of the paths that changed
+	 * since the "requested token".
+	 */
+	fsmonitor_format_response_token(&response_token,
+					&token_data->token_id,
+					batch_head);
+
+	reply(reply_data, response_token.buf, response_token.len + 1);
+	total_response_len += response_token.len + 1;
+
+	trace2_data_string("fsmonitor", the_repository, "response/token",
+			   response_token.buf);
+	trace_printf_key(&trace_fsmonitor, "response token: %s", response_token.buf);
+
+	shown = kh_init_str();
+	for (batch = batch_head;
+	     batch && batch->batch_seq_nr >= requested_oldest_seq_nr;
+	     batch = batch->next) {
+		size_t k;
+
+		for (k = 0; k < batch->nr; k++) {
+			const char *s = batch->interned_paths[k];
+			size_t s_len;
+
+			if (kh_get_str(shown, s) != kh_end(shown))
+				duplicates++;
+			else {
+				kh_put_str(shown, s, &hash_ret);
+
+				trace_printf_key(&trace_fsmonitor,
+						 "send[%"PRIuMAX"]: %s",
+						 count, s);
+
+				/* Each path gets written with a trailing NUL */
+				s_len = strlen(s) + 1;
+
+				if (payload.len + s_len >=
+				    LARGE_PACKET_DATA_MAX) {
+					reply(reply_data, payload.buf,
+					      payload.len);
+					total_response_len += payload.len;
+					strbuf_reset(&payload);
+				}
+
+				strbuf_add(&payload, s, s_len);
+				count++;
+			}
+		}
+	}
+
+	if (payload.len) {
+		reply(reply_data, payload.buf, payload.len);
+		total_response_len += payload.len;
+	}
+
+	kh_release_str(shown);
+
+	pthread_mutex_lock(&state->main_lock);
+	if (token_data->client_ref_count > 0)
+		token_data->client_ref_count--;
+
+	if (token_data->client_ref_count == 0) {
+		if (token_data != state->current_token_data) {
+			/*
+			 * The listener thread did a token-reset while we were
+			 * walking the batch list.  Therefore, this token is
+			 * stale and can be discarded completely.  If we are
+			 * the last reader thread using this token, we own
+			 * that work.
+			 */
+			fsmonitor_free_token_data(token_data);
+		}
+	}
+
+	pthread_mutex_unlock(&state->main_lock);
+
+	trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
+	trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
+	trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
+
+	strbuf_release(&response_token);
+	strbuf_release(&requested_token_id);
+	strbuf_release(&payload);
+
+	return 0;
+
+send_trivial_response:
+	pthread_mutex_lock(&state->main_lock);
+	fsmonitor_format_response_token(&response_token,
+					&state->current_token_data->token_id,
+					state->current_token_data->batch_head);
+	pthread_mutex_unlock(&state->main_lock);
+
+	reply(reply_data, response_token.buf, response_token.len + 1);
+	trace2_data_string("fsmonitor", the_repository, "response/token",
+			   response_token.buf);
+	reply(reply_data, "/", 2);
+	trace2_data_intmax("fsmonitor", the_repository, "response/trivial", 1);
+
+	strbuf_release(&response_token);
+	strbuf_release(&requested_token_id);
+
+	return result;
+
+send_empty_response:
+	pthread_mutex_lock(&state->main_lock);
+	fsmonitor_format_response_token(&response_token,
+					&state->current_token_data->token_id,
+					NULL);
+	pthread_mutex_unlock(&state->main_lock);
+
+	reply(reply_data, response_token.buf, response_token.len + 1);
+	trace2_data_string("fsmonitor", the_repository, "response/token",
+			   response_token.buf);
+	trace2_data_intmax("fsmonitor", the_repository, "response/empty", 1);
+
+	strbuf_release(&response_token);
+	strbuf_release(&requested_token_id);
+
+	return 0;
+}
+
 static ipc_server_application_cb handle_client;
 
 static int handle_client(void *data, const char *command,
 			 ipc_server_reply_cb *reply,
 			 struct ipc_server_reply_data *reply_data)
 {
-	/* struct fsmonitor_daemon_state *state = data; */
+	struct fsmonitor_daemon_state *state = data;
 	int result;
 
+	trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
+
 	trace2_region_enter("fsmonitor", "handle_client", the_repository);
 	trace2_data_string("fsmonitor", the_repository, "request", command);
 
-	result = 0; /* TODO Do something here. */
+	result = do_handle_client(state, command, reply, reply_data);
 
 	trace2_region_leave("fsmonitor", "handle_client", the_repository);
 

From 6cc4f89d0f4e06ff217b4e0e3b646dbd391f9310 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 18 Dec 2020 11:46:06 -0500
Subject: [PATCH 31/39] fsmonitor--daemon: periodically truncate list of
 modified files

Teach fsmonitor--daemon to periodically truncate the list of
modified files to save some memory.

Clients will ask for the set of changes relative to a token that they
found in the FSMN index extension in the index.  (This token is like a
point in time, but different).  Clients will then update the index to
contain the response token (so that subsequent commands will be
relative to this new token).

Therefore, the daemon can gradually truncate the in-memory list of
changed paths as they become obsolete (older that the previous token).
Since we may have multiple clients making concurrent requests with a
skew of tokens and clients may be racing to the talk to the daemon,
we lazily truncate the list.

We introduce a 5 minute delay and truncate batches 5 minutes after
they are considered obsolete.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 78 +++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 32df392b25d35d..e9a9aea59ad678 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -316,6 +316,75 @@ static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 			batch_src->interned_paths[k];
 }
 
+/*
+ * To keep the batch list from growing unbounded in response to filesystem
+ * activity, we try to truncate old batches from the end of the list as
+ * they become irrelevant.
+ *
+ * We assume that the .git/index will be updated with the most recent token
+ * any time the index is updated.  And future commands will only ask for
+ * recent changes *since* that new token.  So as tokens advance into the
+ * future, older batch items will never be requested/needed.  So we can
+ * truncate them without loss of functionality.
+ *
+ * However, multiple commands may be talking to the daemon concurrently
+ * or perform a slow command, so a little "token skew" is possible.
+ * Therefore, we want this to be a little bit lazy and have a generous
+ * delay.
+ *
+ * The current reader thread walked backwards in time from `token->batch_head`
+ * back to `batch_marker` somewhere in the middle of the batch list.
+ *
+ * Let's walk backwards in time from that marker an arbitrary delay
+ * and truncate the list there.  Note that these timestamps are completely
+ * artificial (based on when we pinned the batch item) and not on any
+ * filesystem activity.
+ */
+#define MY_TIME_DELAY (5 * 60) /* seconds */
+
+static void fsmonitor_batch__truncate(struct fsmonitor_daemon_state *state,
+				      const struct fsmonitor_batch *batch_marker)
+{
+	/* assert state->main_lock */
+
+	const struct fsmonitor_batch *batch;
+	struct fsmonitor_batch *rest;
+	struct fsmonitor_batch *p;
+	time_t t;
+
+	if (!batch_marker)
+		return;
+
+	trace_printf_key(&trace_fsmonitor, "TRNC mark (%"PRIu64",%"PRIu64")",
+			 batch_marker->batch_seq_nr,
+			 (uint64_t)batch_marker->pinned_time);
+
+	for (batch = batch_marker; batch; batch = batch->next) {
+		if (!batch->pinned_time) /* an overflow batch */
+			continue;
+
+		t = batch->pinned_time + MY_TIME_DELAY;
+		if (t > batch_marker->pinned_time) /* too close to marker */
+			continue;
+
+		goto truncate_past_here;
+	}
+
+	return;
+
+truncate_past_here:
+	state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
+
+	rest = ((struct fsmonitor_batch *)batch)->next;
+	((struct fsmonitor_batch *)batch)->next = NULL;
+
+	for (p = rest; p; p = fsmonitor_batch__free(p)) {
+		trace_printf_key(&trace_fsmonitor,
+				 "TRNC kill (%"PRIu64",%"PRIu64")",
+				 p->batch_seq_nr, (uint64_t)p->pinned_time);
+	}
+}
+
 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 {
 	struct fsmonitor_batch *p;
@@ -647,6 +716,15 @@ static int do_handle_client(struct fsmonitor_daemon_state *state,
 			 * that work.
 			 */
 			fsmonitor_free_token_data(token_data);
+		} else if (batch) {
+			/*
+			 * This batch is the first item in the list
+			 * that is older than the requested sequence
+			 * number and might be considered to be
+			 * obsolete.  See if we can truncate the list
+			 * and save some memory.
+			 */
+			fsmonitor_batch__truncate(state, batch);
 		}
 	}
 

From bbf2a4f276311787d59f232cc5918794bc89f267 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 18 Dec 2020 11:57:05 -0500
Subject: [PATCH 32/39] fsmonitor--daemon:: introduce client delay for testing

Define GIT_TEST_FSMONITOR_CLIENT_DELAY as a millisecond delay.

Introduce an artificial delay when processing client requests.
This make the CI/PR test suite a little more stable and avoids
the need to load up test scripts with sleep statements to avoid
racy failures.  This was mostly seen on 1 or 2 core CI build
machines where the test script would create a file and quickly
try to confirm that the daemon had seen it *before* the daemon
had received the kernel event and causing a test failure.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 38 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index e9a9aea59ad678..0cb09ef0b98486 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -150,6 +150,30 @@ static int do_as_client__send_flush(void)
 	return 0;
 }
 
+static int lookup_client_test_delay(void)
+{
+	static int delay_ms = -1;
+
+	const char *s;
+	int ms;
+
+	if (delay_ms >= 0)
+		return delay_ms;
+
+	delay_ms = 0;
+
+	s = getenv("GIT_TEST_FSMONITOR_CLIENT_DELAY");
+	if (!s)
+		return delay_ms;
+
+	ms = atoi(s);
+	if (ms < 0)
+		return delay_ms;
+
+	delay_ms = ms;
+	return delay_ms;
+}
+
 /*
  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
  * "token" as a virtual timestamp.  Clients can request a summary of all
@@ -526,6 +550,18 @@ static int do_handle_client(struct fsmonitor_daemon_state *state,
 		return SIMPLE_IPC_QUIT;
 	}
 
+	/*
+	 * For testing purposes, introduce an artificial delay in this
+	 * worker to allow the filesystem listener thread to receive
+	 * any fs events that may have been generated by the client
+	 * process on the other end of the pipe/socket.  This helps
+	 * make the CI/PR test suite runs a little more predictable
+	 * and hopefully eliminates the need to introduce `sleep`
+	 * commands in the test scripts.
+	 */
+	if (state->test_client_delay_ms)
+		sleep_millisec(state->test_client_delay_ms);
+
 	if (!strcmp(command, "flush")) {
 		/*
 		 * Flush all of our cached data and generate a new token
@@ -1038,7 +1074,7 @@ static int fsmonitor_run_daemon(void)
 	pthread_mutex_init(&state.main_lock, NULL);
 	state.error_code = 0;
 	state.current_token_data = fsmonitor_new_token_data();
-	state.test_client_delay_ms = 0;
+	state.test_client_delay_ms = lookup_client_test_delay();
 
 	/* Prepare to (recursively) watch the <worktree-root> directory. */
 	strbuf_init(&state.path_worktree_watch, 0);

From df6bf080e5ab00f32414d552cff73c1b914c0e80 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 18 Dec 2020 12:11:29 -0500
Subject: [PATCH 33/39] fsmonitor--daemon: use a cookie file to sync with file
 system

Teach fsmonitor--daemon client threads to create a cookie file
inside the .git directory and then wait until FS events for the
cookie are observed by the FS listener thread.

This helps address the racy nature of file system events by
blocking the client response until the kernel has drained any
event backlog.

This is especially important on MacOS where kernel events are
only issued with a limited frequency.  See the `latency` argument
of `FSeventStreamCreate()`.  The kernel only signals every `latency`
seconds, but does not guarantee that the kernel queue is completely
drained, so we may have to wait more than one interval.  If we
increase the frequency, the system is more likely to drop events.
We avoid these issues by having each client thread create a unique
cookie file and then wait until it is seen in the event stream.

Co-authored-by: Kevin Willford <Kevin.Willford@microsoft.com>
Co-authored-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 198 ++++++++++++++++++++++++++++++++++++
 fsmonitor--daemon.h         |   5 +
 2 files changed, 203 insertions(+)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 0cb09ef0b98486..d6b59a98ceddd5 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -150,6 +150,149 @@ static int do_as_client__send_flush(void)
 	return 0;
 }
 
+enum fsmonitor_cookie_item_result {
+	FCIR_ERROR = -1, /* could not create cookie file ? */
+	FCIR_INIT = 0,
+	FCIR_SEEN,
+	FCIR_ABORT,
+};
+
+struct fsmonitor_cookie_item {
+	struct hashmap_entry entry;
+	const char *name;
+	enum fsmonitor_cookie_item_result result;
+};
+
+static int cookies_cmp(const void *data, const struct hashmap_entry *he1,
+		     const struct hashmap_entry *he2, const void *keydata)
+{
+	const struct fsmonitor_cookie_item *a =
+		container_of(he1, const struct fsmonitor_cookie_item, entry);
+	const struct fsmonitor_cookie_item *b =
+		container_of(he2, const struct fsmonitor_cookie_item, entry);
+
+	return strcmp(a->name, keydata ? keydata : b->name);
+}
+
+static enum fsmonitor_cookie_item_result fsmonitor_wait_for_cookie(
+	struct fsmonitor_daemon_state *state)
+{
+	int fd;
+	struct fsmonitor_cookie_item cookie;
+	struct strbuf cookie_pathname = STRBUF_INIT;
+	struct strbuf cookie_filename = STRBUF_INIT;
+	const char *slash;
+	int my_cookie_seq;
+
+	pthread_mutex_lock(&state->main_lock);
+
+	my_cookie_seq = state->cookie_seq++;
+
+	strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
+	strbuf_addf(&cookie_pathname, "%i-%i", getpid(), my_cookie_seq);
+
+	slash = find_last_dir_sep(cookie_pathname.buf);
+	if (slash)
+		strbuf_addstr(&cookie_filename, slash + 1);
+	else
+		strbuf_addbuf(&cookie_filename, &cookie_pathname);
+	cookie.name = strbuf_detach(&cookie_filename, NULL);
+	cookie.result = FCIR_INIT;
+	// TODO should we have case-insenstive hash (and in cookie_cmp()) ??
+	hashmap_entry_init(&cookie.entry, strhash(cookie.name));
+
+	/*
+	 * Warning: we are putting the address of a stack variable into a
+	 * global hashmap.  This feels dodgy.  We must ensure that we remove
+	 * it before this thread and stack frame returns.
+	 */
+	hashmap_add(&state->cookies, &cookie.entry);
+
+	trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
+			 cookie.name, cookie_pathname.buf);
+
+	/*
+	 * Create the cookie file on disk and then wait for a notification
+	 * that the listener thread has seen it.
+	 */
+	fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
+	if (fd >= 0) {
+		close(fd);
+		unlink_or_warn(cookie_pathname.buf);
+
+		while (cookie.result == FCIR_INIT)
+			pthread_cond_wait(&state->cookies_cond,
+					  &state->main_lock);
+
+		hashmap_remove(&state->cookies, &cookie.entry, NULL);
+	} else {
+		error_errno(_("could not create fsmonitor cookie '%s'"),
+			    cookie.name);
+
+		cookie.result = FCIR_ERROR;
+		hashmap_remove(&state->cookies, &cookie.entry, NULL);
+	}
+
+	pthread_mutex_unlock(&state->main_lock);
+
+	free((char*)cookie.name);
+	strbuf_release(&cookie_pathname);
+	return cookie.result;
+}
+
+/*
+ * Mark these cookies as _SEEN and wake up the corresponding client threads.
+ */
+static void fsmonitor_cookie_mark_seen(struct fsmonitor_daemon_state *state,
+				       const struct string_list *cookie_names)
+{
+	/* assert state->main_lock */
+
+	int k;
+	int nr_seen = 0;
+
+	for (k = 0; k < cookie_names->nr; k++) {
+		struct fsmonitor_cookie_item key;
+		struct fsmonitor_cookie_item *cookie;
+
+		key.name = cookie_names->items[k].string;
+		hashmap_entry_init(&key.entry, strhash(key.name));
+
+		cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
+		if (cookie) {
+			trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
+					 cookie->name);
+			cookie->result = FCIR_SEEN;
+			nr_seen++;
+		}
+	}
+
+	if (nr_seen)
+		pthread_cond_broadcast(&state->cookies_cond);
+}
+
+/*
+ * Set _ABORT on all pending cookies and wake up all client threads.
+ */
+static void fsmonitor_cookie_abort_all(struct fsmonitor_daemon_state *state)
+{
+	/* assert state->main_lock */
+
+	struct hashmap_iter iter;
+	struct fsmonitor_cookie_item *cookie;
+	int nr_aborted = 0;
+
+	hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
+		trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
+				 cookie->name);
+		cookie->result = FCIR_ABORT;
+		nr_aborted++;
+	}
+
+	if (nr_aborted)
+		pthread_cond_broadcast(&state->cookies_cond);
+}
+
 static int lookup_client_test_delay(void)
 {
 	static int delay_ms = -1;
@@ -435,6 +578,9 @@ static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
  *     We should create a new token and start fresh (as if we just
  *     booted up).
  *
+ * [2] Some of those lost events may have been for cookie files.  We
+ *     should assume the worst and abort them rather letting them starve.
+ *
  * If there are no readers of the the current token data series, we
  * can free it now.  Otherwise, let the last reader free it.  Either
  * way, the old token data series is no longer associated with our
@@ -454,6 +600,8 @@ void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 			 state->current_token_data->token_id.buf,
 			 new_one->token_id.buf);
 
+	fsmonitor_cookie_abort_all(state);
+
 	if (state->current_token_data->client_ref_count == 0)
 		free_me = state->current_token_data;
 	state->current_token_data = new_one;
@@ -526,6 +674,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state,
 	kh_str_t *shown;
 	int hash_ret;
 	int result;
+	enum fsmonitor_cookie_item_result cookie_result;
 
 	/*
 	 * We expect `command` to be of the form:
@@ -654,6 +803,39 @@ static int do_handle_client(struct fsmonitor_daemon_state *state,
 		goto send_trivial_response;
 	}
 
+	pthread_mutex_unlock(&state->main_lock);
+
+	/*
+	 * Write a cookie file inside the directory being watched in an
+	 * effort to flush out existing filesystem events that we actually
+	 * care about.  Suspend this client thread until we see the filesystem
+	 * events for this cookie file.
+	 */
+	cookie_result = fsmonitor_wait_for_cookie(state);
+	if (cookie_result != FCIR_SEEN) {
+		error(_("fsmonitor: cookie_result '%d' != SEEN"),
+		      cookie_result);
+		result = 0;
+		goto send_trivial_response;
+	}
+
+	pthread_mutex_lock(&state->main_lock);
+
+	if (strcmp(requested_token_id.buf,
+		   state->current_token_data->token_id.buf)) {
+		/*
+		 * Ack! The listener thread lost sync with the filesystem
+		 * and created a new token while we were waiting for the
+		 * cookie file to be created!  Just give up.
+		 */
+		pthread_mutex_unlock(&state->main_lock);
+
+		trace_printf_key(&trace_fsmonitor,
+				 "lost filesystem sync");
+		result = 0;
+		goto send_trivial_response;
+	}
+
 	/*
 	 * We're going to hold onto a pointer to the current
 	 * token-data while we walk the list of batches of files.
@@ -982,6 +1164,9 @@ void fsmonitor_publish(struct fsmonitor_daemon_state *state,
 		}
 	}
 
+	if (cookie_names->nr)
+		fsmonitor_cookie_mark_seen(state, cookie_names);
+
 	pthread_mutex_unlock(&state->main_lock);
 }
 
@@ -1071,7 +1256,9 @@ static int fsmonitor_run_daemon(void)
 
 	memset(&state, 0, sizeof(state));
 
+	hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
 	pthread_mutex_init(&state.main_lock, NULL);
+	pthread_cond_init(&state.cookies_cond, NULL);
 	state.error_code = 0;
 	state.current_token_data = fsmonitor_new_token_data();
 	state.test_client_delay_ms = lookup_client_test_delay();
@@ -1094,6 +1281,15 @@ static int fsmonitor_run_daemon(void)
 		state.nr_paths_watching = 2;
 	}
 
+	/*
+	 * We will write filesystem syncing cookie files into
+	 * <gitdir>/<cookie-prefix><pid>-<seq>.
+	 */
+	strbuf_init(&state.path_cookie_prefix, 0);
+	strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
+	strbuf_addch(&state.path_cookie_prefix, '/');
+	strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_PREFIX);
+
 	/*
 	 * Confirm that we can create platform-specific resources for the
 	 * filesystem listener before we bother starting all the threads.
@@ -1106,6 +1302,7 @@ static int fsmonitor_run_daemon(void)
 	err = fsmonitor_run_daemon_1(&state);
 
 done:
+	pthread_cond_destroy(&state.cookies_cond);
 	pthread_mutex_destroy(&state.main_lock);
 	fsmonitor_fs_listen__dtor(&state);
 
@@ -1113,6 +1310,7 @@ static int fsmonitor_run_daemon(void)
 
 	strbuf_release(&state.path_worktree_watch);
 	strbuf_release(&state.path_gitdir_watch);
+	strbuf_release(&state.path_cookie_prefix);
 
 	return err;
 }
diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h
index 06563b6ed56c0d..4e580e285ed6f0 100644
--- a/fsmonitor--daemon.h
+++ b/fsmonitor--daemon.h
@@ -45,6 +45,11 @@ struct fsmonitor_daemon_state {
 
 	struct fsmonitor_token_data *current_token_data;
 
+	struct strbuf path_cookie_prefix;
+	pthread_cond_t cookies_cond;
+	int cookie_seq;
+	struct hashmap cookies;
+
 	int error_code;
 	struct fsmonitor_daemon_backend_data *backend_data;
 

From 5fa93cf80e82a6efcc284f558755bb5816e14211 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 7 Jan 2021 16:45:13 -0500
Subject: [PATCH 34/39] fsmonitor: force update index when fsmonitor token
 advances

Set the `FSMONITOR_CHANGED` bit on `istate->cache_changed` when the
fsmonitor response contains a different token to ensure that the index
is written to disk.

Normally, when the fsmonitor response includes a tracked file, the
index is always updated.  Similarly, the index might be updated when
the response alters the untracked-cache (when enabled).  However, in
cases where neither of those cause the index to be considered changed,
the fsmonitor response is wasted.  And subsequent commands will
continue to make requests with the same token and if there have not
been any changes in the working directory, they will receive the same
response.

This was observed on Windows after a large checkout.  On Windows, the
kernel emits events for the files that are changed as they are
changed.  However, it might delay events for the containing
directories until the system is more idle (or someone scans the
directory (so it seems)).  The first status following a checkout would
get the list of files.  The subsequent status commands would get the
list of directories as the events trickled out.  But they would never
catch up because the token was not advanced because the index wasn't
updated.

This list of directories caused `wt_status_collect_untracked()` to
unnecessarily spend time actually scanning them during each command.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 fsmonitor.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fsmonitor.c b/fsmonitor.c
index e28a28315043f3..cf8c675eff65bb 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -352,6 +352,16 @@ void refresh_fsmonitor(struct index_state *istate)
 	}
 	strbuf_release(&query_result);
 
+	/*
+	 * If the fsmonitor response and the subsequent scan of the disk
+	 * did not cause the in-memory index to be marked dirty, then force
+	 * it so that we advance the fsmonitor token in our extension, so
+	 * that future requests don't keep re-requesting the same range.
+	 */
+	if (istate->fsmonitor_last_update &&
+	    strcmp(istate->fsmonitor_last_update, last_update_token.buf))
+		istate->cache_changed |= FSMONITOR_CHANGED;
+
 	/* Now that we've updated istate, save the last_update_token */
 	FREE_AND_NULL(istate->fsmonitor_last_update);
 	istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL);

From f9046e537d0b2897a1e62debd1dc51936547c0e2 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 18 Dec 2020 12:31:37 -0500
Subject: [PATCH 35/39] t7527: create test for fsmonitor--daemon

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 t/t7527-builtin-fsmonitor.sh | 485 +++++++++++++++++++++++++++++++++++
 1 file changed, 485 insertions(+)
 create mode 100755 t/t7527-builtin-fsmonitor.sh

diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh
new file mode 100755
index 00000000000000..1fd230f1d4c6c7
--- /dev/null
+++ b/t/t7527-builtin-fsmonitor.sh
@@ -0,0 +1,485 @@
+#!/bin/sh
+
+test_description='built-in file system watcher'
+
+. ./test-lib.sh
+
+# Ask the fsmonitor daemon to insert a little delay before responding to
+# client commands like `git status` and `git fsmonitor--daemon --query` to
+# allow recent filesystem events to be received by the daemon.  This helps
+# the CI/PR builds be more stable.
+#
+# An arbitrary millisecond value.
+#
+GIT_TEST_FSMONITOR_CLIENT_DELAY=1000
+export GIT_TEST_FSMONITOR_CLIENT_DELAY
+
+git version --build-options | grep "feature:" | grep "fsmonitor--daemon" || {
+	skip_all="The built-in FSMonitor is not supported on this platform"
+	test_done
+}
+
+kill_repo () {
+	r=$1
+	git -C $r fsmonitor--daemon --stop >/dev/null 2>/dev/null
+	rm -rf $1
+	return 0
+}
+
+start_daemon () {
+	case "$#" in
+		1) r="-C $1";;
+		*) r="";
+	esac
+
+	git $r fsmonitor--daemon --start || return $?
+	git $r fsmonitor--daemon --is-running || return $?
+
+	return 0
+}
+
+test_expect_success 'explicit daemon start and stop' '
+	test_when_finished "kill_repo test_explicit" &&
+
+	git init test_explicit &&
+	start_daemon test_explicit &&
+
+	git -C test_explicit fsmonitor--daemon --stop &&
+	test_must_fail git -C test_explicit fsmonitor--daemon --is-running
+'
+
+test_expect_success 'implicit daemon start' '
+	test_when_finished "kill_repo test_implicit" &&
+
+	git init test_implicit &&
+	test_must_fail git -C test_implicit fsmonitor--daemon --is-running &&
+
+	# query will implicitly start the daemon.
+	#
+	# for test-script simplicity, we send a V1 timestamp rather than
+	# a V2 token.  either way, the daemon response to any query contains
+	# a new V2 token.  (the daemon may complain that we sent a V1 request,
+	# but this test case is only concerned with whether the daemon was
+	# implicitly started.)
+
+	GIT_TRACE2_EVENT="$PWD/.git/trace" \
+		git -C test_implicit fsmonitor--daemon --query 0 >actual &&
+	nul_to_q <actual >actual.filtered &&
+	grep "builtin:" actual.filtered &&
+
+	# confirm that a daemon was started in the background.
+	#
+	# since the mechanism for starting the background daemon is platform
+	# dependent, just confirm that the foreground command received a
+	# response from the daemon.
+
+	grep :\"query/response-length\" .git/trace &&
+
+	git -C test_implicit fsmonitor--daemon --is-running &&
+	git -C test_implicit fsmonitor--daemon --stop &&
+	test_must_fail git -C test_implicit fsmonitor--daemon --is-running
+'
+
+test_expect_success 'implicit daemon stop (delete .git)' '
+	test_when_finished "kill_repo test_implicit_1" &&
+
+	git init test_implicit_1 &&
+
+	start_daemon test_implicit_1 &&
+
+	# deleting the .git directory will implicitly stop the daemon.
+	rm -rf test_implicit_1/.git &&
+
+	# Create an empty .git directory so that the following Git command
+	# will stay relative to the `-C` directory.  Without this, the Git
+	# command will (override the requested -C argument) and crawl out
+	# to the containing Git source tree.  This would make the test
+	# result dependent upon whether we were using fsmonitor on our
+	# development worktree.
+
+	sleep 1 &&
+	mkdir test_implicit_1/.git &&
+
+	test_must_fail git -C test_implicit_1 fsmonitor--daemon --is-running
+'
+
+test_expect_success 'implicit daemon stop (rename .git)' '
+	test_when_finished "kill_repo test_implicit_2" &&
+
+	git init test_implicit_2 &&
+
+	start_daemon test_implicit_2 &&
+
+	# renaming the .git directory will implicitly stop the daemon.
+	mv test_implicit_2/.git test_implicit_2/.xxx &&
+
+	# Create an empty .git directory so that the following Git command
+	# will stay relative to the `-C` directory.  Without this, the Git
+	# command will (override the requested -C argument) and crawl out
+	# to the containing Git source tree.  This would make the test
+	# result dependent upon whether we were using fsmonitor on our
+	# development worktree.
+
+	sleep 1 &&
+	mkdir test_implicit_2/.git &&
+
+	test_must_fail git -C test_implicit_2 fsmonitor--daemon --is-running
+'
+
+test_expect_success 'cannot start multiple daemons' '
+	test_when_finished "kill_repo test_multiple" &&
+
+	git init test_multiple &&
+
+	start_daemon test_multiple &&
+
+	test_must_fail git -C test_multiple fsmonitor--daemon --start 2>actual &&
+	grep "fsmonitor--daemon is already running" actual &&
+
+	git -C test_multiple fsmonitor--daemon --stop &&
+	test_must_fail git -C test_multiple fsmonitor--daemon --is-running
+'
+
+test_expect_success 'setup' '
+	>tracked &&
+	>modified &&
+	>delete &&
+	>rename &&
+	mkdir dir1 &&
+	>dir1/tracked &&
+	>dir1/modified &&
+	>dir1/delete &&
+	>dir1/rename &&
+	mkdir dir2 &&
+	>dir2/tracked &&
+	>dir2/modified &&
+	>dir2/delete &&
+	>dir2/rename &&
+	mkdir dirtorename &&
+	>dirtorename/a &&
+	>dirtorename/b &&
+
+	cat >.gitignore <<-\EOF &&
+	.gitignore
+	expect*
+	actual*
+	EOF
+
+	git -c core.useBuiltinFSMonitor= add . &&
+	test_tick &&
+	git -c core.useBuiltinFSMonitor= commit -m initial &&
+
+	git config core.useBuiltinFSMonitor true
+'
+
+test_expect_success 'update-index implicitly starts daemon' '
+	test_must_fail git fsmonitor--daemon --is-running &&
+
+	GIT_TRACE2_EVENT="$PWD/.git/trace_implicit_1" \
+		git update-index --fsmonitor &&
+
+	git fsmonitor--daemon --is-running &&
+	test_might_fail git fsmonitor--daemon --stop &&
+
+	grep \"event\":\"start\".*\"fsmonitor--daemon\" .git/trace_implicit_1
+'
+
+test_expect_success 'status implicitly starts daemon' '
+	test_must_fail git fsmonitor--daemon --is-running &&
+
+	GIT_TRACE2_EVENT="$PWD/.git/trace_implicit_2" \
+		git status >actual &&
+
+	git fsmonitor--daemon --is-running &&
+	test_might_fail git fsmonitor--daemon --stop &&
+
+	grep \"event\":\"start\".*\"fsmonitor--daemon\" .git/trace_implicit_2
+'
+
+edit_files() {
+	echo 1 >modified
+	echo 2 >dir1/modified
+	echo 3 >dir2/modified
+	>dir1/untracked
+}
+
+delete_files() {
+	rm -f delete
+	rm -f dir1/delete
+	rm -f dir2/delete
+}
+
+create_files() {
+	echo 1 >new
+	echo 2 >dir1/new
+	echo 3 >dir2/new
+}
+
+rename_files() {
+	mv rename renamed
+	mv dir1/rename dir1/renamed
+	mv dir2/rename dir2/renamed
+}
+
+file_to_directory() {
+	rm -f delete
+	mkdir delete
+	echo 1 >delete/new
+}
+
+directory_to_file() {
+	rm -rf dir1
+	echo 1 >dir1
+}
+
+verify_status() {
+	git status >actual &&
+	GIT_INDEX_FILE=.git/fresh-index git read-tree master &&
+	GIT_INDEX_FILE=.git/fresh-index git -c core.useBuiltinFSMonitor= status >expect &&
+	test_cmp expect actual &&
+	echo HELLO AFTER &&
+	cat .git/trace &&
+	echo HELLO AFTER
+}
+
+# The next few test cases confirm that our fsmonitor daemon sees each type
+# of OS filesystem notification that we care about.  At this layer we just
+# ensure we are getting the OS notifications and do not try to confirm what
+# is reported by `git status`.
+#
+# We run a simple query after modifying the filesystem just to introduce
+# a bit of a delay so that the trace logging from the daemon has time to
+# get flushed to disk.
+#
+# We `reset` and `clean` at the bottom of each test (and before stopping the
+# daemon) because these commands might implicitly restart the daemon.
+
+clean_up_repo_and_stop_daemon () {
+	git reset --hard HEAD
+	git clean -fd
+	git fsmonitor--daemon --stop
+	rm -f .git/trace
+}
+
+test_expect_success 'edit some files' '
+	test_when_finished "clean_up_repo_and_stop_daemon" &&
+
+	(
+		GIT_TRACE_FSMONITOR="$PWD/.git/trace" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon
+	) &&
+
+	edit_files &&
+
+	git fsmonitor--daemon --query 0 >/dev/null 2>&1 &&
+
+	grep "^event: dir1/modified$"  .git/trace &&
+	grep "^event: dir2/modified$"  .git/trace &&
+	grep "^event: modified$"       .git/trace &&
+	grep "^event: dir1/untracked$" .git/trace
+'
+
+test_expect_success 'create some files' '
+	test_when_finished "clean_up_repo_and_stop_daemon" &&
+
+	(
+		GIT_TRACE_FSMONITOR="$PWD/.git/trace" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon
+	) &&
+
+	create_files &&
+
+	git fsmonitor--daemon --query 0 >/dev/null 2>&1 &&
+
+	grep "^event: dir1/new$" .git/trace &&
+	grep "^event: dir2/new$" .git/trace &&
+	grep "^event: new$"      .git/trace
+'
+
+test_expect_success 'delete some files' '
+	test_when_finished "clean_up_repo_and_stop_daemon" &&
+
+	(
+		GIT_TRACE_FSMONITOR="$PWD/.git/trace" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon
+	) &&
+
+	delete_files &&
+
+	git fsmonitor--daemon --query 0 >/dev/null 2>&1 &&
+
+	grep "^event: dir1/delete$" .git/trace &&
+	grep "^event: dir2/delete$" .git/trace &&
+	grep "^event: delete$"      .git/trace
+'
+
+test_expect_success 'rename some files' '
+	test_when_finished "clean_up_repo_and_stop_daemon" &&
+
+	(
+		GIT_TRACE_FSMONITOR="$PWD/.git/trace" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon
+	) &&
+
+	rename_files &&
+
+	git fsmonitor--daemon --query 0 >/dev/null 2>&1 &&
+
+	grep "^event: dir1/rename$"  .git/trace &&
+	grep "^event: dir2/rename$"  .git/trace &&
+	grep "^event: rename$"       .git/trace &&
+	grep "^event: dir1/renamed$" .git/trace &&
+	grep "^event: dir2/renamed$" .git/trace &&
+	grep "^event: renamed$"      .git/trace
+'
+
+test_expect_success 'rename directory' '
+	test_when_finished "clean_up_repo_and_stop_daemon" &&
+
+	(
+		GIT_TRACE_FSMONITOR="$PWD/.git/trace" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon
+	) &&
+
+	mv dirtorename dirrenamed &&
+
+	git fsmonitor--daemon --query 0 >/dev/null 2>&1 &&
+
+	grep "^event: dirtorename/*$" .git/trace &&
+	grep "^event: dirrenamed/*$"  .git/trace
+'
+
+test_expect_success 'file changes to directory' '
+	test_when_finished "clean_up_repo_and_stop_daemon" &&
+
+	(
+		GIT_TRACE_FSMONITOR="$PWD/.git/trace" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon
+	) &&
+
+	file_to_directory &&
+
+	git fsmonitor--daemon --query 0 >/dev/null 2>&1 &&
+
+	grep "^event: delete$"     .git/trace &&
+	grep "^event: delete/new$" .git/trace
+'
+
+test_expect_success 'directory changes to a file' '
+	test_when_finished "clean_up_repo_and_stop_daemon" &&
+
+	(
+		GIT_TRACE_FSMONITOR="$PWD/.git/trace" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon
+	) &&
+
+	directory_to_file &&
+
+	git fsmonitor--daemon --query 0 >/dev/null 2>&1 &&
+
+	grep "^event: dir1$" .git/trace
+'
+
+# The next few test cases exercise the token-resync code.  When filesystem
+# drops events (because of filesystem velocity or because the daemon isn't
+# polling fast enough), we need to discard the cached data (relative to the
+# current token) and start collecting events under a new token.
+#
+# the 'git fsmonitor--daemon --flush' command can be used to send a "flush"
+# message to a running daemon and ask it to do a flush/resync.
+
+test_expect_success 'flush cached data' '
+	test_when_finished "kill_repo test_flush" &&
+
+	git init test_flush &&
+
+	(
+		GIT_TEST_FSMONITOR_TOKEN=true &&
+		export GIT_TEST_FSMONITOR_TOKEN &&
+
+		GIT_TRACE_FSMONITOR="$PWD/.git/trace_daemon" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon test_flush
+	) &&
+
+	# The daemon should have an initial token with no events in _0 and
+	# then a few (probably platform-specific number of) events in _1.
+	# These should both have the same <token_id>.
+
+	git -C test_flush fsmonitor--daemon --query "builtin:test_00000001:0" >actual_0 &&
+	nul_to_q <actual_0 >actual_q0 &&
+
+	touch test_flush/file_1 &&
+	touch test_flush/file_2 &&
+
+	git -C test_flush fsmonitor--daemon --query "builtin:test_00000001:0" >actual_1 &&
+	nul_to_q <actual_1 >actual_q1 &&
+
+	grep "file_1" actual_q1 &&
+
+	# Force a flush.  This will change the <token_id>, reset the <seq_nr>, and
+	# flush the file data.  Then create some events and ensure that the file
+	# again appears in the cache.  It should have the new <token_id>.
+
+	git -C test_flush fsmonitor--daemon --flush >flush_0 &&
+	nul_to_q <flush_0 >flush_q0 &&
+	grep "^builtin:test_00000002:0Q/Q$" flush_q0 &&
+
+	git -C test_flush fsmonitor--daemon --query "builtin:test_00000002:0" >actual_2 &&
+	nul_to_q <actual_2 >actual_q2 &&
+
+	grep "^builtin:test_00000002:0Q$" actual_q2 &&
+
+	touch test_flush/file_3 &&
+
+	git -C test_flush fsmonitor--daemon --query "builtin:test_00000002:0" >actual_3 &&
+	nul_to_q <actual_3 >actual_q3 &&
+
+	grep "file_3" actual_q3
+'
+
+# The next few test cases create repos where the .git directory is NOT
+# inside the one of the working directory.  That is, where .git is a file
+# that points to a directory elsewhere.  This happens for submodules and
+# non-primary worktrees.
+
+test_expect_success 'setup worktree base' '
+	git init wt-base &&
+	echo 1 >wt-base/file1 &&
+	git -C wt-base add file1 &&
+	git -C wt-base commit -m "c1"
+'
+
+test_expect_success 'worktree with .git file' '
+	git -C wt-base worktree add ../wt-secondary &&
+
+	(
+		GIT_TRACE2_PERF="$PWD/trace2_wt_secondary" &&
+		export GIT_TRACE2_PERF &&
+
+		GIT_TRACE_FSMONITOR="$PWD/trace_wt_secondary" &&
+		export GIT_TRACE_FSMONITOR &&
+
+		start_daemon wt-secondary
+	) &&
+
+	git -C wt-secondary fsmonitor--daemon --stop &&
+	test_must_fail git -C wt-secondary fsmonitor--daemon --is-running
+'
+
+test_done

From fe048bfa5e8893b0db5b61b284fcd7a35cbd7994 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Fri, 15 Jan 2021 15:38:42 -0500
Subject: [PATCH 36/39] p7519: add fsmonitor--daemon

Repeat all of the fsmonitor perf tests using `git fsmonitor--daemon` and
the "Simple IPC" interface.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 t/perf/p7519-fsmonitor.sh | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh
index b657564aed6035..32ed494fc199e3 100755
--- a/t/perf/p7519-fsmonitor.sh
+++ b/t/perf/p7519-fsmonitor.sh
@@ -24,7 +24,8 @@ test_description="Test core.fsmonitor"
 # GIT_PERF_7519_SPLIT_INDEX: used to configure core.splitIndex
 # GIT_PERF_7519_FSMONITOR: used to configure core.fsMonitor. May be an
 #   absolute path to an integration. May be a space delimited list of
-#   absolute paths to integrations.
+#   absolute paths to integrations.  (This hook or list of hooks does not
+#   include the built-in fsmonitor--daemon.)
 #
 # The big win for using fsmonitor is the elimination of the need to scan the
 # working directory looking for changed and untracked files. If the file
@@ -135,10 +136,16 @@ test_expect_success "one time repo setup" '
 
 setup_for_fsmonitor() {
 	# set INTEGRATION_SCRIPT depending on the environment
-	if test -n "$INTEGRATION_PATH"
+	if test -n "$USE_FSMONITOR_DAEMON"
 	then
+		git config core.useBuiltinFSMonitor true &&
+		INTEGRATION_SCRIPT=false
+	elif test -n "$INTEGRATION_PATH"
+	then
+		git config core.useBuiltinFSMonitor false &&
 		INTEGRATION_SCRIPT="$INTEGRATION_PATH"
 	else
+		git config core.useBuiltinFSMonitor false &&
 		#
 		# Choose integration script based on existence of Watchman.
 		# Fall back to an empty integration script.
@@ -281,4 +288,30 @@ test_expect_success "setup without fsmonitor" '
 test_fsmonitor_suite
 trace_stop
 
+#
+# Run a full set of perf tests using the built-in fsmonitor--daemon.
+# It does not use the Hook API, so it has a different setup.
+# Explicitly start the daemon here and before we start client commands
+# so that we can later add custom tracing.
+#
+
+test_lazy_prereq HAVE_FSMONITOR_DAEMON '
+	git version --build-options | grep "feature:" | grep "fsmonitor--daemon"
+'
+
+if test_have_prereq HAVE_FSMONITOR_DAEMON
+then
+	USE_FSMONITOR_DAEMON=t
+
+	trace_start fsmonitor--daemon--server
+	git fsmonitor--daemon --start
+
+	trace_start fsmonitor--daemon--client
+	test_expect_success "setup for fsmonitor--daemon" 'setup_for_fsmonitor'
+	test_fsmonitor_suite
+
+	git fsmonitor--daemon --stop
+	trace_stop
+fi
+
 test_done

From e666cc2520689ec6b6e863171a490db41820d411 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Mon, 1 Mar 2021 17:48:39 -0500
Subject: [PATCH 37/39] t7527: test status with untracked-cache and
 fsmonitor--daemon

Create 2x2 test matrix with the untracked-cache and fsmonitor--daemon
features and a series of edits and verify that status output is
identical.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 t/t7527-builtin-fsmonitor.sh | 97 ++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)

diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh
index 1fd230f1d4c6c7..ad2188169db730 100755
--- a/t/t7527-builtin-fsmonitor.sh
+++ b/t/t7527-builtin-fsmonitor.sh
@@ -163,6 +163,8 @@ test_expect_success 'setup' '
 	.gitignore
 	expect*
 	actual*
+	flush*
+	trace*
 	EOF
 
 	git -c core.useBuiltinFSMonitor= add . &&
@@ -482,4 +484,99 @@ test_expect_success 'worktree with .git file' '
 	test_must_fail git -C wt-secondary fsmonitor--daemon --is-running
 '
 
+# TODO Repeat one of the "edit" tests on wt-secondary and confirm that
+# TODO we get the same events and behavior -- that is, that fsmonitor--daemon
+# TODO correctly listens to events on both the working directory and to the
+# TODO referenced GITDIR.
+
+test_expect_success 'cleanup worktrees' '
+	kill_repo wt-secondary &&
+	kill_repo wt-base
+'
+
+# The next few tests perform arbitrary/contrived file operations and
+# confirm that status is correct.  That is, that the data (or lack of
+# data) from fsmonitor doesn't cause incorrect results.  And doesn't
+# cause incorrect results when the untracked-cache is enabled.
+
+test_lazy_prereq UNTRACKED_CACHE '
+	{ git update-index --test-untracked-cache; ret=$?; } &&
+	test $ret -ne 1
+'
+
+test_expect_success 'Matrix: setup for untracked-cache,fsmonitor matrix' '
+	test_might_fail git config --unset core.useBuiltinFSMonitor &&
+	git update-index --no-fsmonitor &&
+	test_might_fail git fsmonitor--daemon --stop
+'
+
+matrix_clean_up_repo () {
+	git reset --hard HEAD
+	git clean -fd
+}
+
+matrix_try () {
+	uc=$1
+	fsm=$2
+	fn=$3
+
+	test_expect_success "Matrix[uc:$uc][fsm:$fsm] $fn" '
+		matrix_clean_up_repo &&
+		$fn &&
+		if test $uc = false -a $fsm = false
+		then
+			git status --porcelain=v1 >.git/expect.$fn
+		else
+			git status --porcelain=v1 >.git/actual.$fn
+			test_cmp .git/expect.$fn .git/actual.$fn
+		fi
+	'
+
+	return $?
+}
+
+uc_values="false"
+test_have_prereq UNTRACKED_CACHE && uc_values="false true"
+for uc_val in $uc_values
+do
+	if test $uc_val = false
+	then
+		test_expect_success "Matrix[uc:$uc_val] disable untracked cache" '
+			git config core.untrackedcache false &&
+			git update-index --no-untracked-cache
+		'
+	else
+		test_expect_success "Matrix[uc:$uc_val] enable untracked cache" '
+			git config core.untrackedcache true &&
+			git update-index --untracked-cache
+		'
+	fi
+
+	fsm_values="false true"
+	for fsm_val in $fsm_values
+	do
+		if test $fsm_val = false
+		then
+			test_expect_success "Matrix[uc:$uc_val][fsm:$fsm_val] disable fsmonitor" '
+				test_might_fail git config --unset core.useBuiltinFSMonitor &&
+				git update-index --no-fsmonitor &&
+				test_might_fail git fsmonitor--daemon --stop 2>/dev/null
+			'
+		else
+			test_expect_success "Matrix[uc:$uc_val][fsm:$fsm_val] enable fsmonitor" '
+				git config core.useBuiltinFSMonitor true &&
+				git fsmonitor--daemon --start &&
+				git update-index --fsmonitor
+			'
+		fi
+
+		matrix_try $uc_val $fsm_val edit_files
+		matrix_try $uc_val $fsm_val delete_files
+		matrix_try $uc_val $fsm_val create_files
+		matrix_try $uc_val $fsm_val rename_files
+		matrix_try $uc_val $fsm_val file_to_directory
+		matrix_try $uc_val $fsm_val directory_to_file
+	done
+done
+
 test_done

From 44234a2ff86030d08212bb9c9cc11e55ee11d800 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 5 Mar 2021 23:07:26 +0100
Subject: [PATCH 38/39] fsmonitor: mark the built-in FSMonitor as experimental

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 Documentation/config/core.txt           | 8 +++++---
 Documentation/git-fsmonitor--daemon.txt | 5 ++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index eac0d04ac105d5..4532a6baea5c7e 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -70,7 +70,8 @@ core.fsmonitor::
 See the "fsmonitor-watchman" section of linkgit:githooks[5].
 +
 Note: FSMonitor hooks (and this config setting) are ignored if the
-built-in FSMonitor is enabled (see `core.useBuiltinFSMonitor`).
+(experimental) built-in FSMonitor is enabled (see
+`core.useBuiltinFSMonitor`).
 
 core.fsmonitorHookVersion::
 	Sets the version of hook that is to be used when calling the
@@ -89,8 +90,9 @@ Note: FSMonitor hooks (and this config setting) are ignored if the
 built-in FSMonitor is enabled (see `core.useBuiltinFSMonitor`).
 
 core.useBuiltinFSMonitor::
-	If set to true, enable the built-in filesystem event watcher (for
-	technical details, see linkgit:git-fsmonitor--daemon[1]).
+	(EXPERIMENTAL) If set to true, enable the built-in filesystem
+	event watcher (for technical details, see
+	linkgit:git-fsmonitor--daemon[1]).
 +
 Like external (hook-based) FSMonitors, the built-in FSMonitor can speed up
 Git commands that need to refresh the Git index (e.g. `git status`) in a
diff --git a/Documentation/git-fsmonitor--daemon.txt b/Documentation/git-fsmonitor--daemon.txt
index b94f57c97fe472..ec0012b3454d6c 100644
--- a/Documentation/git-fsmonitor--daemon.txt
+++ b/Documentation/git-fsmonitor--daemon.txt
@@ -3,7 +3,7 @@ git-fsmonitor--daemon(1)
 
 NAME
 ----
-git-fsmonitor--daemon - Builtin file system monitor daemon
+git-fsmonitor--daemon - (EXPERIMENTAL) Builtin file system monitor daemon
 
 SYNOPSIS
 --------
@@ -20,6 +20,9 @@ SYNOPSIS
 DESCRIPTION
 -----------
 
+NOTE! This command is still only an experiment, subject to change dramatically
+(or even to be abandoned).
+
 Monitors files and directories in the working directory for changes using
 platform-specific file system notification facilities.
 

From 60deac24df3c37080eaa71052d35b179931f2137 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 5 Mar 2021 23:12:11 +0100
Subject: [PATCH 39/39] Enable the built-in FSMonitor as an experimental
 feature

If `feature.experimental` and `feature.manyFiles` are set, we now start
the built-in FSMonitor by default.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 repo-settings.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/repo-settings.c b/repo-settings.c
index 93aab92ff16436..df27452b565731 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -7,7 +7,7 @@
 
 void prepare_repo_settings(struct repository *r)
 {
-	int value;
+	int value, feature_many_files = 0;
 	char *strval;
 
 	if (r->settings.initialized)
@@ -62,6 +62,7 @@ void prepare_repo_settings(struct repository *r)
 		r->settings.use_builtin_fsmonitor = 1;
 
 	if (!repo_config_get_bool(r, "feature.manyfiles", &value) && value) {
+		feature_many_files = 1;
 		UPDATE_DEFAULT_BOOL(r->settings.index_version, 4);
 		UPDATE_DEFAULT_BOOL(r->settings.core_untracked_cache, UNTRACKED_CACHE_WRITE);
 	}
@@ -70,8 +71,16 @@ void prepare_repo_settings(struct repository *r)
 		r->settings.fetch_write_commit_graph = value;
 	UPDATE_DEFAULT_BOOL(r->settings.fetch_write_commit_graph, 0);
 
-	if (!repo_config_get_bool(r, "feature.experimental", &value) && value)
+	if (!repo_config_get_bool(r, "feature.experimental", &value) && value) {
 		UPDATE_DEFAULT_BOOL(r->settings.fetch_negotiation_algorithm, FETCH_NEGOTIATION_SKIPPING);
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+		if (feature_many_files)
+			UPDATE_DEFAULT_BOOL(r->settings.use_builtin_fsmonitor,
+					    1);
+#else
+		(void)feature_many_files;
+#endif
+	}
 
 	/* Hack for test programs like test-dump-untracked-cache */
 	if (ignore_untracked_cache_config)