diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..d665af1f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,36 @@
+APPNAME ?= gitstatusd
+OBJDIR ?= obj
+
+CXX ?= g++
+
+VERSION ?= $(shell . ./ && printf "%s" "$$gitstatus_version")
+
+# Note: -fsized-deallocation is not used to avoid binary compatibility issues on macOS.
+#
+# Sized delete is implemented as __ZdlPvm in /usr/lib/libc++.1.dylib but this symbol is
+# missing in macOS prior to 10.13.
+CXXFLAGS += -std=c++14 -funsigned-char -O3 -DNDEBUG -DGITSTATUS_VERSION=$(VERSION) -Wall -Werror # -g -fsanitize=thread
+LDFLAGS += -pthread # -fsanitize=thread
+LDLIBS += -lgit2 # -lprofiler -lunwind
+
+SRCS := $(shell find src -name "*.cc")
+OBJS := $(patsubst src/, $(OBJDIR)/%.o, $(SRCS))
+
+all: $(APPNAME)
+
+$(APPNAME): usrbin/$(APPNAME)
+
+usrbin/$(APPNAME): $(OBJS)
+	$(CXX) $(OBJS) $(LDFLAGS) $(LDLIBS) -o $@
+
+$(OBJDIR):
+	mkdir -p -- $(OBJDIR)
+
+$(OBJDIR)/%.o: src/ Makefile | $(OBJDIR)
+	$(CXX) $(CXXFLAGS) -MM -MT $@ src/$*.cc >$(OBJDIR)/$*.dep
+	$(CXX) $(CXXFLAGS) -Wall -c -o $@ src/$*.cc
+
+clean:
+	rm -rf -- $(OBJDIR)
+
+-include $(OBJS:.o=.dep) Its primary use +case is to enable fast git prompt in interactive shells. + +Heavy lifting is done by **gitstatusd** -- a custom binary written in C++. It comes with Zsh and +Bash bindings for integration with shell. + +## Table of Contents + +1. [Using from Zsh](#using-from-zsh) +1. [Using from Bash](#using-from-bash) +2. [Using from other shells](#using-from-other-shells) +1. [How it works](#how-it-works) +1. [Benchmarks](#benchmarks) +1. [Why fast](#why-fast) +1. [Requirements](#requirements) +1. [Compiling](#compiling) +1. [License](#license) + +## Using from Zsh + +The easiest way to take advantage of gitstatus from Zsh is to use a theme that's already integrated +with it. For example, [Powerlevel10k]( is a flexible and +fast theme with first-class gitstatus integration. + +![Powerlevel10k Zsh Theme]( + + +For those who wish to use gitstatus without a theme, there is +[gitstatus.prompt.zsh](gitstatus.prompt.zsh). Install it as follows: + +```zsh +git clone --depth=1 ~/gitstatus +echo 'source ~/gitstatus/gitstatus.prompt.zsh' >>! ~/.zshrc +``` + +_Make sure to disable your current theme if you have one._ + +This will give you a basic yet functional prompt with git status in it. It's +[over 10x faster](#benchmarks) than any alternative that can give you comparable prompt. In order +to customize it, set `PROMPT` and/or `RPROMPT` at the end of `~/.zshrc` after sourcing +`gitstatus.prompt.zsh`. Insert `${GITSTATUS_PROMPT}` where you want git status to go. For example: + +```zsh +source ~/gitstatus/gitstatus.prompt.zsh + +PROMPT='%~%# ' # left prompt: directory followed by %/# (normal/root) +RPROMPT='$GITSTATUS_PROMPT' # right prompt: git status +``` + +The expansion of `${GITSTATUS_PROMPT}` can contain the following bits: + +| segment | meaning | +|-------------|-------------------------------------------------------| +| `master` | current branch | +| `#v1` | HEAD is tagged with `v1`; not shown when on a branch | +| `@5fc6fca4` | current commit; not shown when on a branch or tag | +| `⇣1` | local branch is behind the remote by 1 commit | +| `⇡2` | local branch is ahead of the remote by 2 commits | +| `⇠3` | local branch is behind the push remote by 3 commits | +| `⇢4` | local branch is ahead of the push remote by 4 commits | +| `*5` | there are 5 stashes | +| `merge` | merge is in progress (could be some other action) | +| `~6` | there are 6 merge conflicts | +| `+7` | there are 7 staged changes | +| `!8` | there are 8 unstaged changes | +| `?9` | there are 9 untracked files | + +`$GITSTATUS_PROMPT_LEN` tells you how long `$GITSTATUS_PROMPT` is when printed to the console. +[gitstatus.prompt.zsh](gitstatus.prompt.zsh) has an example of using it to truncate the current +directory. + +If you'd like to change the format of git status, or want to have greater control over the +process of assembling `PROMPT`, you can copy and modify parts of +[gitstatus.prompt.zsh](gitstatus.prompt.zsh) instead of sourcing the script. Your `~/.zshrc` +might look something like this: + +```zsh +source ~/gitstatus/gitstatus.plugin.zsh + +function my_set_prompt() { + PROMPT='%~%# ' + RPROMPT='' + + if gitstatus_query MY && [[ $VCS_STATUS_RESULT == ok-sync ]]; then + RPROMPT=${${VCS_STATUS_LOCAL_BRANCH:-@${VCS_STATUS_COMMIT}}//\%/%%} # escape % + (( $VCS_STATUS_NUM_STAGED )) && RPROMPT+='+' + (( $VCS_STATUS_NUM_UNSTAGED )) && RPROMPT+='!' + (( $VCS_STATUS_NUM_UNTRACKED )) && RPROMPT+='?' + fi + + setopt no_prompt_{bang,subst} prompt_percent # enable/disable correct prompt expansions +} + +gitstatus_stop 'MY' && gitstatus_start -s -1 -u -1 -c -1 -d -1 'MY' +autoload -Uz add-zsh-hook +add-zsh-hook precmd my_set_prompt +``` + +This snippet is sourcing `gitstatus.plugin.zsh` rather than `gitstatus.prompt.zsh`. The former +defines low-level bindings that communicate with gitstatusd over pipes. The latter is a simple +script that uses these bindings to assemble git prompt. + +Unlike [Powerlevel10k](, code based on +[gitstatus.prompt.zsh](gitstatus.prompt.zsh) is communicating with gitstatusd synchronously. This +can make your prompt slow when working in a large git repository or on a slow machine. To avoid +this problem, call `gitstatus_query` asynchronously as documented in +[gitstatus.plugin.zsh](gitstatus.plugin.zsh). This can be quite challenging. + +## Using from Bash + +The easiest way to take advantage of gitstatus from Bash is via +[]( Install it as follows: + +```bash +git clone --depth=1 ~/gitstatus +echo 'source ~/gitstatus/' >> ~/.bashrc +``` + +This will give you a basic yet functional prompt with git status in it. It's +[over 10x faster](#benchmarks) than any alternative that can give you comparable prompt. + +![Bash Prompt with GitStatus]( + + +In order to customize your prompt, set `PS1` at the end of `~/.bashrc` after sourcing +``. Insert `${GITSTATUS_PROMPT}` where you want git status to go. For example: + +```bash +source ~/gitstatus/ + +PS1='\w ${GITSTATUS_PROMPT}\n\$ ' # directory followed by git status and $/# (normal/root) +``` + +The expansion of `${GITSTATUS_PROMPT}` can contain the following bits: + +| segment | meaning | +|-------------|-------------------------------------------------------| +| `master` | current branch | +| `#v1` | HEAD is tagged with `v1`; not shown when on a branch | +| `@5fc6fca4` | current commit; not shown when on a branch or tag | +| `⇣1` | local branch is behind the remote by 1 commit | +| `⇡2` | local branch is ahead of the remote by 2 commits | +| `⇠3` | local branch is behind the push remote by 3 commits | +| `⇢4` | local branch is ahead of the push remote by 4 commits | +| `*5` | there are 5 stashes | +| `merge` | merge is in progress (could be some other action) | +| `~6` | there are 6 merge conflicts | +| `+7` | there are 7 staged changes | +| `!8` | there are 8 unstaged changes | +| `?9` | there are 9 untracked files | + +If you'd like to change the format of git status, or want to have greater control over the +process of assembling `PS1`, you can copy and modify parts of +[]( instead of sourcing the script. Your `~/.bashrc` might +look something like this: + +```bash +source ~/gitstatus/ + +function my_set_prompt() { + PS1='\w' + + if gitstatus_query && [[ "$VCS_STATUS_RESULT" == ok-sync ]]; then + if [[ -n "$VCS_STATUS_LOCAL_BRANCH" ]]; then + PS1+=" ${VCS_STATUS_LOCAL_BRANCH//\\/\\\\}" # escape backslash + else + PS1+=" @${VCS_STATUS_COMMIT//\\/\\\\}" # escape backslash + fi + [[ "$VCS_STATUS_HAS_STAGED" == 1 ]] && PS1+='+' + [[ "$VCS_STATUS_HAS_UNSTAGED" == 1 ]] && PS1+='!' + [[ "$VCS_STATUS_HAS_UNTRACKED" == 1 ]] && PS1+='?' + fi + + PS1+='\n\$ ' + + shopt -u promptvars # disable expansion of '$(...)' and the like +} + +gitstatus_stop && gitstatus_start +PROMPT_COMMAND=my_set_prompt +``` + +This snippet is sourcing `` rather than ``. The former +defines low-level bindings that communicate with gitstatusd over pipes. The latter is a simple +script that uses these bindings to assemble git prompt. + +Note: Bash bindings, unlike Zsh bindings, don't support asynchronous calls. + +## Using from other shells + +If there are no gitstatusd bindings for your shell, you'll need to get your hands dirty. +Use the existing bindings for inspiration; run `gitstatusd --help` or read the same thing in +[](src/ + +## How it works + +gitstatusd reads requests from stdin and prints responses to stdout. Requests contain an ID and +a directory. Responses contain the same ID and machine-readable git status for the directory. +gitstatusd keeps some state in memory for the directories it has seen in order to serve future +requests faster. + +[Zsh bindings](gitstatus.plugin.zsh) and [Bash bindings]( start gitstatusd in +the background and communicate with it via pipes. Themes such as +[Powerlevel10k]( use these bindings to put git status in +`PROMPT`. + +Note that gitstatus cannot be used as a drop-in replacement for `git status` command as it doesn't +produce output in the same format. It does perform the same computation though. + +## Benchmarks + +The following benchmark results were obtained on Intel i9-7900X running Ubuntu 18.04 in +a clean [chromium]( repository synced to `9394e49a`. The +repository was checked out to an ext4 filesystem on M.2 SSD. + +Three functionally equivalent tools for computing git status were benchmarked: + +* `gitstatusd` +* `git` with untracked cache enabled +* `lg2` -- a demo/example executable from [libgit2]( that + implements a subset of `git` functionality on top of libgit2 API; for the purposes of this + benchmark the subset is sufficient to generate the same data as the other tools + +Every tool was benchmark in cold and hot conditions. For `git` the first run in a repository was +considered cold, with the following runs considered hot. `lg2` was patched to compute results twice +in a single invocation without freeing the repository in between; the second run was considered hot. +The same patching was not done for `git` because `git` cannot be easily modified to refresh inmemory +index state between invocations; in fact, this limitation is one of the primary reasons developers +use libgit2. `gitstatusd` was benchmarked similarly to `lg2` with two result computations in the +same invocation. + +Two commands were benchmarked: `status` and `describe`. + +### Status + +In this benchmark all tools were computing the equivalent of `git status`. Lower numbers are better. + +| Tool | Cold | Hot | +|---------------|-----------:|------------:| +| **gitstatus** | **291 ms** | **30.9 ms** | +| git | 876 ms | 295 ms | +| lg2 | 1730 ms | 1310 ms | + +gitstatusd is substantially faster than the alternatives, especially on hot runs. Note that hot runs +are of primary importance to the main use case of gitstatus in interactive shells. + +The performance of `git status` fluctuated wildly in this benchmarks for reasons unknown to the +author. Moreover, performance is sticky -- once `git status` settles around a number, it stays +there for a long time. Numbers as diverse as 295, 352, 663 and 730 had been observed on hot runs on +the same repository. The number in the table is the lowest (fastest or best) that `git status` had +shown. + +### Describe + +In this benchmark all tools were computing the equivalent of `git describe --tags --exact-match` +to find tags that resolve to the same commit as `HEAD`. Lower numbers are better. + +| Tool | Cold | Hot | +|---------------|------------:|--------------:| +| **gitstatus** | **4.04 ms** | **0.0345 ms** | +| git | 18.0 ms | 14.5 ms | +| lg2 | 185 ms | 45.2 ms | + +gitstatusd is once again faster than the alternatives, more so on hot runs. + +## Why fast + +Since gitstatusd doesn't have to print all staged/unstaged/untracked files but only report +whether there are any, it can terminate repository scan early. It can also remember which files +were dirty on the previous run and check them first on the next run to avoid the scan entirely if +the files are still dirty. However, the benchmarks above were performed in a clean repository where +these shortcuts do not trigger. All benchmarked tools had to do the same work -- check the status +of every file in the index to see if it has changed, check every directory for newly created files, +etc. And yet, gitstatusd came ahead by a large margin. This section describes what it does that +makes it so fast. + +Most of the following comparisons are done against libgit2 rather than git because of the author's +familiarity with the former but not the with latter. libgit2 has clean, well-documented APIs and an +elegant implementation, which makes it so much easier to work with and to analyze performance +bottlenecks. + +### Summary for the impatient + +Under the benchmark conditions described above, the equivalent of libgit2's +`git_diff_index_to_workdir` (the most expensive part of `status` command) is 46.3 times faster in +gitstatusd. The speedup comes from the following sources. + +* gitstatusd uses more efficient data structures and algorithms and employs performance-conscious +coding style throughout the codebase. This reduces CPU time in userspace by 32x compared to libgit2. +* gitstatusd uses less expensive system calls and makes fewer of them. This reduces CPU time spent +in kernel by 1.9x. +* gitstatusd can utilize multiple cores to scan index and workdir in parallel with almost perfect +scaling. This reduces total run time by 12.4x while having virtually no effect on total CPU time. + +### Problem statement + +The most resource-intensive part of the `status` command is finding the difference between _index_ +and _workdir_ (`git_diff_index_to_workdir` in libgit2). Index is a list of all files in the git +repository with their last modification times. This is an obvious simplification but it suffices for +this exposition. On disk, index is stored sorted by file path. Here's an example of git index: + +| File | Last modification time | +|-------------|-----------------------:| +| Makefile | 2019-04-01T14:12:32Z | +| src/hello.c | 2019-04-01T14:12:00Z | +| src/hello.h | 2019-04-01T14:12:32Z | + +This list needs to be compared to the list of files in the working directory. If any of the files +listed in the index are missing from the workdir or have different last modification time, they are +"unstaged" in gitstatusd parlance. If you run `git status`, they'll be shown as "changes not staged +for commit". Thus, any implementation of `status` command has to call `stat()` or one of its +variants on every file in the index. + +In addition, all files in the working directory for which there is no entry in the index at all are +"untracked". `git status` will show them as "untracked files". Finding untracked files requires some +form of work directory traversal. + +### Single-threaded scan + +Let's see how `git_diff_index_to_workdir` from libgit2 accomplishes these tasks. Here's its CPU +profile from 200 hot runs over chromium repository. + +![libgit2 CPU profile (hot)]( + + +(The CPU profile was created with [gperftools]( and +rendered with [pprof]( + +We can see `__GI__lxstat` taking a lot of time. This is the `stat()` call for every file in the +index. We can also identify `__opendir`, `__readdir` and `__GI___close_nocancel` -- glibc wrappers +for reading the contents of a directory. This is for finding untracked files. Out of the total 232 +seconds, 111 seconds -- or 47.7% -- was spent on these calls. The rest is computation -- comparing +strings, sorting arrays, etc. + +Now let's take a look at the CPU profile of gitstatusd on the same task. + +![gitstatusd CPU profile (hot)]( + + +The first impression is that this profile looks pruned. This isn't an artifact. The profile was +generated with the same tools and the same flags as the profile of libgit2. + +Since both profiles were generated from the same workload, absolute numbers can be compared. We can +see that gitstatusd took 62 seconds in total compared to libgit2's 232 seconds. System calls at the +core of the algorithm are cleary visible. `__GI___fxstatat` is a flavor of `stat()`, and the other +three calls -- `__libc_openat64`, `__libc_close` and `__GI___fxstat` are responsible for opening +directories and finding untracked files. Notice that there is almost nothing else in the profile +apart from these calls. The rest of the code accounts for 3.77 seconds of CPU time -- 32 times less +than in libgit2. + +So, one reason gitstatusd is fast is that it has efficient diffing code -- very little time is spent +outside of kernel. However, if we look closely, we can notice that system calls in gitstatusd are +_also_ faster than in libgit2. For example, libgit2 spent 72.07 seconds in `__GI__lxstat` while +gitstatusd spent only 48.82 seconds in `__GI___fxstatat`. There are two reasons for this difference. +First, libgit2 makes more `stat()` calls than is strictly required. It's not necessary to stat +directories because index only has files. There are 25k directories in chromium repository (and 300k +files) -- that's 25k `stat()` calls that could be avoided. The second reason is that libgit2 and +gitstatusd use different flavors of `stat()`. libgit2 uses `lstat()`, which takes a path to the file +as input. Its performance is linear in the number of subdirectories in the path because it needs to +perform a lookup for every one of them and to check permissions. gitstatusd uses `fstatat()`, which +takes a file descriptor to the parent directory and a name of the file. Just a single lookup, less +CPU time. + +Similarly to `lstat()` vs `fstatat()`, it's faster to open files and directories with `openat()` +from the parent directory file descriptor than with regular `open()` that accepts full file path. +gitstatusd takes advantage of `openat()` to open directories as fast as possible. It opens about 90% +of the directories (this depends on the actual directory structure of the repository) from the +immediate parent -- the most efficient way -- and the remaining 10% it opens from the repository's +root directory. The reason it's done this way is to keep the maximum number of simultaneously open +file descriptors bounded. libgit2 can have O(repository depth) simultaneously open file descriptors, +which may be OK for a single-threaded application but can balloon to a large number when scans are +done by many threads simultaneously, like in gitstatusd. + +There is no equivalent to `__opendir` or `__readdir` in the gitstatusd profile because it uses the +equivalent of [untracked cache]( from +git. On the first scan of the workdir gitstatusd lists all files just like libgit2. But, unlike +libgit2, it remembers the last modification time of every directory along with the list of +untracked files under it. On the next scan, gitstatusd can skip listing files in directories whose +last modification time hasn't changed. + +To summarize, here's what gitstatusd was doing when the CPU profile was captured: + +1. `__libc_openat64`: Open every directory for which there are files in the index. +2. `__GI___fxstat`: Check last modification time of the directory. Since it's the same as on the + last scan, this directory has the same list of untracked files as before, which is empty (the + repository is clean). +3. `__GI___fxstatat`: Check last modification time for every file in the index that belongs to this + directory. +4. `__libc_close`: Close the file descriptor to the directory. + +Here's how the very first scan of a repository looks like in gitstatusd: + +![gitstatusd CPU profile (cold)]( + + +(Some glibc functions are mislabel on this profile. `explicit_bzero` and `__nss_passwd_lookup` are +in reality `strcmp` and `memcmp`.) + +This is a superset of the previous -- hot -- profile, with an extra `syscall` and string sorting for +directory listing. gitstatusd uses `getdents64` Linux system call directly, bypassing the glibc +wrapper that libgit2 uses. This is 23% faster. The details of this optimization can be found in a +[separate document](docs/ + +### Multithreading + +The diffing algorithm in gitstatusd was designed from the ground up with the intention of using it +concurrently from multiple threads. With a fast SSD, `status` is CPU bound, so taking advantage of +all available CPU cores is an obvious way to yield results faster. + +gitstatusd exhibits almost perfect scaling from multithreading. Engaging all cores allows it to +produce results 12.4 times faster than in single-threaded execution. This is on Intel i9-7900X with +10 cores (20 with hyperthreading) with single-core frequency of 4.3GHz and all-core frequency of +4.0GHz. + +Note: `git status` also uses all available cores in some parts of its algorithm while `lg2` does +everything in a single thread. + +### Postprocessing + +Once the difference between the index and the workdir is found, we have a list of _candidates_ -- +files that may be unstaged or untracked. To make the final judgement, these files need to be checked +against `.gitignore` rules and a few other things. + +gitstatusd uses [patched libgit2]( for this step. This fork +adds several optimizations that make libgit2 faster. The patched libgit2 performs more than twice +as fast in the benchmark as the original even without changes in the user code (that is, in the +code that uses the libgit2 APIs). The fork also adds several API extensions, most notable of which +is the support for multi-threaded scans. If `lg2 status` is modified to take advantage of these +extensions, it outperforms the original libgit2 by a factor of 18. Lastly, the fork fixes a score of +bugs, most of which become apparent only when using libgit2 from multiple threads. + +_WARNING: Changes to libgit2 are extensive but the testing they underwent isn't. It is +**not recommended** to use the patched libgit2 in production._ + +## Requirements + +* To compile: binutils, cmake, gcc, g++, git and GNU make. +* To run: Linux, macOS, FreeBSD, Android, WSL, Cygwin or MSYS2. + +## Compiling + +There are prebuilt `gitstatusd` binaries in [releases]( + When using the official shell bindings +provided by gitstatus, the right binary for your architecture gets downloaded automatically. + +If prebuilt binaries don't work for you, you'll need to get your hands dirty. + +### Compiling for personal use + +```zsh +git clone --depth=1 +cd gitstatus +./build -w -s -d docker +``` + +- If it says that `-d docker` is not supported on your OS, remove this flag. +- If it says that `-s` is not supported on your OS, remove this flag. +- If it tell you to install docker but you cannot or don't want to, remove `-d docker`. +- If it says that some command is missing, install it. + +If everything goes well, the newly built binary will appear in `./usrbin`. It'll be picked up +by shell bindings automatically. + +When you update shell bindings, they may refuse to work with the binary you've built earlier. In +this case you'll need to rebuild. + +### Compiling for distribution + +If you want to package gitstatus, it's best to do it based off releases. You also probably don't +want to build in docker (`-d docker`) or to allow automatic downloading of libgit2 tarballs (`-w`). + +The following code should work. If it doesn't, please open an issue. + +```zsh +curl -fsSLO +tar -xzf v1.0.0.tar.gz +cd gitstatus-1.0.0 +( + . ./ + curl -fsSLo \ + deps/libgit2-"$libgit2_version".tar.gz \ +"$libgit2_version".tar.gz +) +./build +rm deps/libgit2-*.tar.gz +for file in gitstatus.plugin.zsh gitstatus.prompt.zsh install; do + zsh -fc "zcompile -R -- $file.zwc $file" +done +``` + +This needs binutils, cmake, gcc, g++, git, GNU make and zsh. + +Depending on your workflow, it might be easier to store the URL to the libgit2 tarball in the +same place where you are going to put the main gitstatus tarball URL. You'll need to update both +URLs at the same time when bumping package version. + +Once build completes, *do not delete or move any files*. Package the whole directory as is. Don't +add it (or any of its subdirectories) to `PATH`. + +Note that Powerlevel10k has an embedded version of gitstatus. It must stay that way. The embedded +gitstatus won't conflict with the standalone version. They can have different versions and can +coexist within the same Zsh process. Do not attempt to surgically remove gitstatus from +Powerlevel10k, package the result and then somehow force Powerlevel10k to use a separately packaged +gitstatus. + +## License + +GNU General Public License v3.0. See [LICENSE](LICENSE). Contributions are covered by the same +license. diff --git a/build b/build new file mode 100755 index 00000000..1a591a8c --- /dev/null +++ b/build @@ -0,0 +1,442 @@ +#!/bin/sh +# +# Type `build -h` for help and see +# for full documentation. + +set -ue + +if [ -n "${ZSH_VERSION:-}" ]; then + emulate sh -o err_exit -o no_unset +fi + +usage="$(cat <<\END +Usage: build [-m ARCH] [-c CPU] [-d CMD] [-i IMAGE] [-s] [-w] + +Options: + + -m ARCH `uname -m` from the target machine; defaults to `uname -m` + from the local machine + -c CPU generate machine instructions for CPU of this type; this + value gets passed as `-march` to gcc; inferred from ARCH + if not set explicitly + -d CMD build in a Docker container and use CMD as the `docker` + command; e.g., `-d docker` or `-d podman` + -i IMAGE build in this Docker image; inferred from ARCH if not set + explicitly + -s install whatever software is necessary for build to + succeed; on some operating systems this option is not + supported; on others it can have partial effect + -w automatically download tarballs for dependencies if they + don't already exist in ./deps; dependencies are described + in ./ +END +)" + +build="$(cat <<\END +outdir="$(pwd)" + +if command -v mktemp >/dev/null 2>&1; then + workdir="$(mktemp -d "${TMPDIR:-/tmp}"/gitstatus-build.XXXXXXXXXX)" +else + workdir="${TMPDIR:-/tmp}/gitstatus-build.tmp.$$" + mkdir -- "$workdir" +fi + +cd -- "$workdir" +workdir="$(pwd)" + +narg() { echo $#; } + +if [ "$(narg $workdir)" != 1 -o -z "${workdir##*:*}" ]; then + >&2 echo "[error] cannot build in this directory: $workdir" + exit 1 +fi + +appname=gitstatusd-"$gitstatus_kernel"-"$gitstatus_arch" +libgit2_tmp="$outdir"/deps/"$appname".libgit2.tmp + +cleanup() { + cd / + rm -rf -- "$workdir" "$outdir"/usrbin/"$appname".tmp "$libgit2_tmp" + trap - INT QUIT TERM EXIT ILL PIPE +} +trap cleanup INT QUIT TERM EXIT ILL PIPE + +if [ -n "$gitstatus_install_tools" ]; then + case "$gitstatus_kernel" in + linux) + apk update + apk add binutils cmake gcc g++ git make musl-dev + ;; + freebsd) + pkg install -y cmake gmake binutils gcc git + ;; + netbsd) + pkgin -y install cmake gmake binutils git + ;; + darwin) + if ! command -v make >/dev/null 2>&1 || ! command -v gcc >/dev/null 2>&1; then + >&2 echo "[error] please run 'xcode-select --install' and retry" + exit 1 + fi + if ! command -v brew >/dev/null 2>&1; then + >&2 echo "[error] please install homebrew from and retry" + exit 1 + fi + for formula in libiconv cmake git wget; do + if brew list "$formula" &>/dev/null; then + brew upgrade "$formula" + else + brew install "$formula" + fi + done + ;; + msys*|mingw*) + pacman -Syu --noconfirm + pacman -S --needed --noconfirm binutils cmake gcc git make + ;; + *) + >&2 echo "[internal error] unhandled kernel: $gitstatus_kernel" + exit 1 + ;; + esac +fi + +cpus="$(getconf _NPROCESSORS_ONLN)" || cpus="$(sysctl -n hw.ncpu)" || cpus=8 + +libgit2_cmake_flags= +libgit2_cflags="-march=$gitstatus_cpu" + +gitstatus_cxx=g++ +gitstatus_cxxflags="-I${workdir}/libgit2/include -DGITSTATUS_ZERO_NSEC -D_GNU_SOURCE -march=$gitstatus_cpu" +gitstatus_ldflags="-L${workdir}/libgit2/build" +gitstatus_ldlibs= +gitstatus_make=make + +case "$gitstatus_kernel" in + linux) + gitstatus_ldflags="$gitstatus_ldflags -static" + ;; + freebsd) + gitstatus_make=gmake + gitstatus_ldflags="$gitstatus_ldflags -static" + ;; + netbsd) + gitstatus_make=gmake + gitstatus_ldflags="$gitstatus_ldflags -static" + ;; + darwin) + mkdir -- "$workdir"/lib + ln -s -- /usr/local/opt/libiconv/lib/libiconv.a "$workdir"/lib + libgit2_cmake_flags="$libgit2_cmake_flags -DUSE_ICONV=ON" + libgit2_cflags="$libgit2_cflags -I/usr/local/opt/libiconv/include" + gitstatus_cxxflags="$gitstatus_cxxflags -I/usr/local/opt/libiconv/include" + gitstatus_ldlibs="$gitstatus_ldlibs -liconv" + gitstatus_ldflags="$gitstatus_ldflags -L${workdir}/lib" + ;; + msys*|mingw*) + gitstatus_ldflags="$gitstatus_ldflags -static" + ;; + cygwin*) + gitstatus_ldflags="$gitstatus_ldflags -static" + ;; + *) + >&2 echo "[internal error] unhandled kernel: $gitstatus_kernel" + exit 1 + ;; +esac + +for cmd in cmake gcc g++ git ld "$gitstatus_make" wget; do + if ! command -v "$cmd" >/dev/null 2>&1; then + if [ -n "$gitstatus_install_tools" ]; then + >&2 echo "[internal error] $cmd not found" + exit 1 + else + >&2 echo "[error] command not found: $cmd" + exit 1 + fi + fi +done + +. "$outdir"/ +if [ -z "$libgit2_version" ]; then + >&2 echo "[internal error] libgit2_version not set" + exit 1 +fi +libgit2_tarball="$outdir"/deps/libgit2-"$libgit2_version".tar.gz +if [ ! -e "$libgit2_tarball" ]; then + if [ -n "$gitstatus_download_deps" ]; then + libgit2_url="$libgit2_version".tar.gz + wget -O "$libgit2_tmp" -- "$libgit2_url" + mv -f -- "$libgit2_tmp" "$libgit2_tarball" + else + >&2 echo "[error] file not found: deps/libgit2-"$libgit2_version".tar.gz" + exit 1 + fi +fi + +cd -- "$workdir" +tar -xzf "$libgit2_tarball" +mv -- libgit2-"$libgit2_version" libgit2 +mkdir libgit2/build +cd libgit2/build + +CFLAGS="$libgit2_cflags" cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DZERO_NSEC=ON \ + -DTHREADSAFE=ON \ + -DUSE_BUNDLED_ZLIB=ON \ + -DREGEX_BACKEND=builtin \ + -DUSE_HTTP_PARSER=builtin \ + -DUSE_SSH=OFF \ + -DUSE_HTTPS=OFF \ + -DBUILD_CLAR=OFF \ + -DUSE_GSSAPI=OFF \ + -DUSE_NTLMCLIENT=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DENABLE_REPRODUCIBLE_BUILDS=OFF \ + $libgit2_cmake_flags \ + .. +make -j "$cpus" VERBOSE=1 + +APPNAME="$appname".tmp \ + OBJDIR="$workdir"/gitstatus \ + CXX="$gitstatus_cxx" \ + CXXFLAGS="$gitstatus_cxxflags" \ + LDFLAGS="$gitstatus_ldflags" \ + LDLIBS="$gitstatus_ldlibs" \ + "$gitstatus_make" -C "$outdir" -j "$cpus" + +app="$outdir"/usrbin/"$appname" + +strip "$app".tmp + +mkdir -- "$workdir"/repo +git -C "$workdir"/repo init -- +git -C "$workdir"/repo config "" +git -C "$workdir"/repo commit --allow-empty --allow-empty-message -m '' + +resp="$(printf "hello\037$workdir/repo\036" | "$app".tmp)" +[ -n "$resp" -a -z "${resp##hello*1*$workdir/repo*master*}" ] + +resp="$(printf 'hello\037\036' | "$app".tmp)" +[ -n "$resp" -a -z "${resp##hello*0*}" ] + +mv -f -- "$app".tmp "$app" + +cleanup + +cat >&2 <<-END + ------------------------------------------------- + SUCCESS: created usrbin/$appname + END +END +)" + +docker_image= +docker_cmd= + +gitstatus_arch= +gitstatus_cpu= +gitstatus_install_tools= +gitstatus_download_deps= + +while getopts ':m:c:i:d:swh' opt "$@"; do + case "$opt" in + h) + printf '%s\n' "$usage" + exit + ;; + m) + if [ -n "$gitstatus_arch" ]; then + >&2 echo "[error] duplicate option: -$opt" + exit 1 + fi + if [ -z "$OPTARG" ]; then + >&2 echo "[error] incorrect value of -$opt: $OPTARG" + exit 1 + fi + gitstatus_arch="$OPTARG" + ;; + c) + if [ -n "$gitstatus_cpu" ]; then + >&2 echo "[error] duplicate option: -$opt" + exit 1 + fi + if [ -z "$OPTARG" ]; then + >&2 echo "[error] incorrect value of -$opt: $OPTARG" + exit 1 + fi + gitstatus_cpu="$OPTARG" + ;; + i) + if [ -n "$docker_image" ]; then + >&2 echo "[error] duplicate option: -$opt" + exit 1 + fi + if [ -z "$OPTARG" ]; then + >&2 echo "[error] incorrect value of -$opt: $OPTARG" + exit 1 + fi + docker_image="$OPTARG" + ;; + d) + if [ -n "$docker_cmd" ]; then + >&2 echo "[error] duplicate option: -$opt" + exit 1 + fi + if [ -z "$OPTARG" ]; then + >&2 echo "[error] incorrect value of -$opt: $OPTARG" + exit 1 + fi + docker_cmd="$OPTARG" + ;; + s) + if [ -n "$gitstatus_install_tools" ]; then + >&2 echo "[error] duplicate option: -$opt" + exit 1 + fi + gitstatus_install_tools=1 + ;; + w) + if [ -n "$gitstatus_download_deps" ]; then + >&2 echo "[error] duplicate option: -$opt" + exit 1 + fi + gitstatus_download_deps=1 + ;; + \?) >&2 echo "[error] invalid option: -$OPTARG" ; exit 1;; + :) >&2 echo "[error] missing required argument: -$OPTARG"; exit 1;; + *) >&2 echo "[internal error] unhandled option: -$opt" ; exit 1;; + esac +done + +if [ "$OPTIND" -le $# ]; then + >&2 echo "[error] unexpected positional argument" + exit 1 +fi + +if [ -n "$docker_image" -a -z "$docker_cmd" ]; then + >&2 echo "[error] cannot use -i without -d" + exit 1 +fi + +if [ -z "$gitstatus_arch" ]; then + gitstatus_arch="$(uname -m)" + gitstatus_arch="$(printf '%s' "$gitstatus_arch" | tr '[A-Z]' '[a-z]')" +fi + +if [ -z "$gitstatus_cpu" ]; then + case "$gitstatus_arch" in + armv6l) gitstatus_cpu=armv6;; + armv7l) gitstatus_cpu=armv7;; + aarch64) gitstatus_cpu=armv8-a;; + x86_64|amd64) gitstatus_cpu=x86-64;; + i386|i586|i686) gitstatus_cpu="$gitstatus_arch";; + *) + >&2 echo '[error] unable to infer target CPU architecture' + >&2 echo 'Please specify explicitly with `-c CPU`.' + exit 1 + ;; + esac +fi + +gitstatus_kernel="$(uname -s)" +gitstatus_kernel="$(printf '%s' "$gitstatus_kernel" | tr '[A-Z]' '[a-z]')" + +case "$gitstatus_kernel" in + linux) + if [ -n "$docker_cmd" ]; then + if [ -z "${docker_cmd##*/*}" ]; then + if [ ! -x "$docker_cmd" ]; then + >&2 echo "[error] not an executable file: $docker_cmd" + exit 1 + fi + else + if ! command -v "$docker_cmd" >/dev/null 2>&1; then + >&2 echo "[error] command not found: $docker_cmd" + exit 1 + fi + fi + if [ -z "$docker_image" ]; then + case "$gitstatus_arch" in + x86_64) docker_image=alpine:3.11.6;; + i386|i586|i686) docker_image=i386/alpine:3.11.6;; + armv6l) docker_image=arm32v6/alpine:3.11.6;; + armv7l) docker_image=arm32v7/alpine:3.11.6;; + aarch64) docker_image=arm64v8/alpine:3.11.6;; + *) + >&2 echo '[error] unable to infer docker image' + >&2 echo 'Please specify explicitly with `-i IMAGE`.' + exit 1 + ;; + esac + fi + elif [ -n "$gitstatus_install_tools" ]; then + >&2 echo '[error] -s without -d is not supported on linux' + exit 1 + fi + ;; + freebsd|netbsd|darwin) + if [ -n "$docker_cmd" ]; then + >&2 echo "[error] docker (-d) is not supported on $gitstatus_kernel" + exit 1 + fi + ;; + msys_nt-*|mingw32_nt-*|mingw64_nt-*|cygwin_nt-*) + if ! printf '%s' "$gitstatus_kernel" | grep -Eqx '[^-]+-[0-9]+\.[0-9]+(-.*)?'; then + >&2 echo '[error] unsupported kernel, sorry!' + exit 1 + fi + gitstatus_kernel="$(printf '%s' "$gitstatus_kernel" | sed 's/^\([^-]*-[0-9]*\.[0-9]*\).*/\1/')" + if [ -n "$docker_cmd" ]; then + >&2 echo '[error] docker (-d) is not supported on windows' + exit 1 + fi + if [ -n "$gitstatus_install_tools" -a -z "${gitstatus_kernel##cygwin_nt-*}" ]; then + >&2 echo '[error] -s is not supported on cygwin' + exit 1 + fi + ;; + *) + >&2 echo '[error] unsupported kernel, sorry!' + exit 1 + ;; +esac + +dir="$(dirname -- "$0")" +cd -- "$dir" +dir="$(pwd)" + +>&2 echo "Building gitstatusd..." +>&2 echo "" +>&2 echo " kernel := $gitstatus_kernel" +>&2 echo " arch := $gitstatus_arch" +>&2 echo " cpu := $gitstatus_cpu" +[ -z "$docker_cmd" ] || >&2 echo " docker command := $docker_cmd" +[ -z "$docker_image" ] || >&2 echo " docker image := $docker_image" +if [ -n "$gitstatus_install_tools" ]; then + >&2 echo " install tools := yes" +else + >&2 echo " install tools := no" +fi +if [ -n "$gitstatus_download_deps" ]; then + >&2 echo " download deps := yes" +else + >&2 echo " download deps := no" +fi + +if [ -n "$docker_cmd" ]; then + "$docker_cmd" run \ + -e gitstatus_kernel="$gitstatus_kernel" \ + -e gitstatus_arch="$gitstatus_arch" \ + -e gitstatus_cpu="$gitstatus_cpu" \ + -e gitstatus_install_tools="$gitstatus_install_tools" \ + -e gitstatus_download_deps="$gitstatus_download_deps" \ + -v "$dir":/out \ + -w /out \ + --rm \ + -- "$docker_image" /bin/sh -uexc "$build" +else + eval "$build" +fi diff --git a/ b/ new file mode 100644 index 00000000..b38192c5 --- /dev/null +++ b/ @@ -0,0 +1,18 @@ +# This value gets embedded in gitstatusd at build time. It is +# read by ./Makefile. `gitstatusd --version` reports it back. +# +# This value is also read by shell bindings (indirectly, through +# ./install) when gitstatusd is from ./usrbin. +gitstatus_version="v1.0.0" + +# libgit2 is a build time dependency of gitstatusd. The value of +# libgit2_version is read by ./build. +# +# If ./deps/libgit2-${libgit2_version}.tar.gz doesn't exist, build +# downloads it from the following location: +# +#${libgit2_version}.tar.gz +# +# Once downloaded, the tarball is stored at the path indicated +# above so that repeated builds don't consume network bandwidth. +libgit2_version="tag-005f77dca6dbe8788e55139fa1199fc94cc04f9a" diff --git a/deps/.gitkeep b/deps/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/docs/ b/docs/ new file mode 100644 index 00000000..0939cc18 --- /dev/null +++ b/docs/ @@ -0,0 +1,330 @@ +# Fast directory listing + +In order to find untracked files in a git repository, [gitstatusd](../ needs to list the +contents of every directory. gitstatusd does it 27% faster than a reasonable implementation that a +seasoned C/C++ practitioner might write. This document explains the optimizations that went into it. +As directory listing is a common operation, many other projects can benefit from applying these +optimizations. + +## v1 + +Given a path to a directory, `ListDir()` must produce the list of files in that directory. Moreover, +the list must be sorted lexicographically to enable fast comparison with Git index. + +The following C++ implementation gets the job done. For simplicity, it returns an empty list on +error. + +```c++ +vector ListDir(const char* dirname) { + vector entries; + if (DIR* dir = opendir(dirname)) { + while (struct dirent* ent = (errno = 0, readdir(dir))) { + if (!Dots(ent->d_name)) entries.push_back(ent->d_name); + } + if (errno) entries.clear(); + sort(entries.begin(), entries.end()); + closedir(dir); + } + return entries; +} +``` + +Every directory has entries `"."` and `".."`, which we aren't interested in. We filter them out with +a helper function `Dots()`. + +```c++ +bool Dots(const char* s) { return s[0] == '.' && (!s[1] || (s[1] == '.' && !s[2])); } +``` + +To check how fast `ListDir()` performs, we can run it many times on a typical directory. One million +runs on a directory with 32 files with 16-character names takes 12.7 seconds. + +## v2 + +Experienced C++ practitioners will scoff at our implementation of `ListDir()`. If it's meant to be +efficient, returning `vector` is an unaffordable convenience. To avoid heap allocations we +can use a simple arena that will allow us to reuse memory between different `ListDir()` calls. + +(Changed and added lines are marked with comments.) + +```c++ +void ListDir(const char* dirname, string& arena, vector& entries) { // + + entries.clear(); // + + if (DIR* dir = opendir(dirname)) { + arena.clear(); // + + while (struct dirent* ent = (errno = 0, readdir(dir))) { + if (!Dots(ent->d_name)) { + entries.push_back(reinterpret_cast(arena.size())); // + + arena.append(ent->d_name, strlen(ent->d_name) + 1); // + + } + } + if (errno) entries.clear(); + for (char*& p : entries) p = &arena[reinterpret_cast(p)]; // + + sort(entries.begin(), entries.end(), // + + [](const char* a, const char* b) { return strcmp(a, b) < 0; }); // + + closedir(dir); + } +} +``` + +To make performance comparison easier, we can normalize them relative to the baseline. v1 will get +performance score of 100. A twice-as-fast alternative will be 200. + +| version | optimization | score | +|---------|----------------------------|----------:| +| v1 | baseline | 100.0 | +| **v2** | **avoid heap allocations** | **112.7** | + +Avoiding heap allocations makes `ListDir()` 12.7% faster. Not bad. As an added bonus, those casts +will fend off the occasional frontend developer who accidentally wanders into the codebase. + +## v3 + +`opendir()` is an expensive call whose performance is linear in the number of subdirectories in the +path because it needs to perform a lookup for every one of them. We can replace it with `openat()`, +which takes a file descriptor to the parent directory and a name of the subdirectory. Just a single +lookup, less CPU time. This optimization assumes that callers already have a descriptor to the +parent directory, which is indeed the case for gitstatusd, and is often the case in other +applications that traverse filesystem. + +```c++ +void ListDir(int parent_fd, const char* dirname, string& arena, vector& entries) { // + + entries.clear(); + int dir_fd = openat(parent_fd, dirname, O_NOATIME | O_RDONLY | O_DIRECTORY | O_CLOEXEC); // + + if (dir_fd < 0) return; // + + if (DIR* dir = fdopendir(dir_fd)) { + arena.clear(); + while (struct dirent* ent = (errno = 0, readdir(dir))) { + if (!Dots(ent->d_name)) { + entries.push_back(reinterpret_cast(arena.size())); + arena.append(ent->d_name, strlen(ent->d_name) + 1); + } + } + if (errno) entries.clear(); + for (char*& p : entries) p = &arena[reinterpret_cast(p)]; + sort(entries.begin(), entries.end(), + [](const char* a, const char* b) { return strcmp(a, b) < 0; }); + closedir(dir); + } else { // + + close(dir_fd); // + + } // + +} +``` + +This is worth about 3.5% in speed. + +| version | optimization | score | +|---------|--------------------------------------|----------:| +| v1 | baseline | 100.0 | +| v2 | avoid heap allocations | 112.7 | +| **v3** | **open directories with `openat()`** | **116.2** | + +## v4 + +Copying file names to the arena isn't free but it doesn't seem like we can avoid it. Poking around +we can see that the POSIX API we are using is implemented on Linux on top of `getdents64` system +call. Its documentation isn't very encouraging: + +```text +These are not the interfaces you are interested in. Look at +readdir(3) for the POSIX-conforming C library interface. This page +documents the bare kernel system call interfaces. + +Note: There are no glibc wrappers for these system calls. +``` + +Hmm... The API looks like something we can take advantage of, so let's try it anyway. + +First, we'll need a simple `Arena` class that can allocate 8KB blocks of memory. + +```c++ +class Arena { + public: + enum { kBlockSize = 8 << 10 }; + + char* Alloc() { + if (cur_ == blocks_.size()) blocks_.emplace_back(kBlockSize, 0); + return blocks_[cur_++].data(); + } + + void Clear() { cur_ = 0; } + + private: + size_t cur_ = 0; + vector blocks_; +}; +``` + +Next, we need to define `struct dirent64_t` ourselves because there is no wrapper for the system +call we are about to use. + +```c++ +struct dirent64_t { + ino64_t d_ino; + off64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; +}; +``` + +Finally we can get to the implementation of `ListDir()`. + +```c++ +void ListDir(int parent_fd, Arena& arena, vector& entries) { // + + entries.clear(); + int dir_fd = openat(parent_fd, dirname, O_NOATIME | O_RDONLY | O_DIRECTORY | O_CLOEXEC); + if (dir_fd < 0) return; + arena.Clear(); // + + while (true) { // + + char* buf = arena.Alloc(); // + + int n = syscall(SYS_getdents64, dir_fd, buf, Arena::kBlockSize); // + + if (n <= 0) { // + + if (n) entries.clear(); // + + break; // + + } // + + for (int pos = 0; pos < n;) { // + + auto* ent = reinterpret_cast(buf + pos); // + + if (!Dots(ent->d_name)) entries.push_back(ent->d_name); // + + pos += ent->d_reclen; // + + } // + + } // + + sort(entries.begin(), entries.end(), + [](const char* a, const char* b) { return strcmp(a, b) < 0; }); + close(dir_fd); +} +``` + +How are we doing with this one? + +| version | optimization | score | +|---------|----------------------------------|----------:| +| v1 | baseline | 100.0 | +| v2 | avoid heap allocations | 112.7 | +| v3 | open directories with `openat()` | 116.2 | +| **v4** | **call `getdents64()` directly** | **137.8** | + +Solid 20% speedup. Worth the trouble. Unfortunately, we now have just one `reinterpret_cast` instead +of two, and it's not nearly as scary-looking. Hopefully with the next iteration we can get back some +of that evil vibe of low-level code. + +As a bonus, every element in `entries` has `d_type` at offset -1. This can be useful to the callers +that need to distinguish between regular files and directories (gitstatusd, in fact, needs this). +Note how `ListDir()` implements this feature at zero cost, as a lucky accident of `dirent64_t` +memory layout. + +## v5 + +The CPU profile of `ListDir()` reveals that almost all userspace CPU time is spent in `strcmp()`. +Digging into the source code of `std::sort()` we can see that it uses Insertion Sort for short +collections. Our 32-element vector falls under the threshold. Insertion Sort makes `O(N^2)` +comparisons, hence a lot of CPU time in `strcmp()`. Switching to `qsort()` or +[Timsort]( is of no use as all good sorting algorithms fall +back to Insertion Sort. + +If we cannot make fewer comparisons, perhaps we can make each of them faster? `strcmp()` compares +characters one at a time. It cannot read ahead as it can be illegal to touch memory past the first +null byte. But _we_ know that it's safe to read a few extra bytes past the end of `d_name` for every +entry except the last in the buffer. And since we own the buffer, we can overallocate it so that +reading past the end of the last entry is also safe. + +Combining these ideas with the fact that file names on Linux are at most 255 bytes long, we can +invoke `getdents64()` like this: + +```c++ +int n = syscall(SYS_getdents64, dir_fd, buf, Arena::kBlockSize - 256); +``` + +And then compare entries like this: + +```c++ +[](const char* a, const char* b) { return memcmp(a, b, 255) < 0; } +``` + +This version doesn't give any speedup compared to the previous but it opens an avenue for another +optimization. The pointers we pass to `memcmp()` aren't aligned. To be more specific, their +numerical values are `N * 8 + 3` for some `N`. When given such a pointer, `memcmp()` will check the +first 5 bytes one by one, and only then switch to comparing 8 bytes at a time. If we can handle the +first 5 bytes ourselves, we can pass aligned memory to `memcmp()` and take full advantage of its +vectorized loop. + +Here's the implementation: + +```c++ +uint64_t Read64(const void* p) { // + + uint64_t x; // + + memcpy(&x, p, sizeof(x)); // + + return x; // + +} // + + +void ByteSwap64(void* p) { // + + uint64_t x = __builtin_bswap64(Read64(p)); // + + memcpy(p, &x, sizeof(x)); // + +} // + + +void ListDir(int parent_fd, Arena& arena, vector& entries) { + entries.clear(); + int dir_fd = openat(parent_fd, dirname, O_NOATIME | O_RDONLY | O_DIRECTORY | O_CLOEXEC); + if (dir_fd < 0) return; + arena.Clear(); + while (true) { + char* buf = arena.Alloc(); + int n = syscall(SYS_getdents64, dir_fd, buf, Arena::kBlockSize - 256); // + + if (n <= 0) { + if (n) entries.clear(); + break; + } + for (int pos = 0; pos < n;) { + auto* ent = reinterpret_cast(buf + pos); + if (!Dots(ent->d_name)) { + ByteSwap64(ent->d_name); // + + entries.push_back(ent->d_name); + } + pos += ent->d_reclen; + } + } + sort(entries.begin(), entries.end(), [](const char* a, const char* b) { + uint64_t x = Read64(a); // + + uint64_t y = Read64(b); // + + return x < y || (x == y && a != b && memcmp(a + 5, b + 5, 256) < 0); // + + }); + for (char* p : entries) ByteSwap64(p); // + + close(dir_fd); +} +``` + +This is for Little Endian architecture. Big Endian doesn't need `ByteSwap64()`, so it'll be a bit +faster. + +| version | optimization | score | +|---------|----------------------------------|----------:| +| v1 | baseline | 100.0 | +| v2 | avoid heap allocations | 112.7 | +| v3 | open directories with `openat()` | 116.2 | +| v4 | call `getdents64()` directly | 137.8 | +| **v5** | **hand-optimize `strcmp()`** | **143.3** | + +Fast and respectably arcane. + +## Conclusion + +Through a series of incremental improvements we've sped up directory listing by 43.3% compared to a +naive implementation (v1) and 27.2% compared to a reasonable implementation that a seasoned C/C++ +practitioner might write (v2). + +However, these numbers are based on an artificial benchmark while the real judge is always the real +code. Our goal was to speed up gitstatusd. Benchmark was just a tool. Thankfully, the different +versions of `ListDir()` have the same comparative performance within gitstatusd as in the benchmark. +In truth, the directory chosen for the benchmark wasn't arbitrary. It was picked by sampling +gitstatusd when it runs on [chromium]( git repository. + +The final version of `ListDir()` spends 97% of its CPU time in the kernel. If we assume that it +makes the minimum possible number of system calls and these calls are optimal (true to the best +of my knowledge), it puts the upper bound on possible future performance improvements at just 3%. +There is almost nothing left in `ListDir()` to optimize. + +![ListDir() CPU profile]( + + +(The CPU profile was created with [gperftools]( and +rendered with [pprof]( diff --git a/ b/ new file mode 100644 index 00000000..7385defa --- /dev/null +++ b/ @@ -0,0 +1,427 @@ +# Bash bindings for gitstatus. + +[[ $- == *i* ]] || return # non-interactive shell + +# Starts gitstatusd in the background. Does nothing and succeeds if gitstatusd +# is already running. +# +# Usage: gitstatus_start [OPTION]... +# +# -t FLOAT Fail the self-check on initialization if not getting a response from +# gitstatusd for this this many seconds. Defaults to 5. +# +# -s INT Report at most this many staged changes; negative value means infinity. +# Defaults to 1. +# +# -u INT Report at most this many unstaged changes; negative value means infinity. +# Defaults to 1. +# +# -c INT Report at most this many conflicted changes; negative value means infinity. +# Defaults to 1. +# +# -d INT Report at most this many untracked files; negative value means infinity. +# Defaults to 1. +# +# -m INT Report -1 unstaged, untracked and conflicted if there are more than this many +# files in the index. Negative value means infinity. Defaults to -1. +# +# -e Count files within untracked directories like `git status --untracked-files`. +# +# -U Unless this option is specified, report zero untracked files for repositories +# with status.showUntrackedFiles = false. +# +# -W Unless this option is specified, report zero untracked files for repositories +# with bash.showUntrackedFiles = false. +# +# -D Unless this option is specified, report zero staged, unstaged and conflicted +# changes for repositories with bash.showDirtyState = false. +function gitstatus_start() { + unset OPTIND + local opt timeout=5 max_dirty=-1 extra_flags + local max_num_staged=1 max_num_unstaged=1 max_num_conflicted=1 max_num_untracked=1 + local ignore_status_show_untracked_files + while getopts "t:s:u:c:d:m:eUWD" opt; do + case "$opt" in + t) timeout=$OPTARG;; + s) max_num_staged=$OPTARG;; + u) max_num_unstaged=$OPTARG;; + c) max_num_conflicted=$OPTARG;; + d) max_num_untracked=$OPTARG;; + m) max_dirty=$OPTARG;; + e) extra_flags+='--recurse-untracked-dirs ';; + U) extra_flags+='--ignore-status-show-untracked-files ';; + W) extra_flags+='--ignore-bash-show-untracked-files ';; + D) extra_flags+='--ignore-bash-show-dirty-state ';; + *) return 1;; + esac + done + + (( OPTIND == $# + 1 )) || { echo "usage: gitstatus_start [OPTION]..." >&2; return 1; } + + [[ -z "${GITSTATUS_DAEMON_PID:-}" ]] || return 0 # already started + + if [[ "${BASH_SOURCE[0]}" == */* ]]; then + local gitstatus_plugin_dir="${BASH_SOURCE[0]%/*}" + if [[ "$gitstatus_plugin_dir" != /* ]]; then + gitstatus_plugin_dir="$PWD"/"$gitstatus_plugin_dir" + fi + else + local gitstatus_plugin_dir="$PWD" + fi + + local req_fifo resp_fifo + + function gitstatus_start_impl() { + local log_level="${GITSTATUS_LOG_LEVEL:-}" + [[ -n "$log_level" || "${GITSTATUS_ENABLE_LOGGING:-0}" != 1 ]] || log_level=INFO + + local uname_sm + uname_sm="$(uname -sm)" || return + uname_sm="${uname_sm,,}" + local uname_s="${uname_sm% *}" + local uname_m="${uname_sm#* }" + + if [[ "${GITSTATUS_NUM_THREADS:-0}" -gt 0 ]]; then + local threads="$GITSTATUS_NUM_THREADS" + else + local cpus + if ! command -v sysctl &>/dev/null || [[ "$uname_s" == linux ]] || + ! cpus="$(sysctl -n hw.ncpu)"; then + if ! command -v getconf &>/dev/null || ! cpus="$(getconf _NPROCESSORS_ONLN)"; then + cpus=8 + fi + fi + local threads=$((cpus > 16 ? 32 : cpus > 0 ? 2 * cpus : 16)) + fi + + local daemon_args=( + --parent-pid="$$" + --num-threads="$threads" + --max-num-staged="$max_num_staged" + --max-num-unstaged="$max_num_unstaged" + --max-num-conflicted="$max_num_conflicted" + --max-num-untracked="$max_num_untracked" + --dirty-max-index-size="$max_dirty" + $extra_flags) + + if [[ -n "$log_level" ]]; then + GITSTATUS_DAEMON_LOG=$(mktemp "${TMPDIR:-/tmp}"/gitstatus.$$.log.XXXXXXXXXX) || return + [[ "$log_level" == INFO ]] || daemon_args+=(--log-level="$log_level") + else + GITSTATUS_DAEMON_LOG=/dev/null + fi + + req_fifo=$(mktemp -u "${TMPDIR:-/tmp}"/gitstatus.$$.pipe.req.XXXXXXXXXX) || return + resp_fifo=$(mktemp -u "${TMPDIR:-/tmp}"/gitstatus.$$.pipe.resp.XXXXXXXXXX) || return + mkfifo "$req_fifo" "$resp_fifo" || return + + { + ( + builtin cd / + ( + local fd_in fd_out + exec {fd_in}<"$req_fifo" {fd_out}>"$resp_fifo" || exit + echo "$BASHPID" >&"$fd_out" + + local _gitstatus_bash_daemon _gitstatus_bash_version _gitstatus_bash_downloaded + + function _gitstatus_set_daemon() { + _gitstatus_bash_daemon="$1" + _gitstatus_bash_version="$2" + _gitstatus_bash_downloaded="$3" + } + + set -- -d "$gitstatus_plugin_dir" -s "$uname_s" -m "$uname_m" -- _gitstatus_set_daemon + [[ "${GITSTATUS_AUTO_INSTALL:-1}" -ne 0 ]] || set -- -n "$@" + source "$gitstatus_plugin_dir"/install || return + [[ -n "$_gitstatus_bash_daemon" ]] || return + [[ -n "$_gitstatus_bash_version" ]] || return + [[ "$_gitstatus_bash_downloaded" == [01] ]] || return + + local sig=(INT QUIT TERM EXIT ILL PIPE) + + if [[ -x "$_gitstatus_bash_daemon" ]]; then + "$_gitstatus_bash_daemon" \ + -G "$_gitstatus_bash_version" "${daemon_args[@]}" <&"$fd_in" >&"$fd_out" & + local pid=$! + trap "trap - ${sig[*]}; kill $pid &>/dev/null" ${sig[@]} + wait "$pid" + local ret=$? + trap - ${sig[@]} + case "$ret" in + 0|129|130|131|137|141|143) + echo -nE $'bye\x1f0\x1e' >&"$fd_out" + exit "$ret" + ;; + esac + fi + + (( ! _gitstatus_bash_downloaded )) || return + [[ "${GITSTATUS_AUTO_INSTALL:-1}" -ne 0 ]] || return + set -- -f "$@" + _gitstatus_bash_daemon= + _gitstatus_bash_version= + _gitstatus_bash_downloaded= + source "$gitstatus_plugin_dir"/install || return + [[ -n "$_gitstatus_bash_daemon" ]] || return + [[ -n "$_gitstatus_bash_version" ]] || return + [[ "$_gitstatus_bash_downloaded" == 1 ]] || return + + "$_gitstatus_bash_daemon" \ + -G "$_gitstatus_bash_version" "${daemon_args[@]}" <&"$fd_in" >&"$fd_out" & + local pid=$! + trap "trap - ${sig[*]}; kill $pid &>/dev/null" ${sig[@]} + wait "$pid" + trap - ${sig[@]} + echo -nE $'bye\x1f0\x1e' >&"$fd_out" + ) & + ) & disown + } 0/dev/null + + exec {_GITSTATUS_REQ_FD}>"$req_fifo" {_GITSTATUS_RESP_FD}<"$resp_fifo" || return + command rm "$req_fifo" "$resp_fifo" || return + + IFS='' read -r -u $_GITSTATUS_RESP_FD GITSTATUS_DAEMON_PID || return + [[ $GITSTATUS_DAEMON_PID == [1-9]* ]] || return + + local reply + echo -nE $'hello\x1f\x1e' >&$_GITSTATUS_REQ_FD || return + IFS='' read -rd $'\x1e' -u $_GITSTATUS_RESP_FD -t "$timeout" reply || return + [[ "$reply" == $'hello\x1f0' ]] || return + + _GITSTATUS_DIRTY_MAX_INDEX_SIZE=$max_dirty + _GITSTATUS_CLIENT_PID="$BASHPID" + } + + if ! gitstatus_start_impl; then + echo "gitstatus_start: failed to start gitstatusd" >&2 + [[ -z "${req_fifo:-}" ]] || command rm -f "$req_fifo" + [[ -z "${resp_fifo:-}" ]] || command rm -f "$resp_fifo" + unset -f gitstatus_start_impl + gitstatus_stop + return 1 + fi + + unset -f gitstatus_start_impl + + if [[ "${GITSTATUS_STOP_ON_EXEC:-1}" == 1 ]]; then + type -t _gitstatus_exec &>/dev/null || function _gitstatus_exec() { exec "$@"; } + type -t _gitstatus_builtin &>/dev/null || function _gitstatus_builtin() { builtin "$@"; } + + function _gitstatus_exec_wrapper() { + (( ! $# )) || gitstatus_stop + local ret=0 + _gitstatus_exec "$@" || ret=$? + [[ -n "${GITSTATUS_DAEMON_PID:-}" ]] || gitstatus_start || true + return $ret + } + + function _gitstatus_builtin_wrapper() { + while [[ "${1:-}" == builtin ]]; do shift; done + if [[ "${1:-}" == exec ]]; then + _gitstatus_exec_wrapper "${@:2}" + else + _gitstatus_builtin "$@" + fi + } + + alias exec=_gitstatus_exec_wrapper + alias builtin=_gitstatus_builtin_wrapper + + _GITSTATUS_EXEC_HOOK=1 + else + unset _GITSTATUS_EXEC_HOOK + fi +} + +# Stops gitstatusd if it's running. +function gitstatus_stop() { + [[ "${_GITSTATUS_CLIENT_PID:-$BASHPID}" == "$BASHPID" ]] || return 0 + [[ -z "${_GITSTATUS_REQ_FD:-}" ]] || exec {_GITSTATUS_REQ_FD}>&- || true + [[ -z "${_GITSTATUS_RESP_FD:-}" ]] || exec {_GITSTATUS_RESP_FD}>&- || true + [[ -z "${GITSTATUS_DAEMON_PID:-}" ]] || kill "$GITSTATUS_DAEMON_PID" &>/dev/null || true + if [[ -n "${_GITSTATUS_EXEC_HOOK:-}" ]]; then + unalias exec builtin &>/dev/null || true + function _gitstatus_exec_wrapper() { _gitstatus_exec "$@"; } + function _gitstatus_builtin_wrapper() { _gitstatus_builtin "$@"; } + fi + unset _GITSTATUS_REQ_FD _GITSTATUS_RESP_FD GITSTATUS_DAEMON_PID _GITSTATUS_EXEC_HOOK + unset _GITSTATUS_DIRTY_MAX_INDEX_SIZE _GITSTATUS_CLIENT_PID +} + +# Retrives status of a git repository from a directory under its working tree. +# +# Usage: gitstatus_query [OPTION]... +# +# -d STR Directory to query. Defaults to $PWD. Has no effect if GIT_DIR is set. +# -t FLOAT Timeout in seconds. Will block for at most this long. If no results +# are available by then, will return error. +# -p Don't compute anything that requires reading Git index. If this option is used, +# the following parameters will be 0: VCS_STATUS_INDEX_SIZE, +# VCS_STATUS_{NUM,HAS}_{STAGED,UNSTAGED,UNTRACKED,CONFLICTED}. +# +# On success sets VCS_STATUS_RESULT to one of the following values: +# +# norepo-sync The directory doesn't belong to a git repository. +# ok-sync The directory belongs to a git repository. +# +# If VCS_STATUS_RESULT is ok-sync, additional variables are set: +# +# VCS_STATUS_WORKDIR Git repo working directory. Not empty. +# VCS_STATUS_COMMIT Commit hash that HEAD is pointing to. Either 40 hex digits or +# empty if there is no HEAD (empty repo). +# VCS_STATUS_LOCAL_BRANCH Local branch name or empty if not on a branch. +# VCS_STATUS_REMOTE_NAME The remote name, e.g. "upstream" or "origin". +# VCS_STATUS_REMOTE_BRANCH Upstream branch name. Can be empty. +# VCS_STATUS_REMOTE_URL Remote URL. Can be empty. +# VCS_STATUS_ACTION Repository state, A.K.A. action. Can be empty. +# VCS_STATUS_INDEX_SIZE The number of files in the index. +# VCS_STATUS_NUM_STAGED The number of staged changes. +# VCS_STATUS_NUM_CONFLICTED The number of conflicted changes. +# VCS_STATUS_NUM_UNSTAGED The number of unstaged changes. +# VCS_STATUS_NUM_UNTRACKED The number of untracked files. +# VCS_STATUS_HAS_STAGED 1 if there are staged changes, 0 otherwise. +# VCS_STATUS_HAS_CONFLICTED 1 if there are conflicted changes, 0 otherwise. +# VCS_STATUS_HAS_UNSTAGED 1 if there are unstaged changes, 0 if there aren't, -1 if +# unknown. +# VCS_STATUS_NUM_STAGED_NEW The number of staged new files. Note that renamed files +# are reported as deleted plus new. +# VCS_STATUS_NUM_STAGED_DELETED The number of staged deleted files. Note that renamed files +# are reported as deleted plus new. +# VCS_STATUS_NUM_UNSTAGED_DELETED The number of unstaged deleted files. Note that renamed files +# are reported as deleted plus new. +# VCS_STATUS_HAS_UNTRACKED 1 if there are untracked files, 0 if there aren't, -1 if +# unknown. +# VCS_STATUS_COMMITS_AHEAD Number of commits the current branch is ahead of upstream. +# Non-negative integer. +# VCS_STATUS_COMMITS_BEHIND Number of commits the current branch is behind upstream. +# Non-negative integer. +# VCS_STATUS_STASHES Number of stashes. Non-negative integer. +# VCS_STATUS_TAG The last tag (in lexicographical order) that points to the same +# commit as HEAD. +# VCS_STATUS_PUSH_REMOTE_NAME The push remote name, e.g. "upstream" or "origin". +# VCS_STATUS_PUSH_REMOTE_URL Push remote URL. Can be empty. +# VCS_STATUS_PUSH_COMMITS_AHEAD Number of commits the current branch is ahead of push remote. +# Non-negative integer. +# VCS_STATUS_PUSH_COMMITS_BEHIND Number of commits the current branch is behind push remote. +# Non-negative integer. +# VCS_STATUS_NUM_SKIP_WORKTREE The number of files in the index with skip-worktree bit set. +# Non-negative integer. +# VCS_STATUS_NUM_ASSUME_UNCHANGED The number of files in the index with assume-unchanged bit set. +# Non-negative integer. +# +# The point of reporting -1 via VCS_STATUS_HAS_* is to allow the command to skip scanning files in +# large repos. See -m flag of gitstatus_start. +# +# gitstatus_query returns an error if gitstatus_start hasn't been called in the same +# shell or the call had failed. +function gitstatus_query() { + unset OPTIND + local opt dir timeout=() no_diff=0 + while getopts "d:c:t:p" opt "$@"; do + case "$opt" in + d) dir=$OPTARG;; + t) timeout=(-t "$OPTARG");; + p) no_diff=1;; + *) return 1;; + esac + done + (( OPTIND == $# + 1 )) || { echo "usage: gitstatus_query [OPTION]..." >&2; return 1; } + + [[ -n "$GITSTATUS_DAEMON_PID" ]] || return # not started + + local req_id="$RANDOM.$RANDOM.$RANDOM.$RANDOM" + if [[ -z "${GIT_DIR:-}" ]]; then + [[ "$dir" == /* ]] || dir="$(pwd -P)/$dir" || return + elif [[ "$GIT_DIR" == /* ]]; then + dir=:"$GIT_DIR" + else + dir=:"$(pwd -P)/$GIT_DIR" || return + fi + echo -nE "$req_id"$'\x1f'"$dir"$'\x1f'"$no_diff"$'\x1e' >&$_GITSTATUS_REQ_FD || return + + local -a resp + while true; do + IFS=$'\x1f' read -rd $'\x1e' -a resp -u $_GITSTATUS_RESP_FD "${timeout[@]}" || return + [[ "${resp[0]}" == "$req_id" ]] && break + done + + if [[ "${resp[1]}" == 1 ]]; then + VCS_STATUS_RESULT=ok-sync + VCS_STATUS_WORKDIR="${resp[2]}" + VCS_STATUS_COMMIT="${resp[3]}" + VCS_STATUS_LOCAL_BRANCH="${resp[4]}" + VCS_STATUS_REMOTE_BRANCH="${resp[5]}" + VCS_STATUS_REMOTE_NAME="${resp[6]}" + VCS_STATUS_REMOTE_URL="${resp[7]}" + VCS_STATUS_ACTION="${resp[8]}" + VCS_STATUS_INDEX_SIZE="${resp[9]}" + VCS_STATUS_NUM_STAGED="${resp[10]}" + VCS_STATUS_NUM_UNSTAGED="${resp[11]}" + VCS_STATUS_NUM_CONFLICTED="${resp[12]}" + VCS_STATUS_NUM_UNTRACKED="${resp[13]}" + VCS_STATUS_COMMITS_AHEAD="${resp[14]}" + VCS_STATUS_COMMITS_BEHIND="${resp[15]}" + VCS_STATUS_STASHES="${resp[16]}" + VCS_STATUS_TAG="${resp[17]}" + VCS_STATUS_NUM_UNSTAGED_DELETED="${resp[18]}" + VCS_STATUS_NUM_STAGED_NEW="${resp[19]:-0}" + VCS_STATUS_NUM_STAGED_DELETED="${resp[20]:-0}" + VCS_STATUS_PUSH_REMOTE_NAME="${resp[21]:-}" + VCS_STATUS_PUSH_REMOTE_URL="${resp[22]:-}" + VCS_STATUS_PUSH_COMMITS_AHEAD="${resp[23]:-0}" + VCS_STATUS_PUSH_COMMITS_BEHIND="${resp[24]:-0}" + VCS_STATUS_NUM_SKIP_WORKTREE="${resp[25]:-0}" + VCS_STATUS_NUM_ASSUME_UNCHANGED="${resp[26]:-0}" + VCS_STATUS_HAS_STAGED=$((VCS_STATUS_NUM_STAGED > 0)) + if (( _GITSTATUS_DIRTY_MAX_INDEX_SIZE >= 0 && + VCS_STATUS_INDEX_SIZE > _GITSTATUS_DIRTY_MAX_INDEX_SIZE_ )); then + VCS_STATUS_HAS_UNSTAGED=-1 + VCS_STATUS_HAS_CONFLICTED=-1 + VCS_STATUS_HAS_UNTRACKED=-1 + else + VCS_STATUS_HAS_UNSTAGED=$((VCS_STATUS_NUM_UNSTAGED > 0)) + VCS_STATUS_HAS_CONFLICTED=$((VCS_STATUS_NUM_CONFLICTED > 0)) + VCS_STATUS_HAS_UNTRACKED=$((VCS_STATUS_NUM_UNTRACKED > 0)) + fi + else + VCS_STATUS_RESULT=norepo-sync + unset VCS_STATUS_WORKDIR + unset VCS_STATUS_COMMIT + unset VCS_STATUS_LOCAL_BRANCH + unset VCS_STATUS_REMOTE_BRANCH + unset VCS_STATUS_REMOTE_NAME + unset VCS_STATUS_REMOTE_URL + unset VCS_STATUS_ACTION + unset VCS_STATUS_INDEX_SIZE + unset VCS_STATUS_NUM_STAGED + unset VCS_STATUS_NUM_UNSTAGED + unset VCS_STATUS_NUM_CONFLICTED + unset VCS_STATUS_NUM_UNTRACKED + unset VCS_STATUS_HAS_STAGED + unset VCS_STATUS_HAS_UNSTAGED + unset VCS_STATUS_HAS_CONFLICTED + unset VCS_STATUS_HAS_UNTRACKED + unset VCS_STATUS_COMMITS_AHEAD + unset VCS_STATUS_COMMITS_BEHIND + unset VCS_STATUS_STASHES + unset VCS_STATUS_TAG + unset VCS_STATUS_NUM_UNSTAGED_DELETED + unset VCS_STATUS_NUM_STAGED_NEW + unset VCS_STATUS_NUM_STAGED_DELETED + unset VCS_STATUS_PUSH_REMOTE_NAME + unset VCS_STATUS_PUSH_REMOTE_URL + unset VCS_STATUS_PUSH_COMMITS_AHEAD + unset VCS_STATUS_PUSH_COMMITS_BEHIND + unset VCS_STATUS_NUM_SKIP_WORKTREE + unset VCS_STATUS_NUM_ASSUME_UNCHANGED + fi +} + +# Usage: gitstatus_check. +# +# Returns 0 if and only if gitstatus_start has succeeded previously. +# If it returns non-zero, gitstatus_query is guaranteed to return non-zero. +function gitstatus_check() { + [[ -n "$GITSTATUS_DAEMON_PID" ]] +} diff --git a/gitstatus.plugin.zsh b/gitstatus.plugin.zsh new file mode 100644 index 00000000..5dc8d737 --- /dev/null +++ b/gitstatus.plugin.zsh @@ -0,0 +1,816 @@ +# Zsh bindings for gitstatus. +# +# ------------------------------------------------------------------ +# +# Example: Start gitstatusd, send it a request, wait for response and print it. +# +# source ~/gitstatus/gitstatus.plugin.zsh +# gitstatus_start MY +# gitstatus_query -d $PWD MY +# typeset -m 'VCS_STATUS_*' +# +# Output: +# +# VCS_STATUS_ACTION='' +# VCS_STATUS_COMMIT=c000eddcff0fb38df2d0137efe24d9d2d900f209 +# VCS_STATUS_COMMITS_AHEAD=0 +# VCS_STATUS_COMMITS_BEHIND=0 +# VCS_STATUS_HAS_CONFLICTED=0 +# VCS_STATUS_HAS_STAGED=0 +# VCS_STATUS_HAS_UNSTAGED=1 +# VCS_STATUS_HAS_UNTRACKED=1 +# VCS_STATUS_INDEX_SIZE=33 +# VCS_STATUS_LOCAL_BRANCH=master +# VCS_STATUS_NUM_ASSUME_UNCHANGED=0 +# VCS_STATUS_NUM_CONFLICTED=0 +# VCS_STATUS_NUM_STAGED=0 +# VCS_STATUS_NUM_UNSTAGED=1 +# VCS_STATUS_NUM_SKIP_WORKTREE=0 +# VCS_STATUS_NUM_STAGED_NEW=0 +# VCS_STATUS_NUM_STAGED_DELETED=0 +# VCS_STATUS_NUM_UNSTAGED_DELETED=0 +# VCS_STATUS_NUM_UNTRACKED=1 +# VCS_STATUS_PUSH_COMMITS_AHEAD=0 +# VCS_STATUS_PUSH_COMMITS_BEHIND=0 +# VCS_STATUS_PUSH_REMOTE_NAME='' +# VCS_STATUS_PUSH_REMOTE_URL='' +# VCS_STATUS_REMOTE_BRANCH=master +# VCS_STATUS_REMOTE_NAME=origin +# +# VCS_STATUS_RESULT=ok-sync +# VCS_STATUS_STASHES=0 +# VCS_STATUS_TAG='' +# VCS_STATUS_WORKDIR=/home/romka/powerlevel10k + +[[ -o 'interactive' ]] || 'return' + +# Temporarily change options. +'builtin' 'local' '-a' '_gitstatus_opts' +[[ ! -o 'aliases' ]] || _gitstatus_opts+=('aliases') +[[ ! -o 'sh_glob' ]] || _gitstatus_opts+=('sh_glob') +[[ ! -o 'no_brace_expand' ]] || _gitstatus_opts+=('no_brace_expand') +'builtin' 'setopt' 'no_aliases' 'no_sh_glob' 'brace_expand' + +autoload -Uz add-zsh-hook || return +zmodload zsh/datetime zsh/system || return +zmodload -F zsh/files b:zf_rm || return + +typeset -g _gitstatus_plugin_dir"${1:-}"="${${(%):-%x}:A:h}" + +# Retrives status of a git repo from a directory under its working tree. +# +## Usage: gitstatus_query [OPTION]... NAME +# +# -d STR Directory to query. Defaults to the current directory. Has no effect if GIT_DIR +# is set. +# -c STR Callback function to call once the results are available. Called only after +# gitstatus_query returns 0 with VCS_STATUS_RESULT=tout. +# -t FLOAT Timeout in seconds. Negative value means infinity. Will block for at most this long. +# If no results are available by then: if -c isn't specified, will return 1; otherwise +# will set VCS_STATUS_RESULT=tout and return 0. +# -p Don't compute anything that requires reading Git index. If this option is used, +# the following parameters will be 0: VCS_STATUS_INDEX_SIZE, +# VCS_STATUS_{NUM,HAS}_{STAGED,UNSTAGED,UNTRACKED,CONFLICTED}. +# +# On success sets VCS_STATUS_RESULT to one of the following values: +# +# tout Timed out waiting for data; will call the user-specified callback later. +# norepo-sync The directory isn't a git repo. +# ok-sync The directory is a git repo. +# +# When the callback is called, VCS_STATUS_RESULT is set to one of the following values: +# +# norepo-async The directory isn't a git repo. +# ok-async The directory is a git repo. +# +# If VCS_STATUS_RESULT is ok-sync or ok-async, additional variables are set: +# +# VCS_STATUS_WORKDIR Git repo working directory. Not empty. +# VCS_STATUS_COMMIT Commit hash that HEAD is pointing to. Either 40 hex digits or +# empty if there is no HEAD (empty repo). +# VCS_STATUS_LOCAL_BRANCH Local branch name or empty if not on a branch. +# VCS_STATUS_REMOTE_NAME The remote name, e.g. "upstream" or "origin". +# VCS_STATUS_REMOTE_BRANCH Upstream branch name. Can be empty. +# VCS_STATUS_REMOTE_URL Remote URL. Can be empty. +# VCS_STATUS_ACTION Repository state, A.K.A. action. Can be empty. +# VCS_STATUS_INDEX_SIZE The number of files in the index. +# VCS_STATUS_NUM_STAGED The number of staged changes. +# VCS_STATUS_NUM_CONFLICTED The number of conflicted changes. +# VCS_STATUS_NUM_UNSTAGED The number of unstaged changes. +# VCS_STATUS_NUM_UNTRACKED The number of untracked files. +# VCS_STATUS_HAS_STAGED 1 if there are staged changes, 0 otherwise. +# VCS_STATUS_HAS_CONFLICTED 1 if there are conflicted changes, 0 otherwise. +# VCS_STATUS_HAS_UNSTAGED 1 if there are unstaged changes, 0 if there aren't, -1 if +# unknown. +# VCS_STATUS_NUM_STAGED_NEW The number of staged new files. Note that renamed files +# are reported as deleted plus new. +# VCS_STATUS_NUM_STAGED_DELETED The number of staged deleted files. Note that renamed files +# are reported as deleted plus new. +# VCS_STATUS_NUM_UNSTAGED_DELETED The number of unstaged deleted files. Note that renamed files +# are reported as deleted plus new. +# VCS_STATUS_HAS_UNTRACKED 1 if there are untracked files, 0 if there aren't, -1 if +# unknown. +# VCS_STATUS_COMMITS_AHEAD Number of commits the current branch is ahead of upstream. +# Non-negative integer. +# VCS_STATUS_COMMITS_BEHIND Number of commits the current branch is behind upstream. +# Non-negative integer. +# VCS_STATUS_STASHES Number of stashes. Non-negative integer. +# VCS_STATUS_TAG The last tag (in lexicographical order) that points to the same +# commit as HEAD. +# VCS_STATUS_PUSH_REMOTE_NAME The push remote name, e.g. "upstream" or "origin". +# VCS_STATUS_PUSH_REMOTE_URL Push remote URL. Can be empty. +# VCS_STATUS_PUSH_COMMITS_AHEAD Number of commits the current branch is ahead of push remote. +# Non-negative integer. +# VCS_STATUS_PUSH_COMMITS_BEHIND Number of commits the current branch is behind push remote. +# Non-negative integer. +# VCS_STATUS_NUM_SKIP_WORKTREE The number of files in the index with skip-worktree bit set. +# Non-negative integer. +# VCS_STATUS_NUM_ASSUME_UNCHANGED The number of files in the index with assume-unchanged bit set. +# Non-negative integer. +# +# The point of reporting -1 via VCS_STATUS_HAS_* is to allow the command to skip scanning files in +# large repos. See -m flag of gitstatus_start. +# +# gitstatus_query returns an error if gitstatus_start hasn't been called in the same shell or +# the call had failed. +# +# !!!!! WARNING: CONCURRENT CALLS WITH THE SAME NAME ARE NOT ALLOWED !!!!! +# +# It's illegal to call gitstatus_query if the last asynchronous call with the same NAME hasn't +# completed yet. If you need to issue concurrent requests, use different NAME arguments. +function gitstatus_query"${1:-}"() { + emulate -L zsh -o no_aliases -o extended_glob -o typeset_silent + + local fsuf=${${(%):-%N}#gitstatus_query} + + unset VCS_STATUS_RESULT + + local opt dir callback OPTARG + local -i no_diff OPTIND + local -F timeout=-1 + while getopts ":d:c:t:p" opt; do + case $opt in + +p) no_diff=0;; + p) no_diff=1;; + d) dir=$OPTARG;; + c) callback=$OPTARG;; + t) + if [[ $OPTARG != (|+|-)<->(|.<->)(|[eE](|-|+)<->) ]]; then + print -ru2 -- "gitstatus_query: invalid -t argument: $OPTARG" + return 1 + fi + timeout=OPTARG + ;; + \?) print -ru2 -- "gitstatus_query: invalid option: $OPTARG" ; return 1;; + :) print -ru2 -- "gitstatus_query: missing required argument: $OPTARG"; return 1;; + *) print -ru2 -- "gitstatus_query: invalid option: $opt" ; return 1;; + esac + done + + if (( OPTIND != ARGC )); then + print -ru2 -- "gitstatus_start: exactly one positional argument is required" + return 1 + fi + + local name=$*[OPTIND] + if [[ $name != [[:IDENT:]]## ]]; then + print -ru2 -- "gitstatus_start: invalid positional argument: $name" + return 1 + fi + + (( _GITSTATUS_STATE_$name == 2 )) || return + + if [[ -z $GIT_DIR ]]; then + [[ $dir == /* ]] || dir=${(%):-%/}/$dir + else + [[ $GIT_DIR == /* ]] && dir=:$GIT_DIR || dir=:${(%):-%/}/$GIT_DIR + fi + + local -i req_fd=${(P)${:-_GITSTATUS_REQ_FD_$name}} + local req_id=$EPOCHREALTIME + print -rnu $req_fd -- $req_id' '$callback$'\x1f'$dir$'\x1f'$no_diff$'\x1e' || return + + (( ++_GITSTATUS_NUM_INFLIGHT_$name )) + + if (( timeout == 0 )); then + typeset -g VCS_STATUS_RESULT=tout + _gitstatus_clear$fsuf + else + while true; do + _gitstatus_process_response$fsuf $name $timeout $req_id || return + [[ $VCS_STATUS_RESULT == *-async ]] || break + done + fi + + [[ $VCS_STATUS_RESULT != tout || -n $callback ]] +} + +# If the last call to gitstatus_query timed out (VCS_STATUS_RESULT=tout), wait for the callback +# to be called. Otherwise do nothing. +# +# Usage: gitstatus_process_results [OPTION]... NAME +# +# -t FLOAT Timeout in seconds. Negative value means infinity. Will block for at most this long. +# +# Returns an error only when invoked with incorrect arguments and when gitstatusd isn't running or +# broken. +# +# If a callback gets called, VCS_STATUS_* parameters are set as in gitstatus_query. +# VCS_STATUS_RESULT is either norepo-async or ok-async. +function gitstatus_process_results"${1:-}"() { + emulate -L zsh -o no_aliases -o extended_glob -o typeset_silent + + local fsuf=${${(%):-%N}#gitstatus_process_results} + + local opt OPTARG + local -i OPTIND + local -F timeout=-1 + while getopts ":t:" opt; do + case $opt in + t) + if [[ $OPTARG != (|+|-)<->(|.<->)(|[eE](|-|+)<->) ]]; then + print -ru2 -- "gitstatus_process_results: invalid -t argument: $OPTARG" + return 1 + fi + timeout=OPTARG + ;; + \?) print -ru2 -- "gitstatus_process_results: invalid option: $OPTARG" ; return 1;; + :) print -ru2 -- "gitstatus_process_results: missing required argument: $OPTARG"; return 1;; + *) print -ru2 -- "gitstatus_process_results: invalid option: $opt" ; return 1;; + esac + done + + if (( OPTIND != ARGC )); then + print -ru2 -- "gitstatus_process_results: exactly one positional argument is required" + return 1 + fi + + local name=$*[OPTIND] + if [[ $name != [[:IDENT:]]## ]]; then + print -ru2 -- "gitstatus_process_results: invalid positional argument: $name" + return 1 + fi + + (( _GITSTATUS_STATE_$name == 2 )) || return + + while (( _GITSTATUS_NUM_INFLIGHT_$name )); do + _gitstatus_process_response$fsuf $name $timeout '' || return + [[ $VCS_STATUS_RESULT == *-async ]] || break + done + + return 0 +} + +function _gitstatus_clear"${1:-}"() { + unset VCS_STATUS_{WORKDIR,COMMIT,LOCAL_BRANCH,REMOTE_BRANCH,REMOTE_NAME,REMOTE_URL,ACTION,INDEX_SIZE,NUM_STAGED,NUM_UNSTAGED,NUM_CONFLICTED,NUM_UNTRACKED,HAS_STAGED,HAS_UNSTAGED,HAS_CONFLICTED,HAS_UNTRACKED,COMMITS_AHEAD,COMMITS_BEHIND,STASHES,TAG,NUM_UNSTAGED_DELETED,NUM_STAGED_NEW,NUM_STAGED_DELETED,PUSH_REMOTE_NAME,PUSH_REMOTE_URL,PUSH_COMMITS_AHEAD,PUSH_COMMITS_BEHIND,NUM_SKIP_WORKTREE,NUM_ASSUME_UNCHANGED} +} + +function _gitstatus_process_response"${1:-}"() { + local name=$1 timeout req_id=$3 buf + local -i resp_fd=_GITSTATUS_RESP_FD_$name + local -i dirty_max_index_size=_GITSTATUS_DIRTY_MAX_INDEX_SIZE_$name + + (( $2 >= 0 )) && timeout=-t$2 && [[ -t $resp_fd ]] + sysread $timeout -i $resp_fd 'buf[$#buf+1]' || { + if (( $? == 4 )); then + if [[ -n $req_id ]]; then + typeset -g VCS_STATUS_RESULT=tout + _gitstatus_clear$fsuf + fi + return 0 + else + gitstatus_stop$fsuf $name + return 1 + fi + } + while [[ $buf != *$'\x1e' ]]; do + if ! sysread -i $resp_fd 'buf[$#buf+1]'; then + gitstatus_stop$fsuf $name + return 1 + fi + done + + local s + for s in ${(ps:\x1e:)buf}; do + local -a resp=("${(@ps:\x1f:)s}") + if (( resp[2] )); then + if [[ $resp[1] == $req_id' '* ]]; then + typeset -g VCS_STATUS_RESULT=ok-sync + else + typeset -g VCS_STATUS_RESULT=ok-async + fi + for VCS_STATUS_WORKDIR \ + VCS_STATUS_COMMIT \ + VCS_STATUS_LOCAL_BRANCH \ + VCS_STATUS_REMOTE_BRANCH \ + VCS_STATUS_REMOTE_NAME \ + VCS_STATUS_REMOTE_URL \ + VCS_STATUS_ACTION \ + VCS_STATUS_INDEX_SIZE \ + VCS_STATUS_NUM_STAGED \ + VCS_STATUS_NUM_UNSTAGED \ + VCS_STATUS_NUM_CONFLICTED \ + VCS_STATUS_NUM_UNTRACKED \ + VCS_STATUS_COMMITS_AHEAD \ + VCS_STATUS_COMMITS_BEHIND \ + VCS_STATUS_STASHES \ + VCS_STATUS_TAG \ + VCS_STATUS_NUM_UNSTAGED_DELETED \ + VCS_STATUS_NUM_STAGED_NEW \ + VCS_STATUS_NUM_STAGED_DELETED \ + VCS_STATUS_PUSH_REMOTE_NAME \ + VCS_STATUS_PUSH_REMOTE_URL \ + VCS_STATUS_PUSH_COMMITS_AHEAD \ + VCS_STATUS_PUSH_COMMITS_BEHIND \ + VCS_STATUS_NUM_SKIP_WORKTREE \ + VCS_STATUS_NUM_ASSUME_UNCHANGED in "${(@)resp[3,27]}"; do + done + typeset -gi VCS_STATUS_{INDEX_SIZE,NUM_STAGED,NUM_UNSTAGED,NUM_CONFLICTED,NUM_UNTRACKED,COMMITS_AHEAD,COMMITS_BEHIND,STASHES,NUM_UNSTAGED_DELETED,NUM_STAGED_NEW,NUM_STAGED_DELETED,PUSH_COMMITS_AHEAD,PUSH_COMMITS_BEHIND,NUM_SKIP_WORKTREE,NUM_ASSUME_UNCHANGED} + typeset -gi VCS_STATUS_HAS_STAGED=$((VCS_STATUS_NUM_STAGED > 0)) + if (( dirty_max_index_size >= 0 && VCS_STATUS_INDEX_SIZE > dirty_max_index_size )); then + typeset -gi \ + VCS_STATUS_HAS_UNSTAGED=-1 \ + VCS_STATUS_HAS_CONFLICTED=-1 \ + VCS_STATUS_HAS_UNTRACKED=-1 + else + typeset -gi \ + VCS_STATUS_HAS_UNSTAGED=$((VCS_STATUS_NUM_UNSTAGED > 0)) \ + VCS_STATUS_HAS_CONFLICTED=$((VCS_STATUS_NUM_CONFLICTED > 0)) \ + VCS_STATUS_HAS_UNTRACKED=$((VCS_STATUS_NUM_UNTRACKED > 0)) + fi + else + if [[ $resp[1] == $req_id' '* ]]; then + typeset -g VCS_STATUS_RESULT=norepo-sync + else + typeset -g VCS_STATUS_RESULT=norepo-async + fi + _gitstatus_clear$fsuf + fi + (( --_GITSTATUS_NUM_INFLIGHT_$name )) + [[ $VCS_STATUS_RESULT == *-async ]] && emulate zsh -c "${resp[1]#* }" + done + + return 0 +} + +function _gitstatus_daemon"${1:-}"() { + local -i pipe_fd + exec 0<&- {pipe_fd}>&1 1>>$daemon_log 2>&1 || return + local pgid=$sysparams[pid] + [[ $pgid == <1-> ]] || return + builtin cd -q / || return + + { + { + trap '' PIPE + + local uname_sm + uname_sm="${(L)$(uname -sm)}" || return + [[ $uname_sm == [^' ']##' '[^' ']## ]] || return + local uname_s=${uname_sm% *} + local uname_m=${uname_sm#* } + + if [[ $GITSTATUS_NUM_THREADS == <1-> ]]; then + args+=(-t $GITSTATUS_NUM_THREADS) + else + local cpus + if (( ! $+commands[sysctl] )) || [[ $uname_s == linux ]] || + ! cpus="$(sysctl -n hw.ncpu)"; then + if (( ! $+commands[getconf] )) || ! cpus="$(getconf _NPROCESSORS_ONLN)"; then + cpus=8 + fi + fi + args+=(-t $((cpus > 16 ? 32 : cpus > 0 ? 2 * cpus : 16))) + fi + + local _gitstatus_zsh_daemon _gitstatus_zsh_version _gitstatus_zsh_downloaded + + function _gitstatus_set_daemon$fsuf() { + _gitstatus_zsh_daemon="$1" + _gitstatus_zsh_version="$2" + _gitstatus_zsh_downloaded="$3" + } + + local gitstatus_plugin_dir_var=_gitstatus_plugin_dir$fsuf + local gitstatus_plugin_dir=${(P)gitstatus_plugin_dir_var} + set -- -d $gitstatus_plugin_dir -s $uname_s -m $uname_m -- _gitstatus_set_daemon$fsuf + [[ ${GITSTATUS_AUTO_INSTALL:-1} == (|-|+)<1-> ]] || set -- -n "$@" + source $gitstatus_plugin_dir/install || return + [[ -n $_gitstatus_zsh_daemon ]] || return + [[ -n $_gitstatus_zsh_version ]] || return + [[ $_gitstatus_zsh_downloaded == [01] ]] || return + + mkfifo -- $file_prefix.fifo || return + print -rnu $pipe_fd -- ${(l:20:)pgid} || return + exec <$file_prefix.fifo || return + zf_rm -- $file_prefix.fifo || return + + if [[ -x $_gitstatus_zsh_daemon ]]; then + $_gitstatus_zsh_daemon -G $_gitstatus_zsh_version "${(@)args}" >&$pipe_fd + local -i ret=$? + [[ $ret == (0|129|130|131|137|141|143) ]] && return ret + fi + + (( ! _gitstatus_zsh_downloaded )) || return + [[ ${GITSTATUS_AUTO_INSTALL:-1} == (|-|+)<1-> ]] || return + set -- -f "$@" + _gitstatus_zsh_daemon= + _gitstatus_zsh_version= + _gitstatus_zsh_downloaded= + source $gitstatus_plugin_dir/install || return + [[ -n $_gitstatus_zsh_daemon ]] || return + [[ -n $_gitstatus_zsh_version ]] || return + [[ $_gitstatus_zsh_downloaded == 1 ]] || return + + $_gitstatus_zsh_daemon -G $_gitstatus_zsh_version "${(@)args}" >&$pipe_fd + } always { + local -i ret=$? + zf_rm -f -- $file_prefix.lock $file_prefix.fifo + kill -- -$pgid + } + } &! + + (( lock_fd == -1 )) && return + + { + if zsystem flock -- $file_prefix.lock && [[ -e $file_prefix.lock ]]; then + zf_rm -f -- $file_prefix.lock $file_prefix.fifo + kill -- -$pgid + fi + } &! +} + +# Starts gitstatusd in the background. Does nothing and succeeds if gitstatusd is already running. +# +# Usage: gitstatus_start [OPTION]... NAME +# +# -t FLOAT Fail the self-check on initialization if not getting a response from gitstatusd for +# this this many seconds. Defaults to 5. +# +# -s INT Report at most this many staged changes; negative value means infinity. +# Defaults to 1. +# +# -u INT Report at most this many unstaged changes; negative value means infinity. +# Defaults to 1. +# +# -c INT Report at most this many conflicted changes; negative value means infinity. +# Defaults to 1. +# +# -d INT Report at most this many untracked files; negative value means infinity. +# Defaults to 1. +# +# -m INT Report -1 unstaged, untracked and conflicted if there are more than this many +# files in the index. Negative value means infinity. Defaults to -1. +# +# -e Count files within untracked directories like `git status --untracked-files`. +# +# -U Unless this option is specified, report zero untracked files for repositories +# with status.showUntrackedFiles = false. +# +# -W Unless this option is specified, report zero untracked files for repositories +# with bash.showUntrackedFiles = false. +# +# -D Unless this option is specified, report zero staged, unstaged and conflicted +# changes for repositories with bash.showDirtyState = false. +function gitstatus_start"${1:-}"() { + emulate -L zsh -o no_aliases -o no_bg_nice -o extended_glob -o typeset_silent || return + print -rnu2 || return + + local fsuf=${${(%):-%N}#gitstatus_start} + + local opt OPTARG + local -i OPTIND + local -F timeout=5 + local -i async=0 + local -a args=() + local -i dirty_max_index_size=-1 + + while getopts ":t:s:u:c:d:m:eaUWD" opt; do + case $opt in + a) async=1;; + +a) async=0;; + t) + if [[ $OPTARG != (|+)<->(|.<->)(|[eE](|-|+)<->) ]] || (( ${timeout::=OPTARG} <= 0 )); then + print -ru2 -- "gitstatus_start: invalid -t argument: $OPTARG" + return 1 + fi + ;; + s|u|c|d|m) + if [[ $OPTARG != (|-|+)<-> ]]; then + print -ru2 -- "gitstatus_start: invalid -$opt argument: $OPTARG" + return 1 + fi + args+=(-$opt $OPTARG) + [[ $opt == m ]] && dirty_max_index_size=OPTARG + ;; + e|U|W|D) args+=$opt;; + +(e|U|W|D)) args=(${(@)args:#-$opt});; + \?) print -ru2 -- "gitstatus_start: invalid option: $OPTARG" ; return 1;; + :) print -ru2 -- "gitstatus_start: missing required argument: $OPTARG"; return 1;; + *) print -ru2 -- "gitstatus_start: invalid option: $opt" ; return 1;; + esac + done + + if (( OPTIND != ARGC )); then + print -ru2 -- "gitstatus_start: exactly one positional argument is required" + return 1 + fi + + local name=$*[OPTIND] + if [[ $name != [[:IDENT:]]## ]]; then + print -ru2 -- "gitstatus_start: invalid positional argument: $name" + return 1 + fi + + local -i lock_fd resp_fd stderr_fd + local file_prefix xtrace=/dev/null daemon_log=/dev/null + if (( _GITSTATUS_STATE_$name )); then + (( async )) && return + (( _GITSTATUS_STATE_$name == 2 )) && return + lock_fd=_GITSTATUS_LOCK_FD_$name + resp_fd=_GITSTATUS_RESP_FD_$name + xtrace=${(P)${:-GITSTATUS_XTRACE_$name}} + daemon_log=${(P)${:-GITSTATUS_DAEMON_LOG_$name}} + file_prefix=${(P)${:-_GITSTATUS_FILE_PREFIX_$name}} + else + typeset -gi _GITSTATUS_START_COUNTER + local log_level=$GITSTATUS_LOG_LEVEL + local file_prefix=${${TMPDIR:-/tmp}:A}/gitstatus.$name.$EUID + file_prefix+=.$sysparams[pid].$EPOCHSECONDS.$((++_GITSTATUS_START_COUNTER)) + (( GITSTATUS_ENABLE_LOGGING )) && : ${log_level:=INFO} + if [[ -n $log_level ]]; then + xtrace=$file_prefix.xtrace.log + daemon_log=$file_prefix.daemon.log + fi + args+=(-v ${log_level:-FATAL}) + typeset -g GITSTATUS_XTRACE_$name=$xtrace + typeset -g GITSTATUS_DAEMON_LOG_$name=$daemon_log + typeset -g _GITSTATUS_FILE_PREFIX_$name=$file_prefix + typeset -gi _GITSTATUS_CLIENT_PID_$name="sysparams[pid]" + typeset -gi _GITSTATUS_DIRTY_MAX_INDEX_SIZE_$name=dirty_max_index_size + fi + + () { + if [[ $xtrace != /dev/null && -o no_xtrace ]]; then + exec {stderr_fd}>&2 || return + exec 2>>$xtrace || return + setopt xtrace + fi + + setopt monitor || return + + if (( ! _GITSTATUS_STATE_$name )); then + if [[ -r /proc/version && "$($file_prefix.lock || return + zsystem flock -f lock_fd $file_prefix.lock || return + [[ $lock_fd == <1-> ]] || return + fi + + typeset -gi _GITSTATUS_LOCK_FD_$name=lock_fd + + if [[ -n $USERPROFILE && -d /cygdrive && -d /proc/self/fd ]]; then + # Work around bugs in Cygwin 32-bit. + # + # This hangs: + # + # emulate -L zsh + # () { exec {fd}< $1 } <(:) + # =true # hangs here + # + # This hangs: + # + # sysopen -r -u fd <(:) + local -i fd + exec {fd}< <(_gitstatus_daemon$fsuf) || return + { + [[ -r /proc/self/fd/$fd ]] || return + sysopen -r -o cloexec -u resp_fd /proc/self/fd/$fd || return + } always { + exec {fd} >&- || return + } + else + sysopen -r -o cloexec -u resp_fd <(_gitstatus_daemon$fsuf) || return + fi + + typeset -gi GITSTATUS_DAEMON_PID_$name="${sysparams[procsubstpid]:--1}" + + [[ $resp_fd == <1-> ]] || return + typeset -gi _GITSTATUS_RESP_FD_$name=resp_fd + typeset -gi _GITSTATUS_STATE_$name=1 + fi + + if (( ! async )); then + (( _GITSTATUS_CLIENT_PID_$name == sysparams[pid] )) || return + + local pgid + while (( $#pgid < 20 )); do + [[ -t $resp_fd ]] + sysread -s $((20 - $#pgid)) -t $timeout -i $resp_fd 'pgid[$#pgid+1]' || return + done + [[ $pgid == ' '#<1-> ]] || return + typeset -gi GITSTATUS_DAEMON_PID_$name=pgid + + sysopen -w -o cloexec -u req_fd -- $file_prefix.fifo || return + [[ $req_fd == <1-> ]] || return + typeset -gi _GITSTATUS_REQ_FD_$name=req_fd + + function _gitstatus_process_response_$name-$fsuf() { + emulate -L zsh -o no_aliases -o extended_glob -o typeset_silent + local pair=${${(%):-%N}#_gitstatus_process_response_} + local name=${pair%%-*} + local fsuf=${pair#*-} + if (( ARGC == 1 )); then + _gitstatus_process_response$fsuf $name 0 '' + else + gitstatus_stop$fsuf $name + fi + } + if ! zle -F $resp_fd _gitstatus_process_response_$name-$fsuf; then + unfunction _gitstatus_process_response_$name-$fsuf + return 1 + fi + + function _gitstatus_cleanup_$name-$fsuf() { + emulate -L zsh -o no_aliases -o extended_glob -o typeset_silent + local pair=${${(%):-%N}#_gitstatus_cleanup_} + local name=${pair%%-*} + local fsuf=${pair#*-} + (( _GITSTATUS_CLIENT_PID_$name == sysparams[pid] )) || return + gitstatus_stop$fsuf $name + } + if ! add-zsh-hook zshexit _gitstatus_cleanup_$name-$fsuf; then + unfunction _gitstatus_cleanup_$name-$fsuf + return 1 + fi + + print -nru $req_fd -- $'hello\x1f\x1e' || return + local expected=$'hello\x1f0\x1e' actual + while (( $#actual < $#expected )); do + [[ -t $resp_fd ]] + sysread -s $(($#expected - $#actual)) -t $timeout -i $resp_fd 'actual[$#actual+1]' || return + done + [[ $actual == $expected ]] || return + + if (( lock_fd != -1 )); then + zf_rm -- $file_prefix.lock || return + zsystem flock -u $lock_fd || return + fi + unset _GITSTATUS_LOCK_FD_$name + + typeset -gi _GITSTATUS_STATE_$name=2 + fi + } + + local -i err=$? + (( stderr_fd )) && exec 2>&$stderr_fd {stderr_fd}>&- + (( err == 0 )) && return + + gitstatus_stop$fsuf $name + + setopt prompt_percent no_prompt_subst no_prompt_bang + print -Pru2 -- '[%F{red}ERROR%f]: gitstatus failed to initialize.' + print -ru2 -- '' + print -ru2 -- ' Your Git prompt may disappear or become slow.' + if [[ -s $xtrace ]]; then + print -ru2 -- '' + print -ru2 -- " The content of ${(q-)xtrace} (gitstatus_start xtrace):" + print -Pru2 -- '%F{yellow}' + >&2 awk '{print " " $0}' <$xtrace + print -Pru2 -- "%F{red} ^ this command failed ($err)%f" + fi + if [[ -s $daemon_log ]]; then + print -ru2 -- '' + print -ru2 -- " The content of ${(q-)daemon_log} (gitstatus daemon log):" + print -Pru2 -- '%F{yellow}' + >&2 awk '{print " " $0}' <$daemon_log + print -Pnru2 -- '%f' + fi + if [[ $GITSTATUS_LOG_LEVEL == DEBUG ]]; then + print -ru2 -- '' + print -ru2 -- ' Your system information:' + print -Pru2 -- '%F{yellow}' + print -ru2 -- " zsh: $ZSH_VERSION" + print -ru2 -- " uname -a: $(uname -a)" + print -Pru2 -- '%f' + print -ru2 -- ' If you need help, open an issue and attach this whole error message to it:' + print -ru2 -- '' + print -Pru2 -- ' %F{green}' + else + print -ru2 -- '' + print -ru2 -- ' Run the following command to retry with extra diagnostics:' + print -Pru2 -- '%F{green}' + local env="GITSTATUS_LOG_LEVEL=DEBUG" + if [[ -n $GITSTATUS_NUM_THREADS ]]; then + env+=" GITSTATUS_NUM_THREADS=${(q)GITSTATUS_NUM_THREADS}" + fi + if [[ -n $GITSTATUS_DAEMON ]]; then + env+=" GITSTATUS_DAEMON=${(q)GITSTATUS_DAEMON}" + fi + if [[ -n $GITSTATUS_AUTO_INSTALL ]]; then + env+=" GITSTATUS_AUTO_INSTALL=${(q)GITSTATUS_AUTO_INSTALL}" + fi + if [[ -n $GITSTATUS_CACHE_DIR ]]; then + env+=" GITSTATUS_CACHE_DIR=${(q)GITSTATUS_CACHE_DIR}" + fi + print -nru2 -- " ${env} gitstatus_start ${(@q-)*}" + print -Pru2 -- '%f' + print -ru2 -- '' + local zshrc=${(D)ZDOTDIR:-~}/.zshrc + print -ru2 -- " If this command produces no output, add the following parameter to $zshrc:" + print -ru2 -- '' + print -Pru2 -- '%F{green} GITSTATUS_LOG_LEVEL=DEBUG%f' + print -ru2 -- '' + print -ru2 -- ' With this parameter gitstatus will print additional information on error.' + fi + + return err +} + +# Stops gitstatusd if it's running. +# +# Usage: gitstatus_stop NAME. +function gitstatus_stop"${1:-}"() { + emulate -L zsh -o no_aliases -o extended_glob -o typeset_silent + + local fsuf=${${(%):-%N}#gitstatus_stop} + + if (( ARGC != 1 )); then + print -ru2 -- "gitstatus_stop: exactly one positional argument is required" + return 1 + fi + + local name=$1 + if [[ $name != [[:IDENT:]]## ]]; then + print -ru2 -- "gitstatus_stop: invalid positional argument: $name" + return 1 + fi + + local state_var=_GITSTATUS_STATE_$name + local req_fd_var=_GITSTATUS_REQ_FD_$name + local resp_fd_var=_GITSTATUS_RESP_FD_$name + local lock_fd_var=_GITSTATUS_LOCK_FD_$name + local client_pid_var=_GITSTATUS_CLIENT_PID_$name + local daemon_pid_var=GITSTATUS_DAEMON_PID_$name + local inflight_var=_GITSTATUS_NUM_INFLIGHT_$name + local file_prefix_var=_GITSTATUS_FILE_PREFIX_$name + local dirty_max_index_size_var=_GITSTATUS_DIRTY_MAX_INDEX_SIZE_$name + + local req_fd=${(P)req_fd_var} + local resp_fd=${(P)resp_fd_var} + local lock_fd=${(P)lock_fd_var} + local daemon_pid=${(P)daemon_pid_var} + local file_prefix=${(P)file_prefix_var} + + local cleanup=_gitstatus_cleanup_$name-$fsuf + local process=_gitstatus_process_response_$name-$fsuf + + if (( $+functions[$cleanup] )); then + add-zsh-hook -d zshexit $cleanup + unfunction -- $cleanup + fi + + if (( $+functions[$process] )); then + [[ -n $resp_fd ]] && zle -F $resp_fd + unfunction -- $process + fi + + [[ $daemon_pid == <1-> ]] && kill -- -$daemon_pid 2>/dev/null + [[ $file_prefix == /* ]] && zf_rm -f -- $file_prefix.lock $file_prefix.fifo + [[ $lock_fd == <1-> ]] && zsystem flock -u $lock_fd + [[ $req_fd == <1-> ]] && exec {req_fd}>&- + [[ $resp_fd == <1-> ]] && exec {resp_fd}>&- + + unset $state_var $req_fd_var $lock_fd_var $resp_fd_var $client_pid_var $daemon_pid_var + unset $inflight_var $file_prefix_var $dirty_max_index_size_var + + unset VCS_STATUS_RESULT + _gitstatus_clear$fsuf +} + +# Usage: gitstatus_check NAME. +# +# Returns 0 if and only if `gitstatus_start NAME` has succeeded previously. +# If it returns non-zero, gitstatus_query NAME is guaranteed to return non-zero. +function gitstatus_check"${1:-}"() { + emulate -L zsh -o no_aliases -o extended_glob -o typeset_silent + + local fsuf=${${(%):-%N}#gitstatus_check} + + if (( ARGC != 1 )); then + print -ru2 -- "gitstatus_check: exactly one positional argument is required" + return 1 + fi + + local name=$1 + if [[ $name != [[:IDENT:]]## ]]; then + print -ru2 -- "gitstatus_check: invalid positional argument: $name" + return 1 + fi + + (( _GITSTATUS_STATE_$name == 2 )) +} + +(( ${#_gitstatus_opts} )) && setopt ${_gitstatus_opts[@]} +'builtin' 'unset' '_gitstatus_opts' diff --git a/ b/ new file mode 100644 index 00000000..b2c67c8d --- /dev/null +++ b/ @@ -0,0 +1,103 @@ +# Simple Bash prompt with Git status. + +# Source from $GITSTATUS_DIR or from the same directory +# in which the current script resides if the variable isn't set. +if [[ -n "${GITSTATUS_DIR:-}" ]]; then + source "$GITSTATUS_DIR" || return +elif [[ "${BASH_SOURCE[0]}" == */* ]]; then + source "${BASH_SOURCE[0]%/*}/" || return +else + source || return +fi + +# Sets GITSTATUS_PROMPT to reflect the state of the current git repository. +# The value is empty if not in a git repository. Forwards all arguments to +# gitstatus_query. +# +# Example value of GITSTATUS_PROMPT: master ⇣42⇡42 ⇠42⇢42 *42 merge ~42 +42 !42 ?42 +# +# master current branch +# ⇣42 local branch is 42 commits behind the remote +# ⇡42 local branch is 42 commits ahead of the remote +# ⇠42 local branch is 42 commits behind the push remote +# ⇢42 local branch is 42 commits ahead of the push remote +# *42 42 stashes +# merge merge in progress +# ~42 42 merge conflicts +# +42 42 staged changes +# !42 42 unstaged changes +# ?42 42 untracked files +function gitstatus_prompt_update() { + GITSTATUS_PROMPT="" + + gitstatus_query "$@" || return 1 # error + [[ "$VCS_STATUS_RESULT" == ok-sync ]] || return 0 # not a git repo + + local reset=$'\e[0m' # no color + local clean=$'\e[38;5;076m' # green foreground + local untracked=$'\e[38;5;014m' # teal foreground + local modified=$'\e[38;5;011m' # yellow foreground + local conflicted=$'\e[38;5;196m' # red foreground + + local p + + local where # branch name, tag or commit + if [[ -n "$VCS_STATUS_LOCAL_BRANCH" ]]; then + where="$VCS_STATUS_LOCAL_BRANCH" + elif [[ -n "$VCS_STATUS_TAG" ]]; then + p+="${reset}#" + where="$VCS_STATUS_TAG" + else + p+="${reset}@" + where="${VCS_STATUS_COMMIT:0:8}" + fi + + (( ${#where} > 32 )) && where="${where:0:12}…${where: -12}" # truncate long branch names and tags + p+="${clean}${where}" + + # ⇣42 if behind the remote. + (( VCS_STATUS_COMMITS_BEHIND )) && p+=" ${clean}⇣${VCS_STATUS_COMMITS_BEHIND}" + # ⇡42 if ahead of the remote; no leading space if also behind the remote: ⇣42⇡42. + (( VCS_STATUS_COMMITS_AHEAD && !VCS_STATUS_COMMITS_BEHIND )) && p+=" " + (( VCS_STATUS_COMMITS_AHEAD )) && p+="${clean}⇡${VCS_STATUS_COMMITS_AHEAD}" + # ⇠42 if behind the push remote. + (( VCS_STATUS_PUSH_COMMITS_BEHIND )) && p+=" ${clean}⇠${VCS_STATUS_PUSH_COMMITS_BEHIND}" + (( VCS_STATUS_PUSH_COMMITS_AHEAD && !VCS_STATUS_PUSH_COMMITS_BEHIND )) && p+=" " + # ⇢42 if ahead of the push remote; no leading space if also behind: ⇠42⇢42. + (( VCS_STATUS_PUSH_COMMITS_AHEAD )) && p+="${clean}⇢${VCS_STATUS_PUSH_COMMITS_AHEAD}" + # *42 if have stashes. + (( VCS_STATUS_STASHES )) && p+=" ${clean}*${VCS_STATUS_STASHES}" + # 'merge' if the repo is in an unusual state. + [[ -n "$VCS_STATUS_ACTION" ]] && p+=" ${conflicted}${VCS_STATUS_ACTION}" + # ~42 if have merge conflicts. + (( VCS_STATUS_NUM_CONFLICTED )) && p+=" ${conflicted}~${VCS_STATUS_NUM_CONFLICTED}" + # +42 if have staged changes. + (( VCS_STATUS_NUM_STAGED )) && p+=" ${modified}+${VCS_STATUS_NUM_STAGED}" + # !42 if have unstaged changes. + (( VCS_STATUS_NUM_UNSTAGED )) && p+=" ${modified}!${VCS_STATUS_NUM_UNSTAGED}" + # ?42 if have untracked files. It's really a question mark, your font isn't broken. + (( VCS_STATUS_NUM_UNTRACKED )) && p+=" ${untracked}?${VCS_STATUS_NUM_UNTRACKED}" + + GITSTATUS_PROMPT="${p}${reset}" +} + +# Start gitstatusd in the background. +gitstatus_stop && gitstatus_start -s -1 -u -1 -c -1 -d -1 + +# On every prompt, fetch git status and set GITSTATUS_PROMPT. +PROMPT_COMMAND=gitstatus_prompt_update + +# Enable promptvars so that ${GITSTATUS_PROMPT} in PS1 is expanded. +shopt -s promptvars + +# Customize prompt. Put $GITSTATUS_PROMPT in it reflect git status. +# +# Example: +# +# user@host ~/projects/skynet master+! +# $ █ +PS1='\[\033[01;32m\]\u@\h\[\033[00m\] ' # green user@host +PS1+='\[\033[01;34m\]\w\[\033[00m\]' # blue current working directory +PS1+='${GITSTATUS_PROMPT:+ $GITSTATUS_PROMPT}' # git status (requires promptvars option) +PS1+='\n\[\033[01;$((31+!$?))m\]\$\[\033[00m\] ' # green/red (success/error) $/# (normal/root) +PS1+='\[\e]0;\u@\h: \w\a\]' # terminal title: user@host: dir diff --git a/gitstatus.prompt.zsh b/gitstatus.prompt.zsh new file mode 100644 index 00000000..6ad64856 --- /dev/null +++ b/gitstatus.prompt.zsh @@ -0,0 +1,111 @@ +# Simple Zsh prompt with Git status. + +# Source gitstatus.plugin.zsh from $GITSTATUS_DIR or from the same directory +# in which the current script resides if the variable isn't set. +source "${GITSTATUS_DIR:-${${(%):-%x}:h}}/gitstatus.plugin.zsh" || return + +# Sets GITSTATUS_PROMPT to reflect the state of the current git repository. Empty if not +# in a git repository. In addition, sets GITSTATUS_PROMPT_LEN to the number of columns +# $GITSTATUS_PROMPT will occupy when printed. +# +# Example: +# +# GITSTATUS_PROMPT='master ⇣42⇡42 ⇠42⇢42 *42 merge ~42 +42 !42 ?42' +# GITSTATUS_PROMPT_LEN=39 +# +# master current branch +# ⇣42 local branch is 42 commits behind the remote +# ⇡42 local branch is 42 commits ahead of the remote +# ⇠42 local branch is 42 commits behind the push remote +# ⇢42 local branch is 42 commits ahead of the push remote +# *42 42 stashes +# merge merge in progress +# ~42 42 merge conflicts +# +42 42 staged changes +# !42 42 unstaged changes +# ?42 42 untracked files +function gitstatus_prompt_update() { + emulate -L zsh + typeset -g GITSTATUS_PROMPT='' + typeset -gi GITSTATUS_PROMPT_LEN=0 + + # Call gitstatus_query synchronously. Note that gitstatus_query can also be called + # asynchronously; see documentation in gitstatus.plugin.zsh. + gitstatus_query 'MY' || return 1 # error + [[ $VCS_STATUS_RESULT == 'ok-sync' ]] || return 0 # not a git repo + + local clean='%76F' # green foreground + local modified='%178F' # yellow foreground + local untracked='%39F' # blue foreground + local conflicted='%196F' # red foreground + + local p + + local where # branch name, tag or commit + if [[ -n $VCS_STATUS_LOCAL_BRANCH ]]; then + where=$VCS_STATUS_LOCAL_BRANCH + elif [[ -n $VCS_STATUS_TAG ]]; then + p+='%f#' + where=$VCS_STATUS_TAG + else + p+='%f@' + where=${VCS_STATUS_COMMIT[1,8]} + fi + + (( $#where > 32 )) && where[13,-13]="…" # truncate long branch names and tags + p+="${clean}${where//\%/%%}" # escape % + + # ⇣42 if behind the remote. + (( VCS_STATUS_COMMITS_BEHIND )) && p+=" ${clean}⇣${VCS_STATUS_COMMITS_BEHIND}" + # ⇡42 if ahead of the remote; no leading space if also behind the remote: ⇣42⇡42. + (( VCS_STATUS_COMMITS_AHEAD && !VCS_STATUS_COMMITS_BEHIND )) && p+=" " + (( VCS_STATUS_COMMITS_AHEAD )) && p+="${clean}⇡${VCS_STATUS_COMMITS_AHEAD}" + # ⇠42 if behind the push remote. + (( VCS_STATUS_PUSH_COMMITS_BEHIND )) && p+=" ${clean}⇠${VCS_STATUS_PUSH_COMMITS_BEHIND}" + (( VCS_STATUS_PUSH_COMMITS_AHEAD && !VCS_STATUS_PUSH_COMMITS_BEHIND )) && p+=" " + # ⇢42 if ahead of the push remote; no leading space if also behind: ⇠42⇢42. + (( VCS_STATUS_PUSH_COMMITS_AHEAD )) && p+="${clean}⇢${VCS_STATUS_PUSH_COMMITS_AHEAD}" + # *42 if have stashes. + (( VCS_STATUS_STASHES )) && p+=" ${clean}*${VCS_STATUS_STASHES}" + # 'merge' if the repo is in an unusual state. + [[ -n $VCS_STATUS_ACTION ]] && p+=" ${conflicted}${VCS_STATUS_ACTION}" + # ~42 if have merge conflicts. + (( VCS_STATUS_NUM_CONFLICTED )) && p+=" ${conflicted}~${VCS_STATUS_NUM_CONFLICTED}" + # +42 if have staged changes. + (( VCS_STATUS_NUM_STAGED )) && p+=" ${modified}+${VCS_STATUS_NUM_STAGED}" + # !42 if have unstaged changes. + (( VCS_STATUS_NUM_UNSTAGED )) && p+=" ${modified}!${VCS_STATUS_NUM_UNSTAGED}" + # ?42 if have untracked files. It's really a question mark, your font isn't broken. + (( VCS_STATUS_NUM_UNTRACKED )) && p+=" ${untracked}?${VCS_STATUS_NUM_UNTRACKED}" + + GITSTATUS_PROMPT="${p}%f" + + # The length of GITSTATUS_PROMPT after removing %f and %F. + GITSTATUS_PROMPT_LEN="${(m)#${${GITSTATUS_PROMPT//\%\%/x}//\%(f|<->F)}}" +} + +# Start gitstatusd instance with name "MY". The same name is passed to +# gitstatus_query in gitstatus_prompt_update. The flags with -1 as values +# enable staged, unstaged, conflicted and untracked counters. +gitstatus_stop 'MY' && gitstatus_start -s -1 -u -1 -c -1 -d -1 'MY' + +# On every prompt, fetch git status and set GITSTATUS_PROMPT. +autoload -Uz add-zsh-hook +add-zsh-hook precmd gitstatus_prompt_update + +# Enable/disable the right prompt options. +setopt no_prompt_bang prompt_percent prompt_subst + +# Customize prompt. Put $GITSTATUS_PROMPT in it to reflect git status. +# +# Example: +# +# user@host ~/projects/skynet master ⇡42 +# % █ +# +# The current directory gets truncated from the left if the whole prompt doesn't fit on the line. +PROMPT='%70F%n@%m%f ' # green user@host +PROMPT+='%39F%$((-GITSTATUS_PROMPT_LEN-1))<…<%~%<<%f' # blue current working directory +PROMPT+='${GITSTATUS_PROMPT:+ $GITSTATUS_PROMPT}' # git status +PROMPT+=$'\n' # new line +PROMPT+='%F{%(?.76.196)}%#%f ' # %/# (normal/root); green/red (ok/error) diff --git a/install b/install new file mode 100755 index 00000000..00070540 --- /dev/null +++ b/install @@ -0,0 +1,269 @@ +#!/bin/sh +# +# This script does not have a stable API. + +_gitstatus_install_main() { + if [ -n "${ZSH_VERSION:-}" ]; then + emulate -L sh -o no_unset + else + set -u + fi + + local argv1=$1 + shift + + local no_check= no_install= uname_s= uname_m= gitstatus_dir= + local opt= OPTARG= OPTIND=1 + + while getopts ':s:m:d:fnh' opt "$@"; do + case "$opt" in + h) + command cat <<\END +Usage: install [-s KERNEL] [-m ARCH] [-d DIR] [-f|-n] [-- CMD [ARG]...] + +If positional arguments are specified, call this on success: + + CMD [ARG]... DAEMON VERSION INSTALLED + +DAEMON is path to gitstatusd. VERSION is a glob pattern for the +version this daemon should support; it's supposed to be passed as +-G to gitstatusd. INSTALLED is 1 if gitstatusd has just been +downloaded and 0 otherwise. + +Options: + + -s KERNEL use this instead of lowercase `uname -s` + -m ARCH use this instead of lowercase `uname -m` + -d DIR use this instead of `dirname "$0"` + -f download gitstatusd even if there is one locally + -n do not download gitstatusd (fail instead) +END + return + ;; + n) + if [ -n "$no_install" ]; then + >&2 echo "[gitstatus] error: duplicate option: -$opt" + return 1 + fi + no_install=1 + ;; + f) + if [ -n "$no_check" ]; then + >&2 echo "[gitstatus] error: duplicate option: -$opt" + return 1 + fi + no_check=1 + ;; + d) + if [ -n "$gitstatus_dir" ]; then + >&2 echo "[gitstatus] error: duplicate option: -$opt" + return 1 + fi + if [ -z "$OPTARG" ]; then + >&2 echo "[error] incorrect value of -$opt: $OPTARG" + return 1 + fi + gitstatus_dir="$OPTARG" + ;; + m) + if [ -n "$uname_m" ]; then + >&2 echo "[gitstatus] error: duplicate option: -$opt" + return 1 + fi + if [ -z "$OPTARG" ]; then + >&2 echo "[error] incorrect value of -$opt: $OPTARG" + return 1 + fi + uname_m="$OPTARG" + ;; + s) + if [ -n "$uname_s" ]; then + >&2 echo "[gitstatus] error: duplicate option: -$opt" + return 1 + fi + if [ -z "$OPTARG" ]; then + >&2 echo "[error] incorrect value of -$opt: $OPTARG" + return 1 + fi + uname_s="$OPTARG" + ;; + \?) >&2 echo "[gitstatus] error: invalid option: -$OPTARG" ; return 1;; + :) >&2 echo "[gitstatus] error: missing required argument: -$OPTARG"; return 1;; + *) >&2 echo "[gitstatus] internal error: unhandled option: -$opt" ; return 1;; + esac + done + + shift "$((OPTIND - 1))" + + : "${gitstatus_dir:=$argv1}" + + if [ -n "$no_check" -a -n "$no_install" ]; then + >&2 echo "[gitstatus] error: incompatible options: -f, -n" + return 1 + fi + + if [ -z "$uname_s" ]; then + uname_s="$(command uname -s)" || return + uname_s="$(printf '%s' "$uname_s" | command tr '[A-Z]' '[a-z]')" || return + fi + if [ -z "$uname_m" ]; then + uname_m="$(command uname -s)" || return + uname_m="$(printf '%s' "$uname_m" | command tr '[A-Z]' '[a-z]')" || return + fi + + local daemon="${GITSTATUS_DAEMON:-}" + local cache_dir="${GITSTATUS_CACHE_DIR:-${XDG_CACHE_HOME:-$HOME/.cache}/gitstatus}" + + if [ -z "$no_check" ]; then + if [ -n "${daemon##/*}" ]; then + >&2 echo "[gitstatus] error: GITSTATUS_DAEMON is not absolute path: $daemon" + return 1 + fi + if [ -z "$daemon" ]; then + daemon="$gitstatus_dir"/usrbin/gitstatusd + if [ ! -e "$daemon" ]; then + daemon="$daemon"-"$uname_s"-"$uname_m" + if [ ! -e "$daemon" ]; then + daemon= + fi + fi + fi + if [ -n "$daemon" ]; then + local gitstatus_version= libgit2_version= + if ! . "$gitstatus_dir"/; then + >&2 echo "[gitstatus] internal error: failed to source" + return 1 + fi + if [ -z "$gitstatus_version" ]; then + >&2 echo "[gitstatus] internal error: empty gitstatus_version in" + return 1 + fi + [ $# = 0 ] || "$@" "$daemon" "$gitstatus_version" 0 + return + fi + fi + + while IFS= read -r line; do + line="${line###*}" + [ -n "$line" ] || continue + + local uname_s_glob= uname_m_glob= file= version= + eval "$line" || return + + if [ -z "$uname_s_glob" -o -z "$uname_m_glob" -o -z "$file" -o -z "$version" ]; then + >&2 echo "[gitstatus] internal error: invalid line: $line" + return 1 + fi + + case "$uname_s" in + $uname_s_glob) ;; + *) continue;; + esac + case "$uname_m" in + $uname_m_glob) ;; + *) continue;; + esac + + # Found a match. The while loop will terminate during this iteration. + + if [ -z "$no_check" ]; then + # Check if a suitable gitstatusd already exists. + local daemon="$cache_dir"/"$file" + if [ -e "$daemon" ]; then + [ $# = 0 ] || "$@" "$daemon" "$version" 0 + return + fi + daemon="$daemon"-"$uname_s"-"$uname_m" + if [ -e "$daemon" ]; then + local gitstatus_version= libgit2_version= + if ! . "$gitstatus_dir"/; then + >&2 echo "[gitstatus] internal error: failed to source" + return 1 + fi + if [ -z "$gitstatus_version" ]; then + >&2 echo "[gitstatus] internal error: empty gitstatus_version in" + return 1 + fi + [ $# = 0 ] || "$@" "$daemon" "$gitstatus_version" 0 + return + fi + fi + + # No suitable gitstatusd exists. Need to download. + + if [ -n "$no_install" ]; then + >&2 echo "[gitstatus] error: no gitstatusd found and installation is disabled" + return 1 + fi + + local daemon="$cache_dir"/"$file" + + if [ -n "${cache_dir##/*}" ]; then + >&2 echo "[gitstatus] error: GITSTATUS_CACHE_DIR is not absolute: $cache_dir" + return 1 + fi + [ -d "$cache_dir" ] || mkdir -p -- "$cache_dir" || return + + local url="$version/$file.tar.gz" + local archive="$cache_dir"/"$file".tmp.$$.tar.gz + if command -v curl >/dev/null 2>&1; then + if ! err="$(command curl -fsSLo "$archive" -- "$url" 2>&1)"; then + >&2 echo "[gitstatus] error: failed to download gitstatusd: $url" + >&2 printf "%s" "$err" + return 1 + fi + elif command -v wget >/dev/null 2>&1; then + if ! err="$(command wget -O "$archive" -- "$url" 2>&1)"; then + >&2 echo "[gitstatus] error: failed to download gitstatusd: $url" + >&2 printf "%s" "$err" + return 1 + fi + else + >&2 echo "[gitstatus] error: please install curl or wget" + return 1 + fi + + ( + if [ -n "${ZSH_VERSION:-}" ]; then + builtin cd -q -- "$cache_dir" || return + else + cd -- "$cache_dir" || return + fi + + local old= + if [ -e "$daemon" ]; then + local i=1 + while :; do + old="$daemon"."$i" + [ -e "$old" ] || break + i="$((i+1))" + done + command mv -f -- "$daemon" "$old" || return + fi + + command tar -xzf "$archive" + local ret=$? + command rm -f -- "$archive" + if [ -n "$old" ]; then + if [ "$ret" = 0 ]; then + command rm -f -- "$old" 2>/dev/null + else + command mv -f -- "$old" "$daemon" + fi + fi + exit "$ret" + ) || return + + [ $# = 0 ] || "$@" "$daemon" "$version" 1 + return + done <"$gitstatus_dir"/ + + >&2 echo "[gitstatus] error: no gitstatusd found for $uname_s $uname_m" + return 1 +} + +if [ -z "${0##*/*}" ]; then + _gitstatus_install_main "${0%/*}" "$@" +else + _gitstatus_install_main . "$@" +fi diff --git a/ b/ new file mode 100644 index 00000000..853d4056 --- /dev/null +++ b/ @@ -0,0 +1,24 @@ +# This file is used by ./install and indirectly by shell bindings. + +# Official gitstatusd binaries. +uname_s_glob="cygwin_nt-10.0"; uname_m_glob="i686"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="cygwin_nt-10.0"; uname_m_glob="x86_64"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="darwin"; uname_m_glob="x86_64"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="freebsd"; uname_m_glob="amd64"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="linux"; uname_m_glob="aarch64"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="linux"; uname_m_glob="armv6l"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="linux"; uname_m_glob="armv7l"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="linux"; uname_m_glob="i686"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="linux"; uname_m_glob="x86_64"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="msys_nt-10.0"; uname_m_glob="i686"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; +uname_s_glob="msys_nt-10.0"; uname_m_glob="x86_64"; file="gitstatusd-${uname_s}-${uname_m}"; version="v1.0.0"; + +# Fallbacks to official gitstatusd binaries. +uname_s_glob="cygwin_nt-*"; uname_m_glob="i686"; file="gitstatusd-cygwin_nt-10.0-${uname_m}"; version="v1.0.0"; +uname_s_glob="cygwin_nt-*"; uname_m_glob="x86_64"; file="gitstatusd-cygwin_nt-10.0-${uname_m}"; version="v1.0.0"; +uname_s_glob="mingw32_nt-*"; uname_m_glob="i686"; file="gitstatusd-msys_nt-10.0-${uname_m}"; version="v1.0.0"; +uname_s_glob="mingw32_nt-*"; uname_m_glob="x86_64"; file="gitstatusd-msys_nt-10.0-${uname_m}"; version="v1.0.0"; +uname_s_glob="mingw64_nt-*"; uname_m_glob="i686"; file="gitstatusd-msys_nt-10.0-${uname_m}"; version="v1.0.0"; +uname_s_glob="mingw64_nt-*"; uname_m_glob="x86_64"; file="gitstatusd-msys_nt-10.0-${uname_m}"; version="v1.0.0"; +uname_s_glob="msys_nt-*"; uname_m_glob="i686"; file="gitstatusd-msys_nt-10.0-${uname_m}"; version="v1.0.0"; +uname_s_glob="msys_nt-*"; uname_m_glob="x86_64"; file="gitstatusd-msys_nt-10.0-${uname_m}"; version="v1.0.0"; diff --git a/mbuild b/mbuild new file mode 100755 index 00000000..a1a8cda8 --- /dev/null +++ b/mbuild @@ -0,0 +1,352 @@ +#!/usr/bin/env zsh +# +# This script does not have a stable API. +# +# Usage: mbuild [-b git-ref] [kernel-arch]... +# +# Builds a bunch of gitstatusd-* binaries. Without arguments builds binaries +# for all platforms. git-ref defaults to src. +# +# Before using this script you need to set up build servers and list them +# in ~/.ssh/config. There should be a Host entry for every value of `assets` +# association defined below. VMs and cloud instances work as well as physical +# machines, including localhost. As long as the machine has been set up as +# described below and you can SSH to it without password, it should work. +# +# ===[ Build Server Setup ]=== +# +# Linux +# +# - Install docker. +# $ apt install # adjust appropriately if there is no `apt` +# $ usermod -aG docker $USER # not needed if going to build as root +# - Install git. +# $ apt install git # adjust appropriately if there is no `apt` +# +# macOS +# +# - Install compiler tools: +# $ xcode-select --install +# - Install homebrew: +# $ bash -c "$(curl -fsSL" +# +# FreeBSD +# +# - Install git. +# $ pkg install git +# +# Windows +# +# - Disable Windows Defender (optional). +# ps> Set-MpPreference -DisableRealtimeMonitoring $true +# - Install 64-bit and 32-bit msys2: +# - Open each of them after installation, type `pacman -Syu --noconfirm` and close the window. +# - Then run in powershell while having no msys2 or cygwin windows open: +# ps> C:\msys32\autorebase.bat +# ps> C:\msys64\autorebase.bat +# - Install 64-bit and 32-bit cygwin: +# - Choose to install 32-bit to c:/cygwin32 instead of the default c:/cygwin. +# - Select these packages: binutils, cmake, gcc-core, gcc-g++, git, make, wget. +# +# IMPORTANT: Install msys2 and cygwin one at a time. +# +# IMPORTANT: msys2 builder can reboot the build machine. +# +# Option 1: OpenSSH for Windows +# +# - Install OpenSSH: +# ps> Add-WindowsCapability -Online -Name OpenSSH.Server~~~~ +# ps> Start-Service sshd +# ps> Set-Service -Name sshd -StartupType 'Automatic' +# - Enable publickey authentication: +# ps> cd $env:USERPROFILE +# ps> mkdir .ssh +# ps> notepad.exe .ssh/authorized_keys +# - Paste your public key, save, close. +# ps> icacls .ssh/authorized_keys /inheritance:r +# ps> notepad.exe C:\ProgramData\ssh\sshd_config +# - Comment out these two lines, save, close: +# # Match Group administrators +# # AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys +# ps> Restart-Service sshd +# +# Option 2: OpenSSH from WSL +# +# - Install WSL. +# - Install Ubuntu. +# - Install sshd. +# $ apt install openssh-server +# $ dpkg-reconfigure openssh-server +# $ cat >/etc/ssh/sshd_config <<\END +# ClientAliveInterval 60 +# AcceptEnv TERM LANG LC_* +# PermitRootLogin no +# AllowTcpForwarding no +# AllowAgentForwarding no +# AllowStreamLocalForwarding no +# AuthenticationMethods publickey +# END +# service ssh --full-restart +# - Add your public ssh key to ~/.ssh/authorized_keys. +# - Make `sshd` start when Windows boots. + +'emulate' '-L' 'zsh' '-o' 'no_aliases' '-o' 'err_return' +setopt no_unset extended_glob pipe_fail prompt_percent typeset_silent \ + no_prompt_subst no_prompt_bang pushd_silent warn_create_global + +autoload -Uz is-at-least + +if ! is-at-least 5.1 || [[ $ZSH_VERSION == 5.4.* ]]; then + print -ru2 -- "[error] unsupported zsh version: $ZSH_VERSION" + return 1 +fi + +zmodload zsh/system + +local -r git_url='' + +local -rA assets=( + # target kernel-arch hostname of the build machine + cygwin_nt-10.0-i686 build-windows-x86_64 + cygwin_nt-10.0-x86_64 build-windows-x86_64 + msys_nt-10.0-i686 build-windows-x86_64 + msys_nt-10.0-x86_64 build-windows-x86_64 + darwin-x86_64 build-macos-x86_64 + freebsd-amd64 build-freebsd-amd64 + linux-aarch64 build-linux-aarch64 + linux-armv6l build-linux-armv7l + linux-armv7l build-linux-armv7l + linux-i686 build-linux-x86_64 + linux-x86_64 build-linux-x86_64 +) + +local -rA protocol=( + 'cygwin_nt-10.0-*' windows + 'msys_nt-10.0-*' windows + 'darwin-*' unix + 'freebsd-*' unix + 'linux-*' unix +) + +local -r rootdir=${ZSH_SCRIPT:h} +local -r logs=$rootdir/logs +local -r locks=$rootdir/locks +local -r binaries=$rootdir/usrbin + +function usage() { + print -r -- 'usage: mbuild [-b REF] [KERNEL-ARCH]...' +} + +local OPTARG opt git_ref=src +local -i OPTIND +while getopts ":b:h" opt; do + case $opt in + h) usage; return 0;; + b) [[ -n $OPTARG ]]; git_ref=$OPTARG;; + \?) print -ru2 -- "mbuild: invalid option: -$OPTARG" ; return 1;; + :) print -ru2 -- "mbuild: missing required argument: -$OPTARG"; return 1;; + *) print -ru2 -- "mbuild: invalid option: -$opt" ; return 1;; + esac +done + +shift $((OPTIND - 1)) + +(( $# )) || set -- ${(k)assets} +set -- ${(u)@} + +local platform +for platform; do + if (( ! $+assets[$platform] )); then + print -ru2 -- "mbuild: invalid platform: $platform" + return 1 + fi +done + +local build=' + rm -rf gitstatus + git clone --recursive --shallow-submodules --depth=1 -b '$git_ref' '$git_url' + cd gitstatus + if command -v zsh >/dev/null 2>&1; then + sh=zsh + elif command -v dash >/dev/null 2>&1; then + sh=dash + elif command -v ash >/dev/null 2>&1; then + sh=ash + else + sh=sh + fi + $sh -x ./build -m ' + +function build-unix() { + local intro flags=(-sw) + case $2 in + darwin-*) intro='PATH="/usr/local/bin:$PATH"';; + linux-*) flags+=(-d docker);; + esac + ssh $1 -- /bin/sh -uex <<<" + $intro + cd /tmp + $build ${2##*-} ${(j: :)${(@q)flags}}" + scp $1:/tmp/gitstatus/usrbin/gitstatusd-$2 $binaries/ +} + +function build-windows() { + local shell=$(ssh $1 'echo $0') + if [[ $shell == '$0'* ]]; then + local c='c:' + else + local c='/mnt/c' + fi + + local tmp env bin intro flags=(-w) + case $2 in + msys_nt-10.0-i686) bin='msys32/usr/bin';| + msys_nt-10.0-x86_64) bin='msys64/usr/bin';| + cygwin_nt-10.0-i686) bin='cygwin32/bin' ;| + cygwin_nt-10.0-x86_64) bin='cygwin64/bin' ;| + msys_nt-10.0-*) + flags+=(-s) + tmp='/c/tmp' + env='MSYSTEM=MSYS' + while true; do + local out + out="$(ssh $1 cmd.exe "$c/${bin%%/*}/autorebase.bat" 2>&1)" + [[ $out == *"The following DLLs couldn't be rebased"* ]] || break + # Reboot to get rid of whatever is using those DLLs. + ssh $1 powershell.exe <<<'Restart-Computer -Force' || true + sleep 30 + while ! ssh $1 <<<''; do sleep 5; done + done + () { + while true; do + local -i fd + exec {fd}< <( + ssh $1 $c/$bin/env.exe $env c:/$bin/bash.exe -l 2>&1 <<<" + pacman -Syu --noconfirm + exit") + { + local line + while true; do + IFS= read -u $fd -r line || return 0 + if [[ $line == *"warning: terminate MSYS2"* ]]; then + # At this point the machine is hosed. Rogue process with corrupted name + # is eating all CPU. The top SSH connection won't terminate on its own. + ssh $1 powershell.exe <<<'Restart-Computer -Force' || true + sleep 30 + while ! ssh $1 <<<''; do sleep 5; done + break + fi + done + } always { + exec {fd}<&- + kill -- -$sysparams[procsubstpid] 2>/dev/null || true + } + done + } "$@" + intro='pacman -Syu --noconfirm; pacman -S --needed --noconfirm git; ' + intro+='PATH="$PATH:/usr/bin/site_perl:/usr/bin/vendor_perl:/usr/bin/core_perl"' + ;; + cygwin_nt-10.0-*) + tmp='/cygdrive/c/tmp' + ;; + esac + + ssh $1 $c/$bin/env.exe $env c:/$bin/bash.exe -l <<<" + set -uex + $intro + mkdir -p -- $tmp + cd -- $tmp + $build ${2##*-} ${(j: :)${(@q)flags}} + exit" + scp $1:$c/tmp/gitstatus/usrbin/gitstatusd-$2 $binaries/ + chmod +x $binaries/gitstatusd-$2 +} + +function build() ( + setopt xtrace + local platform=$1 + local machine=$assets[$platform] + print -n >>$locks/$machine + zsystem flock $locks/$machine + build-${protocol[(k)$platform]} $machine $platform + local tmp=gitstatusd-$platform.tmp.$$.tar.gz + ( cd -q -- $binaries; GZIP=-9 tar -czf $tmp gitstatusd-$platform ) + mv -f -- $binaries/$tmp $binaries/gitstatusd-$platform.tar.gz +) + +function mbuild() { + local platform pid pids=() + for platform; do + build $platform &>$logs/$platform & + print -r -- "starting build for $platform on $assets[$platform] (pid $!)" + pids+=($platform $!) + done + for platform pid in $pids; do + print -rn -- "$platform => " + if wait $pid; then + print -r -- "ok" + else + print -r -- "error $?" + print -r -- "---------------------" + >&2 cat $logs/$platform + return 1 + fi + done +} + +# Copied from +function run-process-tree() { + zmodload zsh/parameter zsh/param/private || return + local -P opt=(${(kv)options[@]}) || return + local -P pat=(${patchars[@]}) || return + local -P dis_pat=(${dis_patchars[@]}) || return + emulate -L zsh -o err_return || return + setopt monitor traps_async pipe_fail no_unset + zmodload zsh/system + + if (( $# == 0 )); then + print -ru2 -- 'usage: run-process-tree command [arg]...' + return 1 + fi + + local -P stdout REPLY + exec {stdout}>&1 + { + { + local -Pi pipe + local -P gid=$sysparams[pid] + local -P sig=(ABRT EXIT HUP ILL INT PIPE QUIT TERM ZERR) + local -P trap=(trap "trap - $sig; kill -- -$sysparams[pid]" $sig) + + exec {pipe}>&1 1>&$stdout + $trap + + { + $trap + while sleep 1 && print -u $pipe .; do; done + } 2>/dev/null & + local -Pi watchdog=$! + + { + trap - ZERR + exec {pipe}>&- + enable -p -- $pat + disable -p -- $dis_pat + options=($opt zle off monitor off) + "$@" + } & + local -Pi ret + wait $! || ret=$? + + trap "exit $ret" TERM + kill $watchdog + wait $watchdog + return ret + } | while read; do; done || return + } always { + exec {stdout}>&- + } +} + +mkdir -p -- $logs $locks $binaries +run-process-tree mbuild $@ diff --git a/src/algorithm.h b/src/algorithm.h new file mode 100644 index 00000000..b87b13f0 --- /dev/null +++ b/src/algorithm.h @@ -0,0 +1,37 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see . + +#ifndef ROMKATV_GITSTATUS_ALGORITHM_H_ +#define ROMKATV_GITSTATUS_ALGORITHM_H_ + +#include + +namespace gitstatus { + +// Requires: Iter is a BidirectionalIterator. +// +// Returns iterator pointing to the last value in [begin, end) that compares equal to the value, or +// begin if none compare equal. +template +Iter FindLast(Iter begin, Iter end, const T& val) { + while (begin != end && !(*--end == val)) {} + return end; +} + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_ALGORITHM_H_ diff --git a/src/ b/src/ new file mode 100644 index 00000000..4c137639 --- /dev/null +++ b/src/ @@ -0,0 +1,118 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see . + +#include "arena.h" + +#include +#include + +#include "bits.h" +#include "check.h" + +namespace gitstatus { + +namespace { + +size_t Clamp(size_t min, size_t val, size_t max) { return std::min(max, std::max(min, val)); } + +static const uintptr_t kSingularity = reinterpret_cast(&kSingularity); + +} // namespace + +// Triple singularity. We are all fucked. +Arena::Block Arena::g_empty_block = {kSingularity, kSingularity, kSingularity}; + +Arena::Arena(Arena::Options opt) : opt_(std::move(opt)), top_(&g_empty_block) { + CHECK(opt_.min_block_size <= opt_.max_block_size); +} + +Arena::Arena(Arena&& other) : Arena() { *this = std::move(other); } + +Arena::~Arena() { + // See comments in Makefile for the reason sized deallocation is not used. + for (const Block& b : blocks_) ::operator delete(reinterpret_cast(b.start)); +} + +Arena& Arena::operator=(Arena&& other) { + if (this != &other) { + // In case std::vector ever gets small object optimization. + size_t idx = other.reusable_ ? other.top_ - : 0; + opt_ = other.opt_; + blocks_ = std::move(other.blocks_); + reusable_ = other.reusable_; + top_ = reusable_ ? + idx : &g_empty_block; + other.blocks_.clear(); + other.reusable_ = 0; + other.top_ = &g_empty_block; + } + return *this; +} + +void Arena::Reuse(size_t num_blocks) { + reusable_ = std::min(reusable_, num_blocks); + for (size_t i = reusable_; i != blocks_.size(); ++i) { + const Block& b = blocks_[i]; + // See comments in Makefile for the reason sized deallocation is not used. + ::operator delete(reinterpret_cast(b.start)); + } + blocks_.resize(reusable_); + if (reusable_) { + top_ =; + top_->tip = top_->start; + } else { + top_ = &g_empty_block; + } +} + +void Arena::AddBlock(size_t size, size_t alignment) { + if (alignment > alignof(std::max_align_t)) { + size += alignment - 1; + } else { + size = std::max(size, alignment); + } + if (size <= top_->size() && top_ < + reusable_ - 1) { + assert(blocks_.front().size() == top_->size()); + ++top_; + top_->tip = top_->start; + return; + } + if (size <= opt_.max_alloc_threshold) { + size = + std::max(size, Clamp(opt_.min_block_size, NextPow2(top_->size() + 1), opt_.max_block_size)); + } + + auto p = reinterpret_cast(::operator new(size)); + blocks_.push_back(Block{p, p, p + size}); + if (reusable_) { + if (size < blocks_.front().size()) { + top_ = &blocks_.back(); + return; + } + if (size > blocks_.front().size()) reusable_ = 0; + } + std::swap(blocks_.back(), blocks_[reusable_]); + top_ = &blocks_[reusable_++]; +} + +void* Arena::AllocateSlow(size_t size, size_t alignment) { + assert(alignment && !(alignment & (alignment - 1))); + AddBlock(size, alignment); + assert(Align(top_->tip, alignment) + size <= top_->end); + return Allocate(size, alignment); +} + +} // namespace gitstatus diff --git a/src/arena.h b/src/arena.h new file mode 100644 index 00000000..0bad0bfa --- /dev/null +++ b/src/arena.h @@ -0,0 +1,273 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see . + +#ifndef ROMKATV_GITSTATUS_ARENA_H_ +#define ROMKATV_GITSTATUS_ARENA_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "string_view.h" + +namespace gitstatus { + +// Thread-compatible. Very fast and very flexible w.r.t. allocation size and alignment. +// +// Natural API extensions: +// +// // Donates a block to the arena. When the time comes, it'll be freed with +// // free(p, size, userdata). +// void Donate(void* p, size_t size, void* userdata, void(*free)(void*, void*)); +class Arena { + public: + struct Options { + // The first call to Allocate() will allocate a block of this size. There is one exception when + // the first requested allocation size is larger than this limit. Subsequent blocks will be + // twice as large as the last until they saturate at max_block_size. + size_t min_block_size = 64; + + // Allocate blocks at most this large. There is one exception when the requested allocation + // size is larger than this limit. + size_t max_block_size = 8 << 10; + + // When the size of the first allocation in a block is larger than this threshold, the block + // size will be equal to the allocation size. This is meant to reduce memory waste when making + // many allocations with sizes slightly over max_block_size / 2. With max_alloc_threshold equal + // to max_block_size / N, the upper bound on wasted memory when making many equally-sized + // allocations is 100.0 / (N + 1) percent. When making allocations of different sizes, the upper + // bound on wasted memory is 50%. + size_t max_alloc_threshold = 1 << 10; + + // Natural extensions: + // + // void* userdata; + // void (*alloc)(size_t size, size_t alignment, void* userdata); + // void (*free)(size_t size, void* userdata); + }; + + // Requires: opt.min_block_size <= opt.max_block_size. + // + // Doesn't allocate any memory. + Arena(Options opt); + Arena() : Arena(Options()) {} + Arena(Arena&&); + ~Arena(); + + Arena& operator=(Arena&& other); + + // Requires: alignment is a power of 2. + // + // Result is never null and always aligned. If size is zero, the result may be equal to the last. + // Alignment above alignof(std::max_align_t) is supported. There is no requirement for alignment + // to be less than size or to divide it. + inline void* Allocate(size_t size, size_t alignment) { + assert(alignment && !(alignment & (alignment - 1))); + uintptr_t p = Align(top_->tip, alignment); + uintptr_t e = p + size; + if (e <= top_->end) { + top_->tip = e; + return reinterpret_cast(p); + } + return AllocateSlow(size, alignment); + } + + template + inline T* Allocate(size_t n) { + static_assert(!std::is_reference(), ""); + return static_cast(Allocate(n * sizeof(T), alignof(T))); + } + + template + inline T* Allocate() { + return Allocate(1); + } + + inline char* MemDup(const char* p, size_t len) { + char* res = Allocate(len); + std::memcpy(res, p, len); + return res; + } + + // Copies the null-terminated string (including the trailing null character) to the arena and + // returns a pointer to the copy. + inline char* StrDup(const char* s) { + size_t len = std::strlen(s); + return MemDup(s, len + 1); + } + + // Guarantees: !StrDup(p, len)[len]. + inline char* StrDup(const char* p, size_t len) { + char* res = Allocate(len + 1); + std::memcpy(res, p, len); + res[len] = 0; + return res; + } + + // Guarantees: !StrDup(s)[s.len]. + inline char* StrDup(StringView s) { + return StrDup(s.ptr, s.len); + } + + template + inline char* StrCat(const Ts&... ts) { + return [&](std::initializer_list ss) { + size_t len = 0; + for (StringView s : ss) len += s.len; + char* p = Allocate(len + 1); + for (StringView s : ss) { + std::memcpy(p, s.ptr, s.len); + p += s.len; + } + *p = 0; + return p - len; + }({ts...}); + } + + // Copies/moves `val` to the arena and returns a pointer to it. + template + inline std::remove_const_t>* Dup(T&& val) { + return DirectInit>>(std::forward(val)); + } + + // The same as `new T{args...}` but on the arena. + template + inline T* DirectInit(Args&&... args) { + T* res = Allocate(); + ::new (const_cast(static_cast(res))) T(std::forward(args)...); + return res; + } + + // The same as `new T(args...)` but on the arena. + template + inline T* BraceInit(Args&&... args) { + T* res = Allocate(); + ::new (const_cast(static_cast(res))) T{std::forward(args)...}; + return res; + } + + // Tip() and TipSize() allow you to allocate the remainder of the current block. They can be + // useful if you are flexible w.r.t. the allocation size. + // + // Invariant: + // + // const void* tip = Tip(); + // void* p = Allocate(TipSize(), 1); // grab the remainder of the current block + // assert(p == tip); + const void* Tip() const { return reinterpret_cast(top_->tip); } + size_t TipSize() const { return top_->end - top_->tip; } + + // Invalidates all allocations (without running destructors of allocated objects) and frees all + // blocks except at most the specified number of blocks. The retained blocks will be used to + // fulfil future allocation requests. + void Reuse(size_t num_blocks = std::numeric_limits::max()); + + private: + struct Block { + size_t size() const { return end - start; } + uintptr_t start; + uintptr_t tip; + uintptr_t end; + }; + + inline static size_t Align(size_t n, size_t m) { return (n + m - 1) & ~(m - 1); }; + + void AddBlock(size_t size, size_t alignment); + bool ReuseBlock(size_t size, size_t alignment); + + __attribute__((noinline)) void* AllocateSlow(size_t size, size_t alignment); + + Options opt_; + std::vector blocks_; + // Invariant: !blocks_.empty() <= reusable_ && reusable_ <= blocks_.size(). + size_t reusable_ = 0; + // Invariant: (top_ == &g_empty_block) == blocks_.empty(). + // Invariant: blocks_.empty() || top_ == &blocks_.back() || top_ < + reusable_. + Block* top_; + + static Block g_empty_block; +}; + +// Copies of ArenaAllocator use the same thread-compatible Arena without synchronization. +template +class ArenaAllocator { + public: + using value_type = T; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using size_type = size_t; + using difference_type = ptrdiff_t; + using propagate_on_container_move_assignment = std::true_type; + template + struct rebind { + using other = ArenaAllocator; + }; + using is_always_equal = std::false_type; + + ArenaAllocator(Arena* arena = nullptr) : arena_(*arena) {} + + Arena& arena() const { return arena_; } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + pointer allocate(size_type n, const void* hint = nullptr) { return arena_.Allocate(n); } + void deallocate(T* p, std::size_t n) {} + size_type max_size() const { return std::numeric_limits::max() / sizeof(value_type); } + + template + void construct(U* p, Args&&... args) { + ::new (const_cast(static_cast(p))) U(std::forward(args)...); + } + + template + void destroy(U* p) { + p->~U(); + } + + bool operator==(const ArenaAllocator& other) const { return &arena_ == &other.arena_; } + bool operator!=(const ArenaAllocator& other) const { return &arena_ != &other.arena_; } + + private: + Arena& arena_; +}; + +template +struct LazyWithArena; + +template