diff --git a/23-rseq/Makefile b/23-rseq/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..10c97b3a9251fe6ee9f88d8567fcd7a5d2e916c1
--- /dev/null
+++ b/23-rseq/Makefile
@@ -0,0 +1,23 @@
+PROG = rseq
+
+${PROG}: ${PROG}.o rseq-asm.o Makefile
+	gcc ${PROG}.o rseq-asm.o -o  $@ -Wall -g -Wno-unused-function -O3 -lpthread
+
+%.o: %.c
+	gcc -c $< -o  $@ -Wall -g -Wno-unused-function -O3
+
+%.o: %.S
+	gcc -c $< -o  $@ 
+
+run: ${PROG}
+	./${PROG} 32 regular
+	./${PROG} 32 lock
+
+clean:
+	rm -f ./${PROG} *.o
+
+man:
+	man ./rseq.2
+
+plot:
+	./plot.py data
diff --git a/23-rseq/benchmark.sh b/23-rseq/benchmark.sh
new file mode 100644
index 0000000000000000000000000000000000000000..937608a39201423e1e754108fb482415d7994be4
--- /dev/null
+++ b/23-rseq/benchmark.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+rm data
+
+for mode in rseq-atomic rseq regular lock getcpu-atomic ; do
+    for threads in `seq 1 32`; do
+        ./rseq $threads $mode 8 >> data
+    done
+done
+
+echo "Run: ./plot.py data"
diff --git a/23-rseq/data.Ryzen7_PRO_5850U b/23-rseq/data.Ryzen7_PRO_5850U
new file mode 100644
index 0000000000000000000000000000000000000000..d0a00babc3ebd7242f040bb49f4415602201fb99
--- /dev/null
+++ b/23-rseq/data.Ryzen7_PRO_5850U
@@ -0,0 +1,160 @@
+mode=rseq-atomic threads=1 sum=400000000 state=ok aborts=0 cputime=2.199005s per_increment=5.497513ns
+mode=rseq-atomic threads=2 sum=800000000 state=ok aborts=0 cputime=3.284306s per_increment=4.105382ns
+mode=rseq-atomic threads=3 sum=1200000000 state=ok aborts=0 cputime=4.448196s per_increment=3.706830ns
+mode=rseq-atomic threads=4 sum=1600000000 state=ok aborts=0 cputime=8.446127s per_increment=5.278830ns
+mode=rseq-atomic threads=5 sum=2000000000 state=ok aborts=0 cputime=9.053520s per_increment=4.526760ns
+mode=rseq-atomic threads=6 sum=2400000000 state=ok aborts=0 cputime=12.195459s per_increment=5.081441ns
+mode=rseq-atomic threads=7 sum=2800000000 state=ok aborts=0 cputime=11.748222s per_increment=4.195793ns
+mode=rseq-atomic threads=8 sum=3200000000 state=ok aborts=0 cputime=15.032143s per_increment=4.697545ns
+mode=rseq-atomic threads=9 sum=3600000000 state=ok aborts=0 cputime=16.116472s per_increment=4.476798ns
+mode=rseq-atomic threads=10 sum=4000000000 state=ok aborts=0 cputime=20.672133s per_increment=5.168033ns
+mode=rseq-atomic threads=11 sum=4400000000 state=ok aborts=0 cputime=23.798913s per_increment=5.408844ns
+mode=rseq-atomic threads=12 sum=4800000000 state=ok aborts=0 cputime=26.946868s per_increment=5.613931ns
+mode=rseq-atomic threads=13 sum=5200000000 state=ok aborts=0 cputime=29.666975s per_increment=5.705187ns
+mode=rseq-atomic threads=14 sum=5600000000 state=ok aborts=0 cputime=33.483891s per_increment=5.979266ns
+mode=rseq-atomic threads=15 sum=6000000000 state=ok aborts=0 cputime=37.403459s per_increment=6.233910ns
+mode=rseq-atomic threads=16 sum=6400000000 state=ok aborts=0 cputime=40.505500s per_increment=6.328984ns
+mode=rseq-atomic threads=17 sum=6800000000 state=ok aborts=0 cputime=42.792953s per_increment=6.293081ns
+mode=rseq-atomic threads=18 sum=7200000000 state=ok aborts=0 cputime=45.171370s per_increment=6.273801ns
+mode=rseq-atomic threads=19 sum=7600000000 state=ok aborts=0 cputime=47.448919s per_increment=6.243279ns
+mode=rseq-atomic threads=20 sum=8000000000 state=ok aborts=0 cputime=49.511613s per_increment=6.188952ns
+mode=rseq-atomic threads=21 sum=8400000000 state=ok aborts=0 cputime=52.517915s per_increment=6.252133ns
+mode=rseq-atomic threads=22 sum=8800000000 state=ok aborts=0 cputime=55.319380s per_increment=6.286293ns
+mode=rseq-atomic threads=23 sum=9200000000 state=ok aborts=0 cputime=57.941721s per_increment=6.298013ns
+mode=rseq-atomic threads=24 sum=9600000000 state=ok aborts=0 cputime=60.357880s per_increment=6.287279ns
+mode=rseq-atomic threads=25 sum=10000000000 state=ok aborts=0 cputime=62.908038s per_increment=6.290804ns
+mode=rseq-atomic threads=26 sum=10400000000 state=ok aborts=0 cputime=65.316597s per_increment=6.280442ns
+mode=rseq-atomic threads=27 sum=10800000000 state=ok aborts=0 cputime=68.210331s per_increment=6.315771ns
+mode=rseq-atomic threads=28 sum=11200000000 state=ok aborts=0 cputime=71.145485s per_increment=6.352275ns
+mode=rseq-atomic threads=29 sum=11600000000 state=ok aborts=0 cputime=74.014644s per_increment=6.380573ns
+mode=rseq-atomic threads=30 sum=12000000000 state=ok aborts=0 cputime=80.516616s per_increment=6.709718ns
+mode=rseq-atomic threads=31 sum=12400000000 state=ok aborts=0 cputime=78.501505s per_increment=6.330767ns
+mode=rseq-atomic threads=32 sum=12800000000 state=ok aborts=0 cputime=81.190299s per_increment=6.342992ns
+mode=rseq threads=1 sum=400000000 state=ok aborts=0 cputime=2.483927s per_increment=6.209818ns
+mode=rseq threads=2 sum=800000000 state=ok aborts=1 cputime=2.927913s per_increment=3.659891ns
+mode=rseq threads=3 sum=1200000000 state=ok aborts=0 cputime=4.631658s per_increment=3.859715ns
+mode=rseq threads=4 sum=1600000000 state=ok aborts=2 cputime=9.087390s per_increment=5.679619ns
+mode=rseq threads=5 sum=2000000000 state=ok aborts=1 cputime=7.195553s per_increment=3.597776ns
+mode=rseq threads=6 sum=2400000000 state=ok aborts=0 cputime=9.132219s per_increment=3.805091ns
+mode=rseq threads=7 sum=2800000000 state=ok aborts=0 cputime=10.669462s per_increment=3.810522ns
+mode=rseq threads=8 sum=3200000000 state=ok aborts=2 cputime=14.527707s per_increment=4.539909ns
+mode=rseq threads=9 sum=3600000000 state=ok aborts=0 cputime=15.470640s per_increment=4.297400ns
+mode=rseq threads=10 sum=4000000000 state=ok aborts=3 cputime=18.031536s per_increment=4.507884ns
+mode=rseq threads=11 sum=4400000000 state=ok aborts=1 cputime=20.358866s per_increment=4.627015ns
+mode=rseq threads=12 sum=4800000000 state=ok aborts=4 cputime=23.474872s per_increment=4.890598ns
+mode=rseq threads=13 sum=5200000000 state=ok aborts=5 cputime=29.528908s per_increment=5.678636ns
+mode=rseq threads=14 sum=5600000000 state=ok aborts=8 cputime=30.623857s per_increment=5.468546ns
+mode=rseq threads=15 sum=6000000000 state=ok aborts=11 cputime=33.960129s per_increment=5.660021ns
+mode=rseq threads=16 sum=6400000000 state=ok aborts=14 cputime=36.712009s per_increment=5.736251ns
+mode=rseq threads=17 sum=6800000000 state=ok aborts=18 cputime=38.662291s per_increment=5.685631ns
+mode=rseq threads=18 sum=7200000000 state=ok aborts=22 cputime=40.966459s per_increment=5.689786ns
+mode=rseq threads=19 sum=7600000000 state=ok aborts=24 cputime=42.702562s per_increment=5.618758ns
+mode=rseq threads=20 sum=8000000000 state=ok aborts=17 cputime=45.202435s per_increment=5.650304ns
+mode=rseq threads=21 sum=8400000000 state=ok aborts=38 cputime=48.130558s per_increment=5.729828ns
+mode=rseq threads=22 sum=8800000000 state=ok aborts=48 cputime=50.154982s per_increment=5.699430ns
+mode=rseq threads=23 sum=9200000000 state=ok aborts=61 cputime=53.364385s per_increment=5.800477ns
+mode=rseq threads=24 sum=9600000000 state=ok aborts=57 cputime=54.841167s per_increment=5.712622ns
+mode=rseq threads=25 sum=10000000000 state=ok aborts=21 cputime=56.911656s per_increment=5.691166ns
+mode=rseq threads=26 sum=10400000000 state=ok aborts=39 cputime=59.314805s per_increment=5.703347ns
+mode=rseq threads=27 sum=10800000000 state=ok aborts=58 cputime=61.900339s per_increment=5.731513ns
+mode=rseq threads=28 sum=11200000000 state=ok aborts=42 cputime=63.666270s per_increment=5.684488ns
+mode=rseq threads=29 sum=11600000000 state=ok aborts=56 cputime=66.452641s per_increment=5.728676ns
+mode=rseq threads=30 sum=12000000000 state=ok aborts=87 cputime=68.713723s per_increment=5.726144ns
+mode=rseq threads=31 sum=12400000000 state=ok aborts=48 cputime=70.797445s per_increment=5.709471ns
+mode=rseq threads=32 sum=12800000000 state=ok aborts=69 cputime=73.319152s per_increment=5.728059ns
+mode=regular threads=1 sum=400000000 state=ok aborts=0 cputime=4.445797s per_increment=11.114493ns
+mode=regular threads=2 sum=800000000 state=ok aborts=0 cputime=10.181069s per_increment=12.726336ns
+mode=regular threads=3 sum=1200000000 state=ok aborts=0 cputime=15.207685s per_increment=12.673070ns
+mode=regular threads=4 sum=1600000000 state=ok aborts=0 cputime=20.394730s per_increment=12.746707ns
+mode=regular threads=5 sum=2000000000 state=ok aborts=0 cputime=25.606271s per_increment=12.803136ns
+mode=regular threads=6 sum=2400000000 state=ok aborts=0 cputime=30.550001s per_increment=12.729167ns
+mode=regular threads=7 sum=2800000000 state=ok aborts=0 cputime=31.396464s per_increment=11.213023ns
+mode=regular threads=8 sum=3200000000 state=ok aborts=0 cputime=40.884241s per_increment=12.776325ns
+mode=regular threads=9 sum=3600000000 state=ok aborts=0 cputime=48.727290s per_increment=13.535358ns
+mode=regular threads=10 sum=4000000000 state=ok aborts=0 cputime=57.116025s per_increment=14.279006ns
+mode=regular threads=11 sum=4400000000 state=ok aborts=0 cputime=65.007191s per_increment=14.774362ns
+mode=regular threads=12 sum=4800000000 state=ok aborts=0 cputime=72.886369s per_increment=15.184660ns
+mode=regular threads=13 sum=5200000000 state=ok aborts=0 cputime=82.631048s per_increment=15.890586ns
+mode=regular threads=14 sum=5600000000 state=ok aborts=0 cputime=83.803839s per_increment=14.964971ns
+mode=regular threads=15 sum=6000000000 state=ok aborts=0 cputime=101.826165s per_increment=16.971027ns
+mode=regular threads=16 sum=6399999999 state=fail aborts=0 cputime=103.622771s per_increment=16.191058ns
+mode=regular threads=17 sum=6799999980 state=fail aborts=0 cputime=117.516558s per_increment=17.281847ns
+mode=regular threads=18 sum=7199999984 state=fail aborts=0 cputime=124.609096s per_increment=17.306819ns
+mode=regular threads=19 sum=7599999959 state=fail aborts=0 cputime=131.441945s per_increment=17.294993ns
+mode=regular threads=20 sum=7999999972 state=fail aborts=0 cputime=127.853660s per_increment=15.981708ns
+mode=regular threads=21 sum=8399999947 state=fail aborts=0 cputime=145.638661s per_increment=17.337936ns
+mode=regular threads=22 sum=8799999943 state=fail aborts=0 cputime=152.054029s per_increment=17.278867ns
+mode=regular threads=23 sum=9199999933 state=fail aborts=0 cputime=159.989756s per_increment=17.390191ns
+mode=regular threads=24 sum=9599999946 state=fail aborts=0 cputime=167.602853s per_increment=17.458631ns
+mode=regular threads=25 sum=9999999946 state=fail aborts=0 cputime=173.688899s per_increment=17.368890ns
+mode=regular threads=26 sum=10399999956 state=fail aborts=0 cputime=180.728688s per_increment=17.377759ns
+mode=regular threads=27 sum=10799999959 state=fail aborts=0 cputime=187.942425s per_increment=17.402076ns
+mode=regular threads=28 sum=11199999958 state=fail aborts=0 cputime=194.921763s per_increment=17.403729ns
+mode=regular threads=29 sum=11599999957 state=fail aborts=0 cputime=201.910554s per_increment=17.406082ns
+mode=regular threads=30 sum=11999999966 state=fail aborts=0 cputime=208.667108s per_increment=17.388926ns
+mode=regular threads=31 sum=12399999964 state=fail aborts=0 cputime=218.638442s per_increment=17.632132ns
+mode=regular threads=32 sum=12799999939 state=fail aborts=0 cputime=222.786872s per_increment=17.405224ns
+mode=lock threads=1 sum=400000000 state=ok aborts=0 cputime=9.739219s per_increment=24.348048ns
+mode=lock threads=2 sum=800000000 state=ok aborts=0 cputime=19.477206s per_increment=24.346508ns
+mode=lock threads=3 sum=1200000000 state=ok aborts=0 cputime=29.639653s per_increment=24.699711ns
+mode=lock threads=4 sum=1600000000 state=ok aborts=0 cputime=39.447133s per_increment=24.654458ns
+mode=lock threads=5 sum=2000000000 state=ok aborts=0 cputime=49.416199s per_increment=24.708099ns
+mode=lock threads=6 sum=2400000000 state=ok aborts=0 cputime=58.912073s per_increment=24.546697ns
+mode=lock threads=7 sum=2800000000 state=ok aborts=0 cputime=69.373970s per_increment=24.776418ns
+mode=lock threads=8 sum=3200000000 state=ok aborts=0 cputime=80.045758s per_increment=25.014299ns
+mode=lock threads=9 sum=3600000000 state=ok aborts=0 cputime=95.506874s per_increment=26.529687ns
+mode=lock threads=10 sum=4000000000 state=ok aborts=0 cputime=105.647129s per_increment=26.411782ns
+mode=lock threads=11 sum=4400000000 state=ok aborts=0 cputime=119.966161s per_increment=27.265037ns
+mode=lock threads=12 sum=4800000000 state=ok aborts=0 cputime=144.790100s per_increment=30.164604ns
+mode=lock threads=13 sum=5200000000 state=ok aborts=0 cputime=169.275312s per_increment=32.552945ns
+mode=lock threads=14 sum=5600000000 state=ok aborts=0 cputime=184.008284s per_increment=32.858622ns
+mode=lock threads=15 sum=6000000000 state=ok aborts=0 cputime=206.639565s per_increment=34.439927ns
+mode=lock threads=16 sum=6400000000 state=ok aborts=0 cputime=220.186430s per_increment=34.404130ns
+mode=lock threads=17 sum=6800000000 state=ok aborts=0 cputime=243.215361s per_increment=35.766965ns
+mode=lock threads=18 sum=7200000000 state=ok aborts=0 cputime=256.791891s per_increment=35.665540ns
+mode=lock threads=19 sum=7600000000 state=ok aborts=0 cputime=270.841824s per_increment=35.637082ns
+mode=lock threads=20 sum=8000000000 state=ok aborts=0 cputime=286.147054s per_increment=35.768382ns
+mode=lock threads=21 sum=8400000000 state=ok aborts=0 cputime=303.063539s per_increment=36.078993ns
+mode=lock threads=22 sum=8800000000 state=ok aborts=0 cputime=334.728624s per_increment=38.037344ns
+mode=lock threads=23 sum=9200000000 state=ok aborts=0 cputime=365.422783s per_increment=39.719868ns
+mode=lock threads=24 sum=9600000000 state=ok aborts=0 cputime=383.564521s per_increment=39.954638ns
+mode=lock threads=25 sum=10000000000 state=ok aborts=0 cputime=400.088520s per_increment=40.008852ns
+mode=lock threads=26 sum=10400000000 state=ok aborts=0 cputime=413.845634s per_increment=39.792849ns
+mode=lock threads=27 sum=10800000000 state=ok aborts=0 cputime=432.694245s per_increment=40.064282ns
+mode=lock threads=28 sum=11200000000 state=ok aborts=0 cputime=448.437452s per_increment=40.039058ns
+mode=lock threads=29 sum=11600000000 state=ok aborts=0 cputime=479.398819s per_increment=41.327484ns
+mode=lock threads=30 sum=12000000000 state=ok aborts=0 cputime=485.327195s per_increment=40.443933ns
+mode=lock threads=31 sum=12400000000 state=ok aborts=0 cputime=498.999020s per_increment=40.241856ns
+mode=lock threads=32 sum=12800000000 state=ok aborts=0 cputime=518.804851s per_increment=40.531629ns
+mode=getcpu-atomic threads=1 sum=400000000 state=ok aborts=0 cputime=4.590758s per_increment=11.476895ns
+mode=getcpu-atomic threads=2 sum=800000000 state=ok aborts=0 cputime=10.373209s per_increment=12.966511ns
+mode=getcpu-atomic threads=3 sum=1200000000 state=ok aborts=0 cputime=15.454717s per_increment=12.878931ns
+mode=getcpu-atomic threads=4 sum=1600000000 state=ok aborts=0 cputime=18.276674s per_increment=11.422921ns
+mode=getcpu-atomic threads=5 sum=2000000000 state=ok aborts=0 cputime=25.616891s per_increment=12.808445ns
+mode=getcpu-atomic threads=6 sum=2400000000 state=ok aborts=0 cputime=30.558029s per_increment=12.732512ns
+mode=getcpu-atomic threads=7 sum=2800000000 state=ok aborts=0 cputime=35.695183s per_increment=12.748280ns
+mode=getcpu-atomic threads=8 sum=3200000000 state=ok aborts=0 cputime=41.202231s per_increment=12.875697ns
+mode=getcpu-atomic threads=9 sum=3600000000 state=ok aborts=0 cputime=48.857853s per_increment=13.571626ns
+mode=getcpu-atomic threads=10 sum=4000000000 state=ok aborts=0 cputime=57.148052s per_increment=14.287013ns
+mode=getcpu-atomic threads=11 sum=4400000000 state=ok aborts=0 cputime=65.426838s per_increment=14.869736ns
+mode=getcpu-atomic threads=12 sum=4800000000 state=ok aborts=0 cputime=74.198416s per_increment=15.458003ns
+mode=getcpu-atomic threads=13 sum=5200000000 state=ok aborts=0 cputime=88.584374s per_increment=17.035457ns
+mode=getcpu-atomic threads=14 sum=5600000000 state=ok aborts=0 cputime=97.566401s per_increment=17.422572ns
+mode=getcpu-atomic threads=15 sum=6000000000 state=ok aborts=0 cputime=109.718405s per_increment=18.286401ns
+mode=getcpu-atomic threads=16 sum=6400000000 state=ok aborts=0 cputime=122.916752s per_increment=19.205742ns
+mode=getcpu-atomic threads=17 sum=6800000000 state=ok aborts=0 cputime=134.455988s per_increment=19.772939ns
+mode=getcpu-atomic threads=18 sum=7200000000 state=ok aborts=0 cputime=140.480629s per_increment=19.511198ns
+mode=getcpu-atomic threads=19 sum=7600000000 state=ok aborts=0 cputime=146.157256s per_increment=19.231218ns
+mode=getcpu-atomic threads=20 sum=8000000000 state=ok aborts=0 cputime=155.691843s per_increment=19.461480ns
+mode=getcpu-atomic threads=21 sum=8400000000 state=ok aborts=0 cputime=163.030484s per_increment=19.408391ns
+mode=getcpu-atomic threads=22 sum=8800000000 state=ok aborts=0 cputime=170.584687s per_increment=19.384623ns
+mode=getcpu-atomic threads=23 sum=9200000000 state=ok aborts=0 cputime=176.381227s per_increment=19.171873ns
+mode=getcpu-atomic threads=24 sum=9600000000 state=ok aborts=0 cputime=185.423582s per_increment=19.314956ns
+mode=getcpu-atomic threads=25 sum=10000000000 state=ok aborts=0 cputime=192.877967s per_increment=19.287797ns
+mode=getcpu-atomic threads=26 sum=10400000000 state=ok aborts=0 cputime=199.917576s per_increment=19.222844ns
+mode=getcpu-atomic threads=27 sum=10800000000 state=ok aborts=0 cputime=207.822609s per_increment=19.242834ns
+mode=getcpu-atomic threads=28 sum=11200000000 state=ok aborts=0 cputime=216.261564s per_increment=19.309068ns
+mode=getcpu-atomic threads=29 sum=11600000000 state=ok aborts=0 cputime=213.773868s per_increment=18.428782ns
+mode=getcpu-atomic threads=30 sum=12000000000 state=ok aborts=0 cputime=221.522895s per_increment=18.460241ns
+mode=getcpu-atomic threads=31 sum=12400000000 state=ok aborts=0 cputime=240.809263s per_increment=19.420102ns
+mode=getcpu-atomic threads=32 sum=12800000000 state=ok aborts=0 cputime=251.500147s per_increment=19.648449ns
diff --git a/23-rseq/plot.Ryzen7_PRO_5850U.png b/23-rseq/plot.Ryzen7_PRO_5850U.png
new file mode 100644
index 0000000000000000000000000000000000000000..3314476504cef82c5c861ef8a39bd5f0d2c3ed5a
Binary files /dev/null and b/23-rseq/plot.Ryzen7_PRO_5850U.png differ
diff --git a/23-rseq/plot.py b/23-rseq/plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1681ecdd201846760fd9b4a98de15100f59fbef
--- /dev/null
+++ b/23-rseq/plot.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+def read_log(fn):
+    with open(fn) as fd:
+        lines = fd.readlines()
+        rows = []
+        for line in lines:
+            line = line.strip().split(" ")
+            header = [x.split("=")[0] for x in line]
+            data = [x.split("=")[1].rstrip("ns") for x in line]
+            rows.append(data)
+        df = pd.DataFrame(
+            columns=header,
+            data=rows)
+        for x in "threads sum aborts cputime per_increment".split():
+            df[x] = df[x].apply(float)
+        return df
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        sys.exit("usage: %s [LOGFILE]" % sys.argv[0])
+    df = read_log(sys.argv[1])
+
+    per_inc = df.set_index(["mode", "threads"]).per_increment.unstack().T
+
+    ax = per_inc.plot(marker='x', grid=True,figsize=(10,10))
+    ax.set_ylim((0, None))
+    ax.set_ylabel("Per Increment [ns]")
+    ax.get_figure().savefig('plot.png')
+
diff --git a/23-rseq/rseq-asm.S b/23-rseq/rseq-asm.S
new file mode 100644
index 0000000000000000000000000000000000000000..6a15529e4dfe8ab71e1baf8a2263ff71020c7ab8
--- /dev/null
+++ b/23-rseq/rseq-asm.S
@@ -0,0 +1,58 @@
+// int operation_rseq(struct rseq * rseq, struct cacheline *counters) {
+// According to the Calling Convention the arguments come in registers:
+//     rseq:    %rdi
+//     counter: %rsi
+.p2align 4
+.globl	operation_rseq
+.type	operation_rseq, @function
+operation_rseq:
+.cfi_startproc
+        // We will return the number of aborts in %eax. Initialize
+        // eax with zero
+        xor	%eax, %eax    // %eax = 0
+
+        // We inform the kernel that we are now within a restartable
+        // sequence by moving a pointer to operation_rseq_cs (see below)
+        // to the kernel-registered rseq object.
+        // After an abort, we also jump to this label (restart_ip)
+.restart_ip:
+        // FIXME: Update rseq->rseq_cs
+
+        // The restartable sequence
+        // Implements: [rseq->cpu_id].counter ++;
+.start_ip: // Start of restartable sequence
+        // HINT: Structure of rseq is documented in /usr/include/linux/rseq.h
+        // HINT: rseq->cpu_id == 4(%rdi)
+        // HINT: Each counter-cache-line is 64 bytes lon
+.end_ip: // End of restartable sequence
+	ret
+
+        // The abort trampoline
+
+        // Before the abort label, the kernel will check if a specific
+        // signature is present. We hide this signature in a
+        // well-crafted assembler instruction.
+        // ud1 <sig>(%rip),%edi
+	.byte 0x0f, 0xb9, 0x3d
+	.long 0x53053053       // RSEQ_SIG
+.abort_ip: // On abort, the kernel will jump here
+        // FIXME: count aborts in %eax
+	jmp .restart_ip
+
+// } End of operation_rseq()
+.cfi_endproc
+.size	operation_rseq, .-operation_rseq
+
+// struct rseq_cs operation_rseq_cs -- descriptor for our rseq
+.section        .data.rel.local,"aw"
+.align 32
+.type   operation_rseq_cs, @object
+.size   operation_rseq_cs, 32
+operation_rseq_cs:
+        .long   0   // __u32 version
+        .long   0   // __u32 flags
+        .quad   .start_ip            // __u64 start_ip
+        .quad   .end_ip - .start_ip  // __u64 post_commit_offset
+        .quad   .abort_ip            // __u64 abort_ip
+
+.section        .note.GNU-stack,"",@progbits
diff --git a/23-rseq/rseq.2 b/23-rseq/rseq.2
new file mode 100644
index 0000000000000000000000000000000000000000..8882e1c701a286ca5591e1d1e2324b3128f78b74
--- /dev/null
+++ b/23-rseq/rseq.2
@@ -0,0 +1,371 @@
+.\" Copyright 2015-2020 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+.\"
+.\" %%%LICENSE_START(VERBATIM)
+.\" Permission is granted to make and distribute verbatim copies of this
+.\" manual provided the copyright notice and this permission notice are
+.\" preserved on all copies.
+.\"
+.\" Permission is granted to copy and distribute modified versions of this
+.\" manual under the conditions for verbatim copying, provided that the
+.\" entire resulting derived work is distributed under the terms of a
+.\" permission notice identical to this one.
+.\"
+.\" Since the Linux kernel and libraries are constantly changing, this
+.\" manual page may be incorrect or out-of-date.  The author(s) assume no
+.\" responsibility for errors or omissions, or for damages resulting from
+.\" the use of the information contained herein.  The author(s) may not
+.\" have taken the same level of care in the production of this manual,
+.\" which is licensed free of charge, as they might when working
+.\" professionally.
+.\"
+.\" Formatted or processed versions of this manual, if unaccompanied by
+.\" the source, must acknowledge the copyright and authors of this work.
+.\" %%%LICENSE_END
+.\"
+.TH RSEQ 2 2020-06-05 "Linux" "Linux Programmer's Manual"
+.SH NAME
+rseq \- Restartable sequences and cpu number cache
+.SH SYNOPSIS
+.nf
+.B #include <linux/rseq.h>
+.sp
+.BI "int rseq(struct rseq * " rseq ", uint32_t " rseq_len ", int " flags ", uint32_t " sig ");
+.sp
+.SH DESCRIPTION
+
+A restartable sequence is a sequence of instructions guaranteed to be executed
+atomically with respect to other threads and signal handlers on the current
+CPU. If its execution does not complete atomically, the kernel changes the
+execution flow by jumping to an abort handler defined by user-space for that
+restartable sequence.
+
+Using restartable sequences requires to register a
+.BR __rseq_abi
+thread-local storage data structure (struct rseq) through the
+.BR rseq ()
+system call. Only one
+.BR __rseq_abi
+can be registered per thread, so user-space libraries and applications must
+follow a user-space ABI defining how to share this resource.  The ABI defining
+how to share this resource between applications and libraries is defined by the
+C library.
+
+The
+.BR __rseq_abi
+contains a
+.I rseq_cs
+field which points to the currently executing critical section. For each
+thread, a single rseq critical section can run at any given point. Each
+critical section need to be implemented in assembly.
+
+The
+.BR rseq ()
+ABI accelerates user-space operations on per-cpu data by defining a
+shared data structure ABI between each user-space thread and the kernel.
+
+It allows user-space to perform update operations on per-cpu data
+without requiring heavy-weight atomic operations.
+
+The term CPU used in this documentation refers to a hardware execution
+context. For instance, each CPU number returned by
+.BR sched_getcpu ()
+is a CPU. The current CPU means to the CPU on which the registered thread is
+running.
+
+Restartable sequences are atomic with respect to preemption (making it
+atomic with respect to other threads running on the same CPU), as well
+as signal delivery (user-space execution contexts nested over the same
+thread). They either complete atomically with respect to preemption on
+the current CPU and signal delivery, or they are aborted.
+
+Restartable sequences are suited for update operations on per-cpu data.
+
+Restartable sequences can be used on data structures shared between threads
+within a process, and on data structures shared between threads across
+different processes.
+
+.PP
+Some examples of operations that can be accelerated or improved
+by this ABI:
+.IP \[bu] 2
+Memory allocator per-cpu free-lists,
+.IP \[bu] 2
+Querying the current CPU number,
+.IP \[bu] 2
+Incrementing per-CPU counters,
+.IP \[bu] 2
+Modifying data protected by per-CPU spinlocks,
+.IP \[bu] 2
+Inserting/removing elements in per-CPU linked-lists,
+.IP \[bu] 2
+Writing/reading per-CPU ring buffers content.
+.IP \[bu] 2
+Accurately reading performance monitoring unit counters
+with respect to thread migration.
+
+.PP
+Restartable sequences must not perform system calls. Doing so may result
+in termination of the process by a segmentation fault.
+
+.PP
+The
+.I rseq
+argument is a pointer to the thread-local rseq structure to be shared
+between kernel and user-space.
+
+.PP
+The layout of
+.B struct rseq
+is as follows:
+.TP
+.B Structure alignment
+This structure is aligned on 32-byte boundary.
+.TP
+.B Structure size
+This structure is fixed-size (32 bytes). Its size is passed as parameter to the
+rseq system call.
+.PP
+.in +8n
+.EX
+struct rseq {
+    __u32 cpu_id_start;
+    __u32 cpu_id;
+    union {
+        /* Edited out for conciseness. [...] */
+    } rseq_cs;
+    __u32 flags;
+} __attribute__((aligned(32)));
+.EE
+.TP
+.B Fields
+
+.TP
+.in +4n
+.I cpu_id_start
+Optimistic cache of the CPU number on which the registered thread is
+running. Its value is guaranteed to always be a possible CPU number,
+even when rseq is not registered. Its value should always be confirmed by
+reading the cpu_id field before user-space performs any side-effect (e.g.
+storing to memory).
+
+This field is an optimistic cache in the sense that it is always
+guaranteed to hold a valid CPU number in the range [ 0 ..
+nr_possible_cpus - 1 ]. It can therefore be loaded by user-space and
+used as an offset in per-cpu data structures without having to
+check whether its value is within the valid bounds compared to the
+number of possible CPUs in the system.
+
+Initialized by user-space to a possible CPU number (e.g., 0), updated
+by the kernel for threads registered with rseq.
+
+For user-space applications executed on a kernel without rseq support,
+the cpu_id_start field stays initialized at 0, which is indeed a valid
+CPU number. It is therefore valid to use it as an offset in per-cpu data
+structures, and only validate whether it's actually the current CPU
+number by comparing it with the cpu_id field within the rseq critical
+section. If the kernel does not provide rseq support, that cpu_id field
+stays initialized at -1, so the comparison always fails, as intended.
+
+It is up to user-space to implement a fall-back mechanism for scenarios where
+rseq is not available.
+.in
+.TP
+.in +4n
+.I cpu_id
+Cache of the CPU number on which the registered thread is running. Initialized
+by user-space to -1, updated by the kernel for threads registered with rseq.
+.in
+.TP
+.in +4n
+.I rseq_cs
+The rseq_cs field is a pointer to a struct rseq_cs. Is is NULL when no
+rseq assembly block critical section is active for the registered thread.
+Setting it to point to a critical section descriptor (struct rseq_cs)
+marks the beginning of the critical section.
+
+Initialized by user-space to NULL.
+
+Updated by user-space, which sets the address of the currently
+active rseq_cs at the beginning of assembly instruction sequence
+block, and set to NULL by the kernel when it restarts an assembly
+instruction sequence block, as well as when the kernel detects that
+it is preempting or delivering a signal outside of the range
+targeted by the rseq_cs. Also needs to be set to NULL by user-space
+before reclaiming memory that contains the targeted struct rseq_cs.
+
+Read and set by the kernel.
+.in
+.TP
+.in +4n
+.I flags
+Flags indicating the restart behavior for the registered thread. This is
+mainly used for debugging purposes. Can be a combination of:
+.IP \[bu]
+RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT: Inhibit instruction sequence block restart
+on preemption for this thread.
+.IP \[bu]
+RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL: Inhibit instruction sequence block restart
+on signal delivery for this thread.
+.IP \[bu]
+RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE: Inhibit instruction sequence block restart
+on migration for this thread.
+.in
+
+Initialized by user-space, used by the kernel.
+
+.PP
+The layout of
+.B struct rseq_cs
+version 0 is as follows:
+.TP
+.B Structure alignment
+This structure is aligned on 32-byte boundary.
+.TP
+.B Structure size
+This structure has a fixed size of 32 bytes.
+.PP
+.in +8n
+.EX
+struct rseq_cs {
+    __u32   version;
+    __u32   flags;
+    __u64   start_ip;
+    __u64   post_commit_offset;
+    __u64   abort_ip;
+} __attribute__((aligned(32)));
+.EE
+.TP
+.B Fields
+
+.TP
+.in +4n
+.I version
+Version of this structure. Should be initialized to 0.
+.in
+.TP
+.in +4n
+.I flags
+Flags indicating the restart behavior of this structure. Can be a combination
+of:
+.IP \[bu]
+RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT: Inhibit instruction sequence block restart
+on preemption for this critical section.
+.IP \[bu]
+RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL: Inhibit instruction sequence block restart
+on signal delivery for this critical section.
+.IP \[bu]
+RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE: Inhibit instruction sequence block restart
+on migration for this critical section.
+.TP
+.in +4n
+.I start_ip
+Instruction pointer address of the first instruction of the sequence of
+consecutive assembly instructions.
+.in
+.TP
+.in +4n
+.I post_commit_offset
+Offset (from start_ip address) of the address after the last instruction
+of the sequence of consecutive assembly instructions.
+.in
+.TP
+.in +4n
+.I abort_ip
+Instruction pointer address where to move the execution flow in case of
+abort of the sequence of consecutive assembly instructions.
+.in
+
+.PP
+The
+.I rseq_len
+argument is the size of the
+.I struct rseq
+to register.
+
+.PP
+The
+.I flags
+argument is 0 for registration, and
+.IR RSEQ_FLAG_UNREGISTER
+for unregistration.
+
+.PP
+The
+.I sig
+argument is the 32-bit signature to be expected before the abort
+handler code.
+
+.PP
+A single library per process should keep the rseq structure in a
+thread-local storage variable.
+The
+.I cpu_id
+field should be initialized to -1, and the
+.I cpu_id_start
+field should be initialized to a possible CPU value (typically 0).
+
+.PP
+Each thread is responsible for registering and unregistering its rseq
+structure. No more than one rseq structure address can be registered
+per thread at a given time.
+
+.PP
+Reclaim of rseq object's memory must only be done after either an
+explicit rseq unregistration is performed or after the thread exits.
+
+.PP
+In a typical usage scenario, the thread registering the rseq
+structure will be performing loads and stores from/to that structure. It
+is however also allowed to read that structure from other threads.
+The rseq field updates performed by the kernel provide relaxed atomicity
+semantics (atomic store, without memory ordering), which guarantee that other
+threads performing relaxed atomic reads (atomic load, without memory ordering)
+of the cpu number cache will always observe a consistent value.
+
+.SH RETURN VALUE
+A return value of 0 indicates success. On error, \-1 is returned, and
+.I errno
+is set appropriately.
+
+.SH ERRORS
+.TP
+.B EINVAL
+Either
+.I flags
+contains an invalid value, or
+.I rseq
+contains an address which is not appropriately aligned, or
+.I rseq_len
+contains an incorrect size.
+.TP
+.B ENOSYS
+The
+.BR rseq ()
+system call is not implemented by this kernel.
+.TP
+.B EFAULT
+.I rseq
+is an invalid address.
+.TP
+.B EBUSY
+Restartable sequence is already registered for this thread.
+.TP
+.B EPERM
+The
+.I sig
+argument on unregistration does not match the signature received
+on registration.
+
+.SH VERSIONS
+The
+.BR rseq ()
+system call was added in Linux 4.18.
+
+.SH CONFORMING TO
+.BR rseq ()
+is Linux-specific.
+
+.in
+.SH SEE ALSO
+.BR sched_getcpu (3) ,
+.BR membarrier (2)
\ No newline at end of file
diff --git a/23-rseq/rseq.c b/23-rseq/rseq.c
new file mode 100644
index 0000000000000000000000000000000000000000..18ab5607de04b39daf92885f0f187a2e76c04e33
--- /dev/null
+++ b/23-rseq/rseq.c
@@ -0,0 +1,243 @@
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+#include <stdatomic.h>
+#include <string.h>
+#include <sys/sysinfo.h>
+#include <stdbool.h>
+#include <malloc.h>
+
+#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while(0)
+
+// With modern glibc versions (>2.35), the glibc already registers
+// a rseq area for each thread that is started with
+// pthread_create. In that case, we derive this pointer from the
+// thread_pointer(). For details, see
+// https://www.gnu.org/software/libc/manual/html_node/Restartable-Sequences.html
+#if __has_include ("sys/rseq.h")
+#include <sys/rseq.h>
+
+#if RSEQ_SIG != 0x53053053
+#error "glibc defined RSEQ_SIG differently"
+#endif
+
+static struct rseq * rseq_register() {
+    return __builtin_thread_pointer() + __rseq_offset;
+}
+
+#else
+#include <linux/rseq.h>
+#define RSEQ_SIG   0x53053053
+
+// The rseq(2) syscall has no glibc wrapper. Therefore, we define our
+// own. Please run `make man` to see the man page rseq(2).
+int sys_rseq(struct rseq * rseq, uint32_t rseq_len, int flags, uint32_t sig) {
+    return syscall(SYS_rseq, rseq, rseq_len, flags, RSEQ_SIG);
+}
+
+struct rseq *rseq_register() {
+    struct rseq *ret = memalign(sizeof(struct rseq), sizeof(struct rseq));
+    memset(ret, 0, sizeof(struct rseq));
+    ret->cpu_id_start = -1;
+    ret->cpu_id       = -1;
+    if (sys_rseq(ret, sizeof(struct rseq), 0, 0) < 0)
+        die("rseq");
+    return ret;
+}
+
+#endif
+
+
+
+// This data structure is exactly one cache-line wide (assuming that a
+// cache line is 64 bytes). Thereby, we can allocate an array of
+// cpu-local counters, where each CPU only operates on a single cache
+// line. Thereby, we can avoid most side effects of cache-line
+// transfers.
+struct cacheline {
+    union {
+        char data[64];
+        struct {
+            uint64_t        counter;
+            pthread_mutex_t mutex;   // Used in the lock variant
+        };
+    };
+};
+
+// We will define multiple operation_t functions that all implement
+// the same behavior: They increment a cpu-local counter by 1.
+typedef int (*operation_t)(struct rseq *_, struct cacheline *);
+
+// The simplest variant of a CPU-local counter is to get the cpuid
+// with getcpu() and increment the counter. However, due to the
+// read-update-write cycle, this variant is racy and will produce
+// incorrect results.
+int operation_regular(struct rseq*_, struct cacheline *counters) {
+    unsigned int cpu_id;
+
+    getcpu(&cpu_id, NULL);
+    counters[cpu_id].counter += 1;
+
+    return 0;
+}
+
+// A correct, but slow variant uses the cache-line--local pthread
+// mutex to lock the counter for the time of the operation.
+int operation_lock(struct rseq*_, struct cacheline *counters) {
+    unsigned int cpu_id;
+
+    getcpu(&cpu_id, NULL);
+
+    pthread_mutex_lock(&counters[cpu_id].mutex);
+    counters[cpu_id].counter += 1;
+    pthread_mutex_unlock(&counters[cpu_id].mutex);
+
+    return 0;
+}
+
+// Variant that uses getcpu() + atomic_fetch_add
+int operation_atomic(struct rseq* _, struct cacheline *counters) {
+    // FIXME: Implement variant
+
+    return 0;
+}
+
+// Variant without getcpu: Like operation_atomic, but uses the
+// restartable sequence to retrieve the cpu id.
+// Please look at /usr/include/linux/rseq.h for the documentation of struct rseq
+int operation_rseq_atomic(struct rseq* rs, struct cacheline *counters) {
+    // FIXME: Implement variant
+
+    return 0;
+}
+
+
+// Variant that uses no atomic operations and fully relies on rseq
+// This variant is implemented in assembler (see rseq.S)
+extern int operation_rseq(struct rseq *, struct cacheline*);
+// FIXME: Implement counter_rseq in rseq.S
+
+
+////////////////////////////////////////////////////////////////
+// The Benchmarking code
+//
+// We start NTHREADS threads and each thread executes
+// ROUNDS_PER_THREAD cpu-local increments
+
+int  ROUNDS_PER_THREAD = 50000000;
+
+struct thread_args {
+    operation_t         operation;
+    struct cacheline    *counters;
+};
+
+
+void* thread_handler(void* data) {
+    struct thread_args *args = data;
+
+    // Register rseq area or use glibc's rseq
+    struct rseq *rseq = rseq_register();
+    printf("rseq: %p\n", rseq);
+    
+    // Execute the given operation ROUNDS_PER_THREAD times and count
+    // the number of aborts (only != 0 for rseq)
+    uint64_t aborts = 0;
+    for (uint64_t i = 0; i < ROUNDS_PER_THREAD; i++) {
+        aborts += args->operation(rseq, args->counters);
+    }
+
+    // Return the number of rseq aborts
+    return (void*) aborts;
+}
+
+// Print usage and exit.
+static void usage(char *argv0) {
+    fprintf(stderr, "usage: %s <threads> <regular|lock|getcpu-atomic|rseq-atomic|rseq> [rounds]\n", argv0);
+    exit(EXIT_FAILURE);
+}
+
+int main(int argc, char *argv[]) {
+    // Parameter Parsing. This is boring
+    if (argc < 3) usage(argv[0]);
+    if (argc == 4)
+        ROUNDS_PER_THREAD *= atoi(argv[3]);
+
+    int CPUS     = get_nprocs();
+    int NTHREADS = atoi(argv[1]);
+    char *MODE   = argv[2];
+
+    struct thread_args args;
+    if      (!strcmp(MODE, "rseq"))          args.operation = operation_rseq;
+    else if (!strcmp(MODE, "getcpu-atomic")) args.operation = operation_atomic;
+    else if (!strcmp(MODE, "rseq-atomic"))   args.operation = operation_rseq_atomic;
+    else if (!strcmp(MODE, "regular"))       args.operation = operation_regular;
+    else if (!strcmp(MODE, "lock"))          args.operation = operation_lock;
+    else      usage(argv[0]);
+
+    // Initialize the CPU-local counters. Each CPU gets an struct
+    // cache-line on its own. We use aligned_alloc(3) to get
+    // cache-line-aligned memory from the allocator.
+
+    args.counters = aligned_alloc(sizeof(struct cacheline), CPUS * sizeof(struct cacheline));
+    if (!args.counters) die("calloc");
+
+
+    // Initialize locks for the lock variant
+    for (uint32_t i = 0; i < CPUS; i++) {
+        pthread_mutex_init(&args.counters[i].mutex, NULL);
+    }
+
+
+    // The actual benchmarking code
+    ////////////////////////////////////////////////////////////////
+    struct timespec start, end;
+    // Start Time. We use the CLOCK_PROCESS_CPUTIME_ID to get the
+    // number of CPU-seconds spent. 
+    if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start) < 0)
+        die("clock_gettime");
+
+    // Create NTHREADS threads
+    pthread_t threads[NTHREADS];
+    for (uint32_t i = 0; i < NTHREADS; i++) {
+        pthread_create(&threads[i], NULL, thread_handler, (void*)&args);
+    }
+
+    // Wait for all threads to complete and accumulate the number of aborts
+    uint64_t aborts = 0;
+    for (uint32_t i = 0; i < NTHREADS; i++) {
+        uint64_t thread_aborts;
+        pthread_join(threads[i], (void**)&thread_aborts);
+        aborts += thread_aborts;
+    }
+
+    // End Time
+    if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end) < 0)
+        die("clock_gettime");
+
+    // Calculate the time delta between both points in time.
+    double delta = end.tv_sec - start.tv_sec;
+    delta += (end.tv_nsec - start.tv_nsec) / 1e9;
+
+    // Print out the cpu-local counters. With this output and a low
+    // number of threads you can see the thread migration.
+    uint64_t sum = 0;
+    for (uint32_t i = 0; i < CPUS; i++) {
+        fprintf(stderr, "counter[cpu=%d] = %ld\n", i, args.counters[i].counter);
+        sum += args.counters[i].counter;
+    }
+
+    // Print out the result. We also check that the threads actually
+    // counted correctly (state)
+    printf("mode=%s threads=%d sum=%ld state=%s aborts=%ld cputime=%fs per_increment=%fns\n",
+           MODE, NTHREADS,
+           sum, (sum % ROUNDS_PER_THREAD) == 0 ? "ok" : "fail",
+           aborts,
+           delta,            // total cpu time that was spent
+           delta * 1e9 / sum // nanoseconds per increment
+        );
+}