Performance introspection

The performance of DispatchedTuples should scale similar to the performance of ordinary tuples (good with small tuples, but expensive with larger ones).

using DispatchedTuples
using InteractiveUtils

struct Foo end;
struct Bar end;
struct Baz end;

tup = (
   Pair(Foo(), 1),
   Pair(Bar(), 3),
   Pair(Foo(), 2),
)

tupset = (
   Pair(Foo(), 1),
   Pair(Bar(), 3),
   Pair(Baz(), 2),
)
dtup = DispatchedTuple(tup);
dset = DispatchedSet(tupset);
nothing

Using dtup[key] on a DispatchedTuple is equivalent to hard-coding the intended indexes ahead of time, which means that the LLVM code is concise:

DispatchedTuple

get_foo_magic(dtup) = (dtup.tup[1][2], dtup.tup[3][2])
@code_typed get_foo_magic(dtup)
CodeInfo(
1 ─ %1 = Base.getfield(dtup, :tup)::Tuple{Tuple{Main.ex-perf.Foo,Int64},Tuple{Main.ex-perf.Bar,Int64},Tuple{Main.ex-perf.Foo,Int64}}
│   %2 = Base.getfield(%1, 1, true)::Tuple{Main.ex-perf.Foo,Int64}
│   %3 = Base.getfield(%2, 2, true)::Int64
│   %4 = Base.getfield(dtup, :tup)::Tuple{Tuple{Main.ex-perf.Foo,Int64},Tuple{Main.ex-perf.Bar,Int64},Tuple{Main.ex-perf.Foo,Int64}}
│   %5 = Base.getfield(%4, 3, true)::Tuple{Main.ex-perf.Foo,Int64}
│   %6 = Base.getfield(%5, 2, true)::Int64
│   %7 = Core.tuple(%3, %6)::Tuple{Int64,Int64}
└──      return %7
) => Tuple{Int64,Int64}
@code_typed dtup[Foo()]
CodeInfo(
1 ─ %1 = Base.getfield(dt, :tup)::Tuple{Tuple{Main.ex-perf.Foo,Int64},Tuple{Main.ex-perf.Bar,Int64},Tuple{Main.ex-perf.Foo,Int64}}
│   %2 = Base.getfield(%1, 1, true)::Tuple{Main.ex-perf.Foo,Int64}
│   %3 = Base.getfield(%2, 2, true)::Int64
│   %4 = Base.getfield(dt, :tup)::Tuple{Tuple{Main.ex-perf.Foo,Int64},Tuple{Main.ex-perf.Bar,Int64},Tuple{Main.ex-perf.Foo,Int64}}
│   %5 = Base.getfield(%4, 3, true)::Tuple{Main.ex-perf.Foo,Int64}
│   %6 = Base.getfield(%5, 2, true)::Int64
│   %7 = DispatchedTuples.tuple(%3, %6)::Tuple{Int64,Int64}
└──      return %7
) => Tuple{Int64,Int64}
@code_native get_foo_magic(dtup)
	.text
; ┌ @ none:1 within `get_foo_magic'
	movq	%rdi, %rax
	movq	(%rsi), %rcx
	movq	16(%rsi), %rdx
	movq	%rcx, (%rdi)
	movq	%rdx, 8(%rdi)
	retq
	nopw	%cs:(%rax,%rax)
	nopl	(%rax)
; └
@code_native dtup[Foo()]
	.text
; ┌ @ DispatchedTuples.jl:202 within `getindex'
	movq	%rdi, %rax
; │┌ @ DispatchedTuples.jl:114 within `dispatch'
; ││┌ @ DispatchedTuples.jl:24 within `macro expansion'
	movq	(%rsi), %rcx
	movq	16(%rsi), %rdx
; │└└
	movq	%rcx, (%rdi)
	movq	%rdx, 8(%rdi)
	retq
	nopw	%cs:(%rax,%rax)
	nopl	(%rax)
; └

DispatchedSet

get_foo_magic(dset) = dset.tup[1][2]
@code_typed get_foo_magic(dset)
CodeInfo(
1 ─ %1 = Base.getfield(dset, :tup)::Tuple{Tuple{Main.ex-perf.Foo,Int64},Tuple{Main.ex-perf.Bar,Int64},Tuple{Main.ex-perf.Baz,Int64}}
│   %2 = Base.getfield(%1, 1, true)::Tuple{Main.ex-perf.Foo,Int64}
│   %3 = Base.getfield(%2, 2, true)::Int64
└──      return %3
) => Int64
@code_typed dset[Foo()]
CodeInfo(
1 ─ %1 = Base.getfield(dt, :tup)::Tuple{Tuple{Main.ex-perf.Foo,Int64},Tuple{Main.ex-perf.Bar,Int64},Tuple{Main.ex-perf.Baz,Int64}}
│   %2 = Base.getfield(%1, 1, true)::Tuple{Main.ex-perf.Foo,Int64}
│   %3 = Base.getfield(%2, 2, true)::Int64
└──      return %3
) => Int64
@code_native get_foo_magic(dset)
	.text
; ┌ @ none:1 within `get_foo_magic'
	movq	(%rdi), %rax
	retq
	nopw	%cs:(%rax,%rax)
	nop
; └
@code_native dset[Foo()]
	.text
; ┌ @ DispatchedTuples.jl:202 within `getindex'
	movq	(%rdi), %rax
	retq
	nopw	%cs:(%rax,%rax)
	nop
; └