Сферические факториалы
TL;DR
Lang    avg ns of 1M calls
------- ------------------
Rust    0
Java    3
PyPy    8
Python  537
K       756
O-CPS   7702
Erlang  10699/1806/436/9
LuaJIT  33856
    Java HotSpot(TM) 64-Bit Server VM (build 25.111-b14, mixed mode)
Джава как эталон считающий факториал пятерочки за 4нс, которые уходят на разогрев ВМ.
public class fac {
    public static void main(String[] args) {
        long a = System.nanoTime();
        long i = 10000000;
        while (i > 0) {
         calc(5);
         i--;
        }
        long b = System.nanoTime();
        System.out.println("Fac: " + (b-a)/10000000 + "ns");
    }
    public static long calc(long n) {
        if (n == 1)
            return 1;
        else
            return n * calc(n - 1);
    }
}
$ javac fac.java
$ java -cp . fac
Fac: 3ns
rustc 1.14.0-nightly (cae6ab1c4 2016-11-05)
На расте вообще zero-cost факториал получился :-) Даже factoria 20 не выходит из 0 наносекунд. Так и должно быть!
#[bench]
fn fac_rust(b: &mut Bencher) {
    let mut x: i64 = 0;
    b.iter(|| { x = factorial(5); });
}
fn factorial(value: i64) -> i64 { if value==1 { 1 } else { return value *
factorial(value-1); } }
test fac_rust ... bench:           0 ns/iter (+/- 0)
0ns
Теперь переходим к CPS интерпретаторам:
KDB+ 3.4 2016.10.10 Copyright (C) 1993-2016 Kx Systems
q)f:{$[x=1;1;x*.z.s x-1]}
q)\t f'[1000000#5]
756
756ns
LuaJIT 2.0.4 -- Copyright (C) 2005-2015 Mike Pall. http://luajit.org/
function factorial(n)
  if n == 1 then return 1
  else return n * factorial(n - 1) end end
local x = os.clock()
local f = 0
for i=0,1000000,1 do f = factorial(5) end
print(f,(os.clock()-x)*1000000)
$ luajit fac.lua
120     33856
33us
Erlang/OTP 19 [erts-8.1] [64-bit] [smp:8:8]
> Fac = fun Factorial(X) -> case X of 1 -> 1; _ -> X * Factorial(X-1) end end.
#Fun
> lists:sum([element(1,timer:tc(fun() -> Fac(5)
end))||_<-lists:seq(1,1000000)])/1000000.
10.699388
10us
fac2(X) -> case X of 1 -> 1; _ -> X * fac2(X-1) end.
fac(1,N) -> N;
fac(I,N) -> fac(I-1,N*I).
fac(N) -> fac(N-1,N).
test(F) ->
    A = list_to_integer(os:getenv("FAC")),
    lists:sum([element(1,timer:tc(fun() -> fac(A)
end))||_<-lists:seq(1,1000000)])/1000000.
$ FAC=5 erl
> c(fac), l(fac).
> fac:test().
0.009811
9ns
test(F) ->
    A = list_to_integer(os:getenv("FAC")),
    lists:sum([element(1,timer:tc(fun() -> fac2(A)
end))||_<-lists:seq(1,1000000)])/1000000.
> fac:test().
0.436004
436ns
test() ->
    F = fun Fac(1,N) -> N;
            Fac(I,N) -> Fac(I-1,N*I) end,
    A = list_to_integer(os:getenv("FAC")),
    lists:sum([element(1,timer:tc(fun() -> F(A-1,A)
end))||_<-lists:seq(1,1000000)])/1000000.
1.806826
1806ns
Python 2.7.12
#!/usr/bin/python
import timeit
def fac(x):
   if x == 1:
       return 1
   else:
       return x * fac(x - 1)
n = 1000000
r = timeit.timeit(lambda: fac(5), number=n)
print("{0:.6f}ns".format(r*1000))
542.725086ns
PyPy 5.1.2 with GCC 5.3.1 20160413
#!/usr/bin/env pypy
import timeit
def fac(x):
   if x == 1:
       return 1
   else:
       return x * fac(x - 1)
n = 1000000
r = timeit.timeit(lambda: fac(5), number=n)
print("{0:.6f}ns".format(r*1000))
8.576870ns
    Welcome to O-CPS Interpreter v0.11.0!
$ cargo build ; rlwrap ./target/debug/console 2>/dev/null
    Finished debug [unoptimized + debuginfo] target(s) in 0.0 secs
Welcome to O-CPS Interpreter v0.11.0!
> fac:{$[x=1;1;x*fac[x-1]]};fac 5
120
#[bench]
fn fac(b: &mut Bencher) {
    let mut i = Interpreter::new().unwrap();
    let ref mut code = ast::parse(&"fac:{$[x=1;1;x*fac[x-1]]}".to_string());
    i.run(code).unwrap();
    let ref mut f = ast::parse(&"fac[5]".to_string());
    b.iter(|| {
        i.run(f);
    })
}
test fac      ... bench:      7,702 ns/iter (+/- 376)
7us