5

Github Optimize jumps in PartialOrd le by AngelicosPhosphoros · Pull Request #83...

 3 years ago
source link: https://github.com/rust-lang/rust/pull/83819
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

ASM changes

Codegen difference for code:

#[repr(u32)]
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd)]
pub enum Foo {
    Zero,
    One,
    Two,
}

pub fn compare(a: Foo, b: Foo)->bool{
    a <= b
}

Before:

_ZN8test_cmp7compare17h5c31fd9ed0a26eefE:
	xorl	%eax, %eax
	xorl	%r8d, %r8d
	cmpl	%edx, %ecx
	setne	%r8b
	movq	$-1, %rcx
	cmovaeq	%r8, %rcx
	movl	$0, %edx
	cmovneq	%rcx, %rdx
	addq	$1, %rdx
	cmpq	$1, %rdx
	ja	.LBB0_2
	movb	$1, %al
.LBB0_2:
	retq
_ZN8test_cmp7compare17h5c31fd9ed0a26eefE:
	cmpl	%edx, %ecx
	setbe	%al
	retq

Benchmark results

Code:

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};

#[repr(u32)]
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd)]
enum Foo {
    Zero = 0,
    One = 1,
    Two = 2,
}

struct Data {
    sorted: Vec<Foo>,
    unordered: Vec<Foo>,
}

fn generate_data(n: usize) -> Data {
    use rand::prelude::{Rng, SeedableRng};
    use rand_chacha::ChaCha8Rng;
    let mut rng = ChaCha8Rng::seed_from_u64(6464u64);
    let distribution = rand::distributions::Uniform::new(0, 3);
    let mut data = Vec::with_capacity(n);
    for _ in 0..n {
        let num: usize = rng.sample(distribution);
        data.push(num);
    }
    fn convert(num: usize) -> Foo {
        [Foo::Zero, Foo::One, Foo::Two][num]
    }
    let mut sorted = data.clone();
    sorted.sort_unstable();

    Data {
        sorted: sorted.into_iter().map(convert).collect(),
        unordered: data.into_iter().map(convert).collect(),
    }
}

pub fn criterion_benchmark(c: &mut Criterion) {
    let Data { sorted, unordered } = generate_data(1000);

    let mut group = c.benchmark_group("cmp");

    let pairs = [("Sorted", sorted), ("Unordered", unordered)];
    for (name, data) in pairs.iter().cloned() {
        group.bench_with_input(BenchmarkId::new("comparisons", name), &data, |b, data| {
            b.iter_batched(
                || -> (Vec<bool>, Vec<Foo>) {
                    let buffer = Vec::with_capacity(data.len());
                    let data = data.clone();
                    (buffer, data)
                },
                |(mut out_buff, data)| {
                    let comparisons = data.windows(2).map(|x| {
                        assert_eq!(x.len(), 2);
                        x[0] <= x[1]
                    });
                    out_buff.extend(comparisons);
                    (out_buff, data)
                },
                criterion::BatchSize::LargeInput,
            );
        });
    }

    group.finish();
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

Results:

cmp/comparisons/Sorted  time:   [520.30 ns 520.94 ns 521.63 ns]
                        change: [-47.523% -47.229% -46.966%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 8 outliers among 100 measurements (8.00%)
  4 (4.00%) high mild
  4 (4.00%) high severe
cmp/comparisons/Unordered
                        time:   [517.17 ns 518.03 ns 518.94 ns]
                        change: [-52.738% -52.573% -52.410%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 5 outliers among 100 measurements (5.00%)
  4 (4.00%) high mild
  1 (1.00%) high severe

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK