7-Jun-2017: Eight queens problem in 93 bytes

Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
Eight queens problem in 93 bytes

7-Jun-2017: Eight queens problem in 93 bytes

There is a well known eight queens problem: place 8 queens on chess board so they will not attack each other; also, count all possible positions. There are 92 possible positions for 8*8 chess board (let's leave aside all symmetries).

Here is my piece of code to count them:

```#include <stdio.h>
#include <assert.h>

// https://en.wikipedia.org/wiki/Eight_queens_puzzle
// https://oeis.org/A000170/b000170.txt

int try_row(int row, int attack_vertical, int attack_RL, int attack_LR, int queens, int start_bit)
{
if (row==queens)
{
// all rows are filled, so we have a correct position
return 1;
};

int total=0;
// squares attacked in current row:
int attack=attack_vertical | attack_RL | attack_LR;

// enumerate all squares in current row:
for (int new_queen=start_bit; new_queen; new_queen=new_queen>>1)
{
// square is not attacked by previous/upper row(s),
// so place the queen here:
if ((new_queen & attack)==0)
{
// add new queen to "attack" variables:
// "vertical" is squares attacked vertically
// "RL" is squares attacked diagonally R-to-L
// "LR" is squares attacked diagonally L-to-R

total+=try_row(row+1,
attack_vertical | new_queen,
(attack_RL | new_queen)<<1,
(attack_LR | new_queen)>>1,
queens, start_bit);
};
};

// no more space for queen(s) in current row, stop:
};

int main()
{
for (int i=1; i<=14; i++)
printf ("%d*%d total=%d\n", i, i, try_row(0, 0, 0, 0, i, 1<<(i-1)));
/*
assert (try_row(0, 0, 0, 0, 1, 1<<(1-1))==1);
assert (try_row(0, 0, 0, 0, 2, 1<<(2-1))==0);
assert (try_row(0, 0, 0, 0, 3, 1<<(3-1))==0);
assert (try_row(0, 0, 0, 0, 4, 1<<(4-1))==2);
assert (try_row(0, 0, 0, 0, 5, 1<<(5-1))==10);
assert (try_row(0, 0, 0, 0, 6, 1<<(6-1))==4);
assert (try_row(0, 0, 0, 0, 7, 1<<(7-1))==40);
assert (try_row(0, 0, 0, 0, 8, 1<<(8-1))==92);
assert (try_row(0, 0, 0, 0, 9, 1<<(9-1))==352);
assert (try_row(0, 0, 0, 0, 10, 1<<(10-1))==724);
assert (try_row(0, 0, 0, 0, 11, 1<<(11-1))==2680);
assert (try_row(0, 0, 0, 0, 12, 1<<(12-1))==14200);
assert (try_row(0, 0, 0, 0, 13, 1<<(13-1))==73712);
assert (try_row(0, 0, 0, 0, 14, 1<<(14-1))==365596);
*/
};
```

We can try to rewrite it to assembly. It's very hard to compete with modern compilers in efficient code generation, but we can still compete with small code generation.

This is what GCC generated, but I optimized it manually:

```	.intel_syntax noprefix
.globl	try_row

try_row:
cmp	edi, r8d		# row==queens?
je	.L7			# exit

push	r14
push	r12
push	r13
push	rbp
push	rbx

# "queens" is always in R8 and will be used in subsequent calls
# precomputed "1<<(queens-1)" is always in R9

# registers we use as local variables:
mov	r12d, edx		# r12=edx=attack_RL
mov	r13d, ecx		# r13=ecx=attack_LR
mov	r14d, esi		# r14=esi=attack_vertical
mov	ebx, r9d		# ebx=r9=1<<(queens-1) (precomputed)
xor	ebp, ebp		# ebp=total=0

.L3:
# we check all 3 variables subsequently
# we could OR all them and check once,
# but then we would have to use additional register, which is also would be saved in stack
test	ebx, r12d		# new_queen & attack_RL
jnz	.L4			# not zero? skip it: this queen can be attacked from previous/upper row(s)
test	ebx, r13d		# new_queen & attack_LR
jnz	.L4			# not zero? skip it: this queen can be attacked from previous/upper row(s)
test	ebx, r14d		# new_queen & attack_vertical
jnz	.L4			# not zero? skip it: this queen can be attacked from previous/upper row(s)

# preparing arguments for subsequent call:

inc	edi			# (1st arg) row++ for subsequent call

mov	esi, ebx		# new_queen
or	esi, r14d		# (2nd arg) esi=attack_vertical | new_queen

mov	edx, ebx		# new_queen
or	edx, r12d		# edx=attack_RL | new_queen
shl	edx, 1			# (3rd arg) edx=(attack_RL | new_queen)<<1

mov	ecx, ebx		# new_queen
or	ecx, r13d		# ecx=attack_LR | new_queen
shr	ecx, 1			# (4th arg) ecx=(attack_LR | new_queen)>>1

call	try_row
dec	edi			# row-- (restore it to the previous value, we will use it)
.L4:
shr	ebx, 1			# shift new_queen
jnz	.L3			# bit is still present in new_queen, continue
# if the shifting queen has disappeared, exit:
mov	eax, ebp		# ebp=total

pop	rbx
pop	rbp
pop	r13
pop	r12
pop	r14
ret
.L7:
mov	eax, 1
ret

```

It works slightly slower, but, judging by objdump (objdump -d 8queens_asm), occupies 93 bytes. It still can be optimized further.

There is no point to write assembly code these days, but it can be good exercise, to understand it better. Surprisingly, it also helps during SIMD coding, since you work with CPU instructions rather than functions.