18

This is What Peak Hello World Looks Like

 3 years ago
source link: https://h313.info/blog/cpp/2020/05/17/this-is-what-peak-hello-world-looks-like.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

Everybody’s done a Hello World program before. But now that I’ve got a few years of experience with the language, I set out to ask one of the most pressing questions out there - how do we make Hello World in C as convoluted and hard to understand as possible? This post documents the final result of a sleep-deprived me trying to do exactly that.

I quickly realized I had to set a few ground rules first so there would be a sensible limit for lines of code that are “useless”:

  • Since we’re using pure C, we won’t have classes (so no HelloWorldFactoryFactoryFactorySingleton s)
  • All #include directives should be essential to the program (so no chaining a billion .h files)
  • Each function must do something essential to the program (so no functions that just call another or useless #defines )
  • The program takes no input, and writes Hello World! exactly to the terminal (so no other calculations are done)

With that, let’s start by building our string in a separate function with malloc instead:

#include <stdio.h>
#include <stdlib.h>

char* generate_words() {
  char* ret = malloc(13);

  ret[0] = 'H';
  ret[1] = 'e';
  ret[2] = 'l';
  ret[3] = 'l';
  ret[4] = 'o';
  ret[5] = ' ';
  ret[6] = 'W';
  ret[7] = 'o';
  ret[8] = 'r';
  ret[9] = 'l';
  ret[10] = 'd';
  ret[11] = '!';
  ret[12] = '\0'; // Can't forget our null operator!

  return ret;
}

int main() {
  char *ret = generate_words();
  printf(ret);
  free(ret);
  return 0;
}

That’s already much more complicated than before, but we can make it even worse! Replacing our direct assignments of letters to bitwise operations gets us this monstrosity:

#include <stdio.h>
#include <stdlib.h>

char* generate_words() {
  char* ret = malloc(12);
  char c = 0x01;

  ret[0] = (c << 6) | (c << 3); // H is 0x48
  ret[1] = c | (c << 2) | (c << 5) | (c << 6); // 'e' is 101
  ret[2] = ((c | (c << 1)) << 2) | ((c | (c << 1)) << 5); // 'l' is 108
  ret[3] = ((c | (c << 1)) << 2) | ((c | (c << 1)) << 5); // 'l' is 108
  ret[4] = (c | (c << 1)) | ((c | (c << 1)) << 2) | ((c | (c << 1)) << 5); // 'o' is 111
  ret[5] = (c << 5); // ' ' is 32
  ret[6] = c | (c << 2) | (c << 1) | (c << 1) << 3 | (c << 1) << 5; // 'W' is 87
  ret[7] = (c | (c << 1)) | ((c | (c << 1)) << 2) | ((c | (c << 1)) << 5); // 'o' is 111
  ret[8] = (c << 1) | ((c << 1) | (c << 2) | c) << 4; // 'r' is 114
  ret[9] = ((c | (c << 1)) << 2) | ((c | (c << 1)) << 5); // 'l' is 108
  ret[10] = (c << 2) | (c << 5) | (c << 6); // 'd' is 100
  ret[11] = (c << 5) | c; // '!' is 33

  return ret;
}

int main() {
  char *ret = generate_words();
  printf(ret);
  free(ret);
  return 0;
}

As you can see, I’ve added comments and some extra brackets for readability’s sake. However, what if we remove our c variable to save space and only depend on previously defined values in ret for our calculations?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

volatile u_int8_t a = 0 - 1;
volatile u_int8_t * z = NULL;

int main() {
    a == 0 - 1 ? z = aligned_alloc(sysconf(_SC_LEVEL1_DCACHE_LINESIZE), 1 * 10 + 3) : a != 1 * 10 + 3 ? z[a] = a == 0 ? z[a] = (1 << (a + 6)) | (1 << (a + 3)) : a == 1 ? (z[a - 1] & (1 << (a + 5))) | (z[a - 1] >> a) | (z[a - 1] >> (a + 5))  : a == 2 ? z[a - 2] | z[1] & ~(z[a - 2] >> (a + 4)) : a == 4 ? z[a - 2] | z[a - 3] | (z[a - 4] >> (a + 1)) : a == 5 ? (z[a - 4] & z[a - 3] & z[a - 5]) >> (a - 4) : a == 6 ? (z[a - 2] & ~z[a - 1] | (z[a - 6] >> (a - 4))) & ~(z[a - 6] >> (a - 2) << (a - 5)) : a == 8 ? (~z[a - 7] & z[a - 2]) | z[a - 3] | z[a - 3] << (a - 7) : a == 10 ? z[a - 9] & ~(z[a - 5] >> (a - 5)) : a == 11 ? z[a - 6] | ((z[a - 1] & z[a - 3]) >> (a - 5)) : a - a : 0; if(a == 1 * 10 + 3) {memcpy(z + 3, z + 2, 1);memcpy(z + 9, z + 3, 1);memcpy(z + 7, z + 4, 1);printf("%s", z);free(z);return 0;}a = a + 1;main();
}
u_int8_t* generate_words() {
  u_int8_t * ret = malloc(13);

  ret[0] = (1 << 6) | (1 << 3);
  ret[1] = (ret[0] & (1 << 6)) | (ret[0] >> 1) | (ret[0] >> 6);
  ret[2] = ret[0] | ret[1] & ~(ret[0] >> 6);
  ret[4] = ret[2] | ret[1] | (ret[0] >> 5);
  ret[5] = (ret[1] & ret[2] & ret[0]) >> 1;
  ret[6] = (ret[4] & ~ret[5] | (ret[0] >> 2)) & ~(ret[0] >> 4 << 1);
  ret[8] = (~ret[1] & ret[6]) | ret[5] | ret[5] << 1;
  ret[10] = ret[1] & ~(ret[5] >> 5);
  ret[11] = ret[5] | ((ret[10] & ret[8]) >> 6);
  ret[12] = 0;

  memcpy(ret + 3, ret + 2, 1);
  memcpy(ret + 9, ret + 3, 1);
  memcpy(ret + 7, ret + 4, 1);

  ret[12] = 0;

  return ret;
}

int main() {
  u_int8_t *ret = generate_words();
  printf("%s", ret);
  free(ret);
  return 0;
}

Now our code’s starting to look good (bad?). The only problem is that it takes way too long to run - my super-powerful laptop processor from two years ago took nearly 5 milliseconds to run it! Well, we could also optimize it to be cache-aligned for maximum performance :

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <zconf.h>

u_int8_t* generate_words() {
    u_int8_t * ret = aligned_alloc(sysconf(_SC_LEVEL1_DCACHE_LINESIZE), 13);

    ret[0] = (1 << 6) | (1 << 3);
    ret[1] = (ret[0] & (1 << 6)) | (ret[0] >> 1) | (ret[0] >> 6);
    ret[2] = ret[0] | ret[1] & ~(ret[0] >> 6);
    ret[4] = ret[2] | ret[1] | (ret[0] >> 5);
    ret[5] = (ret[1] & ret[2] & ret[0]) >> 1;
    ret[6] = (ret[4] & ~ret[5] | (ret[0] >> 2)) & ~(ret[0] >> 4 << 1);
    ret[8] = (~ret[1] & ret[6]) | ret[5] | ret[5] << 1;
    ret[10] = ret[1] & ~(ret[5] >> 5);
    ret[11] = ret[5] | ((ret[10] & ret[8]) >> 6);
    ret[12] = 0;

    memcpy(ret + 3, ret + 2, 1);
    memcpy(ret + 9, ret + 3, 1);
    memcpy(ret + 7, ret + 4, 1);

    ret[12] = 0;

    return ret;
}

int main() {
    u_int8_t *ret = generate_words();
    printf("%s", ret);
    free(ret);
    return 0;
}

Well, now we’ve got way too many lines. Let’s trim down on it by getting rid of the function and putting our code into a for loop:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

int main() {
    u_int8_t * ret = aligned_alloc(sysconf(_SC_LEVEL1_DCACHE_LINESIZE), 13);

    for (__auto_type i = 0; i < 13; i++) {
        if (i == 0)
            ret[i] = (1 << (i + 6)) | (1 << (i + 3));
        else if (i == 1)
            ret[i] = (ret[i - 1] & (1 << (i + 5))) | (ret[i - 1] >> i) | (ret[i - 1] >> (i + 5));
        else if (i == 2)
            ret[i] = ret[i - 2] | ret[1] & ~(ret[i - 2] >> (i + 4));
        else if (i == 4)
            ret[i] = ret[i - 2] | ret[i - 3] | (ret[i - 4] >> (i + 1));
        else if (i == 5)
            ret[i] = (ret[i - 4] & ret[i - 3] & ret[i - 5]) >> (i - 4);
        else if (i == 6)
            ret[i] = (ret[i - 2] & ~ret[i - 1] | (ret[i - 6] >> (i - 4))) & ~(ret[i - 6] >> (i - 2) << (i - 5));
        else if (i == 8)
            ret[i] = (~ret[i - 7] & ret[i - 2]) | ret[i - 3] | ret[i - 3] << (i - 7);
        else if (i == 10)
            ret[i] = ret[i - 9] & ~(ret[i - 5] >> (i - 5));
        else if (i == 11)
            ret[i] = ret[i - 6] | ((ret[i - 1] & ret[i - 3]) >> (i - 5));
        else
            ret[i] = i - i;
    }

    memcpy(ret + 3, ret + 2, 1);
    memcpy(ret + 9, ret + 3, 1);
    memcpy(ret + 7, ret + 4, 1);

    ret[12] = 0;
    printf("%s", ret);
    free(ret);
    return 0;
}

Wait, that for loop we added there could be written in an even more convoluted way! Let’s do so:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

volatile unsigned int a = 0 - 1;
volatile u_int8_t * z = NULL;

int main() {
    switch(a) {
        case 0 - 1:
            z = aligned_alloc(sysconf(_SC_LEVEL1_DCACHE_LINESIZE), 1 * 10 + 3);
            break;
        case 1 * 10 + 3:
            memcpy(z + 3, z + 2, 1);
            memcpy(z + 9, z + 3, 1);
            memcpy(z + 7, z + 4, 1);
            printf("%s", z);
            free(z);
            return 0;
        default:
            z[a] = a == 0 ? z[a] = (1 << (a + 6)) | (1 << (a + 3)) : a == 1 ? (z[a - 1] & (1 << (a + 5))) | (z[a - 1] >> a) | (z[a - 1] >> (a + 5))  : a == 2 ? z[a - 2] | z[1] & ~(z[a - 2] >> (a + 4)) : a == 4 ? z[a - 2] | z[a - 3] | (z[a - 4] >> (a + 1)) : a == 5 ? (z[a - 4] & z[a - 3] & z[a - 5]) >> (a - 4) : a == 6 ? (z[a - 2] & ~z[a - 1] | (z[a - 6] >> (a - 4))) & ~(z[a - 6] >> (a - 2) << (a - 5)) : a == 8 ? (~z[a - 7] & z[a - 2]) | z[a - 3] | z[a - 3] << (a - 7) : a == 10 ? z[a - 9] & ~(z[a - 5] >> (a - 5)) : a == 11 ? z[a - 6] | ((z[a - 1] & z[a - 3]) >> (a - 5)) : a - a;
    }

    a = a + 1;
    main();
}

Well, our switch there is taking quite a bit of space, isn’t it? Let’s fix that and clean up the code a bit:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

volatile u_int8_t a = 0 - 1;
volatile u_int8_t * z = NULL;

int main() {
    a==0-1?z=aligned_alloc(sysconf(_SC_LEVEL1_DCACHE_LINESIZE),1*10+3):a!=1*10+3?z[a]=a==0?z[a]=(1<<(a+6))|(1<<(a+3)):a==1?(z[a-1]&(1<<(a+5)))|(z[a-1]>>a)|(z[a-1]>>(a+5)):a==2?z[a-2]|z[1]&~(z[a-2]>>(a+4)):a==4?z[a-2]|z[a-3]|(z[a-4]>>(a+1)):a==5?(z[a-4]&z[a-3]&z[a-5])>>(a-4):a==6?(z[a-2]&~z[a-1]|(z[a-6]>>(a-4)))&~(z[a-6]>>(a-2)<<(a-5)):a==8?(~z[a-7]&z[a-2])|z[a-3]|z[a-3]<<(a-7):a==10?z[a-9]&~(z[a-5]>>(a-5)):a==11?z[a-6]|((z[a-1]&z[a-3])>>(a-5)):a-a:0;if(a==1*10+3){memcpy(z+3,z+2,1);memcpy(z+9,z+3,1);memcpy(z+7,z+4,1);printf("%s",z);free(z);return 0;}a=a+1;main();
}

And there we go - a program that prints Hello World , in just a single line of logic!

I’m sure there’s more ways to improve the unreadability of the project, so if you see something that could be improved, please let me know!


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK