9

MVT: Improve performance and memory usage · postgis/postgis@99c50d4 · GitHub

 3 years ago
source link: https://github.com/postgis/postgis/commit/99c50d4602a6e1d94f65932cbcbee933af998ea1
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

Improve performance and memory usage · postgis/postgis@99c50d4 · GitHubPermalink

Browse files

MVT: Improve performance and memory usage

- Reduce memory usage (raises protobuf-c requirement to 1.1).
- Improves performance when merging tiles (from parallel queries).
- Uses protobuf values to reduce the number of transformations and
  memory usage.
- Uses a temporal context when transforming data (reading rows) to
  have a fast way of cleaning up memory before the parallel process
  starts.
- Processes texts and cstrings with each known reading functions
  instead of the `OidOutputFunctionCall` fallback.
- Avoids multiple hash function calls when a value is not found in
  the hash table (once for the lookup and once for the insertion).

Closes #576
Closes #4737

Algunenano

committed on Aug 18

1 parent 9ef9130 commit 99c50d4602a6e1d94f65932cbcbee933af998ea1
Showing with 235 additions and 278 deletions.

5

NEWS

@@ -2,6 +2,9 @@ PostGIS 3.1.0beta1

2020/xx/xx

Only tickets not included in 3.1.0alpha2

* Breaking changes *

- #4737, Bump minimum protobuf-c requirement to 1.1.0 (Raúl Marín)

* New features *

- #4698, Add a precision parameter to ST_AsEWKT (Raúl Marín)

@@ -23,6 +26,8 @@ Only tickets not included in 3.1.0alpha2

ST_BoundingDiagonal.

- #4741, Don't use ST_PointInsideCircle if you need indexes, use ST_DWithin instead.

Documentation adjusted (Darafei Praliaskouski)

- #4737, Improve performance and reduce memory usage in ST_AsMVT, especially in

queries involving parallelism (Raúl Marín)

* Bug fixes *

- #4691, Fix segfault during gist index creation with empty geometries (Raúl Marín)

@@ -998,7 +998,7 @@ if test "$CHECK_PROTOBUF" != "no"; then

dnl Try pkgconfig first

if test -n "$PKG_CONFIG"; then

dnl Ensure libprotobuf-c is of minimum required version

PKG_CHECK_MODULES([PROTOBUFC], [libprotobuf-c >= 1.0.0], [

PKG_CHECK_MODULES([PROTOBUFC], [libprotobuf-c >= 1.1.0], [

PROTOBUF_CPPFLAGS="$PROTOBUFC_CFLAGS";

PROTOBUF_LDFLAGS="$PROTOBUFC_LIBS";

], [

@@ -1069,10 +1069,8 @@ if test "$CHECK_PROTOBUF" != "no"; then

if test "$HAVE_PROTOBUF" = "yes"; then

AC_DEFINE([HAVE_LIBPROTOBUF], [1], [Define to 1 if libprotobuf-c is present])

AC_DEFINE_UNQUOTED([LIBPROTOBUF_VERSION], [$PROTOC_VERSION], [Numeric version number for libprotobuf-c])

if test $PROTOC_VERSION -ge 1001000; then

AC_DEFINE([HAVE_GEOBUF], [1], [Define to 1 if libprotobuf is >= 1.1])

HAVE_GEOBUF=yes

fi

AC_DEFINE([HAVE_GEOBUF], [1], [Define to 1 if geobuf is present])

HAVE_GEOBUF=yes

fi

AC_SUBST([HAVE_PROTOBUF])

@@ -129,14 +129,14 @@ Datum pgis_asmvt_transfn(PG_FUNCTION_ARGS)

elog(ERROR, "Missing libprotobuf-c");

PG_RETURN_NULL();

#else

MemoryContext aggcontext;

MemoryContext aggcontext, old_context;

mvt_agg_context *ctx;

if (!AggCheckCallContext(fcinfo, &aggcontext))

elog(ERROR, "%s called in non-aggregate context", __func__);

MemoryContextSwitchTo(aggcontext);

if (PG_ARGISNULL(0)) {

old_context = MemoryContextSwitchTo(aggcontext);

ctx = palloc(sizeof(*ctx));

ctx->name = "default";

if (PG_NARGS() > 2 && !PG_ARGISNULL(2))

@@ -151,7 +151,12 @@ Datum pgis_asmvt_transfn(PG_FUNCTION_ARGS)

ctx->id_name = text_to_cstring(PG_GETARG_TEXT_P(5));

else

ctx->id_name = NULL;

ctx->trans_context = AllocSetContextCreate(aggcontext, "MVT transfn", ALLOCSET_DEFAULT_SIZES);

MemoryContextSwitchTo(ctx->trans_context);

mvt_agg_init_context(ctx);

MemoryContextSwitchTo(old_context);

} else {

ctx = (mvt_agg_context *) PG_GETARG_POINTER(0);

}

@@ -160,7 +165,10 @@ Datum pgis_asmvt_transfn(PG_FUNCTION_ARGS)

elog(ERROR, "%s: parameter row cannot be other than a rowtype", __func__);

ctx->row = PG_GETARG_HEAPTUPLEHEADER(1);

old_context = MemoryContextSwitchTo(ctx->trans_context);

mvt_agg_transfn(ctx);

MemoryContextSwitchTo(old_context);

PG_FREE_IF_COPY(ctx->row, 1);

PG_RETURN_POINTER(ctx);

#endif

@@ -203,6 +211,7 @@ Datum pgis_asmvt_serialfn(PG_FUNCTION_ARGS)

PG_RETURN_NULL();

#else

mvt_agg_context *ctx;

bytea *result;

elog(DEBUG2, "%s called", __func__);

if (!AggCheckCallContext(fcinfo, NULL))

elog(ERROR, "%s called in non-aggregate context", __func__);

@@ -215,7 +224,11 @@ Datum pgis_asmvt_serialfn(PG_FUNCTION_ARGS)

}

ctx = (mvt_agg_context *) PG_GETARG_POINTER(0);

PG_RETURN_BYTEA_P(mvt_ctx_serialize(ctx));

result = mvt_ctx_serialize(ctx);

if (ctx->trans_context)

MemoryContextDelete(ctx->trans_context);

ctx->trans_context = NULL;

PG_RETURN_BYTEA_P(result);

#endif

}

Large diffs are not rendered by default.

@@ -56,26 +56,43 @@ typedef struct mvt_column_cache

typedef struct mvt_agg_context

{

/* Temporal memory context using during during pgis_asmvt_transfn and deleted after

* pgis_asmvt_serialfn. This is to have a faster and simpler way to delete the temporal

* objects in parallel runs */

MemoryContext trans_context;

char *name;

uint32_t extent;

/* Name and position of the property that sets the feature id */

char *id_name;

uint32_t id_index;

/* Name and position of the property that sets the feature geometry */

char *geom_name;

uint32_t geom_index;

HeapTupleHeader row;

VectorTile__Tile__Feature *feature;

VectorTile__Tile__Layer *layer;

VectorTile__Tile *tile;

size_t features_capacity;

/* Hash table holding the feature keys */

struct mvt_kv_key *keys_hash;

struct mvt_kv_string_value *string_values_hash;

struct mvt_kv_float_value *float_values_hash;

struct mvt_kv_double_value *double_values_hash;

struct mvt_kv_uint_value *uint_values_hash;

struct mvt_kv_sint_value *sint_values_hash;

struct mvt_kv_bool_value *bool_values_hash;

/* Hash tables holding the feature values, one per type */

struct mvt_kv_value *string_values_hash;

struct mvt_kv_value *float_values_hash;

struct mvt_kv_value *double_values_hash;

struct mvt_kv_value *uint_values_hash;

struct mvt_kv_value *sint_values_hash;

struct mvt_kv_value *bool_values_hash;

/* Total number of values stored (for all combined value hash tables) */

uint32_t values_hash_i;

/* Total number of keys stored */

uint32_t keys_hash_i;

uint32_t row_columns;

mvt_column_cache column_cache;

} mvt_agg_context;

@@ -16,17 +16,17 @@ message Tile {

// Variant type encoding

// The use of values is described in section 4.1 of the specification

// PostGIS: Made oneof (protobuf-c 1.1+ required) to reduce memory usage

message Value {

// Exactly one of these values must be present in a valid message

optional string string_value = 1;

optional float float_value = 2;

optional double double_value = 3;

optional int64 int_value = 4;

optional uint64 uint_value = 5;

optional sint64 sint_value = 6;

optional bool bool_value = 7;

extensions 8 to max;

oneof test_oneof {

pramsey on Sep 19

Member

Do we want to use oneof? Do we need to? It makes building under older systems (RHEL7) hard because their version of protobuf is older.

Algunenano on Sep 19

Author

Member

I don' t have the numbers at hand right now, but I think it reduces memory usage in around 20%. It only increased the minimum protobuf-c release to match the requirements that were already in place for geobuf.

If we want to reduce it and keep the improvement, we could push the *.[c|h] instead, and even remove the dependency completely by adding protobuf-c to deps/ as it's literally a couple of files.

pramsey on Sep 19

Member

Vendoring protobuf would sidestep issues with older platforms, nicely, I kind of lean toward that...

pramsey on Sep 19

Member

wait, doesn't protobuf-c depend on protobuf, a c++ library, quite a bit larger?

string string_value = 1;

float float_value = 2;

double double_value = 3;

int64 int_value = 4;

uint64 uint_value = 5;

sint64 sint_value = 6;

bool bool_value = 7;

}

}

// Features are described in section 4.2 of the specification

@@ -40,7 +40,8 @@ message Tile {

repeated uint32 tags = 2 [ packed = true ];

// The type of geometry stored in this feature.

optional GeomType type = 3 [ default = UNKNOWN ];

// PostGIS: Made mandatory

required GeomType type = 3 [ default = UNKNOWN ];

// Contains a stream of commands and parameters (vertices).

// A detailed description on geometry encoding is located in

@@ -67,9 +68,8 @@ message Tile {

// Dictionary encoding for values

repeated Value values = 4;

// Although this is an "optional" field it is required by the specification.

// See https://github.com/mapbox/vector-tile-spec/issues/47

optional uint32 extent = 5 [ default = 4096 ];

// PostGIS: Made mandatory (see https://github.com/mapbox/vector-tile-spec/issues/47)

required uint32 extent = 5 [ default = 4096 ];

extensions 16 to max;

}

0 comments on commit 99c50d4

Please sign in to comment.


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK