Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 71 additions & 2 deletions include/nat20/cbor.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ extern "C" {
* data items as specified in RFC 8949. Each type corresponds to a specific kind of data
* that can be encoded in CBOR.
*
* In addition to the CBOR major types, this enumeration defines a set of
* synthetic types that represent indefinite length items. These are not CBOR
* major types. They encode the indefinite length variant of a major type by
* setting the 0x100 bit, i.e. their value is `(major_type | 0x100)`. The read
* and write functions use this bit to convert between the synthetic type and
* its wire encoding, which uses the additional info value 31. The break stop
* code that terminates indefinite length items is likewise represented as a
* synthetic type (@ref n20_cbor_type_indefinite_break_e).
*
* @sa https://tools.ietf.org/html/rfc8949
*/
typedef enum n20_cbor_type_s {
Expand Down Expand Up @@ -112,6 +121,41 @@ typedef enum n20_cbor_type_s {
* Represents simple values (e.g., true, false, null) or floating-point numbers.
*/
n20_cbor_type_simple_float_e = 7,
/**
* @brief Indefinite length byte string.
*
* Synthetic type for the byte string major type with the indefinite
* length bit (0x100) set.
*/
n20_cbor_type_indefinite_bytes_e = 0x102,
/**
* @brief Indefinite length text string.
*
* Synthetic type for the text string major type with the indefinite
* length bit (0x100) set.
*/
n20_cbor_type_indefinite_string_e = 0x103,
/**
* @brief Indefinite length array.
*
* Synthetic type for the array major type with the indefinite length
* bit (0x100) set.
*/
n20_cbor_type_indefinite_array_e = 0x104,
/**
* @brief Indefinite length map.
*
* Synthetic type for the map major type with the indefinite length
* bit (0x100) set.
*/
n20_cbor_type_indefinite_map_e = 0x105,
/**
* @brief Break stop code for indefinite length items.
*
* Synthetic type for the simple/float major type with the indefinite
* length bit (0x100) set. It marks the end of an indefinite length item.
*/
n20_cbor_type_indefinite_break_e = 0x107,
} n20_cbor_type_t;

/**
Expand Down Expand Up @@ -142,6 +186,12 @@ typedef enum n20_cbor_type_s {
* it writes the special value 0xf7 to the stream, and @p value is ignored.
* 0xf7 is the encoding of the special value "undefined" in CBOR.
*
* If @p type is one of the indefinite length types
* (@ref n20_cbor_type_indefinite_bytes_e, @ref n20_cbor_type_indefinite_string_e,
* @ref n20_cbor_type_indefinite_array_e, @ref n20_cbor_type_indefinite_map_e)
* or @ref n20_cbor_type_indefinite_break_e, an indefinite length header
* (additional info value 31) is written and @p value is ignored.
*
* @param s The stream to write to.
* @param type The CBOR type (see @ref n20_cbor_type_t).
* @param value The value associated with the CBOR type.
Expand Down Expand Up @@ -249,6 +299,16 @@ extern void n20_cbor_write_map_header(n20_stream_t *s, size_t size);
*
* This function reads the CBOR header for a given type and value from the stream.
*
* Indefinite length headers (additional info value 31) for byte strings, text
* strings, arrays, and maps are reported via the corresponding synthetic types
* (@ref n20_cbor_type_indefinite_bytes_e, @ref n20_cbor_type_indefinite_string_e,
* @ref n20_cbor_type_indefinite_array_e, @ref n20_cbor_type_indefinite_map_e)
* with @p n set to 0. The break stop code is reported as
* @ref n20_cbor_type_indefinite_break_e. Additional info value 31 with any
* other major type (unsigned integer, negative integer, or tag) is rejected
* and the function returns false, as are the reserved additional info values
* 28 to 30.
*
* @param s The stream to read from.
* @param type The CBOR type (see @ref n20_cbor_type_t).
* @param n The value associated with the CBOR type.
Expand All @@ -263,8 +323,17 @@ extern bool n20_cbor_read_header(n20_istream_t *s, n20_cbor_type_t *type, uint64
* past the item. If the item has tags or has a nested structure, like
* an array or map, it will also advance past those structures.
*
* This function will skip past any CBOR structure, however, it does not
* support indefinite length items.
* This function will skip past any CBOR structure, including indefinite length
* byte strings, text strings, arrays, and maps. For indefinite length items it
* consumes the contained chunks or elements up to and including the break stop
* code.
*
* A break stop code encountered where a data item is expected (for example as
* the top-level item, in the value position of a map, or in place of a chunk
* of an indefinite length string) is treated as an error and the function
* returns false. The chunks of an indefinite length byte or text string must
* be definite length byte or text strings respectively; anything else is an
* error.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can mention that it's possible to have nested indefinite length arrays/maps, and if they're there, they also need to have the break stop codes.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added tests accordingly.

*
* @param s The stream to read from.
* @return true if the item was skipped successfully, false otherwise.
Expand Down
165 changes: 145 additions & 20 deletions src/core/cbor.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,33 @@
#include <nat20/types.h>

void n20_cbor_write_header(n20_stream_t *const s, n20_cbor_type_t cbor_type, uint64_t n) {
if ((unsigned int)cbor_type > 7) {
/* 0xf7 is the encoding of the special value "undefined". */
cbor_type = n20_cbor_type_simple_float_e;
n = N20_SIMPLE_UNDEFINED;
switch (cbor_type) {
case n20_cbor_type_uint_e:
case n20_cbor_type_nint_e:
case n20_cbor_type_bytes_e:
case n20_cbor_type_string_e:
case n20_cbor_type_array_e:
case n20_cbor_type_map_e:
case n20_cbor_type_tag_e:
case n20_cbor_type_simple_float_e:
break;
case n20_cbor_type_indefinite_bytes_e:
case n20_cbor_type_indefinite_string_e:
case n20_cbor_type_indefinite_array_e:
case n20_cbor_type_indefinite_map_e:
case n20_cbor_type_indefinite_break_e: {
cbor_type = (n20_cbor_type_t)(cbor_type - 0x100);
uint8_t header = (uint8_t)(cbor_type << 5) | 31;
n20_stream_prepend(s, &header, /*src_len=*/1);
return;
}
default:
/* Invalid types are encoded as "undefined". */
cbor_type = n20_cbor_type_simple_float_e;
n = N20_SIMPLE_UNDEFINED;
break;
}

uint8_t header = (uint8_t)(cbor_type << 5);

size_t value_size = 0;
Expand Down Expand Up @@ -137,9 +159,28 @@ bool n20_cbor_read_header(n20_istream_t *const s, n20_cbor_type_t *const type, u
*type = (n20_cbor_type_t)(header >> 5);
uint8_t additional_info = header & 0x1f;

if (additional_info == 31) {
switch (*type) {
case n20_cbor_type_array_e:
case n20_cbor_type_map_e:
case n20_cbor_type_bytes_e:
case n20_cbor_type_string_e:
case n20_cbor_type_simple_float_e:
/* Indefinite length encoding is encoded in the ninth bit of the type. */
*type = (n20_cbor_type_t)(*type + 0x100);
*n = 0;
return true;
default:
/* Additional info 31 is only valid for arrays, maps, byte strings, and text
* strings, and in the simple/float type denoting the end of indefinite length
* items. */
return false;
Comment thread
werwurm marked this conversation as resolved.
}
}

if (additional_info > 27) {
/* Reserved additional info value. And this code does not
* support indefinite length encoding (31). */
/* Reserved additional info values (28-30). Indefinite length
* encoding (31) is handled above. */
return false;
}

Expand All @@ -163,57 +204,141 @@ bool n20_cbor_read_header(n20_istream_t *const s, n20_cbor_type_t *const type, u
return true;
}

bool n20_cbor_read_skip_item(n20_istream_t *const s) {
typedef enum n20_cbor_read_skip_item_result_s {
n20_cbor_read_skip_item_ok_e,
n20_cbor_read_skip_item_error_e,
n20_cbor_read_skip_item_break_e,
} n20_cbor_read_skip_item_result_t;

static n20_cbor_read_skip_item_result_t n20_cbor_read_skip_item_internal(n20_istream_t *const s);

static n20_cbor_read_skip_item_result_t n20_cbor_read_skip_item_map_element_internal(
n20_istream_t *const s) {
n20_cbor_read_skip_item_result_t result = n20_cbor_read_skip_item_internal(s);
if (result != n20_cbor_read_skip_item_ok_e) {
return result;
}
if (n20_cbor_read_skip_item_internal(s) != n20_cbor_read_skip_item_ok_e) {
/* If the second item is a terminator or if we ran out of buffer
* we consider it an error. */
return n20_cbor_read_skip_item_error_e;
}
return n20_cbor_read_skip_item_ok_e;
}

static n20_cbor_read_skip_item_result_t n20_cbor_read_skip_item_stringish_chunk_internal(
n20_istream_t *const s, bool string) {
n20_cbor_type_t type;
uint64_t n;
if (!n20_cbor_read_header(s, &type, &n)) {
return n20_cbor_read_skip_item_error_e;
}
if (type == n20_cbor_type_indefinite_break_e) {
return n20_cbor_read_skip_item_break_e;
}
if ((string && type != n20_cbor_type_string_e) || (!string && type != n20_cbor_type_bytes_e)) {
return n20_cbor_read_skip_item_error_e; /* Not a valid expected stringish chunk. */
}
if (n > SIZE_MAX) {
/* Prevent uncaught truncation. */
return n20_cbor_read_skip_item_error_e;
}
if (!n20_istream_get_slice(s, NULL, n)) {
return n20_cbor_read_skip_item_error_e;
}
return n20_cbor_read_skip_item_ok_e;
}

static n20_cbor_read_skip_item_result_t n20_cbor_read_skip_item_internal(n20_istream_t *const s) {
n20_cbor_type_t type = n20_cbor_type_none_e;
uint64_t n = 0;
if (!n20_cbor_read_header(s, &type, &n)) {
return false;
return n20_cbor_read_skip_item_error_e;
}

switch (type) {
case n20_cbor_type_array_e:
if (n > SIZE_MAX) {
/* Prevent overflow in the loop counter. */
return false;
return n20_cbor_read_skip_item_error_e;
}
for (size_t i = 0; i < n; i++) {
if (!n20_cbor_read_skip_item(s)) {
return false;
if (n20_cbor_read_skip_item_internal(s) != n20_cbor_read_skip_item_ok_e) {
return n20_cbor_read_skip_item_error_e;
}
}
break;
case n20_cbor_type_map_e:
if (n > SIZE_MAX) {
/* Prevent overflow in the loop counter. */
return false;
return n20_cbor_read_skip_item_error_e;
}
for (size_t i = 0; i < n; i++) {
if (!n20_cbor_read_skip_item(s)) {
return false;
if (n20_cbor_read_skip_item_internal(s) != n20_cbor_read_skip_item_ok_e) {
return n20_cbor_read_skip_item_error_e;
}
if (!n20_cbor_read_skip_item(s)) {
return false;
if (n20_cbor_read_skip_item_internal(s) != n20_cbor_read_skip_item_ok_e) {
return n20_cbor_read_skip_item_error_e;
}
}
break;
case n20_cbor_type_bytes_e:
case n20_cbor_type_string_e: {
if (n > SIZE_MAX) {
/* Prevent uncaught truncation. */
return false;
return n20_cbor_read_skip_item_error_e;
}
if (!n20_istream_get_slice(s, NULL, n)) {
return false;
return n20_cbor_read_skip_item_error_e;
}
break;
}
case n20_cbor_type_tag_e:
/* Skip the tag and the item it refers to. */
return n20_cbor_read_skip_item(s);
return n20_cbor_read_skip_item(s) ? n20_cbor_read_skip_item_ok_e

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this call the internal function like other cases?

Suggested change
return n20_cbor_read_skip_item(s) ? n20_cbor_read_skip_item_ok_e
return n20_cbor_read_skip_item_internal(s) ? n20_cbor_read_skip_item_ok_e

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No this is correct. If this returns ..._break_e it needs to be an error because we cannot have a tagged break symbol. So in terms of the internal semantic it would be _internal(s) == _ok_e ? _ok_e : _error_e which is exactly what n20_cbor_read_skip_item does.

: n20_cbor_read_skip_item_error_e;
case n20_cbor_type_indefinite_bytes_e:
case n20_cbor_type_indefinite_string_e: {
n20_cbor_read_skip_item_result_t result;
do {
result = n20_cbor_read_skip_item_stringish_chunk_internal(
s, type == n20_cbor_type_indefinite_string_e);
if (result == n20_cbor_read_skip_item_error_e) {
return n20_cbor_read_skip_item_error_e;
}
} while (result != n20_cbor_read_skip_item_break_e);
break;
}
case n20_cbor_type_indefinite_array_e: {
n20_cbor_read_skip_item_result_t result;
do {
result = n20_cbor_read_skip_item_internal(s);
if (result == n20_cbor_read_skip_item_error_e) {
return n20_cbor_read_skip_item_error_e;
}
} while (result != n20_cbor_read_skip_item_break_e);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have a malformed indef length array, wouldn't it be possible for the recursion to overflow?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The read functions make sure not to read past the end of the buffer. Even a definite length byte string can be larger than the buffer. In this case the read functions return false and the internal skip function returns ..._error_e.

break;
}
case n20_cbor_type_indefinite_map_e: {
n20_cbor_read_skip_item_result_t result;
do {
result = n20_cbor_read_skip_item_map_element_internal(s);
if (result == n20_cbor_read_skip_item_error_e) {
return n20_cbor_read_skip_item_error_e;
}
} while (result != n20_cbor_read_skip_item_break_e);
break;
}
case n20_cbor_type_indefinite_break_e:
return n20_cbor_read_skip_item_break_e;
default:
/* Simple values and integers have no additional data to skip. */
break;
}

return true;
return n20_cbor_read_skip_item_ok_e;
}

bool n20_cbor_read_skip_item(n20_istream_t *const s) {
return n20_cbor_read_skip_item_internal(s) == n20_cbor_read_skip_item_ok_e;
}
Loading
Loading