Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions src/car.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
//! CAR (Content Addressable aRchive) v1 container decoding. Encoding is not
//! implemented yet; when it lands this becomes `car/{de,ser}.rs`.

use cbor4ii::core::dec::Read;
use pyo3::prelude::*;
use pyo3::types::*;

use crate::dag_cbor::de::to_pyobject;
use crate::error::value_error;
use crate::ffi::recursion::current_recursion_limit;
use crate::io::leb128::read_u64;
use crate::io::SliceReader;

#[pyfunction]
pub fn decode_car<'py>(py: Python<'py>, data: &[u8]) -> PyResult<(Py<PyAny>, Bound<'py, PyDict>)> {
let buf = &mut SliceReader::new(data);
let max_depth = current_recursion_limit();

if read_u64(buf).is_err() {
return Err(value_error(
"Failed to read CAR header",
"Invalid uvarint".to_string(),
));
}
let Ok(header_obj) = to_pyobject(py, buf, 0, max_depth) else {
return Err(value_error(
"Failed to read CAR header",
"Invalid DAG-CBOR".to_string(),
));
};

let header = header_obj.cast_bound::<PyDict>(py)?;

let Some(version) = header.get_item("version")? else {
return Err(value_error(
"Failed to read CAR header",
"Version is None".to_string(),
));
};
if version.cast::<PyInt>()?.extract::<u64>()? != 1 {
return Err(value_error(
"Failed to read CAR header",
"Unsupported version. Version must be 1".to_string(),
));
}

let Some(roots) = header.get_item("roots")? else {
return Err(value_error(
"Failed to read CAR header",
"Roots is None".to_string(),
));
};
if roots.cast::<PyList>()?.len() == 0 {
return Err(value_error(
"Failed to read CAR header",
"Roots is empty. Must be at least one".to_string(),
));
}

// FIXME (MarshalX): we are not verifying if the roots are valid CIDs

let parsed_blocks = PyDict::new(py);

loop {
if read_u64(buf).is_err() {
// FIXME (MarshalX): we are not raising an error here because of possible EOF
break;
}

let cid_bytes_before = buf.buf;
// `&[u8]` is itself an `io::Read`, so we hand it to `Cid::read_bytes`
// directly and recover the consumed length from the slice shrink.
let mut slice: &[u8] = cid_bytes_before;
let cid_result = ::cid::Cid::read_bytes(&mut slice);
let Ok(cid) = cid_result else {
return Err(value_error(
"Failed to read CID of block",
cid_result.unwrap_err().to_string(),
));
};

if cid.codec() != 0x71 {
return Err(value_error(
"Failed to read CAR block",
"Unsupported codec. For now we support only DAG-CBOR (0x71)".to_string(),
));
}

let consumed = cid_bytes_before.len() - slice.len();
buf.advance(consumed);
let cid_raw = &cid_bytes_before[..consumed];

let block_result = to_pyobject(py, buf, 0, max_depth);
let Ok(block) = block_result else {
return Err(value_error(
"Failed to read CAR block",
block_result.unwrap_err().to_string(),
));
};

let key = PyBytes::new(py, cid_raw).into_pyobject(py)?;
parsed_blocks.set_item(key, block)?;
}

Ok((header_obj, parsed_blocks))
}
45 changes: 45 additions & 0 deletions src/cid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//! CID (Content IDentifier) codec plus the shared CID helpers used across
//! codecs: extraction from arbitrary Python objects and the O(1) shape check.

pub(crate) mod de;
pub(crate) mod ser;

pub(crate) use de::decode_cid;
pub(crate) use ser::encode_cid;

use pyo3::prelude::*;
use pyo3::types::*;

use crate::convert::extract_bytes;
use crate::error::value_error;

// `Cid::try_from` parses two varints + a multihash on every call; this O(1)
// shape check rejects payloads that can't be a CID without paying for it.
// CIDv1 starts with `0x01`; CIDv0 is exactly 34 bytes starting `0x12 0x20`.
#[inline]
pub(crate) fn looks_like_cid(bytes: &[u8]) -> bool {
if bytes.len() < 4 {
return false;
}
if bytes[0] == 0x01 {
return true;
}
bytes.len() == 34 && bytes[0] == 0x12 && bytes[1] == 0x20
}

pub(crate) fn extract_cid(data: &Bound<PyAny>) -> PyResult<::cid::Cid> {
let cid = if let Ok(s) = data.cast::<PyString>() {
::cid::Cid::try_from(s.to_str()?)
} else {
::cid::Cid::try_from(extract_bytes(data)?)
};

if let Ok(cid) = cid {
Ok(cid)
} else {
Err(value_error(
"Failed to decode CID",
cid.unwrap_err().to_string(),
))
}
}
29 changes: 29 additions & 0 deletions src/cid/de.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use pyo3::prelude::*;
use pyo3::types::*;

use crate::cid::extract_cid;

fn hash_to_pydict<'py>(py: Python<'py>, cid: &::cid::Cid) -> PyResult<Bound<'py, PyDict>> {
let hash = cid.hash();
let dict_obj = PyDict::new(py);

dict_obj.set_item("code", hash.code())?;
dict_obj.set_item("size", hash.size())?;
dict_obj.set_item("digest", PyBytes::new(py, hash.digest()))?;

Ok(dict_obj)
}

fn to_pydict<'py>(py: Python<'py>, cid: &::cid::Cid) -> PyResult<Bound<'py, PyDict>> {
let dict_obj = PyDict::new(py);

dict_obj.set_item("version", cid.version() as u64)?;
dict_obj.set_item("codec", cid.codec())?;
dict_obj.set_item("hash", hash_to_pydict(py, cid)?)?;
Ok(dict_obj)
}

#[pyfunction]
pub fn decode_cid<'py>(py: Python<'py>, data: &Bound<PyAny>) -> PyResult<Bound<'py, PyDict>> {
to_pydict(py, &extract_cid(data)?)
}
9 changes: 9 additions & 0 deletions src/cid/ser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use pyo3::prelude::*;
use pyo3::types::*;

use crate::cid::extract_cid;

#[pyfunction]
pub fn encode_cid<'py>(py: Python<'py>, data: &Bound<PyAny>) -> PyResult<Bound<'py, PyString>> {
Ok(PyString::new(py, extract_cid(data)?.to_string().as_str()))
}
20 changes: 20 additions & 0 deletions src/convert.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
use pyo3::prelude::*;
use pyo3::types::*;

use crate::error::value_error;

/// Borrow a byte view from a `bytes`, `bytearray`, or `str` (UTF-8) object.
pub(crate) fn extract_bytes<'py>(obj: &'py Bound<'py, PyAny>) -> PyResult<&'py [u8]> {
if let Ok(b) = obj.cast::<PyBytes>() {
Ok(b.as_bytes())
} else if let Ok(ba) = obj.cast::<PyByteArray>() {
Ok(unsafe { ba.as_bytes() })
} else if let Ok(s) = obj.cast::<PyString>() {
Ok(s.to_str()?.as_bytes())
} else {
Err(value_error(
"Failed to encode multibase",
"Unsupported data type".to_string(),
))
}
}
8 changes: 8 additions & 0 deletions src/dag_cbor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//! DAG-CBOR codec: decode (`de`) and encode (`ser`) of the IPLD data model
//! to and from native Python objects.

pub(crate) mod de;
pub(crate) mod ser;

pub(crate) use de::{decode_dag_cbor, decode_dag_cbor_multi};
pub(crate) use ser::encode_dag_cbor;
Loading