910e62b5创建于 1月15日历史提交
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

//! Defines functions for parsing encoded data into Mojom values.
//!
//! Because Mojom's wire format is not self-describing, the functions in this
//! module need to know the type of the data they're parsing. Their job is to
//! take an encoded value and produce a MojomValue of the corresponding type.
//!
//! These functions encode knowledge of Mojom's wire format, and are independent
//! of any particular message.

// IMPORTANT! These functions require the input MojomTypes to have
// their fields in wire order. Use pack_mojom_type in pack.rs to ensure this.
// In the future, we'll probably need a separate type for wire order, since
// (at the very least) MojomType can't handle bitfields.

use crate::ast::*;
use crate::errors::*;
use crate::parse_primitives::*;
use std::collections::HashMap;

/// Parse a type without nested data, i.e. anything but a struct or array
fn parse_leaf_element(data: &mut ParserData, ty: &PackedLeafType) -> ParsingResult<MojomValue> {
    let _ = (parse_f32, parse_f64); // Silence dead code warning until this is used
    match ty {
        PackedLeafType::UInt8 => Ok(MojomValue::UInt8(parse_u8(data)?)),
        PackedLeafType::UInt16 => Ok(MojomValue::UInt16(parse_u16(data)?)),
        PackedLeafType::UInt32 => Ok(MojomValue::UInt32(parse_u32(data)?)),
        PackedLeafType::UInt64 => Ok(MojomValue::UInt64(parse_u64(data)?)),
        PackedLeafType::Int8 => Ok(MojomValue::Int8(parse_i8(data)?)),
        PackedLeafType::Int16 => Ok(MojomValue::Int16(parse_i16(data)?)),
        PackedLeafType::Int32 => Ok(MojomValue::Int32(parse_i32(data)?)),
        PackedLeafType::Int64 => Ok(MojomValue::Int64(parse_i64(data)?)),
        PackedLeafType::Enum { is_valid } => {
            let value = parse_u32(data)?;
            if is_valid.call(value) {
                Ok(MojomValue::Enum(value))
            } else {
                // Report the error starting before the 32 bits we just parsed
                Err(ParsingError::invalid_discriminant(data.bytes_parsed() - 4, value))
            }
        }
    }
}

/// Parse and ignore the contents of as many bytes as necessary to meet the
/// given alignment requirement.
///
/// Does not validate that the skipped bytes were 0, since it's possible that
/// we're parsing a message with a version greater than our mojom file knows
/// about.
fn skip_to_alignment(data: &mut ParserData, alignment: usize) -> ParsingResult<()> {
    let mismatch = data.bytes_parsed() % alignment;
    if mismatch == 0 {
        Ok(())
    } else {
        parse_padding(data, alignment - mismatch)
    }
}

// Mojom structs and arrays are never nested inside each other. Instead, they
// are represented by a pointer with an offset to the actual data which appears
// later in the message.
// The actual nested structs/arrays will appear, in declaration order, after all
// the other fields. This struct holds the information we need to parse and
// validate them, when we reach them.

/// Information about a nested struct/array, which we expect to see later.
struct NestedDataInfo<'a> {
    ty: &'a PackedStructuredType,
    field_name: String,
    ordinal: Ordinal,
    /// The expected location of the nested data, as an offset in bytes from the
    /// start of the enclosing struct
    expected_offset: usize,
    /// Tracks whether this pointer was contained in a union, and if so what
    /// its discriminant was.
    union_discriminant: Option<u32>,
}

/// Return the highest ordinal that appears in a packed struct.
// FOR_RELEASE: It would be nicer to store this in the wire type so we
// don't need to compute it each time.
fn get_max_ordinal(fields: &[MojomWireType]) -> Option<Ordinal> {
    use std::cmp::max;
    if fields.is_empty() {
        return None;
    }

    let mut max_so_far: Ordinal = 0;
    for wire_type in fields.into_iter() {
        match wire_type {
            MojomWireType::Leaf { ordinal, .. }
            | MojomWireType::Pointer { ordinal, .. }
            | MojomWireType::Union { ordinal, .. } => max_so_far = max(max_so_far, *ordinal),
            MojomWireType::Bitfield { ordinals } => {
                let mut iter = ordinals.into_iter();
                while let Some(Some(ordinal)) = iter.next() {
                    max_so_far = max(max_so_far, *ordinal)
                }
            }
        }
    }
    return Some(max_so_far);
}

/// Parse a 32-bit size as part of a struct or array header
pub fn parse_size(data: &mut ParserData) -> ParsingResult<usize> {
    let parsed_value = parse_u32(data)?;
    let mk_err = || Err(ParsingError::invalid_size(data.bytes_parsed() - 4, parsed_value));
    let size = parsed_value.try_into().or_else(|_| mk_err())?;
    if size < 8 || size % 8 != 0 {
        return mk_err();
    }
    Ok(size)
}

pub fn parse_pointer(data: &mut ParserData, may_be_null: bool) -> ParsingResult<usize> {
    let parsed_value = parse_u64(data)?;
    let mk_err = || Err(ParsingError::invalid_pointer(data.bytes_parsed() - 8, parsed_value));
    let ptr_value = parsed_value.try_into().or_else(|_| mk_err())?;
    if ptr_value == 0 && may_be_null {
        return Ok(ptr_value);
    }
    if ptr_value < 8 || ptr_value % 8 != 0 {
        return mk_err();
    }
    Ok(ptr_value)
}

pub fn parse_struct(
    data: &mut ParserData,
    field_names: &[String],
    fields: &[MojomWireType],
) -> ParsingResult<MojomValue> {
    // Parse the struct header
    let size_in_bytes = parse_size(data)?;
    let _version_number = parse_u32(data)?; // We're ignoring versioning for now
    let (parsed_names, parsed_fields) =
        parse_structured_body(data, None, size_in_bytes, field_names, fields)?;
    Ok(MojomValue::Struct(parsed_names, parsed_fields))
}

/// Parse a union value.
///
/// Despite being structured data, union values might be packed directly in the
/// body of a struct or array, instead of being behind a pointer. This can cause
/// problems if the union itself contains a pointer.
///
/// Specifically, if a union (1) contains a pointer and (2) is packed directly
/// into an enclosing body, then then the pointed-to data will appear at the end
/// of the enclosing body instead of the end of the union.
///
/// The `enclosing_nested_data_list` argument to this function tracks which case
/// we are in. If the union value we are parsing appears in an enclosing body,
/// then it should be Some; otherwise, it should be None.
///
/// If `enclosing_nested_data_list` is Some and the union contains a pointer,
/// then pointer's information will be appended to the contained list, and the
/// returned MojomValue will be None. The actual union value will have to be
/// constructed later.
///
/// Otherwise, the returned MojomValue will be Some, and no further work is
/// required.
///
/// This function also returns the union's discriminant value.
fn parse_union<'a>(
    data: &mut ParserData,
    enclosing_nested_data_list: Option<&mut Vec<NestedDataInfo<'a>>>,
    variants: &'a HashMap<u32, MojomWireType>,
) -> ParsingResult<(u32, Option<MojomValue>)> {
    // Parse the union header
    let size_in_bytes = parse_size(data)?;
    let tag = parse_u32(data)?;
    let field_ty = match variants.get(&tag) {
        Some(wire_ty) => wire_ty,
        None => return Err(ParsingError::invalid_discriminant(data.bytes_parsed() - 4, tag)),
    };

    let in_enclosing_body = enclosing_nested_data_list.is_some();

    // A union is structured data with a single, variable field.
    // Make up a dummy field name for debugging.
    static UNION_FIELD_NAME: std::sync::LazyLock<String> =
        std::sync::LazyLock::new(|| "Union_Field".to_string());
    let mut parsed_fields = parse_structured_body(
        data,
        enclosing_nested_data_list,
        size_in_bytes,
        std::slice::from_ref(&*UNION_FIELD_NAME),
        std::slice::from_ref(field_ty),
    )?
    .1;
    assert_eq!(parsed_fields.len(), 1);

    let ret = match (in_enclosing_body, field_ty) {
        // If the union contained a pointer and we were in an enclosing body,
        // then it appended nested data info instead of constructing a union value
        (true, MojomWireType::Pointer { .. }) => None,
        (_, MojomWireType::Union { .. }) => {
            // Sanity check because unions are really complicated
            unreachable!("Unions should never be packed directly in other unions")
        }
        // If the union contained a non-pointer value, or it was standalone,
        // then the union value has already been fully constructed.
        _ => Some(MojomValue::Union(tag, Box::new(parsed_fields.pop().unwrap()))),
    };

    Ok((tag, ret))
}

/// Parse the body of a struct, array, or union, having already consumed its
/// header to figure out its expected size and what fields it has.
///
/// The enclosing_nested_data_list argument is only used for a special case
/// involving unions. See the documentation of parse_union for details.
fn parse_structured_body<'a>(
    data: &mut ParserData,
    enclosing_nested_data_list: Option<&mut Vec<NestedDataInfo<'a>>>,
    expected_size_in_bytes: usize,
    field_names: &'a [String],
    fields: &'a [MojomWireType],
) -> ParsingResult<(Vec<String>, Vec<MojomValue>)> {
    // Start counting from the beginning of the header which we already parsed
    let initial_bytes_parsed = data.bytes_parsed() - 8;

    let mut local_nested_data_list: Vec<NestedDataInfo> = vec![];
    let nested_data_list = match enclosing_nested_data_list {
        Some(list) => list,
        None => &mut local_nested_data_list,
    };

    // Pre-allocate space for the parsed values, so we can write directly into them
    // by index. We have to provide dummy values since rust won't allow
    // uninitialized memory.
    let num_fields = get_max_ordinal(fields).map_or(0, |o| o + 1);
    let mut ret_names: Vec<String> = vec![String::new(); num_fields];
    let mut ret_values: Vec<MojomValue> = vec![MojomValue::Int8(0); num_fields];

    for (index, mojom_wire_type) in fields.iter().enumerate() {
        // Make sure we're at the right alignment for this field
        skip_to_alignment(data, mojom_wire_type.alignment())?;

        let name = field_names
            .get(index)
            .expect("parse_structured_body: field_names should have the same length as fields");

        match mojom_wire_type {
            // Nested structured data, record for later
            MojomWireType::Pointer { ordinal, nested_data_type } => {
                // We don't currently support null pointers
                let pointer_value = parse_pointer(data, false)?;
                let nested_info = NestedDataInfo {
                    ty: nested_data_type,
                    ordinal: *ordinal,
                    field_name: name.clone(),
                    expected_offset: data.bytes_parsed() - initial_bytes_parsed
                        - 8 // Don't count the bytes we just parsed
                        + pointer_value,
                    // We'll fill this in later if necessary (in the Union branch below)
                    union_discriminant: None,
                };
                nested_data_list.push(nested_info);
            }
            // Nested leaf data, just parse it
            MojomWireType::Leaf { ordinal, leaf_type } => {
                let parsed_value = parse_leaf_element(data, leaf_type)?;
                ret_names[*ordinal] = name.clone();
                ret_values[*ordinal] = parsed_value;
            }
            MojomWireType::Bitfield { ordinals } => {
                let mut iter = ordinals.into_iter().enumerate();
                let parsed_bits = parse_u8(data)?;
                while let Some((idx, Some(ordinal))) = iter.next() {
                    let bit = (parsed_bits >> idx) & 1;
                    ret_names[*ordinal] = name.clone();
                    ret_values[*ordinal] = MojomValue::Bool(bit == 1);
                }
            }
            MojomWireType::Union { ordinal, variants } => {
                let bytes_parsed_at_union_start = data.bytes_parsed() - initial_bytes_parsed;
                match parse_union(data, Some(nested_data_list), variants)? {
                    // If we have a complete value, we can just store it as usual.
                    (_, Some(parsed_value)) => {
                        ret_names[*ordinal] = name.clone();
                        ret_values[*ordinal] = parsed_value;
                    }
                    // Otherwise, the union contained a pointer, and its info was appended to
                    // nested_data_list. We need to adjust the appended info so that it's
                    // relative to the enclosing struct, instead of this union.
                    (tag, None) => {
                        // We just pushed an entry, so we know it exists.
                        // We know no elements are later than it because unions cannot nest
                        // directly in other unions, so we'll never recurse more than once.
                        let nested_data_info = nested_data_list.last_mut().unwrap();
                        // This was previously None
                        nested_data_info.union_discriminant = Some(tag);
                        // This was previously the ordinal in the _union_ (i.e. 0)
                        nested_data_info.ordinal = *ordinal;
                        // This was previously the distance from the start of the _union_ body
                        // We need it to be the distance from the start of _this_ body
                        nested_data_info.expected_offset += bytes_parsed_at_union_start;
                    }
                }
            }
        };
    }

    // We've reached the end of the struct (not including nested data!)
    // Make sure we parsed the expected number of bytes.
    let bytes_parsed_so_far = data.bytes_parsed() - initial_bytes_parsed;
    if bytes_parsed_so_far > expected_size_in_bytes {
        return Err(ParsingError::wrong_size(
            data.bytes_parsed(),
            expected_size_in_bytes,
            bytes_parsed_so_far,
        ));
    } else if bytes_parsed_so_far < expected_size_in_bytes {
        parse_padding(data, expected_size_in_bytes - bytes_parsed_so_far)?
    }

    // At this point, we'll only parse nested things if we didn't have an eclosing
    // nested data list; if we did, then the nested things will be at the end of the
    // enclosing object instead.
    for nested_data in local_nested_data_list {
        // Nested data is required to appear in the same order as the (packed) fields
        // of the struct. So the expected offset is only useful for validation.
        let bytes_parsed_so_far = data.bytes_parsed() - initial_bytes_parsed;
        if nested_data.expected_offset != bytes_parsed_so_far {
            return Err(ParsingError::wrong_pointer(
                data.bytes_parsed(),
                nested_data.field_name,
                nested_data.expected_offset,
                bytes_parsed_so_far,
            ));
        }
        let mut parsed_data = match nested_data.ty {
            PackedStructuredType::Struct { packed_field_names, packed_field_types } => {
                parse_struct(data, packed_field_names, packed_field_types)?
            }
            PackedStructuredType::Array { .. } => {
                panic!("Arrays are not yet implemented");
            }
            PackedStructuredType::Union { variants } => parse_union(data, None, variants)?
                .1
                .expect("Parsing nested union should always return a value"),
        };
        if let Some(tag) = nested_data.union_discriminant {
            parsed_data = MojomValue::Union(tag, Box::new(parsed_data))
        };
        ret_names[nested_data.ordinal] = nested_data.field_name;
        ret_values[nested_data.ordinal] = parsed_data;
    }
    Ok((ret_names, ret_values))
}

/// Parse a single mojom value of the given type, outside the context of a
/// struct. This function is only useful for unit testing, since all mojom
/// values in practice are members of a struct. The function only works for
/// some mojom types, since e.g. booleans can't be parsed individually.
pub fn parse_single_value_for_testing(
    data: &[u8],
    wire_type: &MojomWireType,
) -> ParsingResult<MojomValue> {
    let mut data = ParserData::new(data);
    match wire_type {
        MojomWireType::Leaf { leaf_type, .. } => parse_leaf_element(&mut data, leaf_type),
        MojomWireType::Bitfield { .. } => unimplemented!("Bitfields cannot be parsed individually"),
        MojomWireType::Pointer { nested_data_type, .. } => match nested_data_type {
            PackedStructuredType::Struct { packed_field_names, packed_field_types } => {
                parse_struct(&mut data, packed_field_names, packed_field_types)
            }
            PackedStructuredType::Array { .. } => {
                panic!("Arrays are not yet implemented");
            }
            PackedStructuredType::Union { .. } => {
                panic!("Standalone unions are never behind pointers")
            }
        },
        MojomWireType::Union { variants, .. } => Ok(parse_union(&mut data, None, variants)?
            .1
            .expect("Parsing standalone union should always return a value")),
    }
}