# coding=utf-8

# Copyright 2025 The Qwen Team and The HuggingFace Inc. team. All rights reserved.



from dataclasses import dataclass

from typing import Optional

import torch



from transformers.modeling_outputs import ModelOutput

from transformers.utils import auto_docstring

from transformers.cache_utils import Cache





@dataclass

@auto_docstring(

    custom_intro="""

    Base class for Qwen3VL causal language model (or autoregressive) outputs.

    """

)

class Qwen3VLCausalLMOutputWithPast(ModelOutput):

    r"""

    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):

        Language modeling loss (for next-token prediction).

    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):

        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).

    past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):

        It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).



        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see

        `past_key_values` input) to speed up sequential decoding.

    rope_deltas (`torch.LongTensor` of shape `(batch_size, )`, *optional*):

        The rope index difference between sequence length and multimodal rope.

    """



    loss: Optional[torch.FloatTensor] = None

    logits: Optional[torch.FloatTensor] = None

    past_key_values: Optional[Cache] = None

    hidden_states: Optional[tuple[torch.FloatTensor]] = None

    attentions: Optional[tuple[torch.FloatTensor]] = None

    rope_deltas: Optional[torch.LongTensor] = None





@dataclass

@auto_docstring(

    custom_intro="""

    Base class for Llava outputs, with hidden states and attentions.

    """

)

class Qwen3VLModelOutputWithPast(ModelOutput):

    r"""

    past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):

        It is a [`~cache_utils.Cache`] instance.



        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see

        `past_key_values` input) to speed up sequential decoding.

    rope_deltas (`torch.LongTensor` of shape `(batch_size, )`, *optional*):

        The rope index difference between sequence length and multimodal rope.

    """



    last_hidden_state: Optional[torch.FloatTensor] = None

    past_key_values: Optional[Cache] = None

    hidden_states: Optional[tuple[torch.FloatTensor]] = None

    attentions: Optional[tuple[torch.FloatTensor]] = None

    rope_deltas: Optional[torch.LongTensor] = None





@dataclass

@auto_docstring(

    custom_intro="""

    Base class for Qwen3VLMoe causal language model (or autoregressive) outputs.

    """

)

class Qwen3VLMoeCausalLMOutputWithPast(ModelOutput):

    r"""

    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):

        Language modeling loss (for next-token prediction).

    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):

        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).

    past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):

        It is a [`~cache_utils.Cache`] instance.



        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see

        `past_key_values` input) to speed up sequential decoding.

    rope_deltas (`torch.LongTensor` of shape `(batch_size, )`, *optional*):

        The rope index difference between sequence length and multimodal rope.

    """



    loss: Optional[torch.FloatTensor] = None

    logits: Optional[torch.FloatTensor] = None

    past_key_values: Optional[Cache] = None

    hidden_states: Optional[tuple[torch.FloatTensor]] = None

    attentions: Optional[tuple[torch.FloatTensor]] = None

    rope_deltas: Optional[torch.LongTensor] = None

    aux_loss: Optional[torch.FloatTensor] = None