from typing import List, Dict, Any, Optional, TypeVar, Generic, Union, TYPE_CHECKING
from dataiku.generated_sources._typing import TypedDict, Literal, Final, NotRequired
if TYPE_CHECKING:
    from .chat_template_settings import ChatTemplateSettings, _ChatTemplateSettings
    from .device_strategy import DeviceStrategy
    from .inference_engine import InferenceEngine
    from .quantization_mode import QuantizationMode
    from .reasoning_settings import ReasoningSettings, _ReasoningSettings
    from .tool_settings import ToolSettings, _ToolSettings

'''
Translated from class com.dataiku.dip.connections.HuggingFaceLocalConnection$InferenceSettings
Via: PyModel annotation in the class hierarchy
'''


class _InferenceSettings(TypedDict):
    chatTemplateSettings: NotRequired[Optional['ChatTemplateSettings']]
    configFormat: NotRequired[Optional[str]]
    defaultGuidanceScale: NotRequired[Optional[float]]
    defaultHeight: NotRequired[Optional[int]]
    defaultNumInferenceSteps: NotRequired[Optional[int]]
    defaultStrength: NotRequired[Optional[float]]
    defaultWidth: NotRequired[Optional[int]]
    deviceStrategy: NotRequired[Optional['DeviceStrategy']]
    dtype: NotRequired[Optional[str]]
    enableChunkedPrefill: NotRequired[Optional[bool]]
    enableExpertParallelism: NotRequired[Optional[bool]]
    enableJsonConstraintsInPrompt: NotRequired[Optional[bool]]
    enablePrefixCaching: NotRequired[Optional[bool]]
    enableVaeSlicing: NotRequired[Optional[bool]]
    enableVaeTiling: NotRequired[Optional[bool]]
    enforceEager: NotRequired[Optional[bool]]
    engine: 'InferenceEngine'
    gpuMemoryUtilization: NotRequired[Optional[float]]
    guidedDecodingBackend: NotRequired[Optional[str]]
    hfRefinerPath: NotRequired[Optional[str]]
    ignorePatterns: NotRequired[Optional[List[str]]]
    inlineToolsJsonSchemaRefs: NotRequired[Optional[bool]]
    kvCacheDType: NotRequired[Optional[str]]
    limitImagesPerPrompt: NotRequired[Optional[int]]
    loadFormat: NotRequired[Optional[str]]
    maxModelLen: NotRequired[Optional[int]]
    maxNumSeqs: NotRequired[Optional[int]]
    maxSequenceLength: NotRequired[Optional[int]]
    overriddenSettings: NotRequired[Optional[str]]
    pipelineParallelSize: NotRequired[Optional[int]]
    quantizationMode: 'QuantizationMode'
    reasoningSettings: NotRequired[Optional['ReasoningSettings']]
    refinerId: NotRequired[Optional[str]]
    tensorParallelSize: NotRequired[Optional[int]]
    tokenizerMode: NotRequired[Optional[str]]
    toolSettings: NotRequired[Optional['ToolSettings']]
    trustRemoteCode: NotRequired[Optional[bool]]
    vllmEngine: NotRequired[Optional[str]]


InferenceSettings = _InferenceSettings


# EOF
