feat(api-nodes): network client v2: async ops, cancellation, downloads, refactor (#10390)

* feat(api-nodes): implement new API client for V3 nodes * feat(api-nodes): implement new API client for V3 nodes * feat(api-nodes): implement new API client for V3 nodes * converted WAN nodes to use new client; polishing * fix(auth): do not leak authentification for the absolute urls * convert BFL API nodes to use new API client; remove deprecated BFL nodes * converted Google Veo nodes * fix(Veo3.1 model): take into account "generate_audio" parameter
2025-10-24 08:37:16 +03:00
parent 24188b3141
commit 388b306a2b
29 changed files with 2935 additions and 2298 deletions
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -5,8 +5,7 @@ For source of truth on the allowed permutations of request fields, please refere
 """

 from __future__ import annotations
-from typing import Optional, TypeVar, Any
-from collections.abc import Callable
+from typing import Optional, TypeVar
 import math
 import logging

@@ -15,7 +14,6 @@ from typing_extensions import override
 import torch

 from comfy_api_nodes.apis import (
-    KlingTaskStatus,
    KlingCameraControl,
    KlingCameraConfig,
    KlingCameraControlType,
@@ -52,26 +50,20 @@ from comfy_api_nodes.apis import (
    KlingCharacterEffectModelName,
    KlingSingleImageEffectModelName,
 )
-from comfy_api_nodes.apis.client import (
-    ApiEndpoint,
-    HttpMethod,
-    SynchronousOperation,
-    PollingOperation,
-    EmptyRequest,
-)
-from comfy_api_nodes.apinode_utils import (
-    tensor_to_base64_string,
-    download_url_to_video_output,
-    upload_video_to_comfyapi,
-    upload_audio_to_comfyapi,
-    download_url_to_image_tensor,
-    validate_string,
-)
-from comfy_api_nodes.util.validation_utils import (
+from comfy_api_nodes.util import (
    validate_image_dimensions,
    validate_image_aspect_ratio,
    validate_video_dimensions,
    validate_video_duration,
+    tensor_to_base64_string,
+    validate_string,
+    upload_audio_to_comfyapi,
+    download_url_to_image_tensor,
+    upload_video_to_comfyapi,
+    download_url_to_video_output,
+    sync_op,
+    ApiEndpoint,
+    poll_op,
 )
 from comfy_api.input_impl import VideoFromFile
 from comfy_api.input.basic_types import AudioInput
@@ -214,34 +206,6 @@ VOICES_CONFIG = {
 }


-async def poll_until_finished(
-    auth_kwargs: dict[str, str],
-    api_endpoint: ApiEndpoint[Any, R],
-    result_url_extractor: Optional[Callable[[R], str]] = None,
-    estimated_duration: Optional[int] = None,
-    node_id: Optional[str] = None,
-) -> R:
-    """Polls the Kling API endpoint until the task reaches a terminal state, then returns the response."""
-    return await PollingOperation(
-        poll_endpoint=api_endpoint,
-        completed_statuses=[
-            KlingTaskStatus.succeed.value,
-        ],
-        failed_statuses=[KlingTaskStatus.failed.value],
-        status_extractor=lambda response: (
-            response.data.task_status.value
-            if response.data and response.data.task_status
-            else None
-        ),
-        auth_kwargs=auth_kwargs,
-        result_url_extractor=result_url_extractor,
-        estimated_duration=estimated_duration,
-        node_id=node_id,
-        poll_interval=16.0,
-        max_poll_attempts=256,
-    ).execute()
-
-
 def is_valid_camera_control_configs(configs: list[float]) -> bool:
    """Verifies that at least one camera control configuration is non-zero."""
    return any(not math.isclose(value, 0.0) for value in configs)
@@ -377,8 +341,7 @@ async def image_result_to_node_output(


 async def execute_text2video(
-    auth_kwargs: dict[str, str],
-    node_id: str,
+    cls: type[IO.ComfyNode],
    prompt: str,
    negative_prompt: str,
    cfg_scale: float,
@@ -389,14 +352,11 @@ async def execute_text2video(
    camera_control: Optional[KlingCameraControl] = None,
 ) -> IO.NodeOutput:
    validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
-    initial_operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path=PATH_TEXT_TO_VIDEO,
-            method=HttpMethod.POST,
-            request_model=KlingText2VideoRequest,
-            response_model=KlingText2VideoResponse,
-        ),
-        request=KlingText2VideoRequest(
+    task_creation_response = await sync_op(
+        cls,
+        ApiEndpoint(path=PATH_TEXT_TO_VIDEO, method="POST"),
+        response_model=KlingText2VideoResponse,
+        data=KlingText2VideoRequest(
            prompt=prompt if prompt else None,
            negative_prompt=negative_prompt if negative_prompt else None,
            duration=KlingVideoGenDuration(duration),
@@ -406,24 +366,17 @@ async def execute_text2video(
            aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
            camera_control=camera_control,
        ),
-        auth_kwargs=auth_kwargs,
    )

-    task_creation_response = await initial_operation.execute()
    validate_task_creation_response(task_creation_response)

    task_id = task_creation_response.data.task_id
-    final_response = await poll_until_finished(
-        auth_kwargs,
-        ApiEndpoint(
-            path=f"{PATH_TEXT_TO_VIDEO}/{task_id}",
-            method=HttpMethod.GET,
-            request_model=EmptyRequest,
-            response_model=KlingText2VideoResponse,
-        ),
-        result_url_extractor=get_video_url_from_response,
+    final_response = await poll_op(
+        cls,
+        ApiEndpoint(path=f"{PATH_TEXT_TO_VIDEO}/{task_id}"),
+        response_model=KlingText2VideoResponse,
        estimated_duration=AVERAGE_DURATION_T2V,
-        node_id=node_id,
+        status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
    )
    validate_video_result_response(final_response)

@@ -432,8 +385,7 @@ async def execute_text2video(


 async def execute_image2video(
-    auth_kwargs: dict[str, str],
-    node_id: str,
+    cls: type[IO.ComfyNode],
    start_frame: torch.Tensor,
    prompt: str,
    negative_prompt: str,
@@ -455,14 +407,11 @@ async def execute_image2video(
    if model_mode == "std" and model_name == KlingVideoGenModelName.kling_v2_5_turbo.value:
        model_mode = "pro"  # October 5: currently "std" mode is not supported for this model

-    initial_operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path=PATH_IMAGE_TO_VIDEO,
-            method=HttpMethod.POST,
-            request_model=KlingImage2VideoRequest,
-            response_model=KlingImage2VideoResponse,
-        ),
-        request=KlingImage2VideoRequest(
+    task_creation_response = await sync_op(
+        cls,
+        ApiEndpoint(path=PATH_IMAGE_TO_VIDEO, method="POST"),
+        response_model=KlingImage2VideoResponse,
+        data=KlingImage2VideoRequest(
            model_name=KlingVideoGenModelName(model_name),
            image=tensor_to_base64_string(start_frame),
            image_tail=(
@@ -477,24 +426,17 @@ async def execute_image2video(
            duration=KlingVideoGenDuration(duration),
            camera_control=camera_control,
        ),
-        auth_kwargs=auth_kwargs,
    )

-    task_creation_response = await initial_operation.execute()
    validate_task_creation_response(task_creation_response)
    task_id = task_creation_response.data.task_id

-    final_response = await poll_until_finished(
-            auth_kwargs,
-            ApiEndpoint(
-                path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=KlingImage2VideoRequest,
-                response_model=KlingImage2VideoResponse,
-            ),
-            result_url_extractor=get_video_url_from_response,
+    final_response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}"),
+            response_model=KlingImage2VideoResponse,
            estimated_duration=AVERAGE_DURATION_I2V,
-            node_id=node_id,
+            status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
        )
    validate_video_result_response(final_response)

@@ -503,8 +445,7 @@ async def execute_image2video(


 async def execute_video_effect(
-    auth_kwargs: dict[str, str],
-    node_id: str,
+    cls: type[IO.ComfyNode],
    dual_character: bool,
    effect_scene: KlingDualCharacterEffectsScene | KlingSingleImageEffectsScene,
    model_name: str,
@@ -530,35 +471,25 @@ async def execute_video_effect(
            duration=duration,
        )

-    initial_operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path=PATH_VIDEO_EFFECTS,
-            method=HttpMethod.POST,
-            request_model=KlingVideoEffectsRequest,
-            response_model=KlingVideoEffectsResponse,
-        ),
-        request=KlingVideoEffectsRequest(
+    task_creation_response = await sync_op(
+        cls,
+        endpoint=ApiEndpoint(path=PATH_VIDEO_EFFECTS, method="POST"),
+        response_model=KlingVideoEffectsResponse,
+        data=KlingVideoEffectsRequest(
            effect_scene=effect_scene,
            input=request_input_field,
        ),
-        auth_kwargs=auth_kwargs,
    )

-    task_creation_response = await initial_operation.execute()
    validate_task_creation_response(task_creation_response)
    task_id = task_creation_response.data.task_id

-    final_response = await poll_until_finished(
-        auth_kwargs,
-        ApiEndpoint(
-            path=f"{PATH_VIDEO_EFFECTS}/{task_id}",
-            method=HttpMethod.GET,
-            request_model=EmptyRequest,
-            response_model=KlingVideoEffectsResponse,
-        ),
-        result_url_extractor=get_video_url_from_response,
+    final_response = await poll_op(
+        cls,
+        ApiEndpoint(path=f"{PATH_VIDEO_EFFECTS}/{task_id}"),
+        response_model=KlingVideoEffectsResponse,
        estimated_duration=AVERAGE_DURATION_VIDEO_EFFECTS,
-        node_id=node_id,
+        status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
    )
    validate_video_result_response(final_response)

@@ -567,8 +498,7 @@ async def execute_video_effect(


 async def execute_lipsync(
-    auth_kwargs: dict[str, str],
-    node_id: str,
+    cls: type[IO.ComfyNode],
    video: VideoInput,
    audio: Optional[AudioInput] = None,
    voice_language: Optional[str] = None,
@@ -583,24 +513,21 @@ async def execute_lipsync(
    validate_video_duration(video, 2, 10)

    # Upload video to Comfy API and get download URL
-    video_url = await upload_video_to_comfyapi(video, auth_kwargs=auth_kwargs)
+    video_url = await upload_video_to_comfyapi(cls, video)
    logging.info("Uploaded video to Comfy API. URL: %s", video_url)

    # Upload the audio file to Comfy API and get download URL
    if audio:
-        audio_url = await upload_audio_to_comfyapi(audio, auth_kwargs=auth_kwargs)
+        audio_url = await upload_audio_to_comfyapi(cls, audio)
        logging.info("Uploaded audio to Comfy API. URL: %s", audio_url)
    else:
        audio_url = None

-    initial_operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path=PATH_LIP_SYNC,
-            method=HttpMethod.POST,
-            request_model=KlingLipSyncRequest,
-            response_model=KlingLipSyncResponse,
-        ),
-        request=KlingLipSyncRequest(
+    task_creation_response = await sync_op(
+        cls,
+        ApiEndpoint(PATH_LIP_SYNC, "POST"),
+        response_model=KlingLipSyncResponse,
+        data=KlingLipSyncRequest(
            input=KlingLipSyncInputObject(
                video_url=video_url,
                mode=model_mode,
@@ -612,24 +539,17 @@ async def execute_lipsync(
                voice_id=voice_id,
            ),
        ),
-        auth_kwargs=auth_kwargs,
    )

-    task_creation_response = await initial_operation.execute()
    validate_task_creation_response(task_creation_response)
    task_id = task_creation_response.data.task_id

-    final_response = await poll_until_finished(
-        auth_kwargs,
-        ApiEndpoint(
-            path=f"{PATH_LIP_SYNC}/{task_id}",
-            method=HttpMethod.GET,
-            request_model=EmptyRequest,
-            response_model=KlingLipSyncResponse,
-        ),
-        result_url_extractor=get_video_url_from_response,
+    final_response = await poll_op(
+        cls,
+        ApiEndpoint(path=f"{PATH_LIP_SYNC}/{task_id}"),
+        response_model=KlingLipSyncResponse,
        estimated_duration=AVERAGE_DURATION_LIP_SYNC,
-        node_id=node_id,
+        status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
    )
    validate_video_result_response(final_response)

@@ -807,11 +727,7 @@ class KlingTextToVideoNode(IO.ComfyNode):
    ) -> IO.NodeOutput:
        model_mode, duration, model_name = MODE_TEXT2VIDEO[mode]
        return await execute_text2video(
-            auth_kwargs={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
            prompt=prompt,
            negative_prompt=negative_prompt,
            cfg_scale=cfg_scale,
@@ -872,11 +788,7 @@ class KlingCameraControlT2VNode(IO.ComfyNode):
        camera_control: Optional[KlingCameraControl] = None,
    ) -> IO.NodeOutput:
        return await execute_text2video(
-            auth_kwargs={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
            model_name=KlingVideoGenModelName.kling_v1,
            cfg_scale=cfg_scale,
            model_mode=KlingVideoGenMode.std,
@@ -944,11 +856,7 @@ class KlingImage2VideoNode(IO.ComfyNode):
        end_frame: Optional[torch.Tensor] = None,
    ) -> IO.NodeOutput:
        return await execute_image2video(
-            auth_kwargs={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
            start_frame=start_frame,
            prompt=prompt,
            negative_prompt=negative_prompt,
@@ -1017,11 +925,7 @@ class KlingCameraControlI2VNode(IO.ComfyNode):
        camera_control: KlingCameraControl,
    ) -> IO.NodeOutput:
        return await execute_image2video(
-            auth_kwargs={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
            model_name=KlingVideoGenModelName.kling_v1_5,
            start_frame=start_frame,
            cfg_scale=cfg_scale,
@@ -1097,11 +1001,7 @@ class KlingStartEndFrameNode(IO.ComfyNode):
    ) -> IO.NodeOutput:
        mode, duration, model_name = MODE_START_END_FRAME[mode]
        return await execute_image2video(
-            auth_kwargs={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
            prompt=prompt,
            negative_prompt=negative_prompt,
            model_name=model_name,
@@ -1162,41 +1062,27 @@ class KlingVideoExtendNode(IO.ComfyNode):
        video_id: str,
    ) -> IO.NodeOutput:
        validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=PATH_VIDEO_EXTEND,
-                method=HttpMethod.POST,
-                request_model=KlingVideoExtendRequest,
-                response_model=KlingVideoExtendResponse,
-            ),
-            request=KlingVideoExtendRequest(
+        task_creation_response = await sync_op(
+            cls,
+            ApiEndpoint(path=PATH_VIDEO_EXTEND, method="POST"),
+            response_model=KlingVideoExtendResponse,
+            data=KlingVideoExtendRequest(
                prompt=prompt if prompt else None,
                negative_prompt=negative_prompt if negative_prompt else None,
                cfg_scale=cfg_scale,
                video_id=video_id,
            ),
-            auth_kwargs=auth,
        )

-        task_creation_response = await initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

-        final_response = await poll_until_finished(
-            auth,
-            ApiEndpoint(
-                path=f"{PATH_VIDEO_EXTEND}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=KlingVideoExtendResponse,
-            ),
-            result_url_extractor=get_video_url_from_response,
+        final_response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"{PATH_VIDEO_EXTEND}/{task_id}"),
+            response_model=KlingVideoExtendResponse,
            estimated_duration=AVERAGE_DURATION_VIDEO_EXTEND,
-            node_id=cls.hidden.unique_id,
+            status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
        )
        validate_video_result_response(final_response)

@@ -1259,11 +1145,7 @@ class KlingDualCharacterVideoEffectNode(IO.ComfyNode):
        duration: KlingVideoGenDuration,
    ) -> IO.NodeOutput:
        video, _, duration = await execute_video_effect(
-            auth_kwargs={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
            dual_character=True,
            effect_scene=effect_scene,
            model_name=model_name,
@@ -1324,11 +1206,7 @@ class KlingSingleImageVideoEffectNode(IO.ComfyNode):
        return IO.NodeOutput(
            *(
                await execute_video_effect(
-                    auth_kwargs={
-                        "auth_token": cls.hidden.auth_token_comfy_org,
-                        "comfy_api_key": cls.hidden.api_key_comfy_org,
-                    },
-                    node_id=cls.hidden.unique_id,
+                    cls,
                    dual_character=False,
                    effect_scene=effect_scene,
                    model_name=model_name,
@@ -1379,11 +1257,7 @@ class KlingLipSyncAudioToVideoNode(IO.ComfyNode):
        voice_language: str,
    ) -> IO.NodeOutput:
        return await execute_lipsync(
-            auth_kwargs={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
            video=video,
            audio=audio,
            voice_language=voice_language,
@@ -1445,11 +1319,7 @@ class KlingLipSyncTextToVideoNode(IO.ComfyNode):
    ) -> IO.NodeOutput:
        voice_id, voice_language = VOICES_CONFIG[voice]
        return await execute_lipsync(
-            auth_kwargs={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
            video=video,
            text=text,
            voice_language=voice_language,
@@ -1496,40 +1366,26 @@ class KlingVirtualTryOnNode(IO.ComfyNode):
        cloth_image: torch.Tensor,
        model_name: KlingVirtualTryOnModelName,
    ) -> IO.NodeOutput:
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=PATH_VIRTUAL_TRY_ON,
-                method=HttpMethod.POST,
-                request_model=KlingVirtualTryOnRequest,
-                response_model=KlingVirtualTryOnResponse,
-            ),
-            request=KlingVirtualTryOnRequest(
+        task_creation_response = await sync_op(
+            cls,
+            ApiEndpoint(path=PATH_VIRTUAL_TRY_ON, method="POST"),
+            response_model=KlingVirtualTryOnResponse,
+            data=KlingVirtualTryOnRequest(
                human_image=tensor_to_base64_string(human_image),
                cloth_image=tensor_to_base64_string(cloth_image),
                model_name=model_name,
            ),
-            auth_kwargs=auth,
        )

-        task_creation_response = await initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

-        final_response = await poll_until_finished(
-            auth,
-            ApiEndpoint(
-                path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=KlingVirtualTryOnResponse,
-            ),
-            result_url_extractor=get_images_urls_from_response,
+        final_response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}"),
+            response_model=KlingVirtualTryOnResponse,
            estimated_duration=AVERAGE_DURATION_VIRTUAL_TRY_ON,
-            node_id=cls.hidden.unique_id,
+            status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
        )
        validate_image_result_response(final_response)

@@ -1625,18 +1481,11 @@ class KlingImageGenerationNode(IO.ComfyNode):
        else:
            image = tensor_to_base64_string(image)

-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=PATH_IMAGE_GENERATIONS,
-                method=HttpMethod.POST,
-                request_model=KlingImageGenerationsRequest,
-                response_model=KlingImageGenerationsResponse,
-            ),
-            request=KlingImageGenerationsRequest(
+        task_creation_response = await sync_op(
+            cls,
+            ApiEndpoint(path=PATH_IMAGE_GENERATIONS, method="POST"),
+            response_model=KlingImageGenerationsResponse,
+            data=KlingImageGenerationsRequest(
                model_name=model_name,
                prompt=prompt,
                negative_prompt=negative_prompt,
@@ -1647,24 +1496,17 @@ class KlingImageGenerationNode(IO.ComfyNode):
                n=n,
                aspect_ratio=aspect_ratio,
            ),
-            auth_kwargs=auth,
        )

-        task_creation_response = await initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

-        final_response = await poll_until_finished(
-            auth,
-            ApiEndpoint(
-                path=f"{PATH_IMAGE_GENERATIONS}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=KlingImageGenerationsResponse,
-            ),
-            result_url_extractor=get_images_urls_from_response,
+        final_response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"{PATH_IMAGE_GENERATIONS}/{task_id}"),
+            response_model=KlingImageGenerationsResponse,
            estimated_duration=AVERAGE_DURATION_IMAGE_GEN,
-            node_id=cls.hidden.unique_id,
+            status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
        )
        validate_image_result_response(final_response)