feat(api-nodes): network client v2: async ops, cancellation, downloads, refactor (#10390)

* feat(api-nodes): implement new API client for V3 nodes

* feat(api-nodes): implement new API client for V3 nodes

* feat(api-nodes): implement new API client for V3 nodes

* converted WAN nodes to use new client; polishing

* fix(auth): do not leak authentification for the absolute urls

* convert BFL API nodes to use new API client; remove deprecated BFL nodes

* converted Google Veo nodes

* fix(Veo3.1 model): take into account "generate_audio" parameter
This commit is contained in:
Alexander Piskun
2025-10-24 08:37:16 +03:00
committed by GitHub
parent 24188b3141
commit 388b306a2b
29 changed files with 2935 additions and 2298 deletions

View File

@@ -5,8 +5,7 @@ For source of truth on the allowed permutations of request fields, please refere
"""
from __future__ import annotations
from typing import Optional, TypeVar, Any
from collections.abc import Callable
from typing import Optional, TypeVar
import math
import logging
@@ -15,7 +14,6 @@ from typing_extensions import override
import torch
from comfy_api_nodes.apis import (
KlingTaskStatus,
KlingCameraControl,
KlingCameraConfig,
KlingCameraControlType,
@@ -52,26 +50,20 @@ from comfy_api_nodes.apis import (
KlingCharacterEffectModelName,
KlingSingleImageEffectModelName,
)
from comfy_api_nodes.apis.client import (
ApiEndpoint,
HttpMethod,
SynchronousOperation,
PollingOperation,
EmptyRequest,
)
from comfy_api_nodes.apinode_utils import (
tensor_to_base64_string,
download_url_to_video_output,
upload_video_to_comfyapi,
upload_audio_to_comfyapi,
download_url_to_image_tensor,
validate_string,
)
from comfy_api_nodes.util.validation_utils import (
from comfy_api_nodes.util import (
validate_image_dimensions,
validate_image_aspect_ratio,
validate_video_dimensions,
validate_video_duration,
tensor_to_base64_string,
validate_string,
upload_audio_to_comfyapi,
download_url_to_image_tensor,
upload_video_to_comfyapi,
download_url_to_video_output,
sync_op,
ApiEndpoint,
poll_op,
)
from comfy_api.input_impl import VideoFromFile
from comfy_api.input.basic_types import AudioInput
@@ -214,34 +206,6 @@ VOICES_CONFIG = {
}
async def poll_until_finished(
auth_kwargs: dict[str, str],
api_endpoint: ApiEndpoint[Any, R],
result_url_extractor: Optional[Callable[[R], str]] = None,
estimated_duration: Optional[int] = None,
node_id: Optional[str] = None,
) -> R:
"""Polls the Kling API endpoint until the task reaches a terminal state, then returns the response."""
return await PollingOperation(
poll_endpoint=api_endpoint,
completed_statuses=[
KlingTaskStatus.succeed.value,
],
failed_statuses=[KlingTaskStatus.failed.value],
status_extractor=lambda response: (
response.data.task_status.value
if response.data and response.data.task_status
else None
),
auth_kwargs=auth_kwargs,
result_url_extractor=result_url_extractor,
estimated_duration=estimated_duration,
node_id=node_id,
poll_interval=16.0,
max_poll_attempts=256,
).execute()
def is_valid_camera_control_configs(configs: list[float]) -> bool:
"""Verifies that at least one camera control configuration is non-zero."""
return any(not math.isclose(value, 0.0) for value in configs)
@@ -377,8 +341,7 @@ async def image_result_to_node_output(
async def execute_text2video(
auth_kwargs: dict[str, str],
node_id: str,
cls: type[IO.ComfyNode],
prompt: str,
negative_prompt: str,
cfg_scale: float,
@@ -389,14 +352,11 @@ async def execute_text2video(
camera_control: Optional[KlingCameraControl] = None,
) -> IO.NodeOutput:
validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=PATH_TEXT_TO_VIDEO,
method=HttpMethod.POST,
request_model=KlingText2VideoRequest,
response_model=KlingText2VideoResponse,
),
request=KlingText2VideoRequest(
task_creation_response = await sync_op(
cls,
ApiEndpoint(path=PATH_TEXT_TO_VIDEO, method="POST"),
response_model=KlingText2VideoResponse,
data=KlingText2VideoRequest(
prompt=prompt if prompt else None,
negative_prompt=negative_prompt if negative_prompt else None,
duration=KlingVideoGenDuration(duration),
@@ -406,24 +366,17 @@ async def execute_text2video(
aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
camera_control=camera_control,
),
auth_kwargs=auth_kwargs,
)
task_creation_response = await initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = await poll_until_finished(
auth_kwargs,
ApiEndpoint(
path=f"{PATH_TEXT_TO_VIDEO}/{task_id}",
method=HttpMethod.GET,
request_model=EmptyRequest,
response_model=KlingText2VideoResponse,
),
result_url_extractor=get_video_url_from_response,
final_response = await poll_op(
cls,
ApiEndpoint(path=f"{PATH_TEXT_TO_VIDEO}/{task_id}"),
response_model=KlingText2VideoResponse,
estimated_duration=AVERAGE_DURATION_T2V,
node_id=node_id,
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
)
validate_video_result_response(final_response)
@@ -432,8 +385,7 @@ async def execute_text2video(
async def execute_image2video(
auth_kwargs: dict[str, str],
node_id: str,
cls: type[IO.ComfyNode],
start_frame: torch.Tensor,
prompt: str,
negative_prompt: str,
@@ -455,14 +407,11 @@ async def execute_image2video(
if model_mode == "std" and model_name == KlingVideoGenModelName.kling_v2_5_turbo.value:
model_mode = "pro" # October 5: currently "std" mode is not supported for this model
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=PATH_IMAGE_TO_VIDEO,
method=HttpMethod.POST,
request_model=KlingImage2VideoRequest,
response_model=KlingImage2VideoResponse,
),
request=KlingImage2VideoRequest(
task_creation_response = await sync_op(
cls,
ApiEndpoint(path=PATH_IMAGE_TO_VIDEO, method="POST"),
response_model=KlingImage2VideoResponse,
data=KlingImage2VideoRequest(
model_name=KlingVideoGenModelName(model_name),
image=tensor_to_base64_string(start_frame),
image_tail=(
@@ -477,24 +426,17 @@ async def execute_image2video(
duration=KlingVideoGenDuration(duration),
camera_control=camera_control,
),
auth_kwargs=auth_kwargs,
)
task_creation_response = await initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = await poll_until_finished(
auth_kwargs,
ApiEndpoint(
path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}",
method=HttpMethod.GET,
request_model=KlingImage2VideoRequest,
response_model=KlingImage2VideoResponse,
),
result_url_extractor=get_video_url_from_response,
final_response = await poll_op(
cls,
ApiEndpoint(path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}"),
response_model=KlingImage2VideoResponse,
estimated_duration=AVERAGE_DURATION_I2V,
node_id=node_id,
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
)
validate_video_result_response(final_response)
@@ -503,8 +445,7 @@ async def execute_image2video(
async def execute_video_effect(
auth_kwargs: dict[str, str],
node_id: str,
cls: type[IO.ComfyNode],
dual_character: bool,
effect_scene: KlingDualCharacterEffectsScene | KlingSingleImageEffectsScene,
model_name: str,
@@ -530,35 +471,25 @@ async def execute_video_effect(
duration=duration,
)
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=PATH_VIDEO_EFFECTS,
method=HttpMethod.POST,
request_model=KlingVideoEffectsRequest,
response_model=KlingVideoEffectsResponse,
),
request=KlingVideoEffectsRequest(
task_creation_response = await sync_op(
cls,
endpoint=ApiEndpoint(path=PATH_VIDEO_EFFECTS, method="POST"),
response_model=KlingVideoEffectsResponse,
data=KlingVideoEffectsRequest(
effect_scene=effect_scene,
input=request_input_field,
),
auth_kwargs=auth_kwargs,
)
task_creation_response = await initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = await poll_until_finished(
auth_kwargs,
ApiEndpoint(
path=f"{PATH_VIDEO_EFFECTS}/{task_id}",
method=HttpMethod.GET,
request_model=EmptyRequest,
response_model=KlingVideoEffectsResponse,
),
result_url_extractor=get_video_url_from_response,
final_response = await poll_op(
cls,
ApiEndpoint(path=f"{PATH_VIDEO_EFFECTS}/{task_id}"),
response_model=KlingVideoEffectsResponse,
estimated_duration=AVERAGE_DURATION_VIDEO_EFFECTS,
node_id=node_id,
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
)
validate_video_result_response(final_response)
@@ -567,8 +498,7 @@ async def execute_video_effect(
async def execute_lipsync(
auth_kwargs: dict[str, str],
node_id: str,
cls: type[IO.ComfyNode],
video: VideoInput,
audio: Optional[AudioInput] = None,
voice_language: Optional[str] = None,
@@ -583,24 +513,21 @@ async def execute_lipsync(
validate_video_duration(video, 2, 10)
# Upload video to Comfy API and get download URL
video_url = await upload_video_to_comfyapi(video, auth_kwargs=auth_kwargs)
video_url = await upload_video_to_comfyapi(cls, video)
logging.info("Uploaded video to Comfy API. URL: %s", video_url)
# Upload the audio file to Comfy API and get download URL
if audio:
audio_url = await upload_audio_to_comfyapi(audio, auth_kwargs=auth_kwargs)
audio_url = await upload_audio_to_comfyapi(cls, audio)
logging.info("Uploaded audio to Comfy API. URL: %s", audio_url)
else:
audio_url = None
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=PATH_LIP_SYNC,
method=HttpMethod.POST,
request_model=KlingLipSyncRequest,
response_model=KlingLipSyncResponse,
),
request=KlingLipSyncRequest(
task_creation_response = await sync_op(
cls,
ApiEndpoint(PATH_LIP_SYNC, "POST"),
response_model=KlingLipSyncResponse,
data=KlingLipSyncRequest(
input=KlingLipSyncInputObject(
video_url=video_url,
mode=model_mode,
@@ -612,24 +539,17 @@ async def execute_lipsync(
voice_id=voice_id,
),
),
auth_kwargs=auth_kwargs,
)
task_creation_response = await initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = await poll_until_finished(
auth_kwargs,
ApiEndpoint(
path=f"{PATH_LIP_SYNC}/{task_id}",
method=HttpMethod.GET,
request_model=EmptyRequest,
response_model=KlingLipSyncResponse,
),
result_url_extractor=get_video_url_from_response,
final_response = await poll_op(
cls,
ApiEndpoint(path=f"{PATH_LIP_SYNC}/{task_id}"),
response_model=KlingLipSyncResponse,
estimated_duration=AVERAGE_DURATION_LIP_SYNC,
node_id=node_id,
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
)
validate_video_result_response(final_response)
@@ -807,11 +727,7 @@ class KlingTextToVideoNode(IO.ComfyNode):
) -> IO.NodeOutput:
model_mode, duration, model_name = MODE_TEXT2VIDEO[mode]
return await execute_text2video(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
prompt=prompt,
negative_prompt=negative_prompt,
cfg_scale=cfg_scale,
@@ -872,11 +788,7 @@ class KlingCameraControlT2VNode(IO.ComfyNode):
camera_control: Optional[KlingCameraControl] = None,
) -> IO.NodeOutput:
return await execute_text2video(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
model_name=KlingVideoGenModelName.kling_v1,
cfg_scale=cfg_scale,
model_mode=KlingVideoGenMode.std,
@@ -944,11 +856,7 @@ class KlingImage2VideoNode(IO.ComfyNode):
end_frame: Optional[torch.Tensor] = None,
) -> IO.NodeOutput:
return await execute_image2video(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
start_frame=start_frame,
prompt=prompt,
negative_prompt=negative_prompt,
@@ -1017,11 +925,7 @@ class KlingCameraControlI2VNode(IO.ComfyNode):
camera_control: KlingCameraControl,
) -> IO.NodeOutput:
return await execute_image2video(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
model_name=KlingVideoGenModelName.kling_v1_5,
start_frame=start_frame,
cfg_scale=cfg_scale,
@@ -1097,11 +1001,7 @@ class KlingStartEndFrameNode(IO.ComfyNode):
) -> IO.NodeOutput:
mode, duration, model_name = MODE_START_END_FRAME[mode]
return await execute_image2video(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
prompt=prompt,
negative_prompt=negative_prompt,
model_name=model_name,
@@ -1162,41 +1062,27 @@ class KlingVideoExtendNode(IO.ComfyNode):
video_id: str,
) -> IO.NodeOutput:
validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
}
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=PATH_VIDEO_EXTEND,
method=HttpMethod.POST,
request_model=KlingVideoExtendRequest,
response_model=KlingVideoExtendResponse,
),
request=KlingVideoExtendRequest(
task_creation_response = await sync_op(
cls,
ApiEndpoint(path=PATH_VIDEO_EXTEND, method="POST"),
response_model=KlingVideoExtendResponse,
data=KlingVideoExtendRequest(
prompt=prompt if prompt else None,
negative_prompt=negative_prompt if negative_prompt else None,
cfg_scale=cfg_scale,
video_id=video_id,
),
auth_kwargs=auth,
)
task_creation_response = await initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = await poll_until_finished(
auth,
ApiEndpoint(
path=f"{PATH_VIDEO_EXTEND}/{task_id}",
method=HttpMethod.GET,
request_model=EmptyRequest,
response_model=KlingVideoExtendResponse,
),
result_url_extractor=get_video_url_from_response,
final_response = await poll_op(
cls,
ApiEndpoint(path=f"{PATH_VIDEO_EXTEND}/{task_id}"),
response_model=KlingVideoExtendResponse,
estimated_duration=AVERAGE_DURATION_VIDEO_EXTEND,
node_id=cls.hidden.unique_id,
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
)
validate_video_result_response(final_response)
@@ -1259,11 +1145,7 @@ class KlingDualCharacterVideoEffectNode(IO.ComfyNode):
duration: KlingVideoGenDuration,
) -> IO.NodeOutput:
video, _, duration = await execute_video_effect(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
dual_character=True,
effect_scene=effect_scene,
model_name=model_name,
@@ -1324,11 +1206,7 @@ class KlingSingleImageVideoEffectNode(IO.ComfyNode):
return IO.NodeOutput(
*(
await execute_video_effect(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
dual_character=False,
effect_scene=effect_scene,
model_name=model_name,
@@ -1379,11 +1257,7 @@ class KlingLipSyncAudioToVideoNode(IO.ComfyNode):
voice_language: str,
) -> IO.NodeOutput:
return await execute_lipsync(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
video=video,
audio=audio,
voice_language=voice_language,
@@ -1445,11 +1319,7 @@ class KlingLipSyncTextToVideoNode(IO.ComfyNode):
) -> IO.NodeOutput:
voice_id, voice_language = VOICES_CONFIG[voice]
return await execute_lipsync(
auth_kwargs={
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
},
node_id=cls.hidden.unique_id,
cls,
video=video,
text=text,
voice_language=voice_language,
@@ -1496,40 +1366,26 @@ class KlingVirtualTryOnNode(IO.ComfyNode):
cloth_image: torch.Tensor,
model_name: KlingVirtualTryOnModelName,
) -> IO.NodeOutput:
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
}
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=PATH_VIRTUAL_TRY_ON,
method=HttpMethod.POST,
request_model=KlingVirtualTryOnRequest,
response_model=KlingVirtualTryOnResponse,
),
request=KlingVirtualTryOnRequest(
task_creation_response = await sync_op(
cls,
ApiEndpoint(path=PATH_VIRTUAL_TRY_ON, method="POST"),
response_model=KlingVirtualTryOnResponse,
data=KlingVirtualTryOnRequest(
human_image=tensor_to_base64_string(human_image),
cloth_image=tensor_to_base64_string(cloth_image),
model_name=model_name,
),
auth_kwargs=auth,
)
task_creation_response = await initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = await poll_until_finished(
auth,
ApiEndpoint(
path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}",
method=HttpMethod.GET,
request_model=EmptyRequest,
response_model=KlingVirtualTryOnResponse,
),
result_url_extractor=get_images_urls_from_response,
final_response = await poll_op(
cls,
ApiEndpoint(path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}"),
response_model=KlingVirtualTryOnResponse,
estimated_duration=AVERAGE_DURATION_VIRTUAL_TRY_ON,
node_id=cls.hidden.unique_id,
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
)
validate_image_result_response(final_response)
@@ -1625,18 +1481,11 @@ class KlingImageGenerationNode(IO.ComfyNode):
else:
image = tensor_to_base64_string(image)
auth = {
"auth_token": cls.hidden.auth_token_comfy_org,
"comfy_api_key": cls.hidden.api_key_comfy_org,
}
initial_operation = SynchronousOperation(
endpoint=ApiEndpoint(
path=PATH_IMAGE_GENERATIONS,
method=HttpMethod.POST,
request_model=KlingImageGenerationsRequest,
response_model=KlingImageGenerationsResponse,
),
request=KlingImageGenerationsRequest(
task_creation_response = await sync_op(
cls,
ApiEndpoint(path=PATH_IMAGE_GENERATIONS, method="POST"),
response_model=KlingImageGenerationsResponse,
data=KlingImageGenerationsRequest(
model_name=model_name,
prompt=prompt,
negative_prompt=negative_prompt,
@@ -1647,24 +1496,17 @@ class KlingImageGenerationNode(IO.ComfyNode):
n=n,
aspect_ratio=aspect_ratio,
),
auth_kwargs=auth,
)
task_creation_response = await initial_operation.execute()
validate_task_creation_response(task_creation_response)
task_id = task_creation_response.data.task_id
final_response = await poll_until_finished(
auth,
ApiEndpoint(
path=f"{PATH_IMAGE_GENERATIONS}/{task_id}",
method=HttpMethod.GET,
request_model=EmptyRequest,
response_model=KlingImageGenerationsResponse,
),
result_url_extractor=get_images_urls_from_response,
final_response = await poll_op(
cls,
ApiEndpoint(path=f"{PATH_IMAGE_GENERATIONS}/{task_id}"),
response_model=KlingImageGenerationsResponse,
estimated_duration=AVERAGE_DURATION_IMAGE_GEN,
node_id=cls.hidden.unique_id,
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
)
validate_image_result_response(final_response)