Improve typing of OutputIterator

aron · aron · commit a00b5d2a0cc2 · 2025-06-04T11:33:11.000+01:00
diff --git a/replicate/use.py b/replicate/use.py
@@ -13,15 +13,16 @@
     Any,
     AsyncIterator,
     Callable,
+    Generator,
     Generic,
     Iterator,
+    List,
     Literal,
     Optional,
     ParamSpec,
     Protocol,
     Tuple,
     TypeVar,
-    Union,
     cast,
     overload,
 )
@@ -210,38 +211,38 @@ def _process_output_with_schema(output: Any, openapi_schema: dict) -> Any:
     return output
 
 
-class OutputIterator:
+class OutputIterator[T]:
     """
     An iterator wrapper that handles both regular iteration and string conversion.
     Supports both sync and async iteration patterns.
     """
 
     def __init__(
-        self, 
-        iterator_factory: Callable[[], Iterator[Any]],
-        async_iterator_factory: Callable[[], AsyncIterator[Any]],
+        self,
+        iterator_factory: Callable[[], Iterator[T]],
+        async_iterator_factory: Callable[[], AsyncIterator[T]],
         schema: dict,
-        *, 
-        is_concatenate: bool
+        *,
+        is_concatenate: bool,
     ) -> None:
         self.iterator_factory = iterator_factory
         self.async_iterator_factory = async_iterator_factory
         self.schema = schema
         self.is_concatenate = is_concatenate
 
-    def __iter__(self) -> Iterator[Any]:
+    def __iter__(self) -> Iterator[T]:
         """Iterate over output items synchronously."""
         for chunk in self.iterator_factory():
             if self.is_concatenate:
-                yield str(chunk)
+                yield chunk
             else:
                 yield _process_iterator_item(chunk, self.schema)
 
-    async def __aiter__(self) -> AsyncIterator[Any]:
+    async def __aiter__(self) -> AsyncIterator[T]:
         """Iterate over output items asynchronously."""
         async for chunk in self.async_iterator_factory():
             if self.is_concatenate:
-                yield str(chunk)
+                yield chunk
             else:
                 yield _process_iterator_item(chunk, self.schema)
 
@@ -252,9 +253,10 @@ def __str__(self) -> str:
         else:
             return str(list(self.iterator_factory()))
 
-    def __await__(self):
+    def __await__(self) -> Generator[Any, None, List[T] | str]:
         """Make OutputIterator awaitable, returning appropriate result based on concatenate mode."""
-        async def _collect_result():
+
+        async def _collect_result() -> List[T] | str:
             if self.is_concatenate:
                 # For concatenate iterators, return the joined string
                 segments = []
@@ -267,6 +269,7 @@ async def _collect_result():
                 async for item in self:
                     items.append(item)
                 return items
+
         return _collect_result().__await__()
 
 
@@ -341,14 +344,10 @@ class Run[O]:
 
     def output(self) -> O:
         """
-        Wait for the prediction to complete and return its output.
+        Return the output. For iterator types, returns immediately without waiting.
+        For non-iterator types, waits for completion.
         """
-        self.prediction.wait()
-
-        if self.prediction.status == "failed":
-            raise ModelError(self.prediction)
-
-        # Return an OutputIterator for iterator output types (including concatenate iterators)
+        # Return an OutputIterator immediately for iterator output types
         if _has_iterator_output_type(self.schema):
             is_concatenate = _has_concatenate_iterator_output_type(self.schema)
             return cast(
@@ -361,6 +360,12 @@ def output(self) -> O:
                 ),
             )
 
+        # For non-iterator types, wait for completion and process output
+        self.prediction.wait()
+
+        if self.prediction.status == "failed":
+            raise ModelError(self.prediction)
+
         # Process output for file downloads based on schema
         return _process_output_with_schema(self.prediction.output, self.schema)
 
@@ -483,14 +488,10 @@ class AsyncRun[O]:
 
     async def output(self) -> O:
         """
-        Wait for the prediction to complete and return its output asynchronously.
+        Return the output. For iterator types, returns immediately without waiting.
+        For non-iterator types, waits for completion.
         """
-        await self.prediction.async_wait()
-
-        if self.prediction.status == "failed":
-            raise ModelError(self.prediction)
-
-        # Return an OutputIterator for iterator output types (including concatenate iterators)
+        # Return an OutputIterator immediately for iterator output types
         if _has_iterator_output_type(self.schema):
             is_concatenate = _has_concatenate_iterator_output_type(self.schema)
             return cast(
@@ -503,6 +504,12 @@ async def output(self) -> O:
                 ),
             )
 
+        # For non-iterator types, wait for completion and process output
+        await self.prediction.async_wait()
+
+        if self.prediction.status == "failed":
+            raise ModelError(self.prediction)
+
         # Process output for file downloads based on schema
         return _process_output_with_schema(self.prediction.output, self.schema)
 
diff --git a/tests/test_use.py b/tests/test_use.py
@@ -345,7 +345,7 @@ async def test_use_function_create_method(client_mode):
         run = hotdog_detector.create(prompt="hello world")
 
     # Assert that run is a Run object with a prediction
-    from replicate.use import Run, AsyncRun
+    from replicate.use import AsyncRun, Run
 
     if client_mode == ClientMode.ASYNC:
         assert isinstance(run, AsyncRun)
@@ -621,6 +621,226 @@ async def async_iterator():
     assert str(result) == "['Hello', ' ', 'World']"  # str() gives list representation
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("client_mode", [ClientMode.DEFAULT, ClientMode.ASYNC])
+@respx.mock
+async def test_iterator_output_returns_immediately(client_mode):
+    """Test that OutputIterator is returned immediately without waiting for completion."""
+    mock_model_endpoints(
+        versions=[
+            create_mock_version(
+                {
+                    "openapi_schema": {
+                        "components": {
+                            "schemas": {
+                                "Output": {
+                                    "type": "array",
+                                    "items": {"type": "string"},
+                                    "x-cog-array-type": "iterator",
+                                    "x-cog-array-display": "concatenate",
+                                }
+                            }
+                        }
+                    }
+                }
+            )
+        ]
+    )
+
+    # Mock prediction that starts as processing (not completed)
+    mock_prediction_endpoints(
+        predictions=[
+            create_mock_prediction({"status": "processing", "output": []}),
+            create_mock_prediction({"status": "processing", "output": ["Hello"]}),
+            create_mock_prediction(
+                {"status": "succeeded", "output": ["Hello", " ", "World"]}
+            ),
+        ]
+    )
+
+    # Call use with "acme/hotdog-detector"
+    hotdog_detector = replicate.use(
+        "acme/hotdog-detector", use_async=client_mode == ClientMode.ASYNC
+    )
+
+    # Get the output iterator - this should return immediately even though prediction is processing
+    if client_mode == ClientMode.ASYNC:
+        run = await hotdog_detector.create(prompt="hello world")
+        output_iterator = await run.output()
+    else:
+        run = hotdog_detector.create(prompt="hello world")
+        output_iterator = run.output()
+
+    # Assert that we get an OutputIterator immediately (without waiting for completion)
+    from replicate.use import OutputIterator
+
+    assert isinstance(output_iterator, OutputIterator)
+
+    # Verify the prediction is still processing when we get the iterator
+    assert run.prediction.status == "processing"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("client_mode", [ClientMode.DEFAULT, ClientMode.ASYNC])
+@respx.mock
+async def test_streaming_output_yields_incrementally(client_mode):
+    """Test that OutputIterator yields results incrementally during polling."""
+    mock_model_endpoints(
+        versions=[
+            create_mock_version(
+                {
+                    "openapi_schema": {
+                        "components": {
+                            "schemas": {
+                                "Output": {
+                                    "type": "array",
+                                    "items": {"type": "string"},
+                                    "x-cog-array-type": "iterator",
+                                    "x-cog-array-display": "concatenate",
+                                }
+                            }
+                        }
+                    }
+                }
+            )
+        ]
+    )
+
+    # Create a prediction that will be polled multiple times
+    prediction_id = "pred123"
+
+    # Mock the initial prediction creation
+    initial_prediction = create_mock_prediction(
+        {"id": prediction_id, "status": "processing", "output": []},
+        prediction_id=prediction_id,
+    )
+
+    if client_mode == ClientMode.ASYNC:
+        respx.post("https://api.replicate.com/v1/predictions").mock(
+            return_value=httpx.Response(201, json=initial_prediction)
+        )
+    else:
+        respx.post("https://api.replicate.com/v1/predictions").mock(
+            return_value=httpx.Response(201, json=initial_prediction)
+        )
+
+    # Mock incremental polling responses - each poll returns more data
+    poll_responses = [
+        create_mock_prediction(
+            {"status": "processing", "output": ["Hello"]}, prediction_id=prediction_id
+        ),
+        create_mock_prediction(
+            {"status": "processing", "output": ["Hello", " "]},
+            prediction_id=prediction_id,
+        ),
+        create_mock_prediction(
+            {"status": "processing", "output": ["Hello", " ", "streaming"]},
+            prediction_id=prediction_id,
+        ),
+        create_mock_prediction(
+            {"status": "processing", "output": ["Hello", " ", "streaming", " "]},
+            prediction_id=prediction_id,
+        ),
+        create_mock_prediction(
+            {
+                "status": "succeeded",
+                "output": ["Hello", " ", "streaming", " ", "world!"],
+            },
+            prediction_id=prediction_id,
+        ),
+    ]
+
+    # Mock the polling endpoint to return different responses in sequence
+    respx.get(f"https://api.replicate.com/v1/predictions/{prediction_id}").mock(
+        side_effect=[httpx.Response(200, json=resp) for resp in poll_responses]
+    )
+
+    # Call use with "acme/hotdog-detector"
+    hotdog_detector = replicate.use(
+        "acme/hotdog-detector", use_async=client_mode == ClientMode.ASYNC
+    )
+
+    # Get the output iterator immediately
+    if client_mode == ClientMode.ASYNC:
+        run = await hotdog_detector.create(prompt="hello world", use_async=True)
+        output_iterator = await run.output()
+    else:
+        run = hotdog_detector.create(prompt="hello world")
+        output_iterator = run.output()
+
+    # Assert that we get an OutputIterator immediately
+    from replicate.use import OutputIterator
+
+    assert isinstance(output_iterator, OutputIterator)
+
+    # Track when we receive each item to verify incremental delivery
+    collected_items = []
+
+    if client_mode == ClientMode.ASYNC:
+        async for item in output_iterator:
+            collected_items.append(item)
+            # Break after we get some incremental results to verify polling works
+            if len(collected_items) >= 3:
+                break
+    else:
+        for item in output_iterator:
+            collected_items.append(item)
+            # Break after we get some incremental results to verify polling works
+            if len(collected_items) >= 3:
+                break
+
+    # Verify we got incremental streaming results
+    assert len(collected_items) >= 3
+    # The items should be the concatenated string parts from the incremental output
+    result = "".join(collected_items)
+    assert "Hello" in result  # Should contain the first part we streamed
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("client_mode", [ClientMode.DEFAULT, ClientMode.ASYNC])
+@respx.mock
+async def test_non_streaming_output_waits_for_completion(client_mode):
+    """Test that non-iterator outputs still wait for completion."""
+    mock_model_endpoints(
+        versions=[
+            create_mock_version(
+                {
+                    "openapi_schema": {
+                        "components": {
+                            "schemas": {
+                                "Output": {"type": "string"}  # Non-iterator output
+                            }
+                        }
+                    }
+                }
+            )
+        ]
+    )
+
+    mock_prediction_endpoints(
+        predictions=[
+            create_mock_prediction({"status": "processing", "output": None}),
+            create_mock_prediction({"status": "succeeded", "output": "Final result"}),
+        ]
+    )
+
+    # Call use with "acme/hotdog-detector"
+    hotdog_detector = replicate.use(
+        "acme/hotdog-detector", use_async=client_mode == ClientMode.ASYNC
+    )
+
+    # For non-iterator output, this should wait for completion
+    if client_mode == ClientMode.ASYNC:
+        run = await hotdog_detector.create(prompt="hello world")
+        output = await run.output()
+    else:
+        run = hotdog_detector.create(prompt="hello world")
+        output = run.output()
+
+    # Should get the final result directly
+    assert output == "Final result"
+
+
 @pytest.mark.asyncio
 @pytest.mark.parametrize("client_mode", [ClientMode.DEFAULT, ClientMode.ASYNC])
 @respx.mock