@@ -103,6 +103,12 @@ def submit(
103103 gpu: int = 0 ,
104104 cpu: int = 1 ,
105105 memory: int = 512 ,
106+ target_worker: Optional[str ] = None ,
107+ gpu_indices: Optional[List[int ]] = None ,
108+ exclusive: bool = True ,
109+ labels: Optional[Dict[str , str ]] = None ,
110+ env: Optional[Dict[str , str ]] = None ,
111+ venv: Optional[str ] = None ,
106112) -> Instance
107113```
108114
@@ -111,21 +117,51 @@ Submit a new instance.
111117** Args:**
112118- `command` : Shell command string, or list of args (auto shell- escaped)
113119- `name` : Optional instance name for service discovery
114- - `gpu` : GPU units required (default 0 )
120+ - `gpu` : GPU units required (default 0 , ignored if `gpu_indices` specified )
115121- `cpu` : CPU cores required (default 1 )
116122- `memory` : Memory in MB required (default 512 )
123+ - `target_worker` : Place on specific worker node
124+ - `gpu_indices` : Request specific physical GPU indices
125+ - `exclusive` : If `False ` , GPUs don' t block allocation pool (default `True`)
126+ - `labels` : Custom metadata dict
127+ - `env` : Environment variables to set
128+ - `venv` : Path to pre- existing virtualenv (must be absolute path)
117129
118- ** Returns:** `Instance` handle
130+ ** Returns:**
131+ - `Instance` handle for the submitted instance.
119132
120133** Raises:**
121134- `NotInitializedError` : `init()` not called
122135- `ValueError ` : Invalid command or resources
123136
124137** Example:**
125138```python
139+ # Basic usage
126140instance = pylet.submit(" echo hello" , cpu = 1 )
127141instance = pylet.submit(" vllm serve model --port $PORT" , name = " vllm" , gpu = 1 , memory = 4096 )
128142instance = pylet.submit([" python" , " -c" , " print('hello')" ], cpu = 1 )
143+
144+ # Target specific worker and GPU indices
145+ instance = pylet.submit(
146+ " sllm-store start" ,
147+ target_worker = " gpu-0" ,
148+ gpu_indices = [0 , 1 , 2 , 3 ],
149+ exclusive = False ,
150+ labels = {" type" : " sllm-store" },
151+ )
152+
153+ # Use a virtualenv
154+ instance = pylet.submit(
155+ " python train.py" ,
156+ venv = " /home/user/my-venv" ,
157+ gpu = 1 ,
158+ )
159+
160+ # Deploy multiple instances (use a loop)
161+ instances = []
162+ for i in range (3 ):
163+ inst = pylet.submit(f " python worker.py " , name = f " worker- { i} " , gpu = 1 )
164+ instances.append(inst)
129165```
130166
131167-- -
@@ -167,13 +203,15 @@ instance = pylet.get(id="abc-123-def")
167203def instances(
168204 * ,
169205 status: Optional[str ] = None ,
206+ labels: Optional[Dict[str , str ]] = None ,
170207) -> List[Instance]
171208```
172209
173210List all instances.
174211
175212** Args:**
176213- `status` : Filter by status (e.g., `" RUNNING" ` , `" PENDING" ` )
214+ - `labels` : Filter by labels (all specified labels must match)
177215
178216** Returns:** List of `Instance` handles
179217
@@ -184,6 +222,7 @@ List all instances.
184222```python
185223all_instances = pylet.instances()
186224running = pylet.instances(status = " RUNNING" )
225+ gpu_instances = pylet.instances(labels = {" type" : " gpu-worker" })
187226```
188227
189228-- -
@@ -203,6 +242,107 @@ List all registered workers.
203242
204243-- -
205244
245+ # ## `pylet.delete`
246+
247+ ```python
248+ def delete(
249+ name: Optional[str ] = None ,
250+ * ,
251+ id : Optional[str ] = None ,
252+ ) -> None
253+ ```
254+
255+ Delete an instance by name or ID .
256+
257+ ** Args:**
258+ - `name` : Instance name (positional or keyword)
259+ - `id ` : Instance ID (keyword only)
260+
261+ ** Raises:**
262+ - `NotInitializedError` : `init()` not called
263+ - `NotFoundError` : Instance not found
264+ - `ValueError ` : Neither `name` nor `id ` provided
265+
266+ ** Example:**
267+ ```python
268+ pylet.delete(" my-instance" )
269+ pylet.delete(id = " abc-123-def" )
270+ ```
271+
272+ -- -
273+
274+ # ## `pylet.delete_all`
275+
276+ ```python
277+ def delete_all(* , status: Optional[str ] = None ) -> int
278+ ```
279+
280+ Delete all instances, optionally filtered by status.
281+
282+ ** Args:**
283+ - `status` : Only delete instances with this status (e.g., `" COMPLETED" ` , `" FAILED" ` , `" CANCELLED" ` )
284+
285+ ** Returns:** Number of instances deleted
286+
287+ ** Raises:**
288+ - `NotInitializedError` : `init()` not called
289+
290+ ** Example:**
291+ ```python
292+ # Delete all completed instances
293+ count = pylet.delete_all(status = " COMPLETED" )
294+ print (f " Deleted { count} instances " )
295+
296+ # Delete all instances (use with caution!)
297+ count = pylet.delete_all()
298+ ```
299+
300+ -- -
301+
302+ # ## `pylet.delete_worker`
303+
304+ ```python
305+ def delete_worker(worker_id: str ) -> None
306+ ```
307+
308+ Delete a worker by ID . ** Only OFFLINE workers can be deleted.**
309+
310+ ** Args:**
311+ - `worker_id` : Worker ID to delete
312+
313+ ** Raises:**
314+ - `NotInitializedError` : `init()` not called
315+ - `NotFoundError` : Worker not found
316+ - `ValueError ` : Worker is not OFFLINE (only OFFLINE workers can be deleted)
317+
318+ ** Example:**
319+ ```python
320+ pylet.delete_worker(" worker-123" )
321+ ```
322+
323+ -- -
324+
325+ # ## `pylet.delete_all_offline_workers`
326+
327+ ```python
328+ def delete_all_offline_workers() -> int
329+ ```
330+
331+ Delete all workers with OFFLINE status.
332+
333+ ** Returns:** Number of workers deleted
334+
335+ ** Raises:**
336+ - `NotInitializedError` : `init()` not called
337+
338+ ** Example:**
339+ ```python
340+ count = pylet.delete_all_offline_workers()
341+ print (f " Deleted { count} offline workers " )
342+ ```
343+
344+ -- -
345+
206346# # Class: `Instance`
207347
208348Returned by `pylet.submit()` and `pylet.get()` . Represents a handle to an instance.
@@ -239,6 +379,42 @@ def exit_code(self) -> Optional[int]
239379```
240380Process exit code when terminal, `None ` otherwise.
241381
382+ ```python
383+ @ property
384+ def display_status(self ) -> str
385+ ```
386+ User- facing status. Returns `" CANCELLING" ` while cancellation is in progress, otherwise same as `status` .
387+
388+ ```python
389+ @ property
390+ def gpu_indices (self ) -> Optional[List[int ]]
391+ ```
392+ Allocated GPU indices when assigned/ running, `None ` otherwise.
393+
394+ ```python
395+ @ property
396+ def exclusive(self ) -> bool
397+ ```
398+ Whether instance has exclusive GPU access. Default `True ` .
399+
400+ ```python
401+ @ property
402+ def labels(self ) -> Dict[str , str ]
403+ ```
404+ User- defined labels. Returns empty dict if none set .
405+
406+ ```python
407+ @ property
408+ def env(self ) -> Dict[str , str ]
409+ ```
410+ User- defined environment variables. Returns empty dict if none set .
411+
412+ ```python
413+ @ property
414+ def target_worker(self ) -> Optional[str ]
415+ ```
416+ Target worker constraint if set , `None ` otherwise.
417+
242418# ## Methods
243419
244420# ### `Instance.wait_running`
@@ -277,11 +453,14 @@ Block until instance reaches terminal state (`COMPLETED`, `FAILED`, `CANCELLED`)
277453# ### `Instance.cancel`
278454
279455```python
280- def cancel(self ) -> None
456+ def cancel(self , delete: bool = False ) -> None
281457```
282458
283459Request instance cancellation. Returns immediately (cancellation is async ).
284460
461+ ** Args:**
462+ - `delete` : If `True ` , delete the instance after cancellation completes (default `False ` )
463+
285464** Raises:**
286465- `InstanceTerminatedError` : Instance already in terminal state
287466
@@ -382,6 +561,12 @@ def memory_available(self) -> int
382561```
383562Available memory in MB .
384563
564+ ```python
565+ @ property
566+ def gpu_indices_available(self ) -> List[int ]
567+ ```
568+ List of available GPU indices.
569+
385570-- -
386571
387572# # Cluster Management
@@ -571,16 +756,20 @@ async def main():
571756- ` async pylet.aio.init(address: str = "http://localhost:8000") -> None `
572757- ` async pylet.aio.shutdown() -> None `
573758- ` pylet.aio.is_initialized() -> bool ` (sync, no I/O)
574- - ` async pylet.aio.submit(...) -> Instance `
759+ - ` async pylet.aio.submit(...) -> Instance ` - Same parameters as sync version
575760- ` async pylet.aio.get(...) -> Instance `
576- - ` async pylet.aio.instances(... ) -> List[Instance] `
761+ - ` async pylet.aio.instances(*, status: Optional[str] = None ) -> List[Instance] ` - Note: does not support ` labels ` parameter
577762- ` async pylet.aio.workers() -> List[WorkerInfo] `
763+ - ` async pylet.aio.delete(name=None, *, id=None) -> None `
764+ - ` async pylet.aio.delete_all(*, status=None) -> int `
765+ - ` async pylet.aio.delete_worker(worker_id) -> None `
766+ - ` async pylet.aio.delete_all_offline_workers() -> int `
578767
579768### Async Instance Methods
580769
581770- ` async Instance.wait_running(timeout: float = 300) -> None `
582771- ` async Instance.wait(timeout: Optional[float] = None) -> None `
583- - ` async Instance.cancel() -> None `
772+ - ` async Instance.cancel(delete: bool = False ) -> None `
584773- ` async Instance.logs(tail: Optional[int] = None) -> str `
585774- ` async Instance.refresh() -> None `
586775
@@ -657,17 +846,35 @@ with pylet.local_cluster(workers=2, cpu_per_worker=2) as cluster:
657846| ` pylet.init(address) ` | Connect to head |
658847| ` pylet.shutdown() ` | Disconnect (optional) |
659848| ` pylet.is_initialized() ` | Check if connected |
660- | ` pylet.submit(command, *, name, gpu, cpu, memory) ` | Submit instance |
849+ | ` pylet.submit(command, *, name, gpu, cpu, memory, ... ) ` | Submit instance |
661850| ` pylet.get(name, *, id) ` | Get instance |
662- | ` pylet.instances(*, status) ` | List instances |
851+ | ` pylet.instances(*, status, labels ) ` | List instances |
663852| ` pylet.workers() ` | List workers |
853+ | ` pylet.delete(name, *, id) ` | Delete instance |
854+ | ` pylet.delete_all(*, status) ` | Delete all instances |
855+ | ` pylet.delete_worker(worker_id) ` | Delete OFFLINE worker |
856+ | ` pylet.delete_all_offline_workers() ` | Delete all OFFLINE workers |
664857| ` pylet.start(*, address, port, gpu, cpu, memory, block) ` | Start head/worker |
665858| ` pylet.local_cluster(workers, *, ...) ` | Test cluster |
666859
860+ | Instance Property | Purpose |
861+ | -------------------| ---------|
862+ | ` instance.id ` | Instance UUID |
863+ | ` instance.name ` | User-provided name |
864+ | ` instance.status ` | Current status |
865+ | ` instance.display_status ` | User-facing status (shows CANCELLING) |
866+ | ` instance.endpoint ` | host: port when running |
867+ | ` instance.exit_code ` | Exit code when terminal |
868+ | ` instance.gpu_indices ` | Allocated GPU indices |
869+ | ` instance.exclusive ` | Exclusive GPU access |
870+ | ` instance.labels ` | User-defined labels |
871+ | ` instance.env ` | Environment variables |
872+ | ` instance.target_worker ` | Target worker constraint |
873+
667874| Instance Method | Purpose |
668875| -----------------| ---------|
669876| ` instance.wait_running(timeout) ` | Block until RUNNING |
670877| ` instance.wait(timeout) ` | Block until terminal |
671- | ` instance.cancel() ` | Request cancellation |
878+ | ` instance.cancel(delete ) ` | Request cancellation |
672879| ` instance.logs(tail) ` | Get logs |
673880| ` instance.refresh() ` | Update from server |
0 commit comments