diff --git a/README.md b/README.md index ec32e3d..bd3e246 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,12 @@ With the job or batch id, you can get the job result or batch status with: client.get_batch_status("BATCH_ID") # Batches client.get_job_result("JOB_ID", "JOB_ID") # Simple jobs client.get_job_result("BATCH_ID", "JOB_ID") # Jobs belonging to batches +client.get_batch_result("BATCH_ID") # Get batch jobs result as array +client.get_batch_result_storage("BATCH_ID", params=params) # Get batch jobs result in a file + +# More details about job and batch +client.get_batch_info("BATCH_ID") # Batches info (without jobs info) +client.get_job_info("JOB_ID") # Jobs info (single jobs only) ``` Alternatively, you can use a utily `wait_for_job_done` or `wait_for_batch_done`: diff --git a/tests/test_functions.py b/tests/test_functions.py index a02ded5..4b7ed43 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -1007,3 +1007,158 @@ def test_create_and_wait_job_timeout(): unittest.TestCase().assertRaises( TimeoutException, c.create_and_wait_job, "rg", "./requirements.txt" ) + +@responses.activate +def test_get_batch_info(): + responses.add( + responses.GET, + f"{BASE_URL}/ocr/batch/info/123", + json={ + "company_id": "1234", + "client_id": "12345", + "batch_id": "123", + "created_at": "2022-06-22T20:58:09Z", + "service": "rg", + "status": "processing", + "source": "API", + "total_jobs": 3, + "total_processed": 2, + }, + status=200, + ) + + c = Client() + res = c.get_batch_info("123") + + assert res.get("batch_id") + assert res.get("service") + assert res.get("client_id") + assert res.get("status") + + assert res.get("batch_id") == "123" + assert res.get("client_id") == "12345" + assert res.get("service") == "rg" + assert res.get("status") == "processing" + + +@responses.activate +def test_get_batch_info_unauthorized(): + responses.add( + responses.GET, + f"{BASE_URL}/ocr/batch/info/123", + status=401, + ) + + c = Client() + unittest.TestCase().assertRaises( + InvalidStatusCodeException, c.get_batch_info, "123" + ) + +@responses.activate +def test_get_job_info(): + responses.add( + responses.GET, + f"{BASE_URL}/ocr/job/info/123", + json={ + "client_id": "12345", + "job_id": "123", + "service": "rg", + "status": "processing", + }, + status=200, + ) + + c = Client() + res = c.get_job_info("123") + + assert res.get("job_id") + assert res.get("service") + assert res.get("client_id") + assert res.get("status") + + assert res.get("job_id") == "123" + assert res.get("client_id") == "12345" + assert res.get("service") == "rg" + assert res.get("status") == "processing" + + +@responses.activate +def test_get_job_info_unauthorized(): + responses.add( + responses.GET, + f"{BASE_URL}/ocr/job/info/123", + status=401, + ) + + c = Client() + unittest.TestCase().assertRaises( + InvalidStatusCodeException, c.get_job_info, "123" + ) + +@responses.activate +def test_get_batch_result(): + responses.add( + responses.GET, + f"{BASE_URL}/ocr/batch/result/123", + json=[ + { + "job_ksuid": "123", + "service": "rg", + "status": "processing", + }, + ], + status=200, + ) + + c = Client() + res = c.get_batch_result("123") + + assert res + assert len(res) == 1 + +@responses.activate +def test_get_batch_result_unauthorized(): + responses.add( + responses.GET, + f"{BASE_URL}/ocr/batch/result/123", + status=401, + ) + + c = Client() + unittest.TestCase().assertRaises( + InvalidStatusCodeException, c.get_batch_result, "123" + ) + +@responses.activate +def test_get_batch_result_storage(): + responses.add( + responses.GET, + f"{BASE_URL}/ocr/batch/result/123", + json={ + "exp": "60000", + "url": "https://presignedurldemo.s3.eu-west-2.amazonaws.com/image.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJJWZ7B6WCRGMKFGQ%2F20180210%2Feu-west-2%2Fs3%2Faws4_request&X-Amz-Date=20180210T171315Z&X-Amz-Expires=1800&X-Amz-Signature=12b74b0788aa036bc7c3d03b3f20c61f1f91cc9ad8873e3314255dc479a25351&X-Amz-SignedHeaders=host" + }, + status=200, + ) + + c = Client() + res = c.get_batch_result_storage("123") + + assert res.get("exp") + assert res.get("url") + + assert res.get("exp") == "60000" + + +@responses.activate +def test_get_batch_result_storage_unauthorized(): + responses.add( + responses.GET, + f"{BASE_URL}/ocr/batch/result/123", + status=401, + ) + + c = Client() + unittest.TestCase().assertRaises( + InvalidStatusCodeException, c.get_batch_result_storage, "123" + ) \ No newline at end of file diff --git a/ultraocr/constants.py b/ultraocr/constants.py index 48b6ebc..bc4a39e 100644 --- a/ultraocr/constants.py +++ b/ultraocr/constants.py @@ -13,6 +13,8 @@ KEY_FACEMATCH = "facematch" KEY_EXTRA = "extra-document" FLAG_TRUE = "true" +RETURN_REQUEST = "request" +RETURN_STORAGE = "storage" class Resource(Enum): diff --git a/ultraocr/functions.py b/ultraocr/functions.py index 3dc3757..47f9e66 100644 --- a/ultraocr/functions.py +++ b/ultraocr/functions.py @@ -25,6 +25,8 @@ FLAG_TRUE, KEY_EXTRA, KEY_FACEMATCH, + RETURN_REQUEST, + RETURN_STORAGE, ) @@ -121,6 +123,14 @@ def _auto_authenticate(self) -> None: if self.auto_refresh and datetime.now() > self.expires_at: self.authenticate(self.client_id, self.client_secret, self.expires) + def _get_batch_result(self, batch_id: str, params: dict = None): + url = f"{self.base_url}/ocr/batch/result/{batch_id}" + + resp = self._get(url, params=params) + validate_status_code(resp.status_code, HTTPStatus.OK) + + return resp.json() + def authenticate( self, client_id: str, client_secret: str, expires: int = DEFAULT_EXPIRATION_TIME ) -> None: @@ -785,3 +795,161 @@ def create_and_wait_batch( batch_id = res.get("id") return self.wait_for_batch_done(batch_id, wait_jobs) + + def get_job_info(self, job_id: str): + """Get job info. + + Get the info with more details. + + Args: + job_id: The id of the job, given on job creation or on batch status. + + Returns: + A json response containing the client data (if given on job creation), the metadata (if + given on job creation), job id, company id, client id creation time, service, source, + status (may be "waiting", "error", "processing", "validating" or "done") and the result + or error depending on the status. For example: + { + "client_data": { }, + "metadata": { }, + "created_at": "2022-06-22T20:58:09Z", + "company_id": "123", + "client_id": "1234", + "job_id": "2AwrSd7bxEMbPrQ5jZHGDzQ4qL3", + "source": "API", + "result": { + "Time": "7.45", + "Document": [ + { + "Page": 1, + "Data": { + "DocumentType": { + "conf": 99, + "value": "CNH" + } + } + } + ] + }, + "service": "idtypification", + "status": "done" + } + + Raises: + InvalidStatusCodeException: If status code is not 200. + """ + url = f"{self.base_url}/ocr/job/info/{job_id}" + + resp = self._get(url) + validate_status_code(resp.status_code, HTTPStatus.OK) + + return resp.json() + + def get_batch_info(self, batch_id: str): + """Get document batch info. + + Get the info of the batch with more details, checking whether it was processed or not. + + Args: + batch_id: The id of the batch, given on batch creation. + + Returns: + A json response containing the id, company id, client id, creation time, service, + source, number of jobs, number of processed jobs and status (may be "waiting", "error", + "processing" or "done"). For example: + { + "company_id": "123", + "client_id": "1234", + "batch_id": "2AwrSd7bxEMbPrQ5jZHGDzQ4qL3", + "created_at": "2022-06-22T20:58:09Z", + "service": "cnh", + "status": "done", + "source": "API", + "total_jobs": 3, + "total_processed": 2, + } + + Raises: + InvalidStatusCodeException: If status code is not 200. + """ + url = f"{self.base_url}/ocr/batch/info/{batch_id}" + + resp = self._get(url) + validate_status_code(resp.status_code, HTTPStatus.OK) + + return resp.json() + + def get_batch_result(self, batch_id: str): + """Get batch jobs results. + + Get the batch jobs results as array. + + Args: + batch_id: The id of the batch, given on batch creation. + + Returns: + A json response containing the url to download and the expiration time (1 minute). + For example: + [ + { + "client_data": { }, + "created_at": "2022-06-22T20:58:09Z", + "job_ksuid": "2AwrSd7bxEMbPrQ5jZHGDzQ4qL3", + "result": { + "Time": "7.45", + "Document": [ + { + "Page": 1, + "Data": { + "DocumentType": { + "conf": 99, + "value": "CNH" + } + } + } + ] + }, + "service": "idtypification", + "status": "done", + "filename": "123.jpg" + } + ] + + Raises: + InvalidStatusCodeException: If status code is not 200. + """ + params = { + "return": RETURN_REQUEST, + } + + return self._get_batch_result(batch_id, params) + + def get_batch_result_storage(self, batch_id: str, params: dict = None): + """Get batch jobs results as file. + + Generate url to download a file containing the batch jobs results. + + Args: + batch_id: The id of the batch, given on batch creation. + params: The query parameters based on UltraOCR Docs. + + Returns: + A json response containing the url to download and the expiration time (1 minute). + For example: + { + "exp": "60000", + "url": "https://presignedurldemo.s3.eu-west-2.amazonaws.com/image.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJJWZ7B6WCRGMKFGQ%2F20180210%2Feu-west-2%2Fs3%2Faws4_request&X-Amz-Date=20180210T171315Z&X-Amz-Expires=1800&X-Amz-Signature=12b74b0788aa036bc7c3d03b3f20c61f1f91cc9ad8873e3314255dc479a25351&X-Amz-SignedHeaders=host" + } + + Raises: + InvalidStatusCodeException: If status code is not 200. + """ + if params is None: + params = {} + + params = { + **params, + "return": RETURN_STORAGE, + } + + return self._get_batch_result(batch_id, params) \ No newline at end of file