Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ With the job or batch id, you can get the job result or batch status with:
client.get_batch_status("BATCH_ID") # Batches
client.get_job_result("JOB_ID", "JOB_ID") # Simple jobs
client.get_job_result("BATCH_ID", "JOB_ID") # Jobs belonging to batches
client.get_batch_result("BATCH_ID") # Get batch jobs result as array
client.get_batch_result_storage("BATCH_ID", params=params) # Get batch jobs result in a file

# More details about job and batch
client.get_batch_info("BATCH_ID") # Batches info (without jobs info)
client.get_job_info("JOB_ID") # Jobs info (single jobs only)
```

Alternatively, you can use a utily `wait_for_job_done` or `wait_for_batch_done`:
Expand Down
155 changes: 155 additions & 0 deletions tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,3 +1007,158 @@ def test_create_and_wait_job_timeout():
unittest.TestCase().assertRaises(
TimeoutException, c.create_and_wait_job, "rg", "./requirements.txt"
)

@responses.activate
def test_get_batch_info():
responses.add(
responses.GET,
f"{BASE_URL}/ocr/batch/info/123",
json={
"company_id": "1234",
"client_id": "12345",
"batch_id": "123",
"created_at": "2022-06-22T20:58:09Z",
"service": "rg",
"status": "processing",
"source": "API",
"total_jobs": 3,
"total_processed": 2,
},
status=200,
)

c = Client()
res = c.get_batch_info("123")

assert res.get("batch_id")
assert res.get("service")
assert res.get("client_id")
assert res.get("status")

assert res.get("batch_id") == "123"
assert res.get("client_id") == "12345"
assert res.get("service") == "rg"
assert res.get("status") == "processing"


@responses.activate
def test_get_batch_info_unauthorized():
responses.add(
responses.GET,
f"{BASE_URL}/ocr/batch/info/123",
status=401,
)

c = Client()
unittest.TestCase().assertRaises(
InvalidStatusCodeException, c.get_batch_info, "123"
)

@responses.activate
def test_get_job_info():
responses.add(
responses.GET,
f"{BASE_URL}/ocr/job/info/123",
json={
"client_id": "12345",
"job_id": "123",
"service": "rg",
"status": "processing",
},
status=200,
)

c = Client()
res = c.get_job_info("123")

assert res.get("job_id")
assert res.get("service")
assert res.get("client_id")
assert res.get("status")

assert res.get("job_id") == "123"
assert res.get("client_id") == "12345"
assert res.get("service") == "rg"
assert res.get("status") == "processing"


@responses.activate
def test_get_job_info_unauthorized():
responses.add(
responses.GET,
f"{BASE_URL}/ocr/job/info/123",
status=401,
)

c = Client()
unittest.TestCase().assertRaises(
InvalidStatusCodeException, c.get_job_info, "123"
)

@responses.activate
def test_get_batch_result():
responses.add(
responses.GET,
f"{BASE_URL}/ocr/batch/result/123",
json=[
{
"job_ksuid": "123",
"service": "rg",
"status": "processing",
},
],
status=200,
)

c = Client()
res = c.get_batch_result("123")

assert res
assert len(res) == 1

@responses.activate
def test_get_batch_result_unauthorized():
responses.add(
responses.GET,
f"{BASE_URL}/ocr/batch/result/123",
status=401,
)

c = Client()
unittest.TestCase().assertRaises(
InvalidStatusCodeException, c.get_batch_result, "123"
)

@responses.activate
def test_get_batch_result_storage():
responses.add(
responses.GET,
f"{BASE_URL}/ocr/batch/result/123",
json={
"exp": "60000",
"url": "https://presignedurldemo.s3.eu-west-2.amazonaws.com/image.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJJWZ7B6WCRGMKFGQ%2F20180210%2Feu-west-2%2Fs3%2Faws4_request&X-Amz-Date=20180210T171315Z&X-Amz-Expires=1800&X-Amz-Signature=12b74b0788aa036bc7c3d03b3f20c61f1f91cc9ad8873e3314255dc479a25351&X-Amz-SignedHeaders=host"
},
status=200,
)

c = Client()
res = c.get_batch_result_storage("123")

assert res.get("exp")
assert res.get("url")

assert res.get("exp") == "60000"


@responses.activate
def test_get_batch_result_storage_unauthorized():
responses.add(
responses.GET,
f"{BASE_URL}/ocr/batch/result/123",
status=401,
)

c = Client()
unittest.TestCase().assertRaises(
InvalidStatusCodeException, c.get_batch_result_storage, "123"
)
2 changes: 2 additions & 0 deletions ultraocr/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
KEY_FACEMATCH = "facematch"
KEY_EXTRA = "extra-document"
FLAG_TRUE = "true"
RETURN_REQUEST = "request"
RETURN_STORAGE = "storage"


class Resource(Enum):
Expand Down
168 changes: 168 additions & 0 deletions ultraocr/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
FLAG_TRUE,
KEY_EXTRA,
KEY_FACEMATCH,
RETURN_REQUEST,
RETURN_STORAGE,
)


Expand Down Expand Up @@ -121,6 +123,14 @@ def _auto_authenticate(self) -> None:
if self.auto_refresh and datetime.now() > self.expires_at:
self.authenticate(self.client_id, self.client_secret, self.expires)

def _get_batch_result(self, batch_id: str, params: dict = None):
url = f"{self.base_url}/ocr/batch/result/{batch_id}"

resp = self._get(url, params=params)
validate_status_code(resp.status_code, HTTPStatus.OK)

return resp.json()

def authenticate(
self, client_id: str, client_secret: str, expires: int = DEFAULT_EXPIRATION_TIME
) -> None:
Expand Down Expand Up @@ -785,3 +795,161 @@ def create_and_wait_batch(
batch_id = res.get("id")

return self.wait_for_batch_done(batch_id, wait_jobs)

def get_job_info(self, job_id: str):
"""Get job info.

Get the info with more details.

Args:
job_id: The id of the job, given on job creation or on batch status.

Returns:
A json response containing the client data (if given on job creation), the metadata (if
given on job creation), job id, company id, client id creation time, service, source,
status (may be "waiting", "error", "processing", "validating" or "done") and the result
or error depending on the status. For example:
{
"client_data": { },
"metadata": { },
"created_at": "2022-06-22T20:58:09Z",
"company_id": "123",
"client_id": "1234",
"job_id": "2AwrSd7bxEMbPrQ5jZHGDzQ4qL3",
"source": "API",
"result": {
"Time": "7.45",
"Document": [
{
"Page": 1,
"Data": {
"DocumentType": {
"conf": 99,
"value": "CNH"
}
}
}
]
},
"service": "idtypification",
"status": "done"
}

Raises:
InvalidStatusCodeException: If status code is not 200.
"""
url = f"{self.base_url}/ocr/job/info/{job_id}"

resp = self._get(url)
validate_status_code(resp.status_code, HTTPStatus.OK)

return resp.json()

def get_batch_info(self, batch_id: str):
"""Get document batch info.

Get the info of the batch with more details, checking whether it was processed or not.

Args:
batch_id: The id of the batch, given on batch creation.

Returns:
A json response containing the id, company id, client id, creation time, service,
source, number of jobs, number of processed jobs and status (may be "waiting", "error",
"processing" or "done"). For example:
{
"company_id": "123",
"client_id": "1234",
"batch_id": "2AwrSd7bxEMbPrQ5jZHGDzQ4qL3",
"created_at": "2022-06-22T20:58:09Z",
"service": "cnh",
"status": "done",
"source": "API",
"total_jobs": 3,
"total_processed": 2,
}

Raises:
InvalidStatusCodeException: If status code is not 200.
"""
url = f"{self.base_url}/ocr/batch/info/{batch_id}"

resp = self._get(url)
validate_status_code(resp.status_code, HTTPStatus.OK)

return resp.json()

def get_batch_result(self, batch_id: str):
"""Get batch jobs results.

Get the batch jobs results as array.

Args:
batch_id: The id of the batch, given on batch creation.

Returns:
A json response containing the url to download and the expiration time (1 minute).
For example:
[
{
"client_data": { },
"created_at": "2022-06-22T20:58:09Z",
"job_ksuid": "2AwrSd7bxEMbPrQ5jZHGDzQ4qL3",
"result": {
"Time": "7.45",
"Document": [
{
"Page": 1,
"Data": {
"DocumentType": {
"conf": 99,
"value": "CNH"
}
}
}
]
},
"service": "idtypification",
"status": "done",
"filename": "123.jpg"
}
]

Raises:
InvalidStatusCodeException: If status code is not 200.
"""
params = {
"return": RETURN_REQUEST,
}

return self._get_batch_result(batch_id, params)

def get_batch_result_storage(self, batch_id: str, params: dict = None):
"""Get batch jobs results as file.

Generate url to download a file containing the batch jobs results.

Args:
batch_id: The id of the batch, given on batch creation.
params: The query parameters based on UltraOCR Docs.

Returns:
A json response containing the url to download and the expiration time (1 minute).
For example:
{
"exp": "60000",
"url": "https://presignedurldemo.s3.eu-west-2.amazonaws.com/image.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJJWZ7B6WCRGMKFGQ%2F20180210%2Feu-west-2%2Fs3%2Faws4_request&X-Amz-Date=20180210T171315Z&X-Amz-Expires=1800&X-Amz-Signature=12b74b0788aa036bc7c3d03b3f20c61f1f91cc9ad8873e3314255dc479a25351&X-Amz-SignedHeaders=host"
}

Raises:
InvalidStatusCodeException: If status code is not 200.
"""
if params is None:
params = {}

params = {
**params,
"return": RETURN_STORAGE,
}

return self._get_batch_result(batch_id, params)