[AvroTensorDataset] Add more py test to cover various scenarios#1795
[AvroTensorDataset] Add more py test to cover various scenarios#1795lijuanzhang78 wants to merge 6 commits intotensorflow:masterfrom
Conversation
fix linter
|
@kvignesh1420 Hi! Could you help review this PR adding more py tests? Thanks! |
|
Thanks @lijuanzhang78. Will take a look soon! |
| BATCH_SIZES = [8, 16, 32, 64, 128, 256, 512, 1024] | ||
| PARALLELISM = [1, 2, 3, 4, 5, 6, tf.data.AUTOTUNE] | ||
| PARAMS = [ | ||
| (batch_size, 1024, "deflate", parallelism) | ||
| for batch_size in BATCH_SIZES | ||
| for parallelism in PARALLELISM | ||
| ] | ||
|
|
||
|
|
||
| @pytest.mark.benchmark( | ||
| group="autotuning", | ||
| ) | ||
| @pytest.mark.parametrize( | ||
| ["batch_size", "shuffle_buffer_size", "codec", "parallelism"], PARAMS | ||
| ) | ||
| def test_autotuning(batch_size, shuffle_buffer_size, codec, parallelism, benchmark): | ||
| data_source = DataSource( | ||
| scenario=MIXED_TYPES_SCENARIO, num_records=LARGE_NUM_RECORDS | ||
| ) | ||
| run_atds_benchmark_from_data_source( | ||
| data_source, | ||
| batch_size, | ||
| benchmark, | ||
| parallelism=parallelism, | ||
| codec=codec, | ||
| shuffle_buffer_size=shuffle_buffer_size, | ||
| rounds=10, | ||
| ) |
There was a problem hiding this comment.
The file name says that you are testing the autotune functionality. However, I see similar tests in tests/test_atds_avro/benchmark/test_atds_parallelism_benchmark.py as well. Maybe the tests can be combined ?
| import pytest | ||
|
|
||
| from tests.test_atds_avro.utils.data_source import DataSource | ||
| from tests.test_atds_avro.utils.data_source_registry import SMALL_NUM_RECORDS | ||
| from tests.test_atds_avro.utils.atds_benchmark_utils import ( | ||
| run_atds_benchmark_from_data_source, | ||
| ) | ||
| from tests.test_atds_avro.utils.benchmark_utils import MIXED_TYPES_SCENARIO | ||
|
|
||
|
|
||
| @pytest.mark.benchmark( | ||
| group="codec", | ||
| ) | ||
| @pytest.mark.parametrize( | ||
| ["batch_size", "codec"], [(128, "null"), (128, "deflate"), (128, "snappy")] | ||
| ) | ||
| def test_codec(batch_size, codec, benchmark): | ||
| data_source = DataSource( | ||
| scenario=MIXED_TYPES_SCENARIO, num_records=SMALL_NUM_RECORDS | ||
| ) | ||
| run_atds_benchmark_from_data_source(data_source, batch_size, benchmark, codec=codec) |
There was a problem hiding this comment.
same issue as before, I see similar tests in tests/test_atds_avro/benchmark/test_atds_parallelism_benchmark.py as well. Maybe the tests can be combined?
| @pytest.mark.skipif( | ||
| os.getenv("ATDS_MEM_LEAK_CHECK") != "1", | ||
| reason="This benchmark test is only used in memory leak check.", | ||
| ) | ||
| @pytest.mark.benchmark( | ||
| group="all_types_of_tensors", | ||
| ) | ||
| @pytest.mark.parametrize("batch_size", [(16)]) | ||
| def test_all_types_of_tensors_for_memory_leak_check(batch_size, benchmark): | ||
| data_source = get_data_source_from_registry(ALL_TYPES_DATA_SOURCE_NAME) | ||
| shuffle_buffer_size = batch_size * 8 | ||
| run_atds_benchmark_from_data_source( | ||
| data_source, | ||
| batch_size, | ||
| benchmark, | ||
| codec="deflate", | ||
| shuffle_buffer_size=shuffle_buffer_size, | ||
| rounds=1, | ||
| ) |
There was a problem hiding this comment.
I might be missing something but how exactly is the leak detected (if there is one)?
|
@lijuanzhang78 gentle ping as the review comments might have been missed 🙂 . |
This pr add more tests for shuffle, autotune, parallelism for various data types.