[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit e6bf02a17427 · 2026-01-14T23:27:59.000Z
for more information, see https://pre-commit.ci
diff --git a/tests/cpp/operator/test_cast_mxfp8_grouped.cu b/tests/cpp/operator/test_cast_mxfp8_grouped.cu
@@ -263,7 +263,7 @@ void performTest_x1(const ProcessingMethod processing_method,
     std::vector<OutputType> out_data_colwise_h(colwise ? elts_num : 0);
     std::vector<fp8e8m0> out_scales_rowwise_h(rowwise ? sfs_num : 0);
     std::vector<fp8e8m0> out_scales_colwise_h(colwise ? sfs_num : 0);
-    
+
     std::vector<OutputType> out_data_rowwise_ref(rowwise ? elts_num : 0);
     std::vector<OutputType> out_data_colwise_ref(colwise ? elts_num : 0);
     std::vector<fp8e8m0> out_scales_rowwise_ref(rowwise ? sfs_num : 0);
@@ -310,7 +310,7 @@ void performTest_x1(const ProcessingMethod processing_method,
     cudaMalloc((void**)&first_dims_d, first_dims_size);
     cudaMalloc((void**)&last_dims_d, last_dims_size);
     cudaMalloc((void**)&offsets_d, offsets_size);
-    
+
     cudaMemcpy(in_data_d, in_data.data(), in_data_size, cudaMemcpyHostToDevice);
     cudaMemcpy(first_dims_d, first_dims_h.data(), first_dims_size, cudaMemcpyHostToDevice);
     cudaMemcpy(last_dims_d, last_dims_h.data(), last_dims_size, cudaMemcpyHostToDevice);
@@ -393,7 +393,7 @@ void performTest_x1(const ProcessingMethod processing_method,
         OutputType* const out_data_colwise_ptr = out_data_colwise_ref.data() + data_offset;
         fp8e8m0* const out_scales_rowwise_ptr = out_scales_rowwise_ref.data() + sfs_offset;
         fp8e8m0* const out_scales_colwise_ptr = out_scales_colwise_ref.data() + sfs_offset;
-    
+
         compute_ref<InputType, OutputType>(
             processing_method, OP, rowwise, colwise, in_ptr, /*grad=*/ nullptr,
             out_data_rowwise_ptr, out_data_colwise_ptr,
@@ -516,7 +516,7 @@ TEST_P(GroupedFusedCastMXFP8TestSuite, Test) {
 
     using namespace transformer_engine;
     using namespace test;
-   
+
     const ProcessingMethod processing_method = std::get<0>(GetParam());
     const ActivationKind activation = std::get<1>(GetParam());
     const ScalingDirection scaling_direction = std::get<2>(GetParam());
@@ -532,7 +532,7 @@ TEST_P(GroupedFusedCastMXFP8TestSuite, Test) {
         switch (shape_rep) {
             case SAME_BOTH_DIMS: {
                 first_dims[t] = logical_shape[0] / num_tensors;
-                last_dims[t] = logical_shape[1]; 
+                last_dims[t] = logical_shape[1];
                 break;
             }
             case VARYING_FIRST_DIM: {
diff --git a/transformer_engine/common/cast/cast.cu b/transformer_engine/common/cast/cast.cu
@@ -27,7 +27,8 @@ void nvte_quantize(const NVTETensor input, NVTETensor output, cudaStream_t strea
   // dispatch::quantize_fwd_helper<IS_ACT, Empty, nullptr>(input, output, nullptr, stream);
 }
 
-void nvte_quantize_grouped(const NVTEGroupedTensor input, NVTEGroupedTensor output, cudaStream_t stream) {
+void nvte_quantize_grouped(const NVTEGroupedTensor input, NVTEGroupedTensor output,
+                           cudaStream_t stream) {
   NVTE_API_CALL(nvte_quantize_grouped);
   using namespace transformer_engine;
 
diff --git a/transformer_engine/common/cast/dispatch/quantize_grouped.cuh b/transformer_engine/common/cast/dispatch/quantize_grouped.cuh
@@ -59,7 +59,8 @@ void quantize_grouped_fwd_helper(const NVTEGroupedTensor input, NVTEGroupedTenso
       GroupedTensor *workspace_tensor = convertNVTEGroupedTensor(workspace);
 
       mxfp8::quantize_grouped</*IS_DBIAS=*/false, /*IS_DACT=*/false, IS_ACT, ParamOP, OP>(
-          input_tensor, activations_tensor, noop_tensor, output_tensor, dbias_tensor, workspace_tensor, stream);
+          input_tensor, activations_tensor, noop_tensor, output_tensor, dbias_tensor,
+          workspace_tensor, stream);
       break;
     }
     default:
diff --git a/transformer_engine/common/cast/mxfp8/quantize_grouped_mxfp8.cuh b/transformer_engine/common/cast/mxfp8/quantize_grouped_mxfp8.cuh

Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,8 @@ void nvte_quantize(const NVTETensor input, NVTETensor output, cudaStream_t strea`
`27`	`27`	`// dispatch::quantize_fwd_helper<IS_ACT, Empty, nullptr>(input, output, nullptr, stream);`
`28`	`28`	`}`
`29`	`29`
`30`		`-void nvte_quantize_grouped(const NVTEGroupedTensor input, NVTEGroupedTensor output, cudaStream_t stream) {`
	`30`	`+void nvte_quantize_grouped(const NVTEGroupedTensor input, NVTEGroupedTensor output,`
	`31`	`+ cudaStream_t stream) {`
`31`	`32`	`NVTE_API_CALL(nvte_quantize_grouped);`
`32`	`33`	`using namespace transformer_engine;`
`33`	`34`
Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,8 @@ void quantize_grouped_fwd_helper(const NVTEGroupedTensor input, NVTEGroupedTenso`
`59`	`59`	`GroupedTensor *workspace_tensor = convertNVTEGroupedTensor(workspace);`
`60`	`60`
`61`	`61`	`mxfp8::quantize_grouped</IS_DBIAS=/false, /IS_DACT=/false, IS_ACT, ParamOP, OP>(`
`62`		`- input_tensor, activations_tensor, noop_tensor, output_tensor, dbias_tensor, workspace_tensor, stream);`
	`62`	`+ input_tensor, activations_tensor, noop_tensor, output_tensor, dbias_tensor,`
	`63`	`+ workspace_tensor, stream);`
`63`	`64`	`break;`
`64`	`65`	`}`
`65`	`66`	`default:`