Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions backends/cadence/aot/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,23 @@ fbcode_target(_kind = runtime.python_library,
],
)

fbcode_target(_kind = runtime.python_library,
name = "fold_qat_conv_bn",
srcs = [
"fold_qat_conv_bn.py",
],
deps = [
"//caffe2:torch",
],
)

fbcode_target(_kind = runtime.python_library,
name = "compiler",
srcs = [
"compiler.py",
],
deps = [
":fold_qat_conv_bn",
":memory_planning",
":ops_registrations",
":passes",
Expand All @@ -46,6 +57,7 @@ fbcode_target(_kind = runtime.python_library,
"//caffe2:torch",
"//executorch/backends/cadence/aot/quantizer:fusion_pass",
"//executorch/backends/cadence/aot/quantizer:quantizer",
"//executorch/backends/transforms:quantize_fused_convbn_bias_pass",
"//executorch/backends/transforms:decompose_sdpa",
"//executorch/backends/transforms:remove_clone_ops",
"//executorch/devtools:lib",
Expand Down Expand Up @@ -512,6 +524,23 @@ fbcode_target(_kind = python_unittest,
],
)

fbcode_target(_kind = python_unittest,
name = "test_fold_qat_conv_bn",
srcs = [
"tests/test_fold_qat_conv_bn.py",
],
supports_static_listing = False,
typing = True,
deps = [
":compiler",
":fold_qat_conv_bn",
"//caffe2:torch",
"//executorch/backends/cadence/aot:ops_registrations",
"//executorch/backends/cadence/aot/quantizer:quantizer",
"//executorch/backends/transforms:quantize_fused_convbn_bias_pass",
],
)

fbcode_target(_kind = python_unittest,
name = "test_remove_ops_passes",
srcs = [
Expand Down
23 changes: 23 additions & 0 deletions backends/cadence/aot/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
QuantizedInputWrapper,
trace as trace_fn,
)
from executorch.backends.cadence.aot.fold_qat_conv_bn import FoldQATConvBNPass
from executorch.backends.cadence.aot.memory_planning import (
CadenceMemoryPlanning,
print_memory_planning_info,
Expand All @@ -30,6 +31,9 @@
get_default_memory_config,
MemoryConfig,
)
from executorch.backends.transforms.quantize_fused_convbn_bias_pass import (
QuantizeFusedConvBnBiasAtenPass,
)
from executorch.devtools import generate_etrecord
from executorch.exir import (
EdgeCompileConfig,
Expand Down Expand Up @@ -162,6 +166,18 @@ def apply_pre_edge_transform_passes(
which will instantiate a default quantizer for you if needed.
Returns an ExportedProgram with the fused model.
"""
# Create zero biases for convs without one, quantize any float biases if exists
converted_program = _transform(
converted_program,
QuantizeFusedConvBnBiasAtenPass(
exported_program=converted_program, default_zero_bias=True
),
)

# Fold QAT Conv-BN simulated fusion patterns
# Removes (div(scale) → add(bias) → batch_norm chain and absorbs the correction into the conv bias
FoldQATConvBNPass(converted_program)(converted_program.graph_module)

# Get patterns and apply fusion of dq -> op -> q to qop
# pyre-ignore[16]: no attribute
patterns = [q.pattern for q in quantizer.quantizers]
Expand Down Expand Up @@ -205,6 +221,13 @@ def get_fake_quant_model(

# Get converted graph module
converted_gm = convert_pt2(prepared_gm, dump_graphs=dump_graphs)

# Create zero biases for convs without one, quantize any float biases
QuantizeFusedConvBnBiasAtenPass(default_zero_bias=True)(converted_gm)

# Fold QAT Conv-BN simulated fusion patterns (now all convs have a bias to fold into)
FoldQATConvBNPass()(converted_gm)

return converted_gm


Expand Down
Loading
Loading