From 70acb640083c1502b946ecd14c90f19b1bb20158 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 5 May 2026 10:56:14 +0100 Subject: [PATCH 1/2] Set OPTE maximum port MTU during creation This allows opteadm/sled-agent to define what the maximum MTU on a link will be on a per-port basis, which should be governed by some sense of control plane policy. I think that create-time is really the only time we can do this: assignment of max MTU (SDU in MAC parlance) happens as part of the call to `mac_register`, and in turn communication of MTU to a guest by viona is basically in a write-once register in the config space. I think all that seems to need checking is that this is visible off the bat to viona via propolis, and that visibility holds whether or not the propolis is in a non-global zone. Also closes #767. --- bin/opteadm/src/bin/opteadm.rs | 13 +++++++----- lib/opte-ioctl/src/lib.rs | 4 ++-- lib/oxide-vpc/src/api.rs | 7 +++---- xde-tests/src/lib.rs | 4 ++-- xde/src/xde.rs | 37 ++++------------------------------ 5 files changed, 19 insertions(+), 46 deletions(-) diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index 52b40f7f..39398ca8 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -206,14 +206,17 @@ enum Command { #[arg(long)] src_underlay_addr: Ipv6Addr, + /// The MTU that should be assigned to the newly created OPTE port. + /// + /// If unset, this will default to the standard Ethernet MTU of 1500. + #[arg(long)] + mtu: Option, + #[command(flatten)] external_net: ExternalNetConfig, #[command(flatten)] dhcp: DhcpConfig, - - #[arg(long)] - passthrough: bool, }, /// Delete an xde device @@ -823,7 +826,7 @@ fn main() -> anyhow::Result<()> { src_underlay_addr, dhcp, external_net, - passthrough, + mtu, } => { let ip_cfg = match private_ip { IpAddr::Ip4(private_ip) => { @@ -877,7 +880,7 @@ fn main() -> anyhow::Result<()> { dhcp: dhcp.into(), }; - hdl.create_xde(&name, cfg, passthrough)?; + hdl.create_xde(&name, cfg, mtu)?; } Command::DeleteXde { name } => { diff --git a/lib/opte-ioctl/src/lib.rs b/lib/opte-ioctl/src/lib.rs index 5bcb3553..ffb5a92c 100644 --- a/lib/opte-ioctl/src/lib.rs +++ b/lib/opte-ioctl/src/lib.rs @@ -127,7 +127,7 @@ impl OpteHdl { &self, name: &str, cfg: VpcCfg, - passthrough: bool, + mtu: Option, ) -> Result { use libnet::link; @@ -139,7 +139,7 @@ impl OpteHdl { let xde_devname = name.into(); let cmd = OpteCmd::CreateXde; - let req = CreateXdeReq { xde_devname, linkid, cfg, passthrough }; + let req = CreateXdeReq { xde_devname, linkid, cfg, mtu }; let res = run_cmd_ioctl(self.device.as_raw_fd(), cmd, Some(&req)); diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api.rs index 7a396a72..7fc91d62 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api.rs @@ -618,11 +618,10 @@ pub struct CreateXdeReq { /// details. pub cfg: VpcCfg, - /// This is a development tool for completely bypassing OPTE processing. + /// The MTU we should assign to the newly created OPTE port. /// - /// XXX Pretty sure we aren't making much use of this anymore, and - /// should go away before v1. - pub passthrough: bool, + /// If unset, this will default to the standard Ethernet MTU of 1500. + pub mtu: Option, } pub type SNat4Cfg = SNatCfg; diff --git a/xde-tests/src/lib.rs b/xde-tests/src/lib.rs index e389a497..a3772df9 100644 --- a/xde-tests/src/lib.rs +++ b/xde-tests/src/lib.rs @@ -316,7 +316,7 @@ impl OptePort { dhcp: DhcpCfg::default(), }; let adm = OpteHdl::open()?; - adm.create_xde(name, cfg.clone(), false)?; + adm.create_xde(name, cfg.clone(), None)?; Ok(OptePort { name: name.into(), cfg, @@ -372,7 +372,7 @@ impl OptePort { dhcp: DhcpCfg::default(), }; let adm = OpteHdl::open()?; - adm.create_xde(name, cfg.clone(), false)?; + adm.create_xde(name, cfg.clone(), None)?; Ok(OptePort { name: name.into(), cfg, diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 5c682595..48326b7b 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -616,6 +616,7 @@ pub struct XdeDev { linkid: datalink_id_t, mh: *mut mac::mac_handle, link_state: mac::link_state_t, + mtu: u32, // The OPTE port associated with this xde device. // @@ -626,10 +627,6 @@ pub struct XdeDev { port_v2p: Arc, port_igw_map: KMutex>, - // Pass the packets through to the underlay devices, skipping - // opte-core processing. - passthrough: bool, - pub vni: Vni, // These are clones of the underlay ports initialized by the @@ -1195,12 +1192,14 @@ fn create_xde(req: &CreateXdeReq) -> Result { }; let mut guest_addr = cfg.guest_mac.bytes(); + let mtu = req.mtu.unwrap_or(u32::from(ETHERNET_MTU)); let mut xde = Arc::new(XdeDev { devname: req.xde_devname.clone(), linkid: req.linkid, mh: ptr::null_mut(), link_state: mac::link_state_t::Down, + mtu, port: new_port( req.xde_devname.clone(), &cfg, @@ -1213,7 +1212,6 @@ fn create_xde(req: &CreateXdeReq) -> Result { port_v2p, vni: cfg.vni, port_igw_map: KMutex::new(None), - passthrough: req.passthrough, u1, u2, underlay_capab, @@ -1241,7 +1239,7 @@ fn create_xde(req: &CreateXdeReq) -> Result { mreg.m_priv_props = core::ptr::null_mut(); mreg.m_instance = c_uint::MAX; // let mac handle this mreg.m_min_sdu = 1; - mreg.m_max_sdu = u32::from(ETHERNET_MTU); // TODO hardcode + mreg.m_max_sdu = mtu; mreg.m_multicast_sdu = 0; mreg.m_margin = crate::sys::VLAN_TAGSZ; mreg.m_v12n = mac::MAC_VIRT_NONE as u32; @@ -2842,19 +2840,6 @@ fn xde_mc_tx_one<'a>( } }; - // Send straight to underlay in passthrough mode. - if src_dev.passthrough { - // TODO We need to deal with flow control. This could actually - // get weird, this is the first provider to use mac_tx(). Is - // there something we can learn from aggr here? I need to - // refresh my memory on all of this. - // - // TODO Is there way to set mac_tx to must use result? - drop(parsed_pkt); - postbox.post_underlay(UnderlayIndex::U1, TxHint::NoneOrMixed, pkt); - return; - } - let port = &src_dev.port; // The port processing code will fire a probe that describes what @@ -3538,13 +3523,6 @@ fn xde_rx_one( None }; - // We are in passthrough mode, skip OPTE processing. - if dev.passthrough { - drop(parsed_pkt); - postbox.post(port_key, pkt); - return None; - } - let port = &dev.port; let res = port.process(Direction::In, parsed_pkt); @@ -3651,13 +3629,6 @@ fn xde_rx_one_direct( None }; - // We are in passthrough mode, skip OPTE processing. - if dev.passthrough { - drop(parsed_pkt); - postbox.post(port_key, pkt); - return; - } - let port = &dev.port; let res = port.process(Direction::In, parsed_pkt); From 84e91b62621406f38e4d0870a92bafacad96536e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 5 May 2026 17:08:38 +0100 Subject: [PATCH 2/2] Bump API_VERSION. This is necessary, but CI won't catch it as these types are not part of `opte_api`. --- crates/opte-api/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 5c319116..0ea97614 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -51,7 +51,7 @@ pub use ulp::*; /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. -pub const API_VERSION: u64 = 40; +pub const API_VERSION: u64 = 41; /// Major version of the OPTE package. pub const MAJOR_VERSION: u64 = 0;