diff --git a/manifests/crds/node.harvesterhci.io_nodeconfigs.yaml b/manifests/crds/node.harvesterhci.io_nodeconfigs.yaml index e0777fd0..8f2f81f3 100644 --- a/manifests/crds/node.harvesterhci.io_nodeconfigs.yaml +++ b/manifests/crds/node.harvesterhci.io_nodeconfigs.yaml @@ -43,6 +43,8 @@ spec: properties: enableV2DataEngine: type: boolean + hugepagesToAllocate: + type: integer type: object ntpConfigs: properties: diff --git a/pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go b/pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go index 67d9f92c..8871fedc 100644 --- a/pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go +++ b/pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go @@ -31,7 +31,8 @@ type NTPConfig struct { } type LonghornConfig struct { - EnableV2DataEngine bool `json:"enableV2DataEngine,omitempty"` + EnableV2DataEngine bool `json:"enableV2DataEngine,omitempty"` + HugepagesToAllocate uint `json:"hugepagesToAllocate,omitempty"` } type NodeConfigStatus struct { diff --git a/pkg/controller/nodeconfig/config/config_test.go b/pkg/controller/nodeconfig/config/config_test.go index b8bbf5e7..64e47ee9 100644 --- a/pkg/controller/nodeconfig/config/config_test.go +++ b/pkg/controller/nodeconfig/config/config_test.go @@ -2,6 +2,7 @@ package config import ( "os" + "strconv" "testing" "github.com/harvester/node-manager/pkg/apis/node.harvesterhci.io/v1beta1" @@ -183,3 +184,48 @@ func TestExtraConfigPersistence(t *testing.T) { _, err = os.Stat(settingsOEMPathBackupPath) assert.False(t, os.IsNotExist(err)) } + +func TestLonghornConfigPersistence(t *testing.T) { + tmpDir := t.TempDir() + oemPath = tmpDir + "/host/oem" + settingsOEMPath = tmpDir + "/host/oem/99_settings.yaml" + settingsOEMPathBackupPath = tmpDir + "/host/oem/99_settings.yaml.bak" + if os.MkdirAll(oemPath, 0777) != nil { + t.Errorf("Unable to create %s", oemPath) + } + + testLonghornConfigPersistence := func(hugepagesToAllocate uint64) { + err := updateLonghornConfigPersistence(hugepagesToAllocate) + assert.Nil(t, err) + + // Should be able to load config + yipConfig, err := utils.LoadYipConfig(settingsOEMPath) + assert.Nil(t, err) + + // Config should be valid + assert.Equal(t, "oem_settings", yipConfig.Name) + // ...one top level stage ("initramfs"): + assert.Equal(t, 1, len(yipConfig.Stages)) + assert.Contains(t, yipConfig.Stages, yipStageInitramfs) + // ...which in turn has one stage inside ("Runtime SPDK Prerequisites"): + assert.Equal(t, 1, len(yipConfig.Stages[yipStageInitramfs])) + assert.Equal(t, "Runtime SPDK Prerequisites", yipConfig.Stages[yipStageInitramfs][0].Name) + if hugepagesToAllocate > 0 { + // ...which might have a systctl to allocate 1024 hugepages + assert.Equal(t, strconv.FormatUint(hugepagesToAllocate, 10), yipConfig.Stages[yipStageInitramfs][0].Sysctl["vm.nr_hugepages"]) + } else { + // (or it might not) + assert.Equal(t, 0, len(yipConfig.Stages[yipStageInitramfs][0].Sysctl)) + } + // ...and three modprobe commands + assert.Equal(t, 3, len(yipConfig.Stages[yipStageInitramfs][0].Commands)) + assert.Equal(t, "modprobe vfio_pci", yipConfig.Stages[yipStageInitramfs][0].Commands[0]) + assert.Equal(t, "modprobe uio_pci_generic", yipConfig.Stages[yipStageInitramfs][0].Commands[1]) + assert.Equal(t, "modprobe nvme_tcp", yipConfig.Stages[yipStageInitramfs][0].Commands[2]) + } + + // Write longhorn config for V2 data engine enabled with various numbers of hugepages + testLonghornConfigPersistence(1024) + testLonghornConfigPersistence(512) + testLonghornConfigPersistence(0) +} diff --git a/pkg/controller/nodeconfig/config/longhorn.go b/pkg/controller/nodeconfig/config/longhorn.go index 966038b5..177880ce 100644 --- a/pkg/controller/nodeconfig/config/longhorn.go +++ b/pkg/controller/nodeconfig/config/longhorn.go @@ -13,9 +13,8 @@ import ( ) const ( - spdkStageName = "Runtime SPDK Prerequisites" - hugepagesPath = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" - hugepagesToAllocate = 1024 + spdkStageName = "Runtime SPDK Prerequisites" + hugepagesPath = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" ) var ( @@ -72,24 +71,26 @@ func restartKubelet() error { return nil } -func EnableV2DataEngine() error { - origHugepages, err := getNrHugepages() - if err != nil { - return err +func updateLonghornConfigPersistence(hugepagesToAllocate uint64) error { + stage := schema.Stage{ + Name: spdkStageName, + Commands: []string{}, + } + + for _, module := range modulesToLoad { + stage.Commands = append(stage.Commands, fmt.Sprintf("modprobe %s", module)) + } + + if hugepagesToAllocate > 0 { + stage.Sysctl = map[string]string{"vm.nr_hugepages": fmt.Sprintf("%d", hugepagesToAllocate)} } + return UpdatePersistentOEMSettings(stage) +} + +func EnableV2DataEngine(hugepagesToAllocate uint64) error { // Write the persistent config first, so we know it's saved... - if err := UpdatePersistentOEMSettings(schema.Stage{ - Name: spdkStageName, - Sysctl: map[string]string{ - "vm.nr_hugepages": fmt.Sprintf("%d", hugepagesToAllocate), - }, - Commands: []string{ - "modprobe vfio_pci", - "modprobe uio_pci_generic", - "modprobe nvme_tcp", - }, - }); err != nil { + if err := updateLonghornConfigPersistence(hugepagesToAllocate); err != nil { return err } @@ -98,9 +99,18 @@ func EnableV2DataEngine() error { return fmt.Errorf("unable to load kernel modules %v: %v", modulesToLoad, err) } + origHugepages, err := getNrHugepages() + if err != nil { + return err + } + if origHugepages >= hugepagesToAllocate { // We've already got enough hugepages, and don't want to unnecessarily // restart the kubelet, so no further action required + // (this also handles the zero case, kinda - at least, if hugepages + // are disabled, we won't bother trying to allocate any, but if + // hugepages were previously enabled, and later disabled, they will + // remain allocated until next reboot). return nil } diff --git a/pkg/controller/nodeconfig/controller.go b/pkg/controller/nodeconfig/controller.go index fdabd2f8..0d22ade9 100644 --- a/pkg/controller/nodeconfig/controller.go +++ b/pkg/controller/nodeconfig/controller.go @@ -70,7 +70,7 @@ func (c *Controller) OnNodeConfigChange(key string, nodecfg *nodeconfigv1.NodeCo // query that value when lhs/v2-data-engine is set to true. This restart // logic is handled inside EnableV2DataEngine() and DisableV2DataEngine(). if nodecfg.Spec.LonghornConfig != nil && nodecfg.Spec.LonghornConfig.EnableV2DataEngine { - if err := config.EnableV2DataEngine(); err != nil { + if err := config.EnableV2DataEngine(uint64(nodecfg.Spec.LonghornConfig.HugepagesToAllocate)); err != nil { logrus.WithFields(logrus.Fields{ "err": err.Error(), }).Error("Failed to enable V2 Data Engine")