Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions manifests/crds/node.harvesterhci.io_nodeconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ spec:
properties:
enableV2DataEngine:
type: boolean
hugepagesToAllocate:
type: integer
type: object
ntpConfigs:
properties:
Expand Down
3 changes: 2 additions & 1 deletion pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ type NTPConfig struct {
}

type LonghornConfig struct {
EnableV2DataEngine bool `json:"enableV2DataEngine,omitempty"`
EnableV2DataEngine bool `json:"enableV2DataEngine,omitempty"`
HugepagesToAllocate uint `json:"hugepagesToAllocate,omitempty"`
}

type NodeConfigStatus struct {
Expand Down
46 changes: 46 additions & 0 deletions pkg/controller/nodeconfig/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package config

import (
"os"
"strconv"
"testing"

"github.com/harvester/node-manager/pkg/apis/node.harvesterhci.io/v1beta1"
Expand Down Expand Up @@ -183,3 +184,48 @@ func TestExtraConfigPersistence(t *testing.T) {
_, err = os.Stat(settingsOEMPathBackupPath)
assert.False(t, os.IsNotExist(err))
}

func TestLonghornConfigPersistence(t *testing.T) {
tmpDir := t.TempDir()
oemPath = tmpDir + "/host/oem"
settingsOEMPath = tmpDir + "/host/oem/99_settings.yaml"
settingsOEMPathBackupPath = tmpDir + "/host/oem/99_settings.yaml.bak"
if os.MkdirAll(oemPath, 0777) != nil {
t.Errorf("Unable to create %s", oemPath)
}

testLonghornConfigPersistence := func(hugepagesToAllocate uint64) {
err := updateLonghornConfigPersistence(hugepagesToAllocate)
assert.Nil(t, err)

// Should be able to load config
yipConfig, err := utils.LoadYipConfig(settingsOEMPath)
assert.Nil(t, err)

// Config should be valid
assert.Equal(t, "oem_settings", yipConfig.Name)
// ...one top level stage ("initramfs"):
assert.Equal(t, 1, len(yipConfig.Stages))
assert.Contains(t, yipConfig.Stages, yipStageInitramfs)
// ...which in turn has one stage inside ("Runtime SPDK Prerequisites"):
assert.Equal(t, 1, len(yipConfig.Stages[yipStageInitramfs]))
assert.Equal(t, "Runtime SPDK Prerequisites", yipConfig.Stages[yipStageInitramfs][0].Name)
if hugepagesToAllocate > 0 {
// ...which might have a systctl to allocate 1024 hugepages
assert.Equal(t, strconv.FormatUint(hugepagesToAllocate, 10), yipConfig.Stages[yipStageInitramfs][0].Sysctl["vm.nr_hugepages"])
} else {
// (or it might not)
assert.Equal(t, 0, len(yipConfig.Stages[yipStageInitramfs][0].Sysctl))
}
// ...and three modprobe commands
assert.Equal(t, 3, len(yipConfig.Stages[yipStageInitramfs][0].Commands))
assert.Equal(t, "modprobe vfio_pci", yipConfig.Stages[yipStageInitramfs][0].Commands[0])
assert.Equal(t, "modprobe uio_pci_generic", yipConfig.Stages[yipStageInitramfs][0].Commands[1])
assert.Equal(t, "modprobe nvme_tcp", yipConfig.Stages[yipStageInitramfs][0].Commands[2])
}

// Write longhorn config for V2 data engine enabled with various numbers of hugepages
testLonghornConfigPersistence(1024)
testLonghornConfigPersistence(512)
testLonghornConfigPersistence(0)
}
46 changes: 28 additions & 18 deletions pkg/controller/nodeconfig/config/longhorn.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ import (
)

const (
spdkStageName = "Runtime SPDK Prerequisites"
hugepagesPath = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
hugepagesToAllocate = 1024
spdkStageName = "Runtime SPDK Prerequisites"
hugepagesPath = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
)

var (
Expand Down Expand Up @@ -72,24 +71,26 @@ func restartKubelet() error {
return nil
}

func EnableV2DataEngine() error {
origHugepages, err := getNrHugepages()
if err != nil {
return err
func updateLonghornConfigPersistence(hugepagesToAllocate uint64) error {
stage := schema.Stage{
Name: spdkStageName,
Commands: []string{},
}

for _, module := range modulesToLoad {
stage.Commands = append(stage.Commands, fmt.Sprintf("modprobe %s", module))
}

if hugepagesToAllocate > 0 {
stage.Sysctl = map[string]string{"vm.nr_hugepages": fmt.Sprintf("%d", hugepagesToAllocate)}
}

return UpdatePersistentOEMSettings(stage)
}

func EnableV2DataEngine(hugepagesToAllocate uint64) error {
// Write the persistent config first, so we know it's saved...
if err := UpdatePersistentOEMSettings(schema.Stage{
Name: spdkStageName,
Sysctl: map[string]string{
"vm.nr_hugepages": fmt.Sprintf("%d", hugepagesToAllocate),
},
Commands: []string{
"modprobe vfio_pci",
"modprobe uio_pci_generic",
"modprobe nvme_tcp",
},
}); err != nil {
if err := updateLonghornConfigPersistence(hugepagesToAllocate); err != nil {
return err
}

Expand All @@ -98,9 +99,18 @@ func EnableV2DataEngine() error {
return fmt.Errorf("unable to load kernel modules %v: %v", modulesToLoad, err)
}

origHugepages, err := getNrHugepages()
if err != nil {
return err
}

if origHugepages >= hugepagesToAllocate {
// We've already got enough hugepages, and don't want to unnecessarily
// restart the kubelet, so no further action required
// (this also handles the zero case, kinda - at least, if hugepages
// are disabled, we won't bother trying to allocate any, but if
// hugepages were previously enabled, and later disabled, they will
// remain allocated until next reboot).
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/nodeconfig/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (c *Controller) OnNodeConfigChange(key string, nodecfg *nodeconfigv1.NodeCo
// query that value when lhs/v2-data-engine is set to true. This restart
// logic is handled inside EnableV2DataEngine() and DisableV2DataEngine().
if nodecfg.Spec.LonghornConfig != nil && nodecfg.Spec.LonghornConfig.EnableV2DataEngine {
if err := config.EnableV2DataEngine(); err != nil {
if err := config.EnableV2DataEngine(uint64(nodecfg.Spec.LonghornConfig.HugepagesToAllocate)); err != nil {
logrus.WithFields(logrus.Fields{
"err": err.Error(),
}).Error("Failed to enable V2 Data Engine")
Expand Down
Loading