package installation
import (
"fmt"
"os"
"path/filepath"
"time"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"gitcode.com/openFuyao/e2e-auto-test/e2e/framework/executor"
config "gitcode.com/openFuyao/e2e-auto-test/e2e/installation/bke-config"
"gitcode.com/openFuyao/e2e-auto-test/e2e/installation/utils"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/util/homedir"
)
var _ = SIGDescribe("BKE Pre-Script Installation", func() {
var (
sshExecutor *executor.SSHExecutor
localExecutor *executor.LocalExecutor
guideConfig *config.GuideNodeConfig
dynamicClient dynamic.Interface
clusterManager *utils.ClusterManager
preScriptManager *utils.PreScriptManager
)
BeforeEach(func() {
guideConfig = config.LoadGuideNodeFromEnv()
Expect(guideConfig.Host).NotTo(BeEmpty(), "GUIDE_NODE_HOST 环境变量必须设置")
Expect(guideConfig.Password).NotTo(BeEmpty(), "GUIDE_NODE_PASSWORD 环境变量必须设置")
var err error
sshExecutor, err = executor.NewSSHExecutor(
guideConfig.Host, guideConfig.Port,
guideConfig.Username, guideConfig.Password,
)
Expect(err).NotTo(HaveOccurred(), "应该成功连接到引导节点")
kubeconfig := filepath.Join(homedir.HomeDir(), ".kube", "config")
restConfig, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
Expect(err).NotTo(HaveOccurred(), "应该成功加载 kubeconfig")
dynamicClient, err = dynamic.NewForConfig(restConfig)
Expect(err).NotTo(HaveOccurred(), "应该成功创建动态客户端")
clusterManager = utils.NewClusterManager(sshExecutor, dynamicClient)
localExecutor = executor.NewLocalExecutor(30 * time.Second)
preScriptManager = utils.NewPreScriptManager(sshExecutor, localExecutor)
})
Describe("单master在线安装1个脚本1个全局参数", Label("prescript", "1master", "P0", "post-init"), func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNode config.NodeInfo
scriptNames = []string{"init-os.sh"}
)
BeforeEach(func() {
clusterName = fmt.Sprintf("pre-1m1s-%d", time.Now().Unix())
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 1), "至少需要 1 个测试节点")
masterNode = nodes[0]
masterNode.Role = []string{"master/node", "etcd"}
By("在引导集群创建 user-system 命名空间 + 脚本 ConfigMap + 全局配置")
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{utils.TestScriptInitOS()},
[]utils.ScriptConfigEntry{
{ScriptName: "init-os.sh", Order: 10, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
},
"",
)
Expect(err).NotTo(HaveOccurred(), "应该成功部署前置脚本资源")
})
AfterEach(func() {
preScriptManager.Cleanup(scriptNames, "")
preScriptManager.CleanupScriptsOnNode(masterNode.IP, masterNode.Username, masterNode.Password)
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool {
return !clusterManager.ClusterExists(clusterName)
}, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
It("应该成功创建集群并在节点上部署和执行前置脚本", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
clusterConfig := config.NewDefaultBKEClusterConfig(clusterName, []config.NodeInfo{masterNode})
By("生成集群配置文件")
var err error
configPath, nodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(err).NotTo(HaveOccurred())
By("创建集群")
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(err).NotTo(HaveOccurred())
By("等待集群状态变为 Healthy")
Eventually(func() string {
state, _ := clusterManager.GetClusterStatus(clusterName)
phase, _, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("集群状态: phase=%s, state=%s, clusterStatus=%s\n", phase, state, cs)
failOnClusterFailure(state, cs)
return state
}, installTimeout, pollInterval).Should(Equal("Healthy"))
By("检查日志中是否有前置处理的日志(非阻塞)")
found, log, _ := preScriptManager.VerifyPreProcessingLogs(clusterName, "")
if found {
GinkgoWriter.Printf("前置处理日志:\n%s\n", log)
} else {
GinkgoWriter.Printf("未找到前置处理日志(不影响测试结果)\n")
}
By("验证 /etc/openFuyao/bkeagent/scripts 中有对应的脚本")
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptLanded(masterNode.IP, masterNode.Username, masterNode.Password, scriptNames)
GinkgoWriter.Printf("脚本落盘: %s\n", detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
By("验证前置脚本在节点上正确执行")
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptExecuted(masterNode.IP, masterNode.Username, masterNode.Password, scriptNames)
GinkgoWriter.Printf("脚本执行: %s\n", detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
})
})
Describe("单master在线安装多个脚本1个全局参数", Label("prescript", "1master", "multi-script", "P0", "post-init"), func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNode config.NodeInfo
scriptNames = []string{"init-os.sh", "setup-env.sh", "install-agent.sh"}
)
BeforeEach(func() {
clusterName = fmt.Sprintf("pre-1mms-%d", time.Now().Unix())
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 1))
masterNode = nodes[0]
masterNode.Role = []string{"master/node", "etcd"}
By("部署 3 个脚本 + 全局配置")
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{
utils.TestScriptInitOS(),
utils.TestScriptSetupEnv(),
utils.TestScriptInstallAgent(),
},
[]utils.ScriptConfigEntry{
{ScriptName: "init-os.sh", Order: 10, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
{ScriptName: "setup-env.sh", Order: 20, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
{ScriptName: "install-agent.sh", Order: 30, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
},
"",
)
Expect(err).NotTo(HaveOccurred())
})
AfterEach(func() {
preScriptManager.Cleanup(scriptNames, "")
preScriptManager.CleanupScriptsOnNode(masterNode.IP, masterNode.Username, masterNode.Password)
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool { return !clusterManager.ClusterExists(clusterName) }, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
It("应该成功创建集群并部署执行所有前置脚本", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
clusterConfig := config.NewDefaultBKEClusterConfig(clusterName, []config.NodeInfo{masterNode})
var err error
configPath, nodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(err).NotTo(HaveOccurred())
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(err).NotTo(HaveOccurred())
Eventually(func() string {
state, _ := clusterManager.GetClusterStatus(clusterName)
phase, _, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("集群状态: phase=%s, state=%s, clusterStatus=%s\n", phase, state, cs)
failOnClusterFailure(state, cs)
return state
}, installTimeout, pollInterval).Should(Equal("Healthy"))
By("检查日志(非阻塞)")
if f, l, _ := preScriptManager.VerifyPreProcessingLogs(clusterName, ""); f {
GinkgoWriter.Printf("前置处理日志:\n%s\n", l)
} else {
GinkgoWriter.Printf("未找到前置处理日志(不影响测试结果)\n")
}
By("验证所有脚本落盘")
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptLanded(masterNode.IP, masterNode.Username, masterNode.Password, scriptNames)
GinkgoWriter.Printf("脚本落盘: %s\n", detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
By("验证所有脚本执行")
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptExecuted(masterNode.IP, masterNode.Username, masterNode.Password, scriptNames)
GinkgoWriter.Printf("脚本执行: %s\n", detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
})
})
Describe("多master(3M)在线安装1个脚本1个全局参数", Label("prescript", "3master", "P0", "post-init"), func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNodes []config.NodeInfo
scriptNames = []string{"init-os.sh"}
)
BeforeEach(func() {
clusterName = fmt.Sprintf("pre-3m1s-%d", time.Now().Unix())
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 3), "至少需要 3 个测试节点")
masterNodes = make([]config.NodeInfo, 3)
for i := 0; i < 3; i++ {
masterNodes[i] = nodes[i]
masterNodes[i].Role = []string{"master/node", "etcd"}
}
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{utils.TestScriptInitOS()},
[]utils.ScriptConfigEntry{
{ScriptName: "init-os.sh", Order: 10, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
},
"",
)
Expect(err).NotTo(HaveOccurred())
})
AfterEach(func() {
preScriptManager.Cleanup(scriptNames, "")
for _, n := range masterNodes {
preScriptManager.CleanupScriptsOnNode(n.IP, n.Username, n.Password)
}
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool { return !clusterManager.ClusterExists(clusterName) }, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
It("应该在所有3个master节点上部署和执行前置脚本", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
clusterConfig := config.NewDefaultBKEClusterConfig(clusterName, masterNodes)
vipHost, vipPort := config.LoadManagementClusterVIPFromEnv()
if vipHost != "" {
clusterConfig.ControlPlaneEndpoint = &config.ControlPlaneEndpoint{Host: vipHost, Port: vipPort}
}
var err error
configPath, nodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(err).NotTo(HaveOccurred())
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(err).NotTo(HaveOccurred())
Eventually(func() string {
state, _ := clusterManager.GetClusterStatus(clusterName)
phase, _, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("集群状态: phase=%s, state=%s, clusterStatus=%s\n", phase, state, cs)
failOnClusterFailure(state, cs)
return state
}, installTimeout, pollInterval).Should(Equal("Healthy"))
if f, l, _ := preScriptManager.VerifyPreProcessingLogs(clusterName, ""); f {
GinkgoWriter.Printf("前置处理日志:\n%s\n", l)
} else {
GinkgoWriter.Printf("未找到前置处理日志(不影响测试结果)\n")
}
for i, node := range masterNodes {
By(fmt.Sprintf("验证 Master 节点 %d (%s)", i+1, node.IP))
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptLanded(node.IP, node.Username, node.Password, scriptNames)
GinkgoWriter.Printf("节点 %s 落盘: %s\n", node.IP, detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptExecuted(node.IP, node.Username, node.Password, scriptNames)
GinkgoWriter.Printf("节点 %s 执行: %s\n", node.IP, detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
}
})
})
Describe("多master(3M)在线安装多个脚本1个全局参数", Label("prescript", "3master", "multi-script", "P0", "post-init"), func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNodes []config.NodeInfo
scriptNames = []string{"init-os.sh", "setup-env.sh", "install-agent.sh"}
)
BeforeEach(func() {
clusterName = fmt.Sprintf("pre-3mms-%d", time.Now().Unix())
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 3))
masterNodes = make([]config.NodeInfo, 3)
for i := 0; i < 3; i++ {
masterNodes[i] = nodes[i]
masterNodes[i].Role = []string{"master/node", "etcd"}
}
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{
utils.TestScriptInitOS(),
utils.TestScriptSetupEnv(),
utils.TestScriptInstallAgent(),
},
[]utils.ScriptConfigEntry{
{ScriptName: "init-os.sh", Order: 10, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
{ScriptName: "setup-env.sh", Order: 20, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
{ScriptName: "install-agent.sh", Order: 30, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
},
"",
)
Expect(err).NotTo(HaveOccurred())
})
AfterEach(func() {
preScriptManager.Cleanup(scriptNames, "")
for _, n := range masterNodes {
preScriptManager.CleanupScriptsOnNode(n.IP, n.Username, n.Password)
}
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool { return !clusterManager.ClusterExists(clusterName) }, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
It("应该在所有3个master节点上部署和执行所有前置脚本", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
clusterConfig := config.NewDefaultBKEClusterConfig(clusterName, masterNodes)
vipHost, vipPort := config.LoadManagementClusterVIPFromEnv()
if vipHost != "" {
clusterConfig.ControlPlaneEndpoint = &config.ControlPlaneEndpoint{Host: vipHost, Port: vipPort}
}
var err error
configPath, nodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(err).NotTo(HaveOccurred())
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(err).NotTo(HaveOccurred())
Eventually(func() string {
state, _ := clusterManager.GetClusterStatus(clusterName)
phase, _, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("集群状态: phase=%s, state=%s, clusterStatus=%s\n", phase, state, cs)
failOnClusterFailure(state, cs)
return state
}, installTimeout, pollInterval).Should(Equal("Healthy"))
if f, l, _ := preScriptManager.VerifyPreProcessingLogs(clusterName, ""); f {
GinkgoWriter.Printf("前置处理日志:\n%s\n", l)
} else {
GinkgoWriter.Printf("未找到前置处理日志(不影响测试结果)\n")
}
for i, node := range masterNodes {
By(fmt.Sprintf("验证 Master 节点 %d (%s)", i+1, node.IP))
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptLanded(node.IP, node.Username, node.Password, scriptNames)
GinkgoWriter.Printf("节点 %s 落盘: %s\n", node.IP, detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptExecuted(node.IP, node.Username, node.Password, scriptNames)
GinkgoWriter.Printf("节点 %s 执行: %s\n", node.IP, detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
}
})
})
Describe("单master在线安装1脚本1全局参数-参数校验失败", Label("prescript", "1master", "param-validation", "P1", "post-init"), func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNode config.NodeInfo
scriptNames = []string{"init-os.sh"}
)
BeforeEach(func() {
clusterName = fmt.Sprintf("pre-1mpf-%d", time.Now().Unix())
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 1))
masterNode = nodes[0]
masterNode.Role = []string{"master/node", "etcd"}
By("部署脚本 + 包含非法参数的全局配置(分号不在白名单内)")
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{utils.TestScriptInitOS()},
[]utils.ScriptConfigEntry{
{ScriptName: "init-os.sh", Order: 10, Params: map[string]string{
"HTTP_REPO": "http://repo.example.com;rm -rf /",
}},
},
"",
)
Expect(err).NotTo(HaveOccurred())
})
AfterEach(func() {
preScriptManager.Cleanup(scriptNames, "")
preScriptManager.CleanupScriptsOnNode(masterNode.IP, masterNode.Username, masterNode.Password)
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool { return !clusterManager.ClusterExists(clusterName) }, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
It("脚本参数校验不通过时脚本无法正常执行", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
clusterConfig := config.NewDefaultBKEClusterConfig(clusterName, []config.NodeInfo{masterNode})
var err error
configPath, nodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(err).NotTo(HaveOccurred())
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(err).NotTo(HaveOccurred())
By("等待集群安装完成(集群本身应该可以正常创建)")
Eventually(func() bool {
_, state, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("集群状态: state=%s, clusterStatus=%s\n", state, cs)
return state == "Healthy" || state == "DeployFailed"
}, installTimeout, pollInterval).Should(BeTrue())
By("验证脚本标记文件不存在(参数校验失败导致脚本被跳过)")
notExecuted, err := preScriptManager.VerifyScriptNotExecuted(
masterNode.IP, masterNode.Username, masterNode.Password, "init-os.sh",
)
Expect(err).NotTo(HaveOccurred())
Expect(notExecuted).To(BeTrue(), "参数校验失败时脚本不应该成功执行")
})
})
Describe("单master在线安装1脚本1全局参数-脚本失败不阻塞集群", Label("prescript", "1master", "script-failure", "P1", "post-init"), func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNode config.NodeInfo
scriptNames = []string{"fail-script.sh"}
)
BeforeEach(func() {
clusterName = fmt.Sprintf("pre-1msf-%d", time.Now().Unix())
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 1))
masterNode = nodes[0]
masterNode.Role = []string{"master/node", "etcd"}
By("部署一个会故意失败的脚本(exit 1)")
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{utils.TestScriptFail()},
[]utils.ScriptConfigEntry{
{ScriptName: "fail-script.sh", Order: 10, Params: map[string]string{"PKG_VERSION": "99.99.99"}},
},
"",
)
Expect(err).NotTo(HaveOccurred())
})
AfterEach(func() {
preScriptManager.Cleanup(scriptNames, "")
preScriptManager.CleanupScriptsOnNode(masterNode.IP, masterNode.Username, masterNode.Password)
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool { return !clusterManager.ClusterExists(clusterName) }, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
It("脚本安装失败但集群应该正常创建", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
clusterConfig := config.NewDefaultBKEClusterConfig(clusterName, []config.NodeInfo{masterNode})
var err error
configPath, nodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(err).NotTo(HaveOccurred())
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(err).NotTo(HaveOccurred())
By("等待集群状态变为 Healthy(脚本失败不应阻塞集群创建)")
Eventually(func() string {
state, _ := clusterManager.GetClusterStatus(clusterName)
phase, _, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("集群状态: phase=%s, state=%s, clusterStatus=%s\n", phase, state, cs)
failOnClusterFailure(state, cs)
return state
}, installTimeout, pollInterval).Should(Equal("Healthy"), "即使脚本失败,集群也应该正常创建")
})
})
Describe("多集群(3M)在线安装1脚本1全局参数-脚本失败不阻塞集群", Label("prescript", "3master", "script-failure", "P1", "post-init"), func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNodes []config.NodeInfo
scriptNames = []string{"fail-script.sh"}
)
BeforeEach(func() {
clusterName = fmt.Sprintf("pre-3msf-%d", time.Now().Unix())
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 3))
masterNodes = make([]config.NodeInfo, 3)
for i := 0; i < 3; i++ {
masterNodes[i] = nodes[i]
masterNodes[i].Role = []string{"master/node", "etcd"}
}
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{utils.TestScriptFail()},
[]utils.ScriptConfigEntry{
{ScriptName: "fail-script.sh", Order: 10, Params: map[string]string{"PKG_VERSION": "99.99.99"}},
},
"",
)
Expect(err).NotTo(HaveOccurred())
})
AfterEach(func() {
preScriptManager.Cleanup(scriptNames, "")
for _, n := range masterNodes {
preScriptManager.CleanupScriptsOnNode(n.IP, n.Username, n.Password)
}
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool { return !clusterManager.ClusterExists(clusterName) }, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
It("脚本安装失败但3master管理集群应该正常创建", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
clusterConfig := config.NewDefaultBKEClusterConfig(clusterName, masterNodes)
vipHost, vipPort := config.LoadManagementClusterVIPFromEnv()
if vipHost != "" {
clusterConfig.ControlPlaneEndpoint = &config.ControlPlaneEndpoint{Host: vipHost, Port: vipPort}
}
var err error
configPath, nodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(err).NotTo(HaveOccurred())
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(err).NotTo(HaveOccurred())
By("等待集群状态变为 Healthy(脚本失败不应阻塞集群创建)")
Eventually(func() string {
state, _ := clusterManager.GetClusterStatus(clusterName)
phase, _, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("集群状态: phase=%s, state=%s, clusterStatus=%s\n", phase, state, cs)
failOnClusterFailure(state, cs)
return state
}, installTimeout, pollInterval).Should(Equal("Healthy"), "即使脚本失败,集群也应该正常创建")
})
})
Describe("扩容时执行前置脚本安装", Label("prescript", "scale-out", "P1", "post-init"), func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNode config.NodeInfo
scaleOutNode config.NodeInfo
scriptNames = []string{"init-os.sh"}
)
BeforeEach(func() {
clusterName = fmt.Sprintf("pre-scale-%d", time.Now().Unix())
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 2), "扩容测试至少需要 2 个节点")
masterNode = nodes[0]
masterNode.Role = []string{"master/node", "etcd"}
scaleOutNode = nodes[1]
scaleOutNode.Role = []string{"node"}
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{utils.TestScriptInitOS()},
[]utils.ScriptConfigEntry{
{ScriptName: "init-os.sh", Order: 10, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
},
"",
)
Expect(err).NotTo(HaveOccurred())
By("创建初始 1Master 集群")
clusterConfig := config.NewDefaultBKEClusterConfig(clusterName, []config.NodeInfo{masterNode})
var createErr error
configPath, nodeConfigPath, createErr = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(createErr).NotTo(HaveOccurred())
createErr = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(createErr).NotTo(HaveOccurred())
By("等待初始集群变为 Healthy")
Eventually(func() string {
state, _ := clusterManager.GetClusterStatus(clusterName)
_, _, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
failOnClusterFailure(state, cs)
return state
}, installTimeout, pollInterval).Should(Equal("Healthy"))
})
AfterEach(func() {
preScriptManager.Cleanup(scriptNames, "")
preScriptManager.CleanupScriptsOnNode(masterNode.IP, masterNode.Username, masterNode.Password)
preScriptManager.CleanupScriptsOnNode(scaleOutNode.IP, scaleOutNode.Username, scaleOutNode.Password)
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool { return !clusterManager.ClusterExists(clusterName) }, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
It("扩容节点时应该在新节点上执行前置脚本", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
By("验证 Master 节点上脚本已执行")
Eventually(func() bool {
ok, _, _ := preScriptManager.VerifyScriptExecuted(masterNode.IP, masterNode.Username, masterNode.Password, scriptNames)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
By("执行扩容操作")
err := clusterManager.ScaleOutNode(clusterName, scaleOutNode)
Expect(err).NotTo(HaveOccurred())
By("等待扩容后集群恢复 Healthy")
Eventually(func() bool {
_, state, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("扩容后: state=%s, clusterStatus=%s\n", state, cs)
return state == "Healthy" && cs == "Ready"
}, installTimeout, pollInterval).Should(BeTrue())
By("验证扩容节点上脚本已落盘")
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptLanded(scaleOutNode.IP, scaleOutNode.Username, scaleOutNode.Password, scriptNames)
GinkgoWriter.Printf("扩容节点落盘: %s\n", detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
By("验证扩容节点上脚本已执行")
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptExecuted(scaleOutNode.IP, scaleOutNode.Username, scaleOutNode.Password, scriptNames)
GinkgoWriter.Printf("扩容节点执行: %s\n", detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
})
})
Describe("管理集群多master在线安装多个脚本创建业务集群", Label("prescript", "3master", "mgmt-workload", "P2", "post-init", "skip-temporarily"), Ordered, func() {
var (
mgmtClusterName string
mgmtConfigPath string
mgmtNodeConfigPath string
mgmtKubeconfigPath string
workloadClusterName string
workloadConfigPath string
workloadNodeConfigPath string
workloadNodes []config.NodeInfo
scriptNames = []string{"init-os.sh", "setup-env.sh"}
)
BeforeAll(func() {
nodes := config.LoadTestNodesFromEnv()
if len(nodes) < 8 {
Skip(fmt.Sprintf("跳过: 管理集群+业务集群场景需要至少 8 个节点,当前只有 %d 个", len(nodes)))
}
By("创建管理集群 (3Master1Worker)")
mgmtCluster, err := Create3M1WManagementCluster(clusterManager, localExecutor, "pre-mgmt", true)
if mgmtCluster != nil {
mgmtClusterName = mgmtCluster.ClusterName
mgmtConfigPath = mgmtCluster.ConfigPath
mgmtNodeConfigPath = mgmtCluster.NodeConfigPath
mgmtKubeconfigPath = mgmtCluster.KubeconfigPath
}
Expect(err).NotTo(HaveOccurred())
By("在管理集群上部署前置脚本资源")
err = preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{utils.TestScriptInitOS(), utils.TestScriptSetupEnv()},
[]utils.ScriptConfigEntry{
{ScriptName: "init-os.sh", Order: 10, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
{ScriptName: "setup-env.sh", Order: 20, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
},
mgmtKubeconfigPath,
)
Expect(err).NotTo(HaveOccurred())
workloadNodes = make([]config.NodeInfo, 3)
for i := 0; i < 3; i++ {
workloadNodes[i] = nodes[4+i]
workloadNodes[i].Role = []string{"master/node", "etcd"}
}
workloadClusterName = fmt.Sprintf("pre-wl-%d", time.Now().Unix())
})
It("应该在业务集群所有节点上部署和执行前置脚本", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
workloadConfig := config.NewDefaultBKEClusterConfig(workloadClusterName, workloadNodes)
vipHost, vipPort := config.LoadWorkloadClusterVIPFromEnv()
if vipHost != "" {
workloadConfig.ControlPlaneEndpoint = &config.ControlPlaneEndpoint{Host: vipHost, Port: vipPort}
}
var err error
workloadConfigPath, workloadNodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(workloadConfig)
Expect(err).NotTo(HaveOccurred())
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(workloadConfigPath, workloadNodeConfigPath, mgmtKubeconfigPath)
Expect(err).NotTo(HaveOccurred())
By("等待业务集群状态变为 Healthy")
Eventually(func() bool {
phase, state, cs, _ := clusterManager.GetClusterFullStatusWithKubeconfig(workloadClusterName, mgmtKubeconfigPath)
GinkgoWriter.Printf("业务集群: phase=%s, state=%s, clusterStatus=%s\n", phase, state, cs)
if state == "DeployFailed" {
Fail(fmt.Sprintf("业务集群创建失败: phase=%s", phase))
}
return state == "Healthy" && cs == "Ready"
}, installTimeout, pollInterval).Should(BeTrue())
By("检查日志(非阻塞)")
if f, l, _ := preScriptManager.VerifyPreProcessingLogs(workloadClusterName, mgmtKubeconfigPath); f {
GinkgoWriter.Printf("前置处理日志:\n%s\n", l)
} else {
GinkgoWriter.Printf("未找到前置处理日志(不影响测试结果)\n")
}
for i, node := range workloadNodes {
By(fmt.Sprintf("验证业务集群 Master 节点 %d (%s)", i+1, node.IP))
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptLanded(node.IP, node.Username, node.Password, scriptNames)
GinkgoWriter.Printf("节点 %s 落盘: %s\n", node.IP, detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptExecuted(node.IP, node.Username, node.Password, scriptNames)
GinkgoWriter.Printf("节点 %s 执行: %s\n", node.IP, detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
}
})
AfterAll(func() {
if mgmtKubeconfigPath != "" {
preScriptManager.Cleanup(scriptNames, mgmtKubeconfigPath)
}
for _, n := range workloadNodes {
preScriptManager.CleanupScriptsOnNode(n.IP, n.Username, n.Password)
}
if workloadClusterName != "" && mgmtKubeconfigPath != "" {
Delete3M1WWorkloadCluster(clusterManager, localExecutor, workloadClusterName, mgmtKubeconfigPath)
}
if workloadConfigPath != "" {
clusterManager.CleanupConfig(workloadConfigPath)
}
if workloadNodeConfigPath != "" {
clusterManager.CleanupConfig(workloadNodeConfigPath)
}
if mgmtClusterName != "" {
Delete3M1WManagementCluster(clusterManager, localExecutor, mgmtClusterName, mgmtKubeconfigPath)
}
if mgmtConfigPath != "" {
clusterManager.CleanupConfig(mgmtConfigPath)
}
if mgmtNodeConfigPath != "" {
clusterManager.CleanupConfig(mgmtNodeConfigPath)
}
})
})
Describe("离线多master安装多个脚本创建业务集群", Label("prescript", "3master", "offline", "P2", "post-init"), Ordered, func() {
var (
clusterName string
configPath string
nodeConfigPath string
masterNodes []config.NodeInfo
scriptNames = []string{"init-os.sh", "setup-env.sh"}
)
BeforeAll(func() {
nodes := config.LoadTestNodesFromEnv()
Expect(len(nodes)).To(BeNumerically(">=", 3))
clusterName = fmt.Sprintf("pre-off3m-%d", time.Now().Unix())
masterNodes = make([]config.NodeInfo, 3)
for i := 0; i < 3; i++ {
masterNodes[i] = nodes[i]
masterNodes[i].Role = []string{"master/node", "etcd"}
}
err := preScriptManager.SetupPreScripts(
[]utils.ScriptContentDef{utils.TestScriptInitOS(), utils.TestScriptSetupEnv()},
[]utils.ScriptConfigEntry{
{ScriptName: "init-os.sh", Order: 10, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
{ScriptName: "setup-env.sh", Order: 20, Params: map[string]string{"HTTP_REPO": "http://repo.example.com"}},
},
"",
)
Expect(err).NotTo(HaveOccurred())
})
It("离线模式下应该在所有节点上部署和执行前置脚本", SpecTimeout(InstallationItTimeout), func(ctx SpecContext) {
imageRepoDomain := os.Getenv("IMAGE_REPO_DOMAIN")
imageRepoIP := os.Getenv("IMAGE_REPO_IP")
var clusterConfig *config.BKEClusterConfig
if imageRepoDomain != "" {
clusterConfig = config.NewBKEClusterConfigOffline(clusterName, masterNodes, imageRepoDomain, imageRepoIP)
} else {
GinkgoWriter.Printf("未配置离线镜像仓库,使用在线模式\n")
clusterConfig = config.NewDefaultBKEClusterConfig(clusterName, masterNodes)
}
vipHost, vipPort := config.LoadManagementClusterVIPFromEnv()
if vipHost != "" {
clusterConfig.ControlPlaneEndpoint = &config.ControlPlaneEndpoint{Host: vipHost, Port: vipPort}
}
var err error
configPath, nodeConfigPath, err = clusterManager.GetConfigGenerator().GenerateAndUpload(clusterConfig)
Expect(err).NotTo(HaveOccurred())
err = clusterManager.CreateClusterInBackgroundWithKubeconfig(configPath, nodeConfigPath, utils.DefaultBootstraoClusterKubeconfig)
Expect(err).NotTo(HaveOccurred())
Eventually(func() string {
state, _ := clusterManager.GetClusterStatus(clusterName)
phase, _, cs, _ := clusterManager.GetClusterFullStatus(clusterName)
GinkgoWriter.Printf("集群状态: phase=%s, state=%s, clusterStatus=%s\n", phase, state, cs)
failOnClusterFailure(state, cs)
return state
}, installTimeout, pollInterval).Should(Equal("Healthy"))
if f, l, _ := preScriptManager.VerifyPreProcessingLogs(clusterName, ""); f {
GinkgoWriter.Printf("前置处理日志:\n%s\n", l)
} else {
GinkgoWriter.Printf("未找到前置处理日志(不影响测试结果)\n")
}
for i, node := range masterNodes {
By(fmt.Sprintf("验证 Master 节点 %d (%s)", i+1, node.IP))
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptLanded(node.IP, node.Username, node.Password, scriptNames)
GinkgoWriter.Printf("节点 %s 落盘: %s\n", node.IP, detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
Eventually(func() bool {
ok, detail, _ := preScriptManager.VerifyScriptExecuted(node.IP, node.Username, node.Password, scriptNames)
GinkgoWriter.Printf("节点 %s 执行: %s\n", node.IP, detail)
return ok
}, 5*time.Minute, 15*time.Second).Should(BeTrue())
}
})
AfterAll(func() {
preScriptManager.Cleanup(scriptNames, "")
for _, n := range masterNodes {
preScriptManager.CleanupScriptsOnNode(n.IP, n.Username, n.Password)
}
if clusterManager.ClusterExists(clusterName) {
clusterManager.DeleteCluster(clusterName)
Eventually(func() bool { return !clusterManager.ClusterExists(clusterName) }, uninstallTimeout, 60*time.Second).Should(BeTrue())
}
if configPath != "" {
clusterManager.CleanupConfig(configPath)
}
if nodeConfigPath != "" {
clusterManager.CleanupConfig(nodeConfigPath)
}
})
})
})