apiVersion: v1
kind: ConfigMap
metadata:
name: clusterd-config-cm
namespace: cluster-system
data:
manually_separate_policy.conf: |
enabled: true
separate:
fault_window_hours: 24
fault_threshold: 3
release:
fault_free_hours: 48
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: clusterd
namespace: mindx-dl
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pods-clusterd-role
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "update", "watch", "patch"]
- apiGroups: [""]
resources: ["services"]
verbs: ["get"]
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "create", "update", "list", "watch", "delete", "patch"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "patch", "watch"]
- apiGroups: [""]
resources: ["nodes/status"]
verbs: ["get", "patch"]
- apiGroups: ["batch.volcano.sh"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "update", "delete"]
- apiGroups: ["scheduling.incubator.k8s.io", "scheduling.volcano.sh"]
resources: ["podgroups"]
verbs: ["list", "watch", "update", "get", "patch"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["get"]
- apiGroups: ["mindxdl.gitee.com"]
resources: ["ascendjobs"]
verbs: ["list", "watch", "get", "update" ]
- apiGroups: [""]
resources: ["events"]
verbs: ["list"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pods-clusterd-rolebinding
subjects:
- kind: ServiceAccount
name: clusterd
namespace: mindx-dl
roleRef:
kind: ClusterRole
name: pods-clusterd-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: clusterd
namespace: mindx-dl
spec:
replicas: 1
selector:
matchLabels:
app: clusterd
template:
metadata:
labels:
app: clusterd
annotations:
seccomp.security.alpha.kubernetes.io/pod: runtime/default
spec:
nodeSelector:
masterselector: dls-master-node
serviceAccountName: clusterd
containers:
- name: clusterd
image: clusterd:v6.0.RC2
resources:
requests:
memory: 1Gi
cpu: 1000m
limits:
memory: 1Gi
cpu: 1000m
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
imagePullPolicy: Never
command: [ "/bin/bash", "-c", "--"]
args: [ "/usr/local/bin/clusterd -logFile=/var/log/mindx-dl/clusterd/clusterd.log -logLevel=0" ]
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop: ["ALL"]
volumeMounts:
- name: log-clusterd
mountPath: /var/log/mindx-dl/clusterd
- name: config-clusterd
mountPath: /user1/mindx-dl/clusterd
- name: localtime
mountPath: /etc/localtime
readOnly: true
- name: slownode
mountPath: /user/slownode-cluster
volumes:
- name: log-clusterd
hostPath:
path: /var/log/mindx-dl/clusterd
type: Directory
- name: localtime
hostPath:
path: /etc/localtime
- name: config-clusterd
hostPath:
path: /user1/mindx-dl/clusterd
type: DirectoryOrCreate
- name: slownode
hostPath:
path: /user/slownode-cluster
type: DirectoryOrCreate
---
apiVersion: v1
kind: Service
metadata:
name: clusterd-grpc-svc
namespace: mindx-dl
spec:
selector:
app: clusterd
ports:
- protocol: TCP
port: 8899
targetPort: 8899