Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package device
import (
"encoding/json"
"strconv"
"sync"
"Ascend-device-plugin/pkg/common"
"Ascend-device-plugin/pkg/kubeclient"
"ascend-common/common-utils/hwlog"
)
type ResetInfoMgr struct {
client *kubeclient.ClientK8s
resetInfo *ResetInfo
busyDevs sync.Map
resetCnt sync.Map
mu sync.RWMutex
}
type ResetInfo struct {
ThirdPartyResetDevs []ResetDevice
ManualResetDevs []ResetDevice
}
type ResetDevice struct {
CardId int32
DeviceId int32
AssociatedCardId int32
PhyID int32
LogicID int32
shouldCheckNet bool
}
type WriteMode int
const (
WMOverwrite WriteMode = iota
WMAppend
WMDelete
)
var (
mgr = &ResetInfoMgr{}
once sync.Once
)
func InitResetInfoMgr(client *kubeclient.ClientK8s) {
once.Do(func() {
infoMgr := ResetInfoMgr{
client: client,
resetInfo: &ResetInfo{},
}
curNode, err := client.GetNode()
if err != nil {
hwlog.RunLog.Errorf("fail to get node from k8s, err: %v", err)
mgr = &infoMgr
return
}
if curNode.Annotations == nil {
mgr = &infoMgr
return
}
infoMgr.resetInfo = readAnnotation(curNode.Annotations, common.ResetInfoAnnotationKey)
mgr = &infoMgr
})
}
func GetResetInfoMgr() *ResetInfoMgr {
return mgr
}
func WriteResetInfo(resetInfo ResetInfo, writeMode WriteMode, updateNode bool) {
mgr.mu.Lock()
hwlog.RunLog.Infof("write reset info, current: %v, new: %v, mode: %v", *mgr.resetInfo, resetInfo, writeMode)
mgr.resetInfo.ThirdPartyResetDevs = mergeFailDevs(mgr.resetInfo.ThirdPartyResetDevs,
resetInfo.ThirdPartyResetDevs, writeMode)
mgr.resetInfo.ManualResetDevs = mergeFailDevs(mgr.resetInfo.ManualResetDevs,
resetInfo.ManualResetDevs, writeMode)
hwlog.RunLog.Infof("reset info change: %v", *mgr.resetInfo)
dataBytes, err := json.Marshal(*mgr.resetInfo)
if err != nil {
hwlog.RunLog.Errorf("marshal reset info error, data: %v, err: %v", *mgr.resetInfo, err)
mgr.mu.Unlock()
return
}
mgr.mu.Unlock()
if updateNode {
writeNodeAnnotation(string(dataBytes))
}
}
func ReadResetInfo() ResetInfo {
mgr.mu.RLock()
defer mgr.mu.RUnlock()
return *mgr.resetInfo
}
func IsDevBusy(logicID int32) bool {
_, exist := mgr.busyDevs.Load(strconv.Itoa(int(logicID)))
return exist
}
func AddBusyDev(logicID int32) {
mgr.busyDevs.Store(strconv.Itoa(int(logicID)), struct{}{})
}
func FreeBusyDev(logicID int32) {
key := strconv.Itoa(int(logicID))
if _, exist := mgr.busyDevs.Load(key); !exist {
return
}
hwlog.RunLog.Infof("free busy device logicID %v", logicID)
mgr.busyDevs.Delete(key)
}
func GetResetCnt(logicID int32) int {
cnt, exist := mgr.resetCnt.Load(strconv.Itoa(int(logicID)))
if !exist {
return 0
}
ret, ok := cnt.(int)
if !ok {
hwlog.RunLog.Warnf("reset cnt map invalid value, val: %v", cnt)
mgr.resetCnt.Store(strconv.Itoa(int(logicID)), 0)
return 0
}
return ret
}
func AddResetCnt(logicID int32) {
cnt := GetResetCnt(logicID)
SetResetCnt(logicID, cnt+1)
}
func SetResetCnt(logicID int32, cnt int) {
mgr.resetCnt.Store(strconv.Itoa(int(logicID)), cnt)
}
func writeNodeAnnotation(resetStr string) {
if err := mgr.client.AddAnnotation(common.ResetInfoAnnotationKey, resetStr); err != nil {
hwlog.RunLog.Errorf("fail to write reset info to node annotation, err: %v", err)
}
}
func mergeFailDevs(curDevs, newDevs []ResetDevice, writeMode WriteMode) []ResetDevice {
switch writeMode {
case WMOverwrite:
return deduplicate(newDevs)
case WMAppend:
return mergeAndDeduplicate(curDevs, newDevs)
case WMDelete:
return excludeArray(curDevs, newDevs)
default:
hwlog.RunLog.Errorf("write mode %v is invalid", writeMode)
return curDevs
}
}
func mergeAndDeduplicate(curArr, newArr []ResetDevice) []ResetDevice {
seen := make(map[int32]struct{})
result := make([]ResetDevice, 0, len(curArr)+len(newArr))
for _, v := range curArr {
if _, exists := seen[v.PhyID]; !exists {
seen[v.PhyID] = struct{}{}
result = append(result, v)
}
}
for _, v := range newArr {
if _, exists := seen[v.PhyID]; !exists {
seen[v.PhyID] = struct{}{}
result = append(result, v)
}
}
return result
}
func deduplicate(arr []ResetDevice) []ResetDevice {
seen := make(map[int32]struct{})
result := make([]ResetDevice, 0, len(arr))
for _, v := range arr {
if _, exists := seen[v.PhyID]; !exists {
seen[v.PhyID] = struct{}{}
result = append(result, v)
}
}
return result
}
func excludeArray(curArr, delArr []ResetDevice) []ResetDevice {
ret := make([]ResetDevice, 0, len(curArr))
toDelMap := make(map[int32]struct{}, len(delArr))
for _, dev := range delArr {
if _, exist := toDelMap[dev.PhyID]; !exist {
toDelMap[dev.PhyID] = struct{}{}
}
}
for _, dev := range curArr {
if _, exist := toDelMap[dev.PhyID]; !exist {
ret = append(ret, dev)
}
}
return ret
}
func readAnnotation(annotation map[string]string, key string) *ResetInfo {
if _, exist := annotation[key]; !exist {
return &ResetInfo{}
}
var ret ResetInfo
if err := json.Unmarshal([]byte(annotation[key]), &ret); err != nil {
hwlog.RunLog.Errorf("unmarshal node annotation failed, err: %v", err)
return &ResetInfo{}
}
return &ret
}
func combineToString(a, b int32) string {
return strconv.Itoa(int(a)) + common.UnderLine + strconv.Itoa(int(b))
}