import openpyxl
import csv
import os
import argparse
import sys
def find_sheet_by_substring(wb, substring):
"""Find worksheet containing specified substring in its name"""
for sheet_name in wb.sheetnames:
if substring in sheet_name:
return wb[sheet_name]
return None
def find_header_cell(sheet, header_text):
"""Find cell position containing specified header text"""
for row in sheet.iter_rows(min_row=1, max_row=100):
for cell in row:
if cell.value and str(cell.value).strip() == header_text:
return cell.row, cell.column
return None, None
def extract_column_data(sheet, header_row, header_col, max_rows=20000):
"""Extract data below specified header column"""
data = []
for row in range(header_row + 1, header_row + max_rows + 1):
cell_value = sheet.cell(row=row, column=header_col).value
if cell_value is None or (isinstance(cell_value, str) and cell_value.strip() == ""):
break
data.append(cell_value)
return data
def process_excel_to_csv(input_file, output_file, topotree_name, group_per_rack):
"""Process Excel file and generate CSV file"""
if not os.path.exists(input_file):
print(f"Error: Input file '{input_file}' does not exist")
return False
try:
wb = openpyxl.load_workbook(input_file, data_only=True)
target_sheet = find_sheet_by_substring(wb, "超节点规划")
if target_sheet is None:
print("Error: Worksheet containing '超节点规划' not found")
return False
print(f"Target worksheet found: {target_sheet.title}")
hostname_row, hostname_col = find_header_cell(target_sheet, "主机名称")
frame_row, frame_col = find_header_cell(target_sheet, "机框编号")
missing_headers = []
if not hostname_row or not hostname_col:
missing_headers.append("主机名称")
if not frame_row or not frame_col:
missing_headers.append("机框编号")
if missing_headers:
print(f"Error: The following headers are not found: {', '.join(missing_headers)}")
return False
node_names = extract_column_data(target_sheet, hostname_row, hostname_col, max_rows=20000)
frame_numbers = extract_column_data(target_sheet, frame_row, frame_col, max_rows=20000)
min_length = min(len(node_names), len(frame_numbers))
if len(node_names) != len(frame_numbers):
print(
f"Warning: Data column lengths inconsistent - Hostnames: {len(node_names)}, Frame Numbers: {len(frame_numbers)}")
print(f"Using minimum length: {min_length}")
group_ids = []
for i in range(min_length):
try:
frame_num = float(frame_numbers[i]) if frame_numbers[i] is not None else 0
group_id = int((frame_num - 1) // group_per_rack)
group_ids.append(group_id)
except (ValueError, TypeError) as e:
print(
f"Warning: Frame number '{frame_numbers[i]}' at row {i + 1} cannot be converted to numeric, using default 0")
group_ids.append(0)
with open(output_file, 'w', newline='', encoding='utf-8-sig') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['nodeName', 'huawei.com/topotree.groupid', 'huawei.com/topotree'])
for i in range(min_length):
writer.writerow([
node_names[i],
group_ids[i],
topotree_name
])
print(f"CSV file successfully generated: {output_file}")
print(f"Processed {min_length} rows of data")
print(f"Topotree name: {topotree_name}")
print(f"Group per rack (divisor): {group_per_rack}")
return True
except Exception as e:
print(f"Error during processing: {str(e)}")
return False
def main():
"""Main function: Handle command line arguments and interactive input"""
parser = argparse.ArgumentParser(description='Extract data from Excel file and generate CSV file')
parser.add_argument('--input', '-i', help='Input Excel file path')
parser.add_argument('--output', '-o', help='Output CSV file path')
parser.add_argument('--topotree-name', '-t', help='Topotree name')
parser.add_argument('--group-per-rack', '-g', type=int, default=12,
help='Number of frames per group (default: 12). This is used as divisor for calculating groupid from frame number.')
args = parser.parse_args()
input_file = args.input
if not input_file:
input_file = input("Please enter Excel file path: ").strip()
if not input_file:
print("Error: Input file path is required")
sys.exit(1)
output_file = args.output
if not output_file:
base_name = os.path.splitext(os.path.basename(input_file))[0]
output_file = f"{base_name}_output.csv"
print(f"Output file not specified, using default: {output_file}")
topotree_name = args.topotree_name
if not topotree_name:
topotree_name = input("Please enter topotree name: ").strip()
if not topotree_name:
print("Error: Topotree name is required")
sys.exit(1)
group_per_rack = args.group_per_rack
if group_per_rack <= 0:
print(f"Error: group-per-rack must be a positive integer, got {group_per_rack}")
sys.exit(1)
success = process_excel_to_csv(input_file, output_file, topotree_name, group_per_rack)
if success:
print("Script execution completed")
else:
print("Script execution failed")
sys.exit(1)
if __name__ == "__main__":
main()