From e14ef324514163925e1f4e520e12611b0183c338 Mon Sep 17 00:00:00 2001
From: Yui5427 <785369607@qq.com>
Date: Mon, 1 Sep 2025 19:48:02 +0800
Subject: [PATCH 1/2] Clang-GCC-Bench

---
 clang-tools-extra/clang-gcc-bench/README.md   | 184 +++++
 clang-tools-extra/clang-gcc-bench/compare.py  | 139 ++++
 clang-tools-extra/clang-gcc-bench/config.json |  32 +
 .../clang-gcc-bench/csvToXlsx.py              | 148 ++++
 clang-tools-extra/clang-gcc-bench/extract.py  | 105 +++
 clang-tools-extra/clang-gcc-bench/utils.py    | 655 ++++++++++++++++++
 6 files changed, 1263 insertions(+)
 create mode 100644 clang-tools-extra/clang-gcc-bench/README.md
 create mode 100644 clang-tools-extra/clang-gcc-bench/compare.py
 create mode 100644 clang-tools-extra/clang-gcc-bench/config.json
 create mode 100644 clang-tools-extra/clang-gcc-bench/csvToXlsx.py
 create mode 100644 clang-tools-extra/clang-gcc-bench/extract.py
 create mode 100644 clang-tools-extra/clang-gcc-bench/utils.py

diff --git a/clang-tools-extra/clang-gcc-bench/README.md b/clang-tools-extra/clang-gcc-bench/README.md
new file mode 100644
index 000000000000..32b7ae868043
--- /dev/null
+++ b/clang-tools-extra/clang-gcc-bench/README.md
@@ -0,0 +1,184 @@
+# Clang-GCC-Bench
+
+基于LLVM的GCC 15与Clang 14编译器性能对比工具，通过汇编指令分析比较编译器优化效果。
+
+## 工作流程
+
+```
+LLVM IR文件
+    |
+    ▼
+llvm-nm 提取函数名
+    |
+    ▼
+llvm-extract 提取单函数
+    |
+    ▼
+llvm-dis 生成可读IR
+    |
+    ▼
+llvm-as 重新编译 (CBE需要干净的.bc格式)
+    |
+    ▼
+llvm-cbe 生成C代码
+    |
+    ▼
+    |————————————————————————————————|
+    |                                |
+    ▼                                ▼
+GCC编译                        Clang编译
+    |                                |
+    ▼                                ▼
+GCC汇编文件                    Clang汇编文件
+    |                                |
+    |————————————————————————————————|
+                    |
+                    ▼
+                指令统计对比
+                    |
+                    ▼
+                CSV结果报告
+                    |
+                    ▼
+                Excel转换
+```
+
+## 使用方法
+
+### 批量对比 (compare.py)
+
+**使用配置文件中的路径进行批量对比：**
+
+```bash
+python3 compare.py -c --all
+```
+
+**参数说明：**
+
+- `-c, --use-config`: 使用config.json中配置的输入路径
+- `--all`: 输出所有函数结果（包括失败的）
+- 不加 `--all`: 只输出GCC表现更好的函数（指令数更少）
+
+**指定单个文件或目录：**
+
+```bash
+python3 compare.py input.ll -o results.csv
+python3 compare.py /path/to/directory -o output_dir
+```
+
+### 单文件详细分析 (extract.py)
+
+**完整功能分析：**
+
+```bash
+python3 extract.py input.ll -o output_dir
+```
+
+**指定单个函数：**
+
+```bash
+python3 extract.py input.ll -f function_name -o output_dir
+```
+
+**extract.py 特有输出：**
+
+- **详细处理日志**: `function_name_processing.log` - 记录每个编译步骤的详细信息和错误
+- **优化管道日志**: `function_name_opt_steps.log` - 记录LLVM优化过程的每一步
+- **中间文件保留**: 保留所有中间文件(.ll, .c, .s)用于调试
+- **实时状态输出**: 显示每个函数的处理状态和错误信息
+
+**处理日志示例：**
+
+```
+*** Processing Function Start ***
+*** Function Name: test_fdiv ***
+*** Step 1: llvm-extract Function Extraction ***
+*** Command: /path/to/llvm-extract -func=test_fdiv input.ll -o temp.bc ***
+*** llvm-extract Success ***
+*** Step 4: CBE C Code Generation ***
+*** CBE Failed (exit code: 1) ***
+*** stderr: error: undefined struct 'l_vector_2_bool' ***
+```
+
+### Excel转换 (csv_to_xlsx.py)
+
+**自动转换配置目录下所有CSV文件：**
+
+```bash
+python3 csv_to_xlsx.py
+```
+
+**转换指定目录：**
+
+```bash
+python3 csvToXlsx.py -d /path/to/csv/directory
+```
+
+**转换单个文件：**
+
+```bash
+python3 csv_to_xlsx.py input.csv output.xlsx
+```
+
+## 配置文件
+
+编辑 `config.json` 设置工具路径和编译参数：
+
+```json
+{
+  "paths": {
+    "llvm_bin": "/path/to/llvm/build/bin",
+    "gcc_bin": "/path/to/gcc"
+  },
+  "input_paths": ["/path/to/llvm/tests"],
+  "output_dir": "comparison_results",
+  "timeout": 5
+}
+```
+
+## 输出格式
+
+| Function    | GCC Instructions | Clang Instructions | Diff % | Status |
+| ----------- | ---------------- | ------------------ | ------ | ------ |
+| test_func   | 15               | 18                 | 20.0   |        |
+| failed_func | -                | -                  | -      | CBE    |
+
+## 依赖工具
+
+- LLVM工具链 (llvm-extract, llvm-nm, llvm-cbe等)
+- GCC & Clang 编译器
+- Python 3
+
+## LLVM编译流程
+
+```bash
+$ git clone https://gitee.com/openeuler/llvm-project.git
+$ cd llvm-project
+$ git checkout dev_19.1.7
+$ mkdir llvm/build
+$ cd llvm/build
+$ cmake .. -G "Ninja" -DCMAKE_BUILD_TYPE=Debug -DLLVM_PARALLEL_LINK_JOBS=1 -DLLVM_TARGETS_TO_BUILD="AArch64"
+$ ninja
+```
+
+## LLVM-CBE编译
+
+```bash
+$ cd llvm-project/llvm/projects
+$ git clone https://github.com/JuliaHubOSS/llvm-cbe
+$ cd ../build
+$ cmake -S .. -G "Ninja"
+$ ninja llvm-cbe
+```
+
+## 错误状态说明
+
+### CSV输出中的Status字段
+
+- **空白**: 处理成功
+- `llvm-extract`: 函数提取失败
+- `llvm-dis`: IR反汇编失败
+- `llvm-as`: IR汇编失败
+- `CBE` / `cbe超时`: C代码生成失败/超时
+- `GCC`: GCC编译失败
+- `Clang`: Clang编译失败
diff --git a/clang-tools-extra/clang-gcc-bench/compare.py b/clang-tools-extra/clang-gcc-bench/compare.py
new file mode 100644
index 000000000000..9fbdf37f95fd
--- /dev/null
+++ b/clang-tools-extra/clang-gcc-bench/compare.py
@@ -0,0 +1,139 @@
+import os
+import re
+import subprocess
+import argparse
+import csv
+from pathlib import Path
+from utils import load_config, strip_asm, process_function, collect_ll_files, extract_function_names
+
+config = load_config()
+
+def generate_filename_from_path(path):
+    """Generate short filename from path"""
+    import hashlib
+    path_str = str(path)
+    # Take last two directory levels of path + first 8 characters of hash
+    parts = Path(path_str).parts
+    if len(parts) >= 2:
+        short_name = f"{parts[-2]}_{parts[-1]}"
+    else:
+        short_name = parts[-1] if parts else "unknown"
+    
+    # Add hash to avoid conflicts
+    hash_short = hashlib.md5(path_str.encode()).hexdigest()[:8]
+    return f"{short_name}_{hash_short}.csv"
+
+def main():
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description="Compare assembly instruction counts generated by GCC and Clang")
+    parser.add_argument("-c", "--use-config", action="store_true", help="Use paths from configuration file")
+    parser.add_argument("input", nargs="?", help="Input .ll file or directory containing .ll files (optional if using -c parameter)")
+    parser.add_argument("-o", "--output", help="Output directory or CSV file path")
+    parser.add_argument("--all", action="store_true", help="Output all function results, default only outputs functions where GCC performs better")
+    
+    args = parser.parse_args()
+    
+    # Determine input paths and output directory
+    if args.use_config:
+        if "input_paths" not in config:
+            print("Error: Missing 'input_paths' field in configuration file")
+            print(f"Fields contained in configuration file: {list(config.keys())}")
+            return
+        input_paths = [Path(p) for p in config["input_paths"]]
+        output_dir = Path(config.get("output_dir", "comparison_results"))
+    else:
+        if not args.input:
+            parser.error("Must provide input path or use -c parameter")
+        input_paths = [Path(args.input)]
+        if args.output:
+            output_dir = Path(args.output)
+        else:
+            output_dir = Path("comparison_results")
+    
+    # Create output directory
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Temporary working directory
+    work_dir = Path("temp_workdir")
+    work_dir.mkdir(exist_ok=True)
+    
+    try:
+        # Process each input path
+        for input_path in input_paths:
+            # Collect .ll files to process
+            ll_files = collect_ll_files(input_path)
+            if not ll_files:
+                continue
+            
+            # Generate output filename
+            output_filename = generate_filename_from_path(input_path)
+            output_file = output_dir / output_filename
+            
+            print(f"\nProcessing input path: {input_path}")
+            print(f"Output file: {output_file}")
+            
+            # Prepare CSV output
+            with open(output_file, 'w', newline='') as csvfile:
+                csv_writer = csv.writer(csvfile)
+                csv_writer.writerow(["Function", "GCC Instructions", "Clang Instructions", "Diff %", "Status", "LL File"])
+                
+                total_functions = 0
+                processed_functions = 0
+                
+                print(f"Found {len(ll_files)} .ll files")
+                
+                for ll_path in ll_files:
+                    # Extract function names
+                    func_names = extract_function_names(ll_path, config)
+                    if not func_names:
+                        continue
+                        
+                    total_functions += len(func_names)
+                    print(f"Processing {ll_path.name} ({len(func_names)} functions)")
+                    
+                    for func in func_names:
+                        # Clean special characters in function name
+                        clean_func = re.sub(r'[^\w]', '_', func)
+                        
+                        result = process_function(ll_path, func, work_dir, config, 
+                                                 keep_files=False, enable_timeout=True)
+                        
+                        processed_functions += 1
+                        
+                        # Decide whether to output and write based on parameters
+                        is_success = result.get('status') == ''
+                        should_output = args.all or (is_success and result['gcc_lines'] < result['clang_lines'])
+                        
+                        if should_output:
+                            if is_success:
+                                print(f"  Processing function: {func} gcc: {result['gcc_lines']}, clang: {result['clang_lines']}, diff: {result['diff_percent']:.1f}%")
+                            else:
+                                print(f"  Processing function: {func} {result['status']}")
+                            
+                            # Write to CSV
+                            csv_writer.writerow([
+                                result["func"],
+                                result["gcc_lines"] if is_success else "-",
+                                result["clang_lines"] if is_success else "-",
+                                f"{result['diff_percent']:.1f}%" if is_success else "-",
+                                result["status"],
+                                str(ll_path)
+                            ])
+                        
+                        # Display progress in real-time
+                        if processed_functions % 10 == 0:
+                            print(f"Processed {processed_functions}/{total_functions} functions...")
+                
+                print(f"Completed processing {input_path}: {processed_functions}/{total_functions} functions")
+    
+    finally:
+        # Clean up temporary directory
+        for file in work_dir.glob("*"):
+            file.unlink()
+        work_dir.rmdir()
+        
+        print(f"\nAll paths processing completed!")
+        print(f"  Results saved to directory: {output_dir}")
+
+if __name__ == "__main__":
+    main()
diff --git a/clang-tools-extra/clang-gcc-bench/config.json b/clang-tools-extra/clang-gcc-bench/config.json
new file mode 100644
index 000000000000..026549322541
--- /dev/null
+++ b/clang-tools-extra/clang-gcc-bench/config.json
@@ -0,0 +1,32 @@
+{
+  "paths": {
+    "llvm_bin": "/path/to/build/bin",
+    "gcc_bin": "/path/to/gcc"
+  },
+  "input_paths": [
+    "/path/to/ll1",
+    "/path/to/ll2"
+  ],
+  "output_dir": "comparison_results",
+  "compile_flags": {
+    "gcc": [
+      "-std=c11",
+      "-march=armv8.6-a+sve",
+      "-O3",
+      "-fomit-frame-pointer",
+      "-S"
+    ],
+    "clang": [
+      "-march=armv8.6-a+sve",
+      "-O3",
+      "-fomit-frame-pointer",
+      "-S"
+    ]
+  },
+  "opt_flags": [
+    "-O3",
+    "-print-after-all",
+    "-disable-output"
+  ],
+  "timeout": 5
+}
\ No newline at end of file
diff --git a/clang-tools-extra/clang-gcc-bench/csvToXlsx.py b/clang-tools-extra/clang-gcc-bench/csvToXlsx.py
new file mode 100644
index 000000000000..dd537e9b1278
--- /dev/null
+++ b/clang-tools-extra/clang-gcc-bench/csvToXlsx.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""
+Convert CSV files to Excel format (XLSX)
+Auto-converts all CSV files from the output directory specified in config.json
+Usage: python3 csv_to_xlsx.py
+"""
+
+import argparse
+import pandas as pd
+import json
+from pathlib import Path
+import sys
+
+def csv_to_xlsx(csv_file, xlsx_file=None):
+    """Convert CSV file to Excel format"""
+    csv_path = Path(csv_file)
+    
+    if not csv_path.exists():
+        print(f"Error: CSV file {csv_path} does not exist")
+        return False
+    
+    # Generate output filename if not provided
+    if xlsx_file is None:
+        xlsx_path = csv_path.with_suffix('.xlsx')
+    else:
+        xlsx_path = Path(xlsx_file)
+    
+    try:
+        # Read CSV file
+        df = pd.read_csv(csv_path)
+        
+        # Write to Excel with formatting
+        with pd.ExcelWriter(xlsx_path, engine='openpyxl') as writer:
+            df.to_excel(writer, index=False, sheet_name='Results')
+            
+            # Get the worksheet
+            worksheet = writer.sheets['Results']
+            
+            # Auto-adjust column widths
+            for column in worksheet.columns:
+                max_length = 0
+                column_letter = column[0].column_letter
+                
+                for cell in column:
+                    try:
+                        if len(str(cell.value)) > max_length:
+                            max_length = len(str(cell.value))
+                    except:
+                        pass
+                
+                adjusted_width = min(max_length + 2, 50)  # Cap at 50 characters
+                worksheet.column_dimensions[column_letter].width = adjusted_width
+            
+            # Format header row
+            from openpyxl.styles import Font, PatternFill
+            header_font = Font(bold=True)
+            header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid')
+            
+            for cell in worksheet[1]:
+                cell.font = header_font
+                cell.fill = header_fill
+        
+        print(f"Successfully converted {csv_path} to {xlsx_path}")
+        return True
+        
+    except Exception as e:
+        print(f"Error converting CSV to Excel: {e}")
+        return False
+
+def load_config():
+    """Load configuration from config.json"""
+    config_file = Path(__file__).parent / "config.json"
+    try:
+        with open(config_file, 'r') as f:
+            return json.load(f)
+    except FileNotFoundError:
+        print(f"Error: Configuration file {config_file} not found")
+        return None
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid configuration file format: {e}")
+        return None
+
+def convert_output_directory(output_dir):
+    """Convert all CSV files in the output directory to Excel format"""
+    output_path = Path(output_dir)
+    
+    if not output_path.exists():
+        print(f"Output directory {output_path} does not exist")
+        return 0
+    
+    # Find all CSV files recursively
+    csv_files = list(output_path.rglob("*.csv"))
+    
+    if not csv_files:
+        print(f"No CSV files found in {output_path}")
+        return 0
+    
+    print(f"Found {len(csv_files)} CSV files to convert...")
+    success_count = 0
+    
+    for csv_file in csv_files:
+        xlsx_file = csv_file.with_suffix('.xlsx')
+        print(f"Converting: {csv_file.relative_to(output_path)}")
+        if csv_to_xlsx(csv_file, xlsx_file):
+            success_count += 1
+    
+    print(f"Successfully converted {success_count}/{len(csv_files)} files")
+    return success_count
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert CSV files to Excel format")
+    parser.add_argument("csv_file", nargs="?", help="Input CSV file path (optional)")
+    parser.add_argument("xlsx_file", nargs="?", help="Output Excel file path (optional)")
+    parser.add_argument("-d", "--directory", help="Convert all CSV files in specified directory")
+    
+    args = parser.parse_args()
+    
+    if args.directory:
+        # Convert all CSV files in specified directory
+        convert_output_directory(args.directory)
+    elif args.csv_file:
+        # Convert single file
+        input_path = Path(args.csv_file)
+        if not input_path.exists():
+            print(f"Error: File {input_path} does not exist")
+            sys.exit(1)
+        
+        if not input_path.suffix.lower() == '.csv':
+            print(f"Error: {input_path} is not a CSV file")
+            sys.exit(1)
+        
+        success = csv_to_xlsx(args.csv_file, args.xlsx_file)
+        sys.exit(0 if success else 1)
+    else:
+        # Auto-convert from config.json output directory
+        config = load_config()
+        if not config:
+            sys.exit(1)
+        
+        output_dir = config.get("output_dir", "out")
+        print(f"Auto-converting CSV files from configured output directory: {output_dir}")
+        
+        success_count = convert_output_directory(output_dir)
+        if success_count == 0:
+            sys.exit(1)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/clang-tools-extra/clang-gcc-bench/extract.py b/clang-tools-extra/clang-gcc-bench/extract.py
new file mode 100644
index 000000000000..fd0e91d4278c
--- /dev/null
+++ b/clang-tools-extra/clang-gcc-bench/extract.py
@@ -0,0 +1,105 @@
+import os
+import re
+import subprocess
+import argparse
+import shutil
+from pathlib import Path
+from utils import load_config, extract_function_names, process_function_with_detailed_logging
+
+config = load_config()
+
+def main():
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description="Generate function assembly based on input LL file")
+    parser.add_argument("ll_file", help="Input .ll file path")
+    parser.add_argument("-o", "--output", default="out", help="Output directory path")
+    parser.add_argument("-f", "--function", help="Specify function name to process (optional, processes all functions if not specified)")
+    parser.add_argument("--keep-bc", action="store_true", help="Keep intermediate .bc files")
+    
+    args = parser.parse_args()
+    
+    ll_path = Path(args.ll_file)
+    if not ll_path.exists() or not ll_path.is_file():
+        print(f"Error: File {ll_path} does not exist or is not a file")
+        return
+    
+    # Prepare output directory
+    output_root = Path(args.output)
+    output_root.mkdir(parents=True, exist_ok=True)
+    
+    # Extract function names
+    all_func_names = extract_function_names(ll_path, config)
+    if not all_func_names:
+        return
+    
+    # Decide which functions to process based on parameters
+    if args.function:
+        if args.function in all_func_names:
+            func_names = [args.function]
+            print(f"Processing specified function in {ll_path.name}: {args.function}")
+        else:
+            print(f"Error: Function '{args.function}' not found in {ll_path.name}")
+            print(f"Available functions: {', '.join(all_func_names)}")
+            return
+    else:
+        func_names = all_func_names
+        print(f"Processing {ll_path.name} ({len(func_names)} functions)")
+    
+    # Create output directory based on LL filename
+    ll_filename = ll_path.stem
+    ll_output_dir = output_root / ll_filename
+    ll_output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Copy complete LL file to parent directory
+    full_ll_copy = ll_output_dir / f"{ll_filename}.ll"
+    shutil.copy(ll_path, full_ll_copy)
+    print(f"Copied complete LL file to: {full_ll_copy}")
+    
+    # Process all functions
+    total_count = 0
+    success_count = 0
+    opt_success_count = 0
+    
+    for func in func_names:
+        # Create subdirectory for each function
+        clean_func_name = re.sub(r'[^\w]', '_', func)
+        func_output_dir = ll_output_dir / clean_func_name
+        func_output_dir.mkdir(parents=True, exist_ok=True)
+        
+        total_count += 1
+        print(f"Processing function: {func}")
+        
+        # Process function - using detailed logging version
+        result = process_function_with_detailed_logging(ll_path, func, func_output_dir, config)
+        # Check if processing was successful (empty status string indicates success)
+        is_success = result.get('status') == ''
+        
+        if is_success:
+            success_count += 1
+            if result["opt_log_created"]:
+                opt_success_count += 1
+            print(f"  Successfully generated files! gcc instructions: {result['gcc_lines']}, clang instructions: {result['clang_lines']}, difference: {result['diff_percent']:.1f}%")
+            # List generated files
+            print(f"  Generated files:")
+            print(f"    - {clean_func_name}.ll (LLVM IR copy)")
+            print(f"    - {clean_func_name}.c (Generated C program)")
+            print(f"    - {clean_func_name}_gcc.s (GCC assembly)")
+            print(f"    - {clean_func_name}_clang.s (Clang assembly)")
+            if result["opt_log_created"]:
+                print(f"    - {clean_func_name}_opt_steps.log (Optimization steps log)")
+            print(f"  Files saved in: {func_output_dir}")
+        else:
+            print(f"  Processing failed: {result['status']}")
+            # If processing failed, delete empty directory
+            if func_output_dir.exists() and not any(func_output_dir.iterdir()):
+                func_output_dir.rmdir()
+    
+    print(f"\nProcessing completed!")
+    print(f"  LL file: {ll_path}")
+    print(f"  Total attempted to process {total_count} functions")
+    print(f"  Successfully processed {success_count} functions")
+    print(f"  Successfully recorded optimization steps for {opt_success_count} functions")
+    print(f"  Output directory: {ll_output_dir.absolute()}")
+
+if __name__ == "__main__":
+    main()
diff --git a/clang-tools-extra/clang-gcc-bench/utils.py b/clang-tools-extra/clang-gcc-bench/utils.py
new file mode 100644
index 000000000000..aab5177a5542
--- /dev/null
+++ b/clang-tools-extra/clang-gcc-bench/utils.py
@@ -0,0 +1,655 @@
+import json
+import re
+import subprocess
+import shutil
+from pathlib import Path
+
+
+def load_config():
+    """Load configuration from JSON file"""
+    config_file = Path(__file__).parent / "config.json"
+    try:
+        with open(config_file, 'r') as f:
+            config = json.load(f)
+        print(f"Successfully loaded configuration file: {config_file}")
+    except FileNotFoundError:
+        print(f"Error: Configuration file {config_file} does not exist")
+        raise
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid configuration file format: {e}")
+        raise
+    
+    # Set paths
+    LLVM_BIN = Path(config["paths"]["llvm_bin"])
+    GCC_BIN = Path(config["paths"]["gcc_bin"])
+    CLANG_BIN = LLVM_BIN / "clang"
+    EXTRACT = LLVM_BIN / "llvm-extract"
+    CBE = LLVM_BIN / "llvm-cbe"
+    OPT = LLVM_BIN / "opt"
+    
+    return {
+        "LLVM_BIN": LLVM_BIN,
+        "GCC_BIN": GCC_BIN,
+        "CLANG_BIN": CLANG_BIN,
+        "EXTRACT": EXTRACT,
+        "CBE": CBE,
+        "OPT": OPT,
+        "compile_flags": config["compile_flags"],
+        "opt_flags": config.get("opt_flags", []),
+        "timeout": config["timeout"],
+        "input_paths": config.get("input_paths", []),
+        "output_dir": config.get("output_dir", "comparison_results")
+    }
+
+
+def strip_asm(asm_path):
+    """Count assembly instruction lines (filter out irrelevant content)"""
+    try:
+        count = 0
+        with open(asm_path, 'r') as f:
+            for line in f:
+                # Remove comments first
+                if '//' in line:
+                    line = line[:line.index('//')]
+                stripped = line.strip()
+                if not stripped: continue  # Skip empty lines
+                if stripped.startswith('.'): continue  # Skip pseudo-instructions
+                if stripped.startswith('#'): continue  # Skip comments
+                if stripped.endswith(':'): continue  # Skip labels
+                count += 1
+        return count
+    except:
+        return 0
+
+
+def collect_ll_files(input_path):
+    """Collect .ll files to process"""
+    ll_files = []
+    
+    if input_path.is_file() and input_path.suffix == ".ll":
+        ll_files.append(input_path)
+    elif input_path.is_dir():
+        ll_files = list(input_path.rglob("*.ll"))
+    else:
+        print(f"Warning: Skipping invalid path {input_path}")
+        return []
+    
+    if not ll_files:
+        print(f"No .ll files found in {input_path}")
+        return []
+    
+    return ll_files
+
+
+def extract_function_names(ll_path, config):
+    """Extract function names from .ll file using LLVM toolchain for accuracy"""
+    try:
+        # Create temporary working directory
+        temp_dir = Path("temp_function_discovery")
+        temp_dir.mkdir(exist_ok=True)
+        
+        # 1. Compile .ll file to .bc file
+        bc_file = temp_dir / f"{ll_path.stem}.bc"
+        as_cmd = [str(config["LLVM_BIN"] / "llvm-as"), str(ll_path), "-o", str(bc_file)]
+        subprocess.run(as_cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        
+        # 2. Use llvm-nm to get all symbols
+        nm_cmd = [str(config["LLVM_BIN"] / "llvm-nm"), str(bc_file)]
+        result = subprocess.run(nm_cmd, capture_output=True, text=True, check=True)
+        
+        # 3. Parse llvm-nm output and extract function symbols
+        func_names = []
+        for line in result.stdout.strip().split('\n'):
+            if not line.strip():
+                continue
+            parts = line.strip().split()
+            if len(parts) >= 3:
+                # llvm-nm output format: address type symbol_name
+                symbol_type = parts[1]
+                symbol_name = parts[2]
+                # T indicates defined function symbols in text section, exclude U (undefined) and other types
+                if symbol_type in ['T', 't'] and not symbol_name.startswith('llvm.'):
+                    # Remove platform-specific symbol prefixes (like underscore prefix on macOS)
+                    clean_name = symbol_name.lstrip('_')
+                    if clean_name:  # Ensure there's still content after removing prefix
+                        func_names.append(clean_name)
+        
+        # Clean up temporary files
+        if bc_file.exists():
+            bc_file.unlink()
+        temp_dir.rmdir()
+        
+        if not func_names:
+            print(f"No function definitions found in {ll_path}")
+            return []
+        
+        return func_names
+        
+    except subprocess.CalledProcessError as e:
+        print(f"LLVM tool error when processing file {ll_path}: {e}")
+        # Clean up potentially remaining files
+        if 'bc_file' in locals() and bc_file.exists():
+            bc_file.unlink()
+        if 'temp_dir' in locals() and temp_dir.exists():
+            temp_dir.rmdir()
+        return []
+    except Exception as e:
+        print(f"Failed to process file {ll_path}: {e}")
+        return []
+
+
+def run_optimization_pipeline(bc_file, output_dir, func_name, config):
+    """Run optimization pipeline and record results of each optimization step"""
+    opt_log_file = output_dir / f"{func_name}_opt_steps.log"
+    
+    try:
+        opt_cmd = [
+            str(config["OPT"]),
+            str(bc_file),
+            *config["opt_flags"]
+        ]
+        
+        with open(opt_log_file, 'w') as log_file:
+            subprocess.run(opt_cmd, stdout=log_file, stderr=subprocess.STDOUT, check=True)
+        
+        return opt_log_file
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to record optimization steps: {e}")
+        if opt_log_file.exists():
+            opt_log_file.unlink()
+        return None
+    except Exception as e:
+        print(f"Unexpected error occurred during optimization step recording: {e}")
+        if opt_log_file.exists():
+            opt_log_file.unlink()
+        return None
+
+
+def extract_clean_function_ll(ll_path, func, output_dir, config, enable_timeout=False):
+    """
+    Extract a single function from original LL file to clean LL file
+    
+    Args:
+        ll_path: Original LLVM IR file path
+        func: Function name
+        output_dir: Output directory
+        config: Configuration dictionary
+        enable_timeout: Whether to enable timeout mechanism
+    
+    Returns:
+        Path to extracted clean LL file, returns None on failure
+    """
+    # Clean special characters in function name
+    clean_func = re.sub(r'[^\w]', '_', func)
+    
+    # Define file paths
+    bc_file = output_dir / f"{clean_func}.bc"
+    clean_ll_file = output_dir / f"{clean_func}.ll"
+    
+    timeout = config.get("timeout") if enable_timeout else None
+    
+    try:
+        # 1. Extract function to bc file
+        try:
+            extract_cmd = [str(config["EXTRACT"]), "-func="+func, str(ll_path), "-o", str(bc_file)]
+            subprocess.run(extract_cmd, check=True, stdout=subprocess.DEVNULL, 
+                          stderr=subprocess.DEVNULL, timeout=timeout)
+        except subprocess.TimeoutExpired:
+            raise Exception("llvm-extract")
+        except subprocess.CalledProcessError:
+            raise Exception("llvm-extract")
+        
+        # 2. Convert bc file to clean ll file
+        try:
+            dis_cmd = [str(config["LLVM_BIN"] / "llvm-dis"), str(bc_file), "-o", str(clean_ll_file)]
+            subprocess.run(dis_cmd, check=True, stdout=subprocess.DEVNULL, 
+                          stderr=subprocess.DEVNULL, timeout=timeout)
+        except subprocess.TimeoutExpired:
+            raise Exception("llvm-dis")
+        except subprocess.CalledProcessError:
+            raise Exception("llvm-dis")
+        
+        # 3. Clean up bc file
+        if bc_file.exists():
+            bc_file.unlink()
+            
+        return clean_ll_file
+        
+    except Exception as e:
+        # Clean up potentially remaining files
+        for f in [bc_file, clean_ll_file]:
+            if f.exists():
+                f.unlink()
+        # Return error information instead of None
+        return {"error": str(e)}
+
+
+def process_clean_function_ll(clean_ll_path, func, output_dir, config, 
+                             keep_files=False, enable_timeout=False, 
+                             enable_opt_logging=False):
+    """
+    Process already extracted clean single-function LL file
+    """
+    # Clean special characters in function name
+    clean_func = re.sub(r'[^\w]', '_', func)
+    
+    # Define file paths
+    bc_file = output_dir / f"{clean_func}.bc"
+    c_file = output_dir / f"{clean_func}.c"
+    s_gcc = output_dir / f"{clean_func}_gcc.s"
+    s_clang = output_dir / f"{clean_func}_clang.s"
+    
+    timeout = config.get("timeout") if enable_timeout else None
+    
+    try:
+        # 1. Compile clean ll file to bc file
+        try:
+            as_cmd = [str(config["LLVM_BIN"] / "llvm-as"), str(clean_ll_path), "-o", str(bc_file)]
+            subprocess.run(as_cmd, check=True, stdout=subprocess.DEVNULL, 
+                          stderr=subprocess.DEVNULL, timeout=timeout)
+        except subprocess.TimeoutExpired:
+            raise Exception("llvm-as")
+        except subprocess.CalledProcessError:
+            raise Exception("llvm-as")
+        
+        # 2. Run optimization pipeline and record (if needed)
+        opt_log_file = None
+        if enable_opt_logging:
+            opt_log_file = run_optimization_pipeline(bc_file, output_dir, clean_func, config)
+        
+        # 3. Generate C code
+        try:
+            cbe_cmd = [str(config["CBE"]), str(bc_file), "-o", str(c_file)]
+            subprocess.run(cbe_cmd, check=True, stdout=subprocess.DEVNULL, 
+                          stderr=subprocess.DEVNULL, timeout=timeout)
+        except subprocess.TimeoutExpired:
+            raise Exception("CBE")
+        except subprocess.CalledProcessError:
+            raise Exception("CBE")
+        
+        # 4. GCC compilation
+        try:
+            gcc_cmd = [str(config["GCC_BIN"]), *config["compile_flags"]["gcc"], str(c_file), "-o", str(s_gcc)]
+            subprocess.run(gcc_cmd, check=True, stdout=subprocess.DEVNULL, 
+                          stderr=subprocess.DEVNULL, timeout=timeout)
+        except subprocess.TimeoutExpired:
+            raise Exception("GCC")
+        except subprocess.CalledProcessError:
+            raise Exception("GCC")
+        
+        # 5. Clang compilation
+        try:
+            clang_cmd = [str(config["CLANG_BIN"]), *config["compile_flags"]["clang"], str(c_file), "-o", str(s_clang)]
+            subprocess.run(clang_cmd, check=True, stdout=subprocess.DEVNULL, 
+                          stderr=subprocess.DEVNULL, timeout=timeout)
+        except subprocess.TimeoutExpired:
+            raise Exception("Clang")
+        except subprocess.CalledProcessError:
+            raise Exception("Clang")
+        
+        # 6. Count instruction lines
+        gcc_count = strip_asm(s_gcc)
+        clang_count = strip_asm(s_clang)
+        
+        # 7. Calculate difference percentage
+        if gcc_count > 0:
+            diff_percent = ((clang_count - gcc_count) / gcc_count) * 100
+        else:
+            diff_percent = 0 if clang_count == 0 else float('inf')
+        
+        # 8. File cleanup strategy
+        if not keep_files:
+            # compare mode: keep only assembly files, delete intermediate files
+            for f in [bc_file, c_file]:
+                if f.exists():
+                    f.unlink()
+            # Delete clean ll file (temporary file)
+            if clean_ll_path.exists():
+                clean_ll_path.unlink()
+        else:
+            # extract mode: delete bc files but keep others
+            if bc_file.exists():
+                bc_file.unlink()
+        
+        return {
+            "func": func,
+            "gcc_lines": gcc_count,
+            "clang_lines": clang_count,
+            "diff_percent": diff_percent,
+            "opt_log_created": opt_log_file is not None if enable_opt_logging else False,
+            "status": ""
+        }
+    
+    except Exception as e:
+        # Get specific error information
+        error_msg = str(e)
+        if enable_timeout:
+            # compare mode: handle errors silently
+            pass
+        else:
+            # extract mode: print error information
+            print(f"Error processing function {func}: {error_msg}")
+        
+        # Clean up files
+        cleanup_files = [bc_file, c_file, s_gcc, s_clang, clean_ll_path]
+        for f in cleanup_files:
+            if f.exists():
+                f.unlink()
+        if enable_opt_logging:
+            opt_log = output_dir / f"{clean_func}_opt_steps.log"
+            if opt_log.exists():
+                opt_log.unlink()
+        return {
+            "func": func,
+            "gcc_lines": 0,
+            "clang_lines": 0,
+            "diff_percent": 0,
+            "opt_log_created": False,
+            "status": error_msg
+        }
+
+
+def process_function(ll_path, func, output_dir, config, 
+                    keep_files=False, enable_timeout=False, 
+                    enable_opt_logging=False, copy_ll=False):
+    """
+    Generic function processing function using new two-stage workflow
+    
+    Args:
+        ll_path: LLVM IR file path
+        func: Function name
+        output_dir: Output directory
+        config: Configuration dictionary
+        keep_files: Whether to keep intermediate files (default False, for compare mode)
+        enable_timeout: Whether to enable timeout mechanism (default False)
+        enable_opt_logging: Whether to enable optimization step recording (default False)
+        copy_ll: Whether to copy original LL file (default False, now handled automatically by two-stage workflow)
+    
+    Returns:
+        Processing result dictionary or None (on failure)
+    """
+    
+    # Stage 1: Extract clean single-function LL file
+    clean_ll_result = extract_clean_function_ll(ll_path, func, output_dir, config, enable_timeout)
+    if isinstance(clean_ll_result, dict):
+        # Stage 1 failed, return error information
+        return {
+            "func": func,
+            "gcc_lines": 0,
+            "clang_lines": 0,
+            "diff_percent": 0,
+            "opt_log_created": False,
+            "status": clean_ll_result["error"]
+        }
+    
+    clean_ll_path = clean_ll_result
+    
+    # Stage 2: Process clean LL file
+    result = process_clean_function_ll(clean_ll_path, func, output_dir, config, 
+                                       keep_files, enable_timeout, enable_opt_logging)
+    
+    # If need to keep LL file (extract mode), don't delete
+    if keep_files and copy_ll:
+        # Rename to final filename
+        clean_func = re.sub(r'[^\w]', '_', func)
+        final_ll_path = output_dir / f"{clean_func}.ll"
+        if clean_ll_path != final_ll_path and clean_ll_path.exists():
+            clean_ll_path.rename(final_ll_path)
+    
+    return result
+
+
+def process_function_with_detailed_logging(ll_path, func, output_dir, config):
+    """
+    Function processing with detailed logging, used by extract.py
+    Save intermediate files and detailed error information for each step
+    """
+    
+    # Clean special characters in function name
+    clean_func = re.sub(r'[^\w]', '_', func)
+    
+    # Create log file
+    log_file = output_dir / f"{clean_func}_processing.log"
+    
+    def log_message(message, also_print=True, add_newline=False):
+        if add_newline:
+            log_entry = f"\n*** {message} ***"
+        else:
+            log_entry = f"*** {message} ***"
+        with open(log_file, 'a', encoding='utf-8') as f:
+            f.write(log_entry + '\n')
+        if also_print:
+            print(f"  {message.replace('*** ', '').replace(' ***', '')}")
+    
+    # Start processing log
+    log_message("Processing Function Start")
+    log_message(f"Function Name: {func}")
+    log_message(f"Clean Name: {clean_func}")
+    log_message(f"Source File: {ll_path}")
+    log_message(f"Output Directory: {output_dir}")
+    
+    # Define all file paths
+    bc_temp = output_dir / f"{clean_func}_temp.bc"
+    clean_ll = output_dir / f"{clean_func}_extracted.ll"
+    bc_final = output_dir / f"{clean_func}_final.bc"
+    c_file = output_dir / f"{clean_func}.c"
+    s_gcc = output_dir / f"{clean_func}_gcc.s"
+    s_clang = output_dir / f"{clean_func}_clang.s"
+    opt_log = output_dir / f"{clean_func}_opt_steps.log"
+    
+    try:
+        # Step 1: llvm-extract Function Extraction
+        log_message("Step 1: llvm-extract Function Extraction", add_newline=True)
+        extract_cmd = [str(config["EXTRACT"]), "-func="+func, str(ll_path), "-o", str(bc_temp)]
+        log_message(f"Command: {' '.join(extract_cmd)}")
+        
+        result = subprocess.run(extract_cmd, capture_output=True, text=True, timeout=config.get("timeout"))
+        if result.returncode != 0:
+            log_message(f"llvm-extract Failed (exit code: {result.returncode})")
+            if result.stderr.strip():
+                log_message(f"stderr: {result.stderr}")
+            if result.stdout.strip():
+                log_message(f"stdout: {result.stdout}")
+            return {
+                "func": func,
+                "gcc_lines": 0,
+                "clang_lines": 0, 
+                "diff_percent": 0,
+                "opt_log_created": False,
+                "status": "llvm-extract"
+            }
+        
+        log_message("llvm-extract Success")
+        log_message(f"Generated File: {bc_temp} (size: {bc_temp.stat().st_size} bytes)")
+        
+        # Step 2: llvm-dis Generate Readable LL
+        log_message("Step 2: llvm-dis Generate Readable LL", add_newline=True)
+        dis_cmd = [str(config["LLVM_BIN"] / "llvm-dis"), str(bc_temp), "-o", str(clean_ll)]
+        log_message(f"Command: {' '.join(dis_cmd)}")
+        
+        result = subprocess.run(dis_cmd, capture_output=True, text=True, timeout=config.get("timeout"))
+        if result.returncode != 0:
+            log_message(f"llvm-dis Failed (exit code: {result.returncode})")
+            if result.stderr.strip():
+                log_message(f"stderr: {result.stderr}")
+            if result.stdout.strip():
+                log_message(f"stdout: {result.stdout}")
+            return {
+                "func": func,
+                "gcc_lines": 0,
+                "clang_lines": 0,
+                "diff_percent": 0,
+                "opt_log_created": False,
+                "status": "llvm-dis"
+            }
+        
+        log_message("llvm-dis Success")
+        log_message(f"Generated File: {clean_ll} (size: {clean_ll.stat().st_size} bytes)")
+        
+        # Step 3: llvm-as Recompile BC File
+        log_message("Step 3: llvm-as Recompile BC File", add_newline=True)
+        as_cmd = [str(config["LLVM_BIN"] / "llvm-as"), str(clean_ll), "-o", str(bc_final)]
+        log_message(f"Command: {' '.join(as_cmd)}")
+        
+        result = subprocess.run(as_cmd, capture_output=True, text=True, timeout=config.get("timeout"))
+        if result.returncode != 0:
+            log_message(f"llvm-as Failed (exit code: {result.returncode})")
+            if result.stderr.strip():
+                log_message(f"stderr: {result.stderr}")
+            if result.stdout.strip():
+                log_message(f"stdout: {result.stdout}")
+            return {
+                "func": func,
+                "gcc_lines": 0,
+                "clang_lines": 0,
+                "diff_percent": 0,
+                "opt_log_created": False,
+                "status": "llvm-as"
+            }
+        
+        log_message("llvm-as Success")
+        log_message(f"Generated File: {bc_final} (size: {bc_final.stat().st_size} bytes)")
+        
+        # opt step has been removed, continue with optimization recording but not logged to main log
+        opt_log_created = False
+        try:
+            opt_cmd = [
+                str(config["OPT"]),
+                str(bc_final),
+                *config["opt_flags"]
+            ]
+            
+            with open(opt_log, 'w') as f:
+                result = subprocess.run(opt_cmd, stdout=f, stderr=subprocess.STDOUT, timeout=config.get("timeout"))
+            
+            if result.returncode == 0:
+                opt_log_created = True
+            else:
+                if opt_log.exists():
+                    opt_log.unlink()
+        except Exception:
+            if opt_log.exists():
+                opt_log.unlink()
+        
+        # Step 4: CBE C Code Generation
+        log_message("Step 4: CBE C Code Generation", add_newline=True)
+        cbe_cmd = [str(config["CBE"]), str(bc_final), "-o", str(c_file)]
+        log_message(f"Command: {' '.join(cbe_cmd)}")
+        
+        result = subprocess.run(cbe_cmd, capture_output=True, text=True, timeout=config.get("timeout"))
+        if result.returncode != 0:
+            log_message(f"CBE Failed (exit code: {result.returncode})")
+            log_message(f"stderr: {result.stderr}")
+            log_message(f"stdout: {result.stdout}")
+            return {
+                "func": func,
+                "gcc_lines": 0,
+                "clang_lines": 0,
+                "diff_percent": 0,
+                "opt_log_created": opt_log_created,
+                "status": "cbe timeout"
+            }
+        
+        log_message("CBE Success")
+        log_message(f"Generated File: {c_file} (size: {c_file.stat().st_size} bytes)")
+        
+        # Step 5: GCC Compile Assembly
+        log_message("Step 5: GCC Compile Assembly", add_newline=True)
+        gcc_cmd = [str(config["GCC_BIN"]), *config["compile_flags"]["gcc"], str(c_file), "-o", str(s_gcc)]
+        log_message(f"Command: {' '.join(gcc_cmd)}")
+        
+        result = subprocess.run(gcc_cmd, capture_output=True, text=True, timeout=config.get("timeout"))
+        if result.returncode != 0:
+            log_message(f"GCC Failed (exit code: {result.returncode})")
+            if result.stderr.strip():
+                log_message(f"stderr: {result.stderr}")
+            if result.stdout.strip():
+                log_message(f"stdout: {result.stdout}")
+            return {
+                "func": func,
+                "gcc_lines": 0,
+                "clang_lines": 0,
+                "diff_percent": 0,
+                "opt_log_created": opt_log_created,
+                "status": "GCC"
+            }
+        
+        log_message("GCC Success")
+        log_message(f"Generated File: {s_gcc} (size: {s_gcc.stat().st_size} bytes)")
+        
+        # Step 6: Clang Compile Assembly
+        log_message("Step 6: Clang Compile Assembly", add_newline=True)
+        clang_cmd = [str(config["CLANG_BIN"]), *config["compile_flags"]["clang"], str(c_file), "-o", str(s_clang)]
+        log_message(f"Command: {' '.join(clang_cmd)}")
+        
+        result = subprocess.run(clang_cmd, capture_output=True, text=True, timeout=config.get("timeout"))
+        if result.returncode != 0:
+            log_message(f"Clang Failed (exit code: {result.returncode})")
+            if result.stderr.strip():
+                log_message(f"stderr: {result.stderr}")
+            if result.stdout.strip():
+                log_message(f"stdout: {result.stdout}")
+            return {
+                "func": func,
+                "gcc_lines": 0,
+                "clang_lines": 0,
+                "diff_percent": 0,
+                "opt_log_created": opt_log_created,
+                "status": "Clang"
+            }
+        
+        log_message("Clang Success")
+        log_message(f"Generated File: {s_clang} (size: {s_clang.stat().st_size} bytes)")
+        
+        # Step 7: Assembly Instruction Counting
+        log_message("Step 7: Assembly Instruction Counting", add_newline=True)
+        gcc_count = strip_asm(s_gcc)
+        clang_count = strip_asm(s_clang)
+        
+        if gcc_count > 0:
+            diff_percent = ((clang_count - gcc_count) / gcc_count) * 100
+        else:
+            diff_percent = 0 if clang_count == 0 else float('inf')
+        
+        log_message(f"GCC Instructions: {gcc_count}")
+        log_message(f"Clang Instructions: {clang_count}")
+        log_message(f"Difference: {diff_percent:.1f}%")
+        
+        # Clean up temporary files
+        if bc_temp.exists():
+            bc_temp.unlink()
+        if bc_final.exists():
+            bc_final.unlink()
+            
+        log_message("Processing Complete")
+        log_message("Final Status: Success")
+        
+        return {
+            "func": func,
+            "gcc_lines": gcc_count,
+            "clang_lines": clang_count,
+            "diff_percent": diff_percent,
+            "opt_log_created": opt_log_created,
+            "status": ""
+        }
+        
+    except subprocess.TimeoutExpired as e:
+        log_message(f"Processing Timeout: {e}")
+        return {
+            "func": func,
+            "gcc_lines": 0,
+            "clang_lines": 0,
+            "diff_percent": 0,
+            "opt_log_created": False,
+            "status": "timeout"
+        }
+    except Exception as e:
+        log_message(f"Processing Exception: {e}")
+        return {
+            "func": func,
+            "gcc_lines": 0,
+            "clang_lines": 0,
+            "diff_percent": 0,
+            "opt_log_created": False,
+            "status": f"exception: {str(e)}"
+        }
\ No newline at end of file
-- 
Gitee


From daf44e0278b0b26f5f7e712ee500f4a4d2e0d604 Mon Sep 17 00:00:00 2001
From: Yui5427 <785369607@qq.com>
Date: Wed, 3 Sep 2025 17:09:14 +0800
Subject: [PATCH 2/2] fix bug in xlsx

---
 clang-tools-extra/clang-gcc-bench/config.json |  12 +-
 .../clang-gcc-bench/csvToXlsx.py              |  21 ++-
 clang-tools-extra/clang-gcc-bench/extract.py  |  17 +-
 clang-tools-extra/clang-gcc-bench/utils.py    | 159 ++++++++++++++----
 4 files changed, 165 insertions(+), 44 deletions(-)

diff --git a/clang-tools-extra/clang-gcc-bench/config.json b/clang-tools-extra/clang-gcc-bench/config.json
index 026549322541..e24d23bb2f0c 100644
--- a/clang-tools-extra/clang-gcc-bench/config.json
+++ b/clang-tools-extra/clang-gcc-bench/config.json
@@ -17,16 +17,22 @@
       "-S"
     ],
     "clang": [
+      "--target=aarch64-linux-gnu",
       "-march=armv8.6-a+sve",
       "-O3",
       "-fomit-frame-pointer",
       "-S"
     ]
   },
-  "opt_flags": [
+  "clang_opt_flags": [
+    "--target=aarch64-linux-gnu",
+    "-march=armv8.6-a+sve",
     "-O3",
-    "-print-after-all",
-    "-disable-output"
+    "-fomit-frame-pointer",
+    "-S",
+    "-emit-llvm",
+    "-mllvm",
+    "-print-after-all"
   ],
   "timeout": 5
 }
\ No newline at end of file
diff --git a/clang-tools-extra/clang-gcc-bench/csvToXlsx.py b/clang-tools-extra/clang-gcc-bench/csvToXlsx.py
index dd537e9b1278..b2eb3b04b754 100644
--- a/clang-tools-extra/clang-gcc-bench/csvToXlsx.py
+++ b/clang-tools-extra/clang-gcc-bench/csvToXlsx.py
@@ -29,6 +29,12 @@ def csv_to_xlsx(csv_file, xlsx_file=None):
         # Read CSV file
         df = pd.read_csv(csv_path)
         
+        # Convert percentage columns to numeric
+        for col in df.columns:
+            if 'diff' in col.lower() and '%' in col:
+                # Remove % sign and convert to float
+                df[col] = pd.to_numeric(df[col].astype(str).str.replace('%', ''), errors='coerce')
+        
         # Write to Excel with formatting
         with pd.ExcelWriter(xlsx_path, engine='openpyxl') as writer:
             df.to_excel(writer, index=False, sheet_name='Results')
@@ -51,8 +57,21 @@ def csv_to_xlsx(csv_file, xlsx_file=None):
                 adjusted_width = min(max_length + 2, 50)  # Cap at 50 characters
                 worksheet.column_dimensions[column_letter].width = adjusted_width
             
-            # Format header row
+            # Format percentage columns
             from openpyxl.styles import Font, PatternFill
+            
+            # Find percentage columns and format them
+            for col_idx, col_name in enumerate(df.columns, 1):
+                if 'diff' in col_name.lower() and '%' in col_name:
+                    col_letter = worksheet.cell(row=1, column=col_idx).column_letter
+                    for row in range(2, worksheet.max_row + 1):
+                        cell = worksheet[f"{col_letter}{row}"]
+                        if cell.value is not None:
+                            cell.number_format = '0.00%'
+                            # Convert the value to decimal for percentage display
+                            cell.value = cell.value / 100 if isinstance(cell.value, (int, float)) else cell.value
+            
+            # Format header row
             header_font = Font(bold=True)
             header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid')
             
diff --git a/clang-tools-extra/clang-gcc-bench/extract.py b/clang-tools-extra/clang-gcc-bench/extract.py
index fd0e91d4278c..29c02b3e6638 100644
--- a/clang-tools-extra/clang-gcc-bench/extract.py
+++ b/clang-tools-extra/clang-gcc-bench/extract.py
@@ -44,11 +44,20 @@ def main():
     else:
         func_names = all_func_names
         print(f"Processing {ll_path.name} ({len(func_names)} functions)")
+        
+        # Clean up entire output directory for this ll file if processing all functions
+        ll_filename = ll_path.stem
+        ll_output_dir = output_root / ll_filename
+        if ll_output_dir.exists():
+            shutil.rmtree(ll_output_dir)
+            print(f"Cleaned up existing output directory: {ll_output_dir}")
+        ll_output_dir.mkdir(parents=True, exist_ok=True)
     
-    # Create output directory based on LL filename
-    ll_filename = ll_path.stem
-    ll_output_dir = output_root / ll_filename
-    ll_output_dir.mkdir(parents=True, exist_ok=True)
+    # Create output directory based on LL filename (if not already created)
+    if not 'll_output_dir' in locals():
+        ll_filename = ll_path.stem
+        ll_output_dir = output_root / ll_filename
+        ll_output_dir.mkdir(parents=True, exist_ok=True)
     
     # Copy complete LL file to parent directory
     full_ll_copy = ll_output_dir / f"{ll_filename}.ll"
diff --git a/clang-tools-extra/clang-gcc-bench/utils.py b/clang-tools-extra/clang-gcc-bench/utils.py
index aab5177a5542..a1c865ba2ac5 100644
--- a/clang-tools-extra/clang-gcc-bench/utils.py
+++ b/clang-tools-extra/clang-gcc-bench/utils.py
@@ -35,7 +35,7 @@ def load_config():
         "CBE": CBE,
         "OPT": OPT,
         "compile_flags": config["compile_flags"],
-        "opt_flags": config.get("opt_flags", []),
+        "clang_opt_flags": config.get("clang_opt_flags", []),
         "timeout": config["timeout"],
         "input_paths": config.get("input_paths", []),
         "output_dir": config.get("output_dir", "comparison_results")
@@ -138,19 +138,29 @@ def extract_function_names(ll_path, config):
         return []
 
 
-def run_optimization_pipeline(bc_file, output_dir, func_name, config):
-    """Run optimization pipeline and record results of each optimization step"""
-    opt_log_file = output_dir / f"{func_name}_opt_steps.log"
+def run_optimization_pipeline(output_dir, func_name, config):
+    """Run clang optimization pipeline and record results of each optimization step"""
+    # Create subdirectories for logs
+    logs_dir = output_dir / "logs"
+    if not logs_dir.exists():
+        logs_dir.mkdir(exist_ok=True)
+    
+    opt_log_file = logs_dir / f"{func_name}_clang_opt_steps.log"
+    c_file = output_dir / f"{func_name}.c"
     
     try:
-        opt_cmd = [
-            str(config["OPT"]),
-            str(bc_file),
-            *config["opt_flags"]
+        clang_cmd = [
+            str(config["LLVM_BIN"] / "clang"),
+            *config["clang_opt_flags"],
+            str(c_file),
+            "-o", "/dev/null"
         ]
         
         with open(opt_log_file, 'w') as log_file:
-            subprocess.run(opt_cmd, stdout=log_file, stderr=subprocess.STDOUT, check=True)
+            result = subprocess.run(clang_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
+            # Write both stdout and stderr to log file  
+            log_file.write(result.stdout)
+            log_file.write(result.stderr)
         
         return opt_log_file
     except subprocess.CalledProcessError as e:
@@ -165,6 +175,66 @@ def run_optimization_pipeline(bc_file, output_dir, func_name, config):
         return None
 
 
+def run_gcc_optimization_pipeline(output_dir, func_name, config):
+    """Run GCC optimization pipeline and record results"""
+    # Create subdirectories for logs
+    logs_dir = output_dir / "logs"
+    logs_dir.mkdir(exist_ok=True)
+    
+    # Create GCC dumps subdirectory
+    gcc_dumps_dir = logs_dir / "gcc_dumps"
+    if gcc_dumps_dir.exists():
+        shutil.rmtree(gcc_dumps_dir)
+    gcc_dumps_dir.mkdir(exist_ok=True)
+    
+    opt_log_file = logs_dir / f"{func_name}_gcc_opt_steps.log"
+    c_file = output_dir / f"{func_name}.c"
+    
+    try:
+        # Run GCC with dumps in the gcc_dumps directory
+        gcc_cmd = [
+            str(config["GCC_BIN"]),
+            *config["compile_flags"]["gcc"],
+            "-fdump-tree-all",
+            "-fdump-rtl-all",
+            "-dumpdir", str(gcc_dumps_dir) + "/",
+            str(c_file),
+            "-o", "/dev/null"
+        ]
+        
+        result = subprocess.run(gcc_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
+        
+        # Collect all dump files
+        dump_files = list(gcc_dumps_dir.glob("*"))
+        
+        with open(opt_log_file, 'w') as log_file:
+            log_file.write("=== GCC Optimization Pipeline Log ===\n")
+            log_file.write(f"Command: {' '.join(gcc_cmd)}\n")
+            log_file.write(f"Working Directory: {gcc_dumps_dir}\n")
+            log_file.write(f"Generated {len(dump_files)} dump files\n\n")
+            
+            log_file.write("=== STDOUT ===\n")
+            log_file.write(result.stdout if result.stdout else "(empty)\n")
+            log_file.write("\n=== STDERR ===\n")
+            log_file.write(result.stderr if result.stderr else "(empty)\n")
+            
+            log_file.write(f"\n=== Generated Dump Files ({len(dump_files)}) ===\n")
+            for dump_file in sorted(dump_files):
+                log_file.write(f"- {dump_file.name}\n")
+        
+        return opt_log_file
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to record GCC optimization steps: {e}")
+        if opt_log_file.exists():
+            opt_log_file.unlink()
+        return None
+    except Exception as e:
+        print(f"Unexpected error occurred during GCC optimization step recording: {e}")
+        if opt_log_file.exists():
+            opt_log_file.unlink()
+        return None
+
+
 def extract_clean_function_ll(ll_path, func, output_dir, config, enable_timeout=False):
     """
     Extract a single function from original LL file to clean LL file
@@ -255,7 +325,7 @@ def process_clean_function_ll(clean_ll_path, func, output_dir, config,
         # 2. Run optimization pipeline and record (if needed)
         opt_log_file = None
         if enable_opt_logging:
-            opt_log_file = run_optimization_pipeline(bc_file, output_dir, clean_func, config)
+            opt_log_file = run_optimization_pipeline(output_dir, clean_func, config)
         
         # 3. Generate C code
         try:
@@ -408,6 +478,11 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config):
     # Clean special characters in function name
     clean_func = re.sub(r'[^\w]', '_', func)
     
+    # Clean up function output directory if it exists
+    if output_dir.exists():
+        shutil.rmtree(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
     # Create log file
     log_file = output_dir / f"{clean_func}_processing.log"
     
@@ -430,12 +505,11 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config):
     
     # Define all file paths
     bc_temp = output_dir / f"{clean_func}_temp.bc"
-    clean_ll = output_dir / f"{clean_func}_extracted.ll"
+    clean_ll = output_dir / f"{clean_func}.ll"
     bc_final = output_dir / f"{clean_func}_final.bc"
     c_file = output_dir / f"{clean_func}.c"
     s_gcc = output_dir / f"{clean_func}_gcc.s"
     s_clang = output_dir / f"{clean_func}_clang.s"
-    opt_log = output_dir / f"{clean_func}_opt_steps.log"
     
     try:
         # Step 1: llvm-extract Function Extraction
@@ -510,27 +584,6 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config):
         log_message("llvm-as Success")
         log_message(f"Generated File: {bc_final} (size: {bc_final.stat().st_size} bytes)")
         
-        # opt step has been removed, continue with optimization recording but not logged to main log
-        opt_log_created = False
-        try:
-            opt_cmd = [
-                str(config["OPT"]),
-                str(bc_final),
-                *config["opt_flags"]
-            ]
-            
-            with open(opt_log, 'w') as f:
-                result = subprocess.run(opt_cmd, stdout=f, stderr=subprocess.STDOUT, timeout=config.get("timeout"))
-            
-            if result.returncode == 0:
-                opt_log_created = True
-            else:
-                if opt_log.exists():
-                    opt_log.unlink()
-        except Exception:
-            if opt_log.exists():
-                opt_log.unlink()
-        
         # Step 4: CBE C Code Generation
         log_message("Step 4: CBE C Code Generation", add_newline=True)
         cbe_cmd = [str(config["CBE"]), str(bc_final), "-o", str(c_file)]
@@ -546,13 +599,44 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config):
                 "gcc_lines": 0,
                 "clang_lines": 0,
                 "diff_percent": 0,
-                "opt_log_created": opt_log_created,
+                "opt_log_created": False,
+                "gcc_opt_log_created": False,
                 "status": "cbe timeout"
             }
         
         log_message("CBE Success")
         log_message(f"Generated File: {c_file} (size: {c_file.stat().st_size} bytes)")
         
+        # Generate clang optimization log
+        log_message("Step 4.5: Clang Optimization Log Generation", add_newline=True)
+        opt_log_created = False
+        try:
+            opt_result = run_optimization_pipeline(output_dir, clean_func, config)
+            if opt_result and opt_result.exists():
+                opt_log_created = True
+                log_message("Clang Optimization Log Success")
+                log_message(f"Generated File: {opt_result} (size: {opt_result.stat().st_size} bytes)")
+            else:
+                log_message("Clang Optimization Log Failed - No file created")
+        except Exception as e:
+            log_message(f"Clang Optimization Log Failed: {e}")
+            pass
+        
+        # Generate GCC optimization log  
+        log_message("Step 4.6: GCC Optimization Log Generation", add_newline=True)
+        gcc_opt_log_created = False
+        try:
+            gcc_opt_result = run_gcc_optimization_pipeline(output_dir, clean_func, config)
+            if gcc_opt_result and gcc_opt_result.exists():
+                gcc_opt_log_created = True
+                log_message("GCC Optimization Log Success")
+                log_message(f"Generated File: {gcc_opt_result} (size: {gcc_opt_result.stat().st_size} bytes)")
+            else:
+                log_message("GCC Optimization Log Failed - No file created")
+        except Exception as e:
+            log_message(f"GCC Optimization Log Failed: {e}")
+            pass
+        
         # Step 5: GCC Compile Assembly
         log_message("Step 5: GCC Compile Assembly", add_newline=True)
         gcc_cmd = [str(config["GCC_BIN"]), *config["compile_flags"]["gcc"], str(c_file), "-o", str(s_gcc)]
@@ -570,7 +654,8 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config):
                 "gcc_lines": 0,
                 "clang_lines": 0,
                 "diff_percent": 0,
-                "opt_log_created": opt_log_created,
+                "opt_log_created": False,
+                "gcc_opt_log_created": False,
                 "status": "GCC"
             }
         
@@ -594,7 +679,8 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config):
                 "gcc_lines": 0,
                 "clang_lines": 0,
                 "diff_percent": 0,
-                "opt_log_created": opt_log_created,
+                "opt_log_created": False,
+                "gcc_opt_log_created": False,
                 "status": "Clang"
             }
         
@@ -630,6 +716,7 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config):
             "clang_lines": clang_count,
             "diff_percent": diff_percent,
             "opt_log_created": opt_log_created,
+            "gcc_opt_log_created": gcc_opt_log_created,
             "status": ""
         }
         
-- 
Gitee