From e14ef324514163925e1f4e520e12611b0183c338 Mon Sep 17 00:00:00 2001 From: Yui5427 <785369607@qq.com> Date: Mon, 1 Sep 2025 19:48:02 +0800 Subject: [PATCH 1/2] Clang-GCC-Bench --- clang-tools-extra/clang-gcc-bench/README.md | 184 +++++ clang-tools-extra/clang-gcc-bench/compare.py | 139 ++++ clang-tools-extra/clang-gcc-bench/config.json | 32 + .../clang-gcc-bench/csvToXlsx.py | 148 ++++ clang-tools-extra/clang-gcc-bench/extract.py | 105 +++ clang-tools-extra/clang-gcc-bench/utils.py | 655 ++++++++++++++++++ 6 files changed, 1263 insertions(+) create mode 100644 clang-tools-extra/clang-gcc-bench/README.md create mode 100644 clang-tools-extra/clang-gcc-bench/compare.py create mode 100644 clang-tools-extra/clang-gcc-bench/config.json create mode 100644 clang-tools-extra/clang-gcc-bench/csvToXlsx.py create mode 100644 clang-tools-extra/clang-gcc-bench/extract.py create mode 100644 clang-tools-extra/clang-gcc-bench/utils.py diff --git a/clang-tools-extra/clang-gcc-bench/README.md b/clang-tools-extra/clang-gcc-bench/README.md new file mode 100644 index 000000000000..32b7ae868043 --- /dev/null +++ b/clang-tools-extra/clang-gcc-bench/README.md @@ -0,0 +1,184 @@ +# Clang-GCC-Bench + +基于LLVM的GCC 15与Clang 14编译器性能对比工具,通过汇编指令分析比较编译器优化效果。 + +## 工作流程 + +``` +LLVM IR文件 + | + ▼ +llvm-nm 提取函数名 + | + ▼ +llvm-extract 提取单函数 + | + ▼ +llvm-dis 生成可读IR + | + ▼ +llvm-as 重新编译 (CBE需要干净的.bc格式) + | + ▼ +llvm-cbe 生成C代码 + | + ▼ + |————————————————————————————————| + | | + ▼ ▼ +GCC编译 Clang编译 + | | + ▼ ▼ +GCC汇编文件 Clang汇编文件 + | | + |————————————————————————————————| + | + ▼ + 指令统计对比 + | + ▼ + CSV结果报告 + | + ▼ + Excel转换 +``` + +## 使用方法 + +### 批量对比 (compare.py) + +**使用配置文件中的路径进行批量对比:** + +```bash +python3 compare.py -c --all +``` + +**参数说明:** + +- `-c, --use-config`: 使用config.json中配置的输入路径 +- `--all`: 输出所有函数结果(包括失败的) +- 不加 `--all`: 只输出GCC表现更好的函数(指令数更少) + +**指定单个文件或目录:** + +```bash +python3 compare.py input.ll -o results.csv +python3 compare.py /path/to/directory -o output_dir +``` + +### 单文件详细分析 (extract.py) + +**完整功能分析:** + +```bash +python3 extract.py input.ll -o output_dir +``` + +**指定单个函数:** + +```bash +python3 extract.py input.ll -f function_name -o output_dir +``` + +**extract.py 特有输出:** + +- **详细处理日志**: `function_name_processing.log` - 记录每个编译步骤的详细信息和错误 +- **优化管道日志**: `function_name_opt_steps.log` - 记录LLVM优化过程的每一步 +- **中间文件保留**: 保留所有中间文件(.ll, .c, .s)用于调试 +- **实时状态输出**: 显示每个函数的处理状态和错误信息 + +**处理日志示例:** + +``` +*** Processing Function Start *** +*** Function Name: test_fdiv *** +*** Step 1: llvm-extract Function Extraction *** +*** Command: /path/to/llvm-extract -func=test_fdiv input.ll -o temp.bc *** +*** llvm-extract Success *** +*** Step 4: CBE C Code Generation *** +*** CBE Failed (exit code: 1) *** +*** stderr: error: undefined struct 'l_vector_2_bool' *** +``` + +### Excel转换 (csv_to_xlsx.py) + +**自动转换配置目录下所有CSV文件:** + +```bash +python3 csv_to_xlsx.py +``` + +**转换指定目录:** + +```bash +python3 csvToXlsx.py -d /path/to/csv/directory +``` + +**转换单个文件:** + +```bash +python3 csv_to_xlsx.py input.csv output.xlsx +``` + +## 配置文件 + +编辑 `config.json` 设置工具路径和编译参数: + +```json +{ + "paths": { + "llvm_bin": "/path/to/llvm/build/bin", + "gcc_bin": "/path/to/gcc" + }, + "input_paths": ["/path/to/llvm/tests"], + "output_dir": "comparison_results", + "timeout": 5 +} +``` + +## 输出格式 + +| Function | GCC Instructions | Clang Instructions | Diff % | Status | +| ----------- | ---------------- | ------------------ | ------ | ------ | +| test_func | 15 | 18 | 20.0 | | +| failed_func | - | - | - | CBE | + +## 依赖工具 + +- LLVM工具链 (llvm-extract, llvm-nm, llvm-cbe等) +- GCC & Clang 编译器 +- Python 3 + +## LLVM编译流程 + +```bash +$ git clone https://gitee.com/openeuler/llvm-project.git +$ cd llvm-project +$ git checkout dev_19.1.7 +$ mkdir llvm/build +$ cd llvm/build +$ cmake .. -G "Ninja" -DCMAKE_BUILD_TYPE=Debug -DLLVM_PARALLEL_LINK_JOBS=1 -DLLVM_TARGETS_TO_BUILD="AArch64" +$ ninja +``` + +## LLVM-CBE编译 + +```bash +$ cd llvm-project/llvm/projects +$ git clone https://github.com/JuliaHubOSS/llvm-cbe +$ cd ../build +$ cmake -S .. -G "Ninja" +$ ninja llvm-cbe +``` + +## 错误状态说明 + +### CSV输出中的Status字段 + +- **空白**: 处理成功 +- `llvm-extract`: 函数提取失败 +- `llvm-dis`: IR反汇编失败 +- `llvm-as`: IR汇编失败 +- `CBE` / `cbe超时`: C代码生成失败/超时 +- `GCC`: GCC编译失败 +- `Clang`: Clang编译失败 diff --git a/clang-tools-extra/clang-gcc-bench/compare.py b/clang-tools-extra/clang-gcc-bench/compare.py new file mode 100644 index 000000000000..9fbdf37f95fd --- /dev/null +++ b/clang-tools-extra/clang-gcc-bench/compare.py @@ -0,0 +1,139 @@ +import os +import re +import subprocess +import argparse +import csv +from pathlib import Path +from utils import load_config, strip_asm, process_function, collect_ll_files, extract_function_names + +config = load_config() + +def generate_filename_from_path(path): + """Generate short filename from path""" + import hashlib + path_str = str(path) + # Take last two directory levels of path + first 8 characters of hash + parts = Path(path_str).parts + if len(parts) >= 2: + short_name = f"{parts[-2]}_{parts[-1]}" + else: + short_name = parts[-1] if parts else "unknown" + + # Add hash to avoid conflicts + hash_short = hashlib.md5(path_str.encode()).hexdigest()[:8] + return f"{short_name}_{hash_short}.csv" + +def main(): + # Parse command line arguments + parser = argparse.ArgumentParser(description="Compare assembly instruction counts generated by GCC and Clang") + parser.add_argument("-c", "--use-config", action="store_true", help="Use paths from configuration file") + parser.add_argument("input", nargs="?", help="Input .ll file or directory containing .ll files (optional if using -c parameter)") + parser.add_argument("-o", "--output", help="Output directory or CSV file path") + parser.add_argument("--all", action="store_true", help="Output all function results, default only outputs functions where GCC performs better") + + args = parser.parse_args() + + # Determine input paths and output directory + if args.use_config: + if "input_paths" not in config: + print("Error: Missing 'input_paths' field in configuration file") + print(f"Fields contained in configuration file: {list(config.keys())}") + return + input_paths = [Path(p) for p in config["input_paths"]] + output_dir = Path(config.get("output_dir", "comparison_results")) + else: + if not args.input: + parser.error("Must provide input path or use -c parameter") + input_paths = [Path(args.input)] + if args.output: + output_dir = Path(args.output) + else: + output_dir = Path("comparison_results") + + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + + # Temporary working directory + work_dir = Path("temp_workdir") + work_dir.mkdir(exist_ok=True) + + try: + # Process each input path + for input_path in input_paths: + # Collect .ll files to process + ll_files = collect_ll_files(input_path) + if not ll_files: + continue + + # Generate output filename + output_filename = generate_filename_from_path(input_path) + output_file = output_dir / output_filename + + print(f"\nProcessing input path: {input_path}") + print(f"Output file: {output_file}") + + # Prepare CSV output + with open(output_file, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerow(["Function", "GCC Instructions", "Clang Instructions", "Diff %", "Status", "LL File"]) + + total_functions = 0 + processed_functions = 0 + + print(f"Found {len(ll_files)} .ll files") + + for ll_path in ll_files: + # Extract function names + func_names = extract_function_names(ll_path, config) + if not func_names: + continue + + total_functions += len(func_names) + print(f"Processing {ll_path.name} ({len(func_names)} functions)") + + for func in func_names: + # Clean special characters in function name + clean_func = re.sub(r'[^\w]', '_', func) + + result = process_function(ll_path, func, work_dir, config, + keep_files=False, enable_timeout=True) + + processed_functions += 1 + + # Decide whether to output and write based on parameters + is_success = result.get('status') == '' + should_output = args.all or (is_success and result['gcc_lines'] < result['clang_lines']) + + if should_output: + if is_success: + print(f" Processing function: {func} gcc: {result['gcc_lines']}, clang: {result['clang_lines']}, diff: {result['diff_percent']:.1f}%") + else: + print(f" Processing function: {func} {result['status']}") + + # Write to CSV + csv_writer.writerow([ + result["func"], + result["gcc_lines"] if is_success else "-", + result["clang_lines"] if is_success else "-", + f"{result['diff_percent']:.1f}%" if is_success else "-", + result["status"], + str(ll_path) + ]) + + # Display progress in real-time + if processed_functions % 10 == 0: + print(f"Processed {processed_functions}/{total_functions} functions...") + + print(f"Completed processing {input_path}: {processed_functions}/{total_functions} functions") + + finally: + # Clean up temporary directory + for file in work_dir.glob("*"): + file.unlink() + work_dir.rmdir() + + print(f"\nAll paths processing completed!") + print(f" Results saved to directory: {output_dir}") + +if __name__ == "__main__": + main() diff --git a/clang-tools-extra/clang-gcc-bench/config.json b/clang-tools-extra/clang-gcc-bench/config.json new file mode 100644 index 000000000000..026549322541 --- /dev/null +++ b/clang-tools-extra/clang-gcc-bench/config.json @@ -0,0 +1,32 @@ +{ + "paths": { + "llvm_bin": "/path/to/build/bin", + "gcc_bin": "/path/to/gcc" + }, + "input_paths": [ + "/path/to/ll1", + "/path/to/ll2" + ], + "output_dir": "comparison_results", + "compile_flags": { + "gcc": [ + "-std=c11", + "-march=armv8.6-a+sve", + "-O3", + "-fomit-frame-pointer", + "-S" + ], + "clang": [ + "-march=armv8.6-a+sve", + "-O3", + "-fomit-frame-pointer", + "-S" + ] + }, + "opt_flags": [ + "-O3", + "-print-after-all", + "-disable-output" + ], + "timeout": 5 +} \ No newline at end of file diff --git a/clang-tools-extra/clang-gcc-bench/csvToXlsx.py b/clang-tools-extra/clang-gcc-bench/csvToXlsx.py new file mode 100644 index 000000000000..dd537e9b1278 --- /dev/null +++ b/clang-tools-extra/clang-gcc-bench/csvToXlsx.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +""" +Convert CSV files to Excel format (XLSX) +Auto-converts all CSV files from the output directory specified in config.json +Usage: python3 csv_to_xlsx.py +""" + +import argparse +import pandas as pd +import json +from pathlib import Path +import sys + +def csv_to_xlsx(csv_file, xlsx_file=None): + """Convert CSV file to Excel format""" + csv_path = Path(csv_file) + + if not csv_path.exists(): + print(f"Error: CSV file {csv_path} does not exist") + return False + + # Generate output filename if not provided + if xlsx_file is None: + xlsx_path = csv_path.with_suffix('.xlsx') + else: + xlsx_path = Path(xlsx_file) + + try: + # Read CSV file + df = pd.read_csv(csv_path) + + # Write to Excel with formatting + with pd.ExcelWriter(xlsx_path, engine='openpyxl') as writer: + df.to_excel(writer, index=False, sheet_name='Results') + + # Get the worksheet + worksheet = writer.sheets['Results'] + + # Auto-adjust column widths + for column in worksheet.columns: + max_length = 0 + column_letter = column[0].column_letter + + for cell in column: + try: + if len(str(cell.value)) > max_length: + max_length = len(str(cell.value)) + except: + pass + + adjusted_width = min(max_length + 2, 50) # Cap at 50 characters + worksheet.column_dimensions[column_letter].width = adjusted_width + + # Format header row + from openpyxl.styles import Font, PatternFill + header_font = Font(bold=True) + header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid') + + for cell in worksheet[1]: + cell.font = header_font + cell.fill = header_fill + + print(f"Successfully converted {csv_path} to {xlsx_path}") + return True + + except Exception as e: + print(f"Error converting CSV to Excel: {e}") + return False + +def load_config(): + """Load configuration from config.json""" + config_file = Path(__file__).parent / "config.json" + try: + with open(config_file, 'r') as f: + return json.load(f) + except FileNotFoundError: + print(f"Error: Configuration file {config_file} not found") + return None + except json.JSONDecodeError as e: + print(f"Error: Invalid configuration file format: {e}") + return None + +def convert_output_directory(output_dir): + """Convert all CSV files in the output directory to Excel format""" + output_path = Path(output_dir) + + if not output_path.exists(): + print(f"Output directory {output_path} does not exist") + return 0 + + # Find all CSV files recursively + csv_files = list(output_path.rglob("*.csv")) + + if not csv_files: + print(f"No CSV files found in {output_path}") + return 0 + + print(f"Found {len(csv_files)} CSV files to convert...") + success_count = 0 + + for csv_file in csv_files: + xlsx_file = csv_file.with_suffix('.xlsx') + print(f"Converting: {csv_file.relative_to(output_path)}") + if csv_to_xlsx(csv_file, xlsx_file): + success_count += 1 + + print(f"Successfully converted {success_count}/{len(csv_files)} files") + return success_count + +def main(): + parser = argparse.ArgumentParser(description="Convert CSV files to Excel format") + parser.add_argument("csv_file", nargs="?", help="Input CSV file path (optional)") + parser.add_argument("xlsx_file", nargs="?", help="Output Excel file path (optional)") + parser.add_argument("-d", "--directory", help="Convert all CSV files in specified directory") + + args = parser.parse_args() + + if args.directory: + # Convert all CSV files in specified directory + convert_output_directory(args.directory) + elif args.csv_file: + # Convert single file + input_path = Path(args.csv_file) + if not input_path.exists(): + print(f"Error: File {input_path} does not exist") + sys.exit(1) + + if not input_path.suffix.lower() == '.csv': + print(f"Error: {input_path} is not a CSV file") + sys.exit(1) + + success = csv_to_xlsx(args.csv_file, args.xlsx_file) + sys.exit(0 if success else 1) + else: + # Auto-convert from config.json output directory + config = load_config() + if not config: + sys.exit(1) + + output_dir = config.get("output_dir", "out") + print(f"Auto-converting CSV files from configured output directory: {output_dir}") + + success_count = convert_output_directory(output_dir) + if success_count == 0: + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/clang-tools-extra/clang-gcc-bench/extract.py b/clang-tools-extra/clang-gcc-bench/extract.py new file mode 100644 index 000000000000..fd0e91d4278c --- /dev/null +++ b/clang-tools-extra/clang-gcc-bench/extract.py @@ -0,0 +1,105 @@ +import os +import re +import subprocess +import argparse +import shutil +from pathlib import Path +from utils import load_config, extract_function_names, process_function_with_detailed_logging + +config = load_config() + +def main(): + # Parse command line arguments + parser = argparse.ArgumentParser(description="Generate function assembly based on input LL file") + parser.add_argument("ll_file", help="Input .ll file path") + parser.add_argument("-o", "--output", default="out", help="Output directory path") + parser.add_argument("-f", "--function", help="Specify function name to process (optional, processes all functions if not specified)") + parser.add_argument("--keep-bc", action="store_true", help="Keep intermediate .bc files") + + args = parser.parse_args() + + ll_path = Path(args.ll_file) + if not ll_path.exists() or not ll_path.is_file(): + print(f"Error: File {ll_path} does not exist or is not a file") + return + + # Prepare output directory + output_root = Path(args.output) + output_root.mkdir(parents=True, exist_ok=True) + + # Extract function names + all_func_names = extract_function_names(ll_path, config) + if not all_func_names: + return + + # Decide which functions to process based on parameters + if args.function: + if args.function in all_func_names: + func_names = [args.function] + print(f"Processing specified function in {ll_path.name}: {args.function}") + else: + print(f"Error: Function '{args.function}' not found in {ll_path.name}") + print(f"Available functions: {', '.join(all_func_names)}") + return + else: + func_names = all_func_names + print(f"Processing {ll_path.name} ({len(func_names)} functions)") + + # Create output directory based on LL filename + ll_filename = ll_path.stem + ll_output_dir = output_root / ll_filename + ll_output_dir.mkdir(parents=True, exist_ok=True) + + # Copy complete LL file to parent directory + full_ll_copy = ll_output_dir / f"{ll_filename}.ll" + shutil.copy(ll_path, full_ll_copy) + print(f"Copied complete LL file to: {full_ll_copy}") + + # Process all functions + total_count = 0 + success_count = 0 + opt_success_count = 0 + + for func in func_names: + # Create subdirectory for each function + clean_func_name = re.sub(r'[^\w]', '_', func) + func_output_dir = ll_output_dir / clean_func_name + func_output_dir.mkdir(parents=True, exist_ok=True) + + total_count += 1 + print(f"Processing function: {func}") + + # Process function - using detailed logging version + result = process_function_with_detailed_logging(ll_path, func, func_output_dir, config) + # Check if processing was successful (empty status string indicates success) + is_success = result.get('status') == '' + + if is_success: + success_count += 1 + if result["opt_log_created"]: + opt_success_count += 1 + print(f" Successfully generated files! gcc instructions: {result['gcc_lines']}, clang instructions: {result['clang_lines']}, difference: {result['diff_percent']:.1f}%") + # List generated files + print(f" Generated files:") + print(f" - {clean_func_name}.ll (LLVM IR copy)") + print(f" - {clean_func_name}.c (Generated C program)") + print(f" - {clean_func_name}_gcc.s (GCC assembly)") + print(f" - {clean_func_name}_clang.s (Clang assembly)") + if result["opt_log_created"]: + print(f" - {clean_func_name}_opt_steps.log (Optimization steps log)") + print(f" Files saved in: {func_output_dir}") + else: + print(f" Processing failed: {result['status']}") + # If processing failed, delete empty directory + if func_output_dir.exists() and not any(func_output_dir.iterdir()): + func_output_dir.rmdir() + + print(f"\nProcessing completed!") + print(f" LL file: {ll_path}") + print(f" Total attempted to process {total_count} functions") + print(f" Successfully processed {success_count} functions") + print(f" Successfully recorded optimization steps for {opt_success_count} functions") + print(f" Output directory: {ll_output_dir.absolute()}") + +if __name__ == "__main__": + main() diff --git a/clang-tools-extra/clang-gcc-bench/utils.py b/clang-tools-extra/clang-gcc-bench/utils.py new file mode 100644 index 000000000000..aab5177a5542 --- /dev/null +++ b/clang-tools-extra/clang-gcc-bench/utils.py @@ -0,0 +1,655 @@ +import json +import re +import subprocess +import shutil +from pathlib import Path + + +def load_config(): + """Load configuration from JSON file""" + config_file = Path(__file__).parent / "config.json" + try: + with open(config_file, 'r') as f: + config = json.load(f) + print(f"Successfully loaded configuration file: {config_file}") + except FileNotFoundError: + print(f"Error: Configuration file {config_file} does not exist") + raise + except json.JSONDecodeError as e: + print(f"Error: Invalid configuration file format: {e}") + raise + + # Set paths + LLVM_BIN = Path(config["paths"]["llvm_bin"]) + GCC_BIN = Path(config["paths"]["gcc_bin"]) + CLANG_BIN = LLVM_BIN / "clang" + EXTRACT = LLVM_BIN / "llvm-extract" + CBE = LLVM_BIN / "llvm-cbe" + OPT = LLVM_BIN / "opt" + + return { + "LLVM_BIN": LLVM_BIN, + "GCC_BIN": GCC_BIN, + "CLANG_BIN": CLANG_BIN, + "EXTRACT": EXTRACT, + "CBE": CBE, + "OPT": OPT, + "compile_flags": config["compile_flags"], + "opt_flags": config.get("opt_flags", []), + "timeout": config["timeout"], + "input_paths": config.get("input_paths", []), + "output_dir": config.get("output_dir", "comparison_results") + } + + +def strip_asm(asm_path): + """Count assembly instruction lines (filter out irrelevant content)""" + try: + count = 0 + with open(asm_path, 'r') as f: + for line in f: + # Remove comments first + if '//' in line: + line = line[:line.index('//')] + stripped = line.strip() + if not stripped: continue # Skip empty lines + if stripped.startswith('.'): continue # Skip pseudo-instructions + if stripped.startswith('#'): continue # Skip comments + if stripped.endswith(':'): continue # Skip labels + count += 1 + return count + except: + return 0 + + +def collect_ll_files(input_path): + """Collect .ll files to process""" + ll_files = [] + + if input_path.is_file() and input_path.suffix == ".ll": + ll_files.append(input_path) + elif input_path.is_dir(): + ll_files = list(input_path.rglob("*.ll")) + else: + print(f"Warning: Skipping invalid path {input_path}") + return [] + + if not ll_files: + print(f"No .ll files found in {input_path}") + return [] + + return ll_files + + +def extract_function_names(ll_path, config): + """Extract function names from .ll file using LLVM toolchain for accuracy""" + try: + # Create temporary working directory + temp_dir = Path("temp_function_discovery") + temp_dir.mkdir(exist_ok=True) + + # 1. Compile .ll file to .bc file + bc_file = temp_dir / f"{ll_path.stem}.bc" + as_cmd = [str(config["LLVM_BIN"] / "llvm-as"), str(ll_path), "-o", str(bc_file)] + subprocess.run(as_cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + # 2. Use llvm-nm to get all symbols + nm_cmd = [str(config["LLVM_BIN"] / "llvm-nm"), str(bc_file)] + result = subprocess.run(nm_cmd, capture_output=True, text=True, check=True) + + # 3. Parse llvm-nm output and extract function symbols + func_names = [] + for line in result.stdout.strip().split('\n'): + if not line.strip(): + continue + parts = line.strip().split() + if len(parts) >= 3: + # llvm-nm output format: address type symbol_name + symbol_type = parts[1] + symbol_name = parts[2] + # T indicates defined function symbols in text section, exclude U (undefined) and other types + if symbol_type in ['T', 't'] and not symbol_name.startswith('llvm.'): + # Remove platform-specific symbol prefixes (like underscore prefix on macOS) + clean_name = symbol_name.lstrip('_') + if clean_name: # Ensure there's still content after removing prefix + func_names.append(clean_name) + + # Clean up temporary files + if bc_file.exists(): + bc_file.unlink() + temp_dir.rmdir() + + if not func_names: + print(f"No function definitions found in {ll_path}") + return [] + + return func_names + + except subprocess.CalledProcessError as e: + print(f"LLVM tool error when processing file {ll_path}: {e}") + # Clean up potentially remaining files + if 'bc_file' in locals() and bc_file.exists(): + bc_file.unlink() + if 'temp_dir' in locals() and temp_dir.exists(): + temp_dir.rmdir() + return [] + except Exception as e: + print(f"Failed to process file {ll_path}: {e}") + return [] + + +def run_optimization_pipeline(bc_file, output_dir, func_name, config): + """Run optimization pipeline and record results of each optimization step""" + opt_log_file = output_dir / f"{func_name}_opt_steps.log" + + try: + opt_cmd = [ + str(config["OPT"]), + str(bc_file), + *config["opt_flags"] + ] + + with open(opt_log_file, 'w') as log_file: + subprocess.run(opt_cmd, stdout=log_file, stderr=subprocess.STDOUT, check=True) + + return opt_log_file + except subprocess.CalledProcessError as e: + print(f"Failed to record optimization steps: {e}") + if opt_log_file.exists(): + opt_log_file.unlink() + return None + except Exception as e: + print(f"Unexpected error occurred during optimization step recording: {e}") + if opt_log_file.exists(): + opt_log_file.unlink() + return None + + +def extract_clean_function_ll(ll_path, func, output_dir, config, enable_timeout=False): + """ + Extract a single function from original LL file to clean LL file + + Args: + ll_path: Original LLVM IR file path + func: Function name + output_dir: Output directory + config: Configuration dictionary + enable_timeout: Whether to enable timeout mechanism + + Returns: + Path to extracted clean LL file, returns None on failure + """ + # Clean special characters in function name + clean_func = re.sub(r'[^\w]', '_', func) + + # Define file paths + bc_file = output_dir / f"{clean_func}.bc" + clean_ll_file = output_dir / f"{clean_func}.ll" + + timeout = config.get("timeout") if enable_timeout else None + + try: + # 1. Extract function to bc file + try: + extract_cmd = [str(config["EXTRACT"]), "-func="+func, str(ll_path), "-o", str(bc_file)] + subprocess.run(extract_cmd, check=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, timeout=timeout) + except subprocess.TimeoutExpired: + raise Exception("llvm-extract") + except subprocess.CalledProcessError: + raise Exception("llvm-extract") + + # 2. Convert bc file to clean ll file + try: + dis_cmd = [str(config["LLVM_BIN"] / "llvm-dis"), str(bc_file), "-o", str(clean_ll_file)] + subprocess.run(dis_cmd, check=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, timeout=timeout) + except subprocess.TimeoutExpired: + raise Exception("llvm-dis") + except subprocess.CalledProcessError: + raise Exception("llvm-dis") + + # 3. Clean up bc file + if bc_file.exists(): + bc_file.unlink() + + return clean_ll_file + + except Exception as e: + # Clean up potentially remaining files + for f in [bc_file, clean_ll_file]: + if f.exists(): + f.unlink() + # Return error information instead of None + return {"error": str(e)} + + +def process_clean_function_ll(clean_ll_path, func, output_dir, config, + keep_files=False, enable_timeout=False, + enable_opt_logging=False): + """ + Process already extracted clean single-function LL file + """ + # Clean special characters in function name + clean_func = re.sub(r'[^\w]', '_', func) + + # Define file paths + bc_file = output_dir / f"{clean_func}.bc" + c_file = output_dir / f"{clean_func}.c" + s_gcc = output_dir / f"{clean_func}_gcc.s" + s_clang = output_dir / f"{clean_func}_clang.s" + + timeout = config.get("timeout") if enable_timeout else None + + try: + # 1. Compile clean ll file to bc file + try: + as_cmd = [str(config["LLVM_BIN"] / "llvm-as"), str(clean_ll_path), "-o", str(bc_file)] + subprocess.run(as_cmd, check=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, timeout=timeout) + except subprocess.TimeoutExpired: + raise Exception("llvm-as") + except subprocess.CalledProcessError: + raise Exception("llvm-as") + + # 2. Run optimization pipeline and record (if needed) + opt_log_file = None + if enable_opt_logging: + opt_log_file = run_optimization_pipeline(bc_file, output_dir, clean_func, config) + + # 3. Generate C code + try: + cbe_cmd = [str(config["CBE"]), str(bc_file), "-o", str(c_file)] + subprocess.run(cbe_cmd, check=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, timeout=timeout) + except subprocess.TimeoutExpired: + raise Exception("CBE") + except subprocess.CalledProcessError: + raise Exception("CBE") + + # 4. GCC compilation + try: + gcc_cmd = [str(config["GCC_BIN"]), *config["compile_flags"]["gcc"], str(c_file), "-o", str(s_gcc)] + subprocess.run(gcc_cmd, check=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, timeout=timeout) + except subprocess.TimeoutExpired: + raise Exception("GCC") + except subprocess.CalledProcessError: + raise Exception("GCC") + + # 5. Clang compilation + try: + clang_cmd = [str(config["CLANG_BIN"]), *config["compile_flags"]["clang"], str(c_file), "-o", str(s_clang)] + subprocess.run(clang_cmd, check=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, timeout=timeout) + except subprocess.TimeoutExpired: + raise Exception("Clang") + except subprocess.CalledProcessError: + raise Exception("Clang") + + # 6. Count instruction lines + gcc_count = strip_asm(s_gcc) + clang_count = strip_asm(s_clang) + + # 7. Calculate difference percentage + if gcc_count > 0: + diff_percent = ((clang_count - gcc_count) / gcc_count) * 100 + else: + diff_percent = 0 if clang_count == 0 else float('inf') + + # 8. File cleanup strategy + if not keep_files: + # compare mode: keep only assembly files, delete intermediate files + for f in [bc_file, c_file]: + if f.exists(): + f.unlink() + # Delete clean ll file (temporary file) + if clean_ll_path.exists(): + clean_ll_path.unlink() + else: + # extract mode: delete bc files but keep others + if bc_file.exists(): + bc_file.unlink() + + return { + "func": func, + "gcc_lines": gcc_count, + "clang_lines": clang_count, + "diff_percent": diff_percent, + "opt_log_created": opt_log_file is not None if enable_opt_logging else False, + "status": "" + } + + except Exception as e: + # Get specific error information + error_msg = str(e) + if enable_timeout: + # compare mode: handle errors silently + pass + else: + # extract mode: print error information + print(f"Error processing function {func}: {error_msg}") + + # Clean up files + cleanup_files = [bc_file, c_file, s_gcc, s_clang, clean_ll_path] + for f in cleanup_files: + if f.exists(): + f.unlink() + if enable_opt_logging: + opt_log = output_dir / f"{clean_func}_opt_steps.log" + if opt_log.exists(): + opt_log.unlink() + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": False, + "status": error_msg + } + + +def process_function(ll_path, func, output_dir, config, + keep_files=False, enable_timeout=False, + enable_opt_logging=False, copy_ll=False): + """ + Generic function processing function using new two-stage workflow + + Args: + ll_path: LLVM IR file path + func: Function name + output_dir: Output directory + config: Configuration dictionary + keep_files: Whether to keep intermediate files (default False, for compare mode) + enable_timeout: Whether to enable timeout mechanism (default False) + enable_opt_logging: Whether to enable optimization step recording (default False) + copy_ll: Whether to copy original LL file (default False, now handled automatically by two-stage workflow) + + Returns: + Processing result dictionary or None (on failure) + """ + + # Stage 1: Extract clean single-function LL file + clean_ll_result = extract_clean_function_ll(ll_path, func, output_dir, config, enable_timeout) + if isinstance(clean_ll_result, dict): + # Stage 1 failed, return error information + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": False, + "status": clean_ll_result["error"] + } + + clean_ll_path = clean_ll_result + + # Stage 2: Process clean LL file + result = process_clean_function_ll(clean_ll_path, func, output_dir, config, + keep_files, enable_timeout, enable_opt_logging) + + # If need to keep LL file (extract mode), don't delete + if keep_files and copy_ll: + # Rename to final filename + clean_func = re.sub(r'[^\w]', '_', func) + final_ll_path = output_dir / f"{clean_func}.ll" + if clean_ll_path != final_ll_path and clean_ll_path.exists(): + clean_ll_path.rename(final_ll_path) + + return result + + +def process_function_with_detailed_logging(ll_path, func, output_dir, config): + """ + Function processing with detailed logging, used by extract.py + Save intermediate files and detailed error information for each step + """ + + # Clean special characters in function name + clean_func = re.sub(r'[^\w]', '_', func) + + # Create log file + log_file = output_dir / f"{clean_func}_processing.log" + + def log_message(message, also_print=True, add_newline=False): + if add_newline: + log_entry = f"\n*** {message} ***" + else: + log_entry = f"*** {message} ***" + with open(log_file, 'a', encoding='utf-8') as f: + f.write(log_entry + '\n') + if also_print: + print(f" {message.replace('*** ', '').replace(' ***', '')}") + + # Start processing log + log_message("Processing Function Start") + log_message(f"Function Name: {func}") + log_message(f"Clean Name: {clean_func}") + log_message(f"Source File: {ll_path}") + log_message(f"Output Directory: {output_dir}") + + # Define all file paths + bc_temp = output_dir / f"{clean_func}_temp.bc" + clean_ll = output_dir / f"{clean_func}_extracted.ll" + bc_final = output_dir / f"{clean_func}_final.bc" + c_file = output_dir / f"{clean_func}.c" + s_gcc = output_dir / f"{clean_func}_gcc.s" + s_clang = output_dir / f"{clean_func}_clang.s" + opt_log = output_dir / f"{clean_func}_opt_steps.log" + + try: + # Step 1: llvm-extract Function Extraction + log_message("Step 1: llvm-extract Function Extraction", add_newline=True) + extract_cmd = [str(config["EXTRACT"]), "-func="+func, str(ll_path), "-o", str(bc_temp)] + log_message(f"Command: {' '.join(extract_cmd)}") + + result = subprocess.run(extract_cmd, capture_output=True, text=True, timeout=config.get("timeout")) + if result.returncode != 0: + log_message(f"llvm-extract Failed (exit code: {result.returncode})") + if result.stderr.strip(): + log_message(f"stderr: {result.stderr}") + if result.stdout.strip(): + log_message(f"stdout: {result.stdout}") + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": False, + "status": "llvm-extract" + } + + log_message("llvm-extract Success") + log_message(f"Generated File: {bc_temp} (size: {bc_temp.stat().st_size} bytes)") + + # Step 2: llvm-dis Generate Readable LL + log_message("Step 2: llvm-dis Generate Readable LL", add_newline=True) + dis_cmd = [str(config["LLVM_BIN"] / "llvm-dis"), str(bc_temp), "-o", str(clean_ll)] + log_message(f"Command: {' '.join(dis_cmd)}") + + result = subprocess.run(dis_cmd, capture_output=True, text=True, timeout=config.get("timeout")) + if result.returncode != 0: + log_message(f"llvm-dis Failed (exit code: {result.returncode})") + if result.stderr.strip(): + log_message(f"stderr: {result.stderr}") + if result.stdout.strip(): + log_message(f"stdout: {result.stdout}") + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": False, + "status": "llvm-dis" + } + + log_message("llvm-dis Success") + log_message(f"Generated File: {clean_ll} (size: {clean_ll.stat().st_size} bytes)") + + # Step 3: llvm-as Recompile BC File + log_message("Step 3: llvm-as Recompile BC File", add_newline=True) + as_cmd = [str(config["LLVM_BIN"] / "llvm-as"), str(clean_ll), "-o", str(bc_final)] + log_message(f"Command: {' '.join(as_cmd)}") + + result = subprocess.run(as_cmd, capture_output=True, text=True, timeout=config.get("timeout")) + if result.returncode != 0: + log_message(f"llvm-as Failed (exit code: {result.returncode})") + if result.stderr.strip(): + log_message(f"stderr: {result.stderr}") + if result.stdout.strip(): + log_message(f"stdout: {result.stdout}") + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": False, + "status": "llvm-as" + } + + log_message("llvm-as Success") + log_message(f"Generated File: {bc_final} (size: {bc_final.stat().st_size} bytes)") + + # opt step has been removed, continue with optimization recording but not logged to main log + opt_log_created = False + try: + opt_cmd = [ + str(config["OPT"]), + str(bc_final), + *config["opt_flags"] + ] + + with open(opt_log, 'w') as f: + result = subprocess.run(opt_cmd, stdout=f, stderr=subprocess.STDOUT, timeout=config.get("timeout")) + + if result.returncode == 0: + opt_log_created = True + else: + if opt_log.exists(): + opt_log.unlink() + except Exception: + if opt_log.exists(): + opt_log.unlink() + + # Step 4: CBE C Code Generation + log_message("Step 4: CBE C Code Generation", add_newline=True) + cbe_cmd = [str(config["CBE"]), str(bc_final), "-o", str(c_file)] + log_message(f"Command: {' '.join(cbe_cmd)}") + + result = subprocess.run(cbe_cmd, capture_output=True, text=True, timeout=config.get("timeout")) + if result.returncode != 0: + log_message(f"CBE Failed (exit code: {result.returncode})") + log_message(f"stderr: {result.stderr}") + log_message(f"stdout: {result.stdout}") + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": opt_log_created, + "status": "cbe timeout" + } + + log_message("CBE Success") + log_message(f"Generated File: {c_file} (size: {c_file.stat().st_size} bytes)") + + # Step 5: GCC Compile Assembly + log_message("Step 5: GCC Compile Assembly", add_newline=True) + gcc_cmd = [str(config["GCC_BIN"]), *config["compile_flags"]["gcc"], str(c_file), "-o", str(s_gcc)] + log_message(f"Command: {' '.join(gcc_cmd)}") + + result = subprocess.run(gcc_cmd, capture_output=True, text=True, timeout=config.get("timeout")) + if result.returncode != 0: + log_message(f"GCC Failed (exit code: {result.returncode})") + if result.stderr.strip(): + log_message(f"stderr: {result.stderr}") + if result.stdout.strip(): + log_message(f"stdout: {result.stdout}") + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": opt_log_created, + "status": "GCC" + } + + log_message("GCC Success") + log_message(f"Generated File: {s_gcc} (size: {s_gcc.stat().st_size} bytes)") + + # Step 6: Clang Compile Assembly + log_message("Step 6: Clang Compile Assembly", add_newline=True) + clang_cmd = [str(config["CLANG_BIN"]), *config["compile_flags"]["clang"], str(c_file), "-o", str(s_clang)] + log_message(f"Command: {' '.join(clang_cmd)}") + + result = subprocess.run(clang_cmd, capture_output=True, text=True, timeout=config.get("timeout")) + if result.returncode != 0: + log_message(f"Clang Failed (exit code: {result.returncode})") + if result.stderr.strip(): + log_message(f"stderr: {result.stderr}") + if result.stdout.strip(): + log_message(f"stdout: {result.stdout}") + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": opt_log_created, + "status": "Clang" + } + + log_message("Clang Success") + log_message(f"Generated File: {s_clang} (size: {s_clang.stat().st_size} bytes)") + + # Step 7: Assembly Instruction Counting + log_message("Step 7: Assembly Instruction Counting", add_newline=True) + gcc_count = strip_asm(s_gcc) + clang_count = strip_asm(s_clang) + + if gcc_count > 0: + diff_percent = ((clang_count - gcc_count) / gcc_count) * 100 + else: + diff_percent = 0 if clang_count == 0 else float('inf') + + log_message(f"GCC Instructions: {gcc_count}") + log_message(f"Clang Instructions: {clang_count}") + log_message(f"Difference: {diff_percent:.1f}%") + + # Clean up temporary files + if bc_temp.exists(): + bc_temp.unlink() + if bc_final.exists(): + bc_final.unlink() + + log_message("Processing Complete") + log_message("Final Status: Success") + + return { + "func": func, + "gcc_lines": gcc_count, + "clang_lines": clang_count, + "diff_percent": diff_percent, + "opt_log_created": opt_log_created, + "status": "" + } + + except subprocess.TimeoutExpired as e: + log_message(f"Processing Timeout: {e}") + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": False, + "status": "timeout" + } + except Exception as e: + log_message(f"Processing Exception: {e}") + return { + "func": func, + "gcc_lines": 0, + "clang_lines": 0, + "diff_percent": 0, + "opt_log_created": False, + "status": f"exception: {str(e)}" + } \ No newline at end of file -- Gitee From daf44e0278b0b26f5f7e712ee500f4a4d2e0d604 Mon Sep 17 00:00:00 2001 From: Yui5427 <785369607@qq.com> Date: Wed, 3 Sep 2025 17:09:14 +0800 Subject: [PATCH 2/2] fix bug in xlsx --- clang-tools-extra/clang-gcc-bench/config.json | 12 +- .../clang-gcc-bench/csvToXlsx.py | 21 ++- clang-tools-extra/clang-gcc-bench/extract.py | 17 +- clang-tools-extra/clang-gcc-bench/utils.py | 159 ++++++++++++++---- 4 files changed, 165 insertions(+), 44 deletions(-) diff --git a/clang-tools-extra/clang-gcc-bench/config.json b/clang-tools-extra/clang-gcc-bench/config.json index 026549322541..e24d23bb2f0c 100644 --- a/clang-tools-extra/clang-gcc-bench/config.json +++ b/clang-tools-extra/clang-gcc-bench/config.json @@ -17,16 +17,22 @@ "-S" ], "clang": [ + "--target=aarch64-linux-gnu", "-march=armv8.6-a+sve", "-O3", "-fomit-frame-pointer", "-S" ] }, - "opt_flags": [ + "clang_opt_flags": [ + "--target=aarch64-linux-gnu", + "-march=armv8.6-a+sve", "-O3", - "-print-after-all", - "-disable-output" + "-fomit-frame-pointer", + "-S", + "-emit-llvm", + "-mllvm", + "-print-after-all" ], "timeout": 5 } \ No newline at end of file diff --git a/clang-tools-extra/clang-gcc-bench/csvToXlsx.py b/clang-tools-extra/clang-gcc-bench/csvToXlsx.py index dd537e9b1278..b2eb3b04b754 100644 --- a/clang-tools-extra/clang-gcc-bench/csvToXlsx.py +++ b/clang-tools-extra/clang-gcc-bench/csvToXlsx.py @@ -29,6 +29,12 @@ def csv_to_xlsx(csv_file, xlsx_file=None): # Read CSV file df = pd.read_csv(csv_path) + # Convert percentage columns to numeric + for col in df.columns: + if 'diff' in col.lower() and '%' in col: + # Remove % sign and convert to float + df[col] = pd.to_numeric(df[col].astype(str).str.replace('%', ''), errors='coerce') + # Write to Excel with formatting with pd.ExcelWriter(xlsx_path, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name='Results') @@ -51,8 +57,21 @@ def csv_to_xlsx(csv_file, xlsx_file=None): adjusted_width = min(max_length + 2, 50) # Cap at 50 characters worksheet.column_dimensions[column_letter].width = adjusted_width - # Format header row + # Format percentage columns from openpyxl.styles import Font, PatternFill + + # Find percentage columns and format them + for col_idx, col_name in enumerate(df.columns, 1): + if 'diff' in col_name.lower() and '%' in col_name: + col_letter = worksheet.cell(row=1, column=col_idx).column_letter + for row in range(2, worksheet.max_row + 1): + cell = worksheet[f"{col_letter}{row}"] + if cell.value is not None: + cell.number_format = '0.00%' + # Convert the value to decimal for percentage display + cell.value = cell.value / 100 if isinstance(cell.value, (int, float)) else cell.value + + # Format header row header_font = Font(bold=True) header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid') diff --git a/clang-tools-extra/clang-gcc-bench/extract.py b/clang-tools-extra/clang-gcc-bench/extract.py index fd0e91d4278c..29c02b3e6638 100644 --- a/clang-tools-extra/clang-gcc-bench/extract.py +++ b/clang-tools-extra/clang-gcc-bench/extract.py @@ -44,11 +44,20 @@ def main(): else: func_names = all_func_names print(f"Processing {ll_path.name} ({len(func_names)} functions)") + + # Clean up entire output directory for this ll file if processing all functions + ll_filename = ll_path.stem + ll_output_dir = output_root / ll_filename + if ll_output_dir.exists(): + shutil.rmtree(ll_output_dir) + print(f"Cleaned up existing output directory: {ll_output_dir}") + ll_output_dir.mkdir(parents=True, exist_ok=True) - # Create output directory based on LL filename - ll_filename = ll_path.stem - ll_output_dir = output_root / ll_filename - ll_output_dir.mkdir(parents=True, exist_ok=True) + # Create output directory based on LL filename (if not already created) + if not 'll_output_dir' in locals(): + ll_filename = ll_path.stem + ll_output_dir = output_root / ll_filename + ll_output_dir.mkdir(parents=True, exist_ok=True) # Copy complete LL file to parent directory full_ll_copy = ll_output_dir / f"{ll_filename}.ll" diff --git a/clang-tools-extra/clang-gcc-bench/utils.py b/clang-tools-extra/clang-gcc-bench/utils.py index aab5177a5542..a1c865ba2ac5 100644 --- a/clang-tools-extra/clang-gcc-bench/utils.py +++ b/clang-tools-extra/clang-gcc-bench/utils.py @@ -35,7 +35,7 @@ def load_config(): "CBE": CBE, "OPT": OPT, "compile_flags": config["compile_flags"], - "opt_flags": config.get("opt_flags", []), + "clang_opt_flags": config.get("clang_opt_flags", []), "timeout": config["timeout"], "input_paths": config.get("input_paths", []), "output_dir": config.get("output_dir", "comparison_results") @@ -138,19 +138,29 @@ def extract_function_names(ll_path, config): return [] -def run_optimization_pipeline(bc_file, output_dir, func_name, config): - """Run optimization pipeline and record results of each optimization step""" - opt_log_file = output_dir / f"{func_name}_opt_steps.log" +def run_optimization_pipeline(output_dir, func_name, config): + """Run clang optimization pipeline and record results of each optimization step""" + # Create subdirectories for logs + logs_dir = output_dir / "logs" + if not logs_dir.exists(): + logs_dir.mkdir(exist_ok=True) + + opt_log_file = logs_dir / f"{func_name}_clang_opt_steps.log" + c_file = output_dir / f"{func_name}.c" try: - opt_cmd = [ - str(config["OPT"]), - str(bc_file), - *config["opt_flags"] + clang_cmd = [ + str(config["LLVM_BIN"] / "clang"), + *config["clang_opt_flags"], + str(c_file), + "-o", "/dev/null" ] with open(opt_log_file, 'w') as log_file: - subprocess.run(opt_cmd, stdout=log_file, stderr=subprocess.STDOUT, check=True) + result = subprocess.run(clang_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True) + # Write both stdout and stderr to log file + log_file.write(result.stdout) + log_file.write(result.stderr) return opt_log_file except subprocess.CalledProcessError as e: @@ -165,6 +175,66 @@ def run_optimization_pipeline(bc_file, output_dir, func_name, config): return None +def run_gcc_optimization_pipeline(output_dir, func_name, config): + """Run GCC optimization pipeline and record results""" + # Create subdirectories for logs + logs_dir = output_dir / "logs" + logs_dir.mkdir(exist_ok=True) + + # Create GCC dumps subdirectory + gcc_dumps_dir = logs_dir / "gcc_dumps" + if gcc_dumps_dir.exists(): + shutil.rmtree(gcc_dumps_dir) + gcc_dumps_dir.mkdir(exist_ok=True) + + opt_log_file = logs_dir / f"{func_name}_gcc_opt_steps.log" + c_file = output_dir / f"{func_name}.c" + + try: + # Run GCC with dumps in the gcc_dumps directory + gcc_cmd = [ + str(config["GCC_BIN"]), + *config["compile_flags"]["gcc"], + "-fdump-tree-all", + "-fdump-rtl-all", + "-dumpdir", str(gcc_dumps_dir) + "/", + str(c_file), + "-o", "/dev/null" + ] + + result = subprocess.run(gcc_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True) + + # Collect all dump files + dump_files = list(gcc_dumps_dir.glob("*")) + + with open(opt_log_file, 'w') as log_file: + log_file.write("=== GCC Optimization Pipeline Log ===\n") + log_file.write(f"Command: {' '.join(gcc_cmd)}\n") + log_file.write(f"Working Directory: {gcc_dumps_dir}\n") + log_file.write(f"Generated {len(dump_files)} dump files\n\n") + + log_file.write("=== STDOUT ===\n") + log_file.write(result.stdout if result.stdout else "(empty)\n") + log_file.write("\n=== STDERR ===\n") + log_file.write(result.stderr if result.stderr else "(empty)\n") + + log_file.write(f"\n=== Generated Dump Files ({len(dump_files)}) ===\n") + for dump_file in sorted(dump_files): + log_file.write(f"- {dump_file.name}\n") + + return opt_log_file + except subprocess.CalledProcessError as e: + print(f"Failed to record GCC optimization steps: {e}") + if opt_log_file.exists(): + opt_log_file.unlink() + return None + except Exception as e: + print(f"Unexpected error occurred during GCC optimization step recording: {e}") + if opt_log_file.exists(): + opt_log_file.unlink() + return None + + def extract_clean_function_ll(ll_path, func, output_dir, config, enable_timeout=False): """ Extract a single function from original LL file to clean LL file @@ -255,7 +325,7 @@ def process_clean_function_ll(clean_ll_path, func, output_dir, config, # 2. Run optimization pipeline and record (if needed) opt_log_file = None if enable_opt_logging: - opt_log_file = run_optimization_pipeline(bc_file, output_dir, clean_func, config) + opt_log_file = run_optimization_pipeline(output_dir, clean_func, config) # 3. Generate C code try: @@ -408,6 +478,11 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config): # Clean special characters in function name clean_func = re.sub(r'[^\w]', '_', func) + # Clean up function output directory if it exists + if output_dir.exists(): + shutil.rmtree(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + # Create log file log_file = output_dir / f"{clean_func}_processing.log" @@ -430,12 +505,11 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config): # Define all file paths bc_temp = output_dir / f"{clean_func}_temp.bc" - clean_ll = output_dir / f"{clean_func}_extracted.ll" + clean_ll = output_dir / f"{clean_func}.ll" bc_final = output_dir / f"{clean_func}_final.bc" c_file = output_dir / f"{clean_func}.c" s_gcc = output_dir / f"{clean_func}_gcc.s" s_clang = output_dir / f"{clean_func}_clang.s" - opt_log = output_dir / f"{clean_func}_opt_steps.log" try: # Step 1: llvm-extract Function Extraction @@ -510,27 +584,6 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config): log_message("llvm-as Success") log_message(f"Generated File: {bc_final} (size: {bc_final.stat().st_size} bytes)") - # opt step has been removed, continue with optimization recording but not logged to main log - opt_log_created = False - try: - opt_cmd = [ - str(config["OPT"]), - str(bc_final), - *config["opt_flags"] - ] - - with open(opt_log, 'w') as f: - result = subprocess.run(opt_cmd, stdout=f, stderr=subprocess.STDOUT, timeout=config.get("timeout")) - - if result.returncode == 0: - opt_log_created = True - else: - if opt_log.exists(): - opt_log.unlink() - except Exception: - if opt_log.exists(): - opt_log.unlink() - # Step 4: CBE C Code Generation log_message("Step 4: CBE C Code Generation", add_newline=True) cbe_cmd = [str(config["CBE"]), str(bc_final), "-o", str(c_file)] @@ -546,13 +599,44 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config): "gcc_lines": 0, "clang_lines": 0, "diff_percent": 0, - "opt_log_created": opt_log_created, + "opt_log_created": False, + "gcc_opt_log_created": False, "status": "cbe timeout" } log_message("CBE Success") log_message(f"Generated File: {c_file} (size: {c_file.stat().st_size} bytes)") + # Generate clang optimization log + log_message("Step 4.5: Clang Optimization Log Generation", add_newline=True) + opt_log_created = False + try: + opt_result = run_optimization_pipeline(output_dir, clean_func, config) + if opt_result and opt_result.exists(): + opt_log_created = True + log_message("Clang Optimization Log Success") + log_message(f"Generated File: {opt_result} (size: {opt_result.stat().st_size} bytes)") + else: + log_message("Clang Optimization Log Failed - No file created") + except Exception as e: + log_message(f"Clang Optimization Log Failed: {e}") + pass + + # Generate GCC optimization log + log_message("Step 4.6: GCC Optimization Log Generation", add_newline=True) + gcc_opt_log_created = False + try: + gcc_opt_result = run_gcc_optimization_pipeline(output_dir, clean_func, config) + if gcc_opt_result and gcc_opt_result.exists(): + gcc_opt_log_created = True + log_message("GCC Optimization Log Success") + log_message(f"Generated File: {gcc_opt_result} (size: {gcc_opt_result.stat().st_size} bytes)") + else: + log_message("GCC Optimization Log Failed - No file created") + except Exception as e: + log_message(f"GCC Optimization Log Failed: {e}") + pass + # Step 5: GCC Compile Assembly log_message("Step 5: GCC Compile Assembly", add_newline=True) gcc_cmd = [str(config["GCC_BIN"]), *config["compile_flags"]["gcc"], str(c_file), "-o", str(s_gcc)] @@ -570,7 +654,8 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config): "gcc_lines": 0, "clang_lines": 0, "diff_percent": 0, - "opt_log_created": opt_log_created, + "opt_log_created": False, + "gcc_opt_log_created": False, "status": "GCC" } @@ -594,7 +679,8 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config): "gcc_lines": 0, "clang_lines": 0, "diff_percent": 0, - "opt_log_created": opt_log_created, + "opt_log_created": False, + "gcc_opt_log_created": False, "status": "Clang" } @@ -630,6 +716,7 @@ def process_function_with_detailed_logging(ll_path, func, output_dir, config): "clang_lines": clang_count, "diff_percent": diff_percent, "opt_log_created": opt_log_created, + "gcc_opt_log_created": gcc_opt_log_created, "status": "" } -- Gitee