commit e20fee18f01166a719fbf6046ec2e70ed7d739e5 Author: unknown Date: Thu Jun 12 12:13:46 2025 +0530 code diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..00fafbd --- /dev/null +++ b/.gitignore @@ -0,0 +1,195 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/app.py b/app.py new file mode 100644 index 0000000..3653535 --- /dev/null +++ b/app.py @@ -0,0 +1,238 @@ +# app.py + +import os +import io +import sys +import json +import subprocess +import threading +import platform +import time + +import gradio as gr +import torch +from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer +from huggingface_hub import login +from dotenv import load_dotenv + +# System prompt +system_message = ( + "You are an assistant that reimplements Python code in high performance C++ for an arduino. " + "Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. " + "The C++ response needs to produce an identical output in the fastest possible time. " + "Keep implementations of random number generators identical so that results match exactly." +) + +# Helper functions +def user_prompt_for(python): + return ( + "Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. " + "Respond only with C++ code; do not explain your work other than a few comments. " + "Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\n\n" + + python + ) + +def messages_for(python): + return [ + {"role": "system", "content": system_message}, + {"role": "user", "content": user_prompt_for(python)}, + ] + +def write_output(cpp): + code = cpp.replace("```cpp", "").replace("```", "") + with open("optimized.cpp", "w") as f: + f.write(code) + +def execute_python(code): + try: + output = io.StringIO() + sys.stdout = output + exec(code) + finally: + sys.stdout = sys.__stdout__ + return output.getvalue() + +def execute_cpp(code): + write_output(code) + compiler_cmd = ["clang++", "-O3", "-std=c++17", "-march=armv8.3-a", "-o", "optimized", "optimized.cpp"] + try: + subprocess.run(compiler_cmd, check=True, text=True, capture_output=True) + run_result = subprocess.run(["./optimized"], check=True, text=True, capture_output=True) + return run_result.stdout + except subprocess.CalledProcessError as e: + return f"An error occurred:\n{e.stderr}" + +def run_cmd(command): + try: + result = subprocess.run(command, check=True, text=True, capture_output=True) + return result.stdout.strip() if result.stdout else "SUCCESS" + except: + return "" + +simple_cpp = """ +#include +int main() { + std::cout << "Hello"; + return 0; +} +""" + +def c_compiler_cmd(filename_base): + my_platform = platform.system() + try: + with open("simple.cpp", "w") as f: + f.write(simple_cpp) + + if my_platform == "Linux": + compile_cmd = ["g++", "simple.cpp", "-o", "simple"] + if run_cmd(compile_cmd) == "Hello": + return [my_platform, "GCC", ["g++", f"{filename_base}.cpp", "-o", filename_base]] + + elif my_platform == "Darwin": + compile_cmd = ["clang++", "simple.cpp", "-o", "simple"] + if run_cmd(compile_cmd) == "Hello": + return [my_platform, "Clang++", ["clang++", f"{filename_base}.cpp", "-o", filename_base]] + + elif my_platform == "Windows": + return [my_platform, "Unsupported", []] + + except: + return [my_platform, "Unavailable", []] + + return [my_platform, "Unavailable", []] + +# HF Model Config +load_dotenv() +hf_token = os.environ["HF_TOKEN"] +login(hf_token) + +code_qwen = "Qwen/Qwen3-0.6B" +tokenizer = AutoTokenizer.from_pretrained(code_qwen) + +def stream_code_qwen(python): + messages = messages_for(python) + text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + model_inputs = tokenizer([text], return_tensors="pt") + + model = AutoModelForCausalLM.from_pretrained(code_qwen, device_map="auto", torch_dtype="auto") + model_inputs = model_inputs.to(model.device) + streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) + + thread = threading.Thread( + target=model.generate, + kwargs=dict(inputs=model_inputs.input_ids, streamer=streamer, max_new_tokens=1024, do_sample=False) + ) + thread.start() + + output = "" + for token in streamer: + output += token + yield output + +def optimize(python, model): + if model == "CodeQwen": + result = "" + for chunk in stream_code_qwen(python): + result = chunk + return result + return "Only CodeQwen is supported." + +# Sample Programs +pi = """ +import time + +def calculate(iterations, param1, param2): + result = 1.0 + for i in range(1, iterations+1): + j = i * param1 - param2 + result -= (1/j) + j = i * param1 + param2 + result += (1/j) + return result + +start_time = time.time() +result = calculate(100_000_000, 4, 1) * 4 +end_time = time.time() + +print(f"Result: {result:.12f}") +print(f"Execution Time: {(end_time - start_time):.6f} seconds") +""" + +python_hard = """ +def lcg(seed, a=1664525, c=1013904223, m=2**32): + value = seed + while True: + value = (a * value + c) % m + yield value + +def max_subarray_sum(n, seed, min_val, max_val): + lcg_gen = lcg(seed) + random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)] + max_sum = float('-inf') + for i in range(n): + current_sum = 0 + for j in range(i, n): + current_sum += random_numbers[j] + if current_sum > max_sum: + max_sum = current_sum + return max_sum + +def total_max_subarray_sum(n, initial_seed, min_val, max_val): + total_sum = 0 + lcg_gen = lcg(initial_seed) + for _ in range(20): + seed = next(lcg_gen) + total_sum += max_subarray_sum(n, seed, min_val, max_val) + return total_sum + +import time +start_time = time.time() +result = total_max_subarray_sum(10000, 42, -10, 10) +end_time = time.time() +print("Total Maximum Subarray Sum (20 runs):", result) +print("Execution Time: {:.6f} seconds".format(end_time - start_time)) +""" + +def select_sample_program(sample_program): + return pi if sample_program == "pi" else python_hard + +# Gradio UI +compiler_cmd = c_compiler_cmd("optimized") + +css = """ +.python {background-color: #306998;} +.cpp {background-color: #050;} +""" + +with gr.Blocks(css=css) as ui: + gr.Markdown("## Convert Python Code to High-Performance C++") + + with gr.Row(): + python = gr.Textbox(label="Python code", value=python_hard, lines=12) + cpp = gr.Textbox(label="Generated C++ code", lines=12) + + with gr.Row(): + with gr.Column(): + sample_program = gr.Radio(["pi", "python_hard"], label="Sample program", value="python_hard") + model = gr.Dropdown(["CodeQwen"], label="Select model", value="CodeQwen") + with gr.Column(): + architecture = gr.Radio([compiler_cmd[0]], label="Architecture", interactive=False) + compiler = gr.Radio([compiler_cmd[1]], label="Compiler", interactive=False) + + with gr.Row(): + convert = gr.Button("Convert to C++") + + with gr.Row(): + python_run = gr.Button("Run Python") + cpp_run = gr.Button("Run C++", interactive=(compiler_cmd[1] != "Unavailable")) + + with gr.Row(): + python_out = gr.TextArea(label="Python output", elem_classes=["python"]) + cpp_out = gr.TextArea(label="C++ output", elem_classes=["cpp"]) + + sample_program.change(select_sample_program, inputs=[sample_program], outputs=[python]) + convert.click(optimize, inputs=[python, model], outputs=[cpp]) + python_run.click(execute_python, inputs=[python], outputs=[python_out]) + cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out]) + +ui.launch(inbrowser=True) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cccbd8c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +gradio +torch +transformers +huggingface_hub +dotenv \ No newline at end of file