基于GPT-3.5-Instruct编写的JSON翻译

梦汐 · 发表于 2023-9-24 16:16

本帖最后由梦汐于 2023-9-24 16:18 编辑


function Transl(str, tl) {
    str = str instanceof Array ? str : str = [str]
    tl = undefined ? "Chinese" : tl
    console.log(`Translate to ${tl}, save as JSON, with translation results as key-value pairs:${JSON.stringify(str, null, 0)}`);
    return new Promise(
        complete => {
            GM_xmlhttpRequest({
                method: "POST",
                url: "https://填你的代{过}{滤}理域名/v1/completions",
                headers: {
                    "Content-Type": "application/json",
                    "Authorization": "Bearer 填你的key"
                },
                data: JSON.stringify({
                    "model": "gpt-3.5-turbo-instruct",
                    "prompt": `Translate to ${tl}, save as JSON, with translation results as key-value pairs:${JSON.stringify(str, null, 0)}`,
                    "max_tokens": 1024,//max 4096
                    "temperature": 0.2
                }),
                onload: function (response) {
                    var data = JSON.parse(response.responseText);
                    console.log(data);
                    complete(data.choices[0].text)
                }
            });
        }
    )

}
let sl = [
    "hi",
    "save uit",
    "4u"
]
console.log(await Transl(sl, "Chinese"));

import json
import openai
import tiktoken
import tkinter as tk
from tkinter import filedialog

class TranslationArray:
    def __init__(self, base_url: str, api_key: str):
        # Initialize the base URL and API key for OpenAI API
        openai.api_base = base_url
        openai.api_key = api_key

    def translate(self, dictionary: dict, target_language: str):
        # Calculate the token count needed for the text
        def get_token_count(text: str) -> int:
            encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-instruct")
            return len(encoding.encode(text))

        # Chunk the dictionary, with each chunk having no more than 1200 tokens
        def chunk_submit(lst: list, threshold: int = 1200) -> dict:
            pack = {"chunked": [], "small_blocks": []}
            for i in range(len(lst)):
                pack["small_blocks"].append(lst[i])
                tk_count = get_token_count(json.dumps(
                    pack["small_blocks"], ensure_ascii=False))
                if i + 1 != len(lst):
                    mt_count = get_token_count(json.dumps(
                        pack["small_blocks"] + [lst[i + 1]], ensure_ascii=False))
                    if mt_count > threshold:
                        pack["chunked"].append(pack["small_blocks"])
                        pack["small_blocks"] = []
                else:
                    pack["chunked"].append(pack["small_blocks"])
            return pack["chunked"]

        # Translate the keys in the dictionary to the target language
        keys = dictionary.keys()
        lst = []
        for key in keys:
            lst.append(key)
        chunks = chunk_submit(lst, 1200)
        export = {}
        for chunk in chunks:
            body = f"Translate to {target_language or 'Chinese'}, save as JSON, with translation results as key-value pairs:" + \
                json.dumps(chunk)
            tokens = 4096 - get_token_count(body)
            complete = openai.Completion.create(
                model="gpt-3.5-turbo-instruct",
                prompt=body,
                max_tokens=tokens,
                temperature=0.2
            )
            # Add the translation result to the output dictionary
            export.update(json.loads(complete["choices"][0]["text"]))
        return export

# Select the JSON file to be translated
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename(filetypes=[("JSON Files", "*.json")])

# Read the data from the JSON file
with open(file_path, encoding="utf-8") as f:
    data = json.load(f)

# Translate the data to Chinese
translator = TranslationArray("your base","your key")
data = translator.translate(data, "Chinese")

# Write the translation result to the original file
with open(file_path, "w", encoding="utf-8") as outfile:
    outfile.write(json.dumps(data, ensure_ascii=False))

print("save", file_path)

marco527 · 发表于 2023-9-24 16:30

GPT-3.5是21年9月份之前的，总感觉有点生硬。不过还是要支持一下楼主。

waxxy · 发表于 2023-9-24 23:21

感谢分享，这个跟谷歌或是Deepl相比较，会好些吗？

帐号		自动登录	找回密码
密码			注册[Register]

[其他原创] 基于GPT-3.5-Instruct编写的JSON翻译

免费评分

个人中心