用R语言翻译文本

zpjtge22  于 7个月前  发布在  R语言
关注(0)|答案(6)|浏览(162)

在寻找R中翻译文本的解决方案时,我得到了很多很老的答案,建议使用translateR包。我找到的最好的答案是this one
答案是6年前,在此期间,translateR不再是CRAN仓库的一部分(有人知道为什么吗?)。我想知道现在是否有更好的选择,使用CRAN仓库中的软件包。
我的示例数据如下:

translate <- data.frame(sentences = c("This needs to be translated to Dutch",
               "This also needs to be translated to Dutch",
               "Just as this one has to"))

字符串
目前在R中翻译文本的最佳选择是什么?

kdfy810k

kdfy810k1#

你可以使用deeplr包,它使用deeplAPI。Deepl应该比谷歌翻译准确得多。

library(deeplr)
translate2(text = translate$sentences, 
           source_lang = "EN",
           target_lang = "NL",
           auth_key = "your_key")

#[1] "Dit moet vertaald worden naar het Nederlands"    
#[2] "Dit moet ook vertaald worden naar het Nederlands"
#[3] "Net als deze moet"

字符串

wgeznvg7

wgeznvg72#

这里有一个方法,它基本上是从R调用一个python库:

library(reticulate)
conda_Env <- conda_list()

if(any(conda_Env[, 1] == "traduction") == FALSE)
{
  reticulate::conda_create(envname = "traduction", packages = c("transformers"))
}

reticulate::use_condaenv("traduction")
py_run_string("from transformers import pipeline")
py_run_string("translator = pipeline('translation_en_to_fr')")
py_run_string("print(translator('It is easy to translate languages with transformers', max_length=40))")

[{'translation_text': "Il est facile de traduire des langues à l'aide de transformateurs"}]

字符串
这种方法不需要API,并且在本地运行。您还可以考虑以下方法,该方法也在本地运行:

library(reticulate)
conda_Env <- conda_list()

if(any(conda_Env[, 1] == "traduction") == FALSE)
{
  reticulate::conda_create(envname = "traduction", packages = c("transformers"))
}

reticulate::use_condaenv("traduction")

transformers <- import("transformers")
translator <- transformers$pipeline('translation_en_to_fr')
translator('It is easy to translate languages with transformers', max_length=40)

[[1]]
[[1]]$translation_text
[1] "Il est facile de traduire des langues à l'aide de transformateurs"

gab6jxml

gab6jxml3#

以下是另一种基于谷歌翻译的方法:

library(stringr)
library(pagedown)
library(pdftools)
text_To_Translate <- "La tutela de Vieux-la-Romaine est une "
text_To_Translate <- str_replace_all(string = text_To_Translate, pattern = "[:space:]", replacement = "%20")
url <- paste0('https://translate.google.com/?hl=fr&sl=fr&tl=en&text=', text_To_Translate, '&op=translate')
temp_PDF <- tempfile(fileext = ".pdf")
tryCatch(pagedown::chrome_print(input = url, output = temp_PDF, wait = 2), error = function(e) NA)
translated_Text <- pdf_text(temp_PDF)
translated_Text <- strsplit(translated_Text, split = "\r\n|\n")[[1]]
translated_Text <- translated_Text[c(12, 13)]
translated_Text[1] <- str_remove(string = translated_Text[1], pattern = "         clear")
str_split(translated_Text, "[:space:]{20,100}")

[[1]]
[1] "La tutela de Vieux-la-"    "The guardian of Vieux-la-"

[[2]]
[1] "Romaine est une" "Romaine is a"

字符串

vohkndzv

vohkndzv4#

这里有一个方法,可以用来从英语翻译成荷兰语:

library(reticulate)

conda_Env <- conda_list()

if(any(conda_Env[, 1] == "traduction") == FALSE)
{
  reticulate::conda_create(envname = "traduction", packages = c("transformers", "SentencePiece"), python_version = "3.9.16")
  reticulate::conda_install(envname = "traduction", packages = "torch", pip = TRUE)
}  

reticulate::use_condaenv(condaenv = "traduction")
transformers <- import(module = "transformers")

tokenizer <- transformers$AutoTokenizer$from_pretrained("yhavinga/t5-small-24L-ccmatrix-multi")
model <- transformers$AutoModelForSeq2SeqLM$from_pretrained("yhavinga/t5-small-24L-ccmatrix-multi")

translator <- transformers$pipeline("translation_en_to_nl", tokenizer = tokenizer, model = model)
vec_Text <- c("This needs to be translated to Dutch", "This also needs to be translated to Dutch", "Just as this one has to")
translator(vec_Text)

[[1]]
[[1]]$translation_text
[1] "Dit moet vertaald worden naar het Nederlands"

[[2]]
[[2]]$translation_text
[1] "Dit moet ook vertaald worden naar het Nederlands"

[[3]]
[[3]]$translation_text
[1] "Net zoals deze moet"

字符串
使用这种方法,您不需要应用程序密钥,它可以在您的计算机上本地运行。

vqlkdk9b

vqlkdk9b5#

下面是一个基于chatGPT的方法,它需要一个API密钥:

library(chatgpt)
question <- "Can you translate the following sentence from English to Dutch : This also needs to be translated to Dutch"
Sys.setenv(OPENAI_API_KEY = "xxx")
chatgpt::reset_chat_session()
ask_chatgpt(question)

*** ChatGPT input:

Can you translate the following sentence from English to Dutch : This also needs to be translated to Dutch

[1] "Dit moet ook vertaald worden naar het Nederlands."

字符串

pepwfjgg

pepwfjgg6#

下面是另一种使用微软Azure的chatGPT的方法:

library(reticulate)

conda_Env <- conda_list()

if(any(conda_Env[, 1] == "azureGPT") == FALSE)
{
  reticulate::conda_create(envname = "azureGPT", packages = c("openai"), python_version = "3.9.16")
}

reticulate::use_condaenv(condaenv = "azureGPT")
openai <- import(module = "openai")
openai$api_type <- "azure"
openai$api_base <- "https://yyy.openai.azure.com/"
openai$api_version <- "2023-07-01-preview"
openai$api_key <- "xxx"

messages <- list(list(role = 'system',
                      content = 'You will me to translate from english to dutch.'),
                 list(role = 'user',
                      content = 'Translate from english to dutch the following sentence : This needs to be translated to Dutch'))

model <- openai$ChatCompletion
response <- model$create(engine = "GPT35", 
                         messages = messages,
                         temperature = 0,
                         max_tokens = 350L,
                         top_p = 0.95,
                         frequency_penalty = 0,
                         presence_penalty = 0,
                         stop = NULL)

response$choices

{
  "index": 0,
  "finish_reason": "stop",
  "message": {
    "role": "assistant",
    "content": "Dit moet vertaald worden naar het Nederlands."
  },
  "content_filter_results": {
    "hate": {
      "filtered": false,
      "severity": "safe"
    },
    "self_harm": {
      "filtered": false,
      "severity": "safe"
    },
    "sexual": {
      "filtered": false,
      "severity": "safe"
    },
    "violence": {
      "filtered": false,
      "severity": "safe"
    }
  }
}

字符串

相关问题