The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Tidy pipelines and structured output

knitr::opts_chunk$set(
  collapse = TRUE, comment = "#>",
  eval = identical(tolower(Sys.getenv("LLMR_RUN_VIGNETTES", "false")), "true")
)

We’ll show both unstructured and structured pipelines, using four model names: - gpt-5-nano (OpenAI) - claude-sonnet-4-20250514 (Anthropic) - gemini-2.5-flash (Gemini) - openai/gpt-oss-20b (Groq)

You will need environment variables OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, GROQ_API_KEY.

library(LLMR)
library(dplyr)

cfg_openai <- llm_config("openai",   "gpt-5-nano")
cfg_cld    <- llm_config("anthropic","claude-sonnet-4-20250514", max_tokens = 512)  # avoid warnings; Anthropic requires max_tokens
cfg_gemini <- llm_config("gemini",   "gemini-2.5-flash")
cfg_groq   <- llm_config("groq",     "openai/gpt-oss-20b")

llm_fn: unstructured (OpenAI)

words <- c("excellent", "awful", "fine")
out <- llm_fn(
  words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral.",
  .config = cfg_openai,
  .return = "columns"
)
out
#> # A tibble: 3 × 14
#>   response_text finish_reason sent_tokens rec_tokens total_tokens
#>   <chr>         <chr>               <int>      <int>        <int>
#> 1 Positive      stop                   19        138          157
#> 2 Negative.     stop                   20        204          224
#> 3 Neutral       stop                   19        266          285
#> # ℹ 9 more variables: reasoning_tokens <int>, success <lgl>,
#> #   error_message <chr>, status_code <int>, error_code <chr>, bad_param <chr>,
#> #   response_id <chr>, duration <dbl>, raw_response_json <chr>

llm_fn: unstructured (Groq)

out_groq <- llm_fn(
  words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral.",
  .config = cfg_groq,
  .return = "columns"
)
out_groq
#> # A tibble: 3 × 14
#>   response_text finish_reason sent_tokens rec_tokens total_tokens
#>   <chr>         <chr>               <int>      <int>        <int>
#> 1 Positive      stop                   84         50          134
#> 2 Negative      stop                   85         89          174
#> 3 Positive      stop                   84        103          187
#> # ℹ 9 more variables: reasoning_tokens <int>, success <lgl>,
#> #   error_message <chr>, status_code <int>, error_code <chr>, bad_param <chr>,
#> #   response_id <chr>, duration <dbl>, raw_response_json <chr>

llm_fn_structured: schema-first (OpenAI)

schema <- list(
  type = "object",
  properties = list(
    label = list(type = "string", description = "Sentiment label"),
    score = list(type = "number", description = "Confidence 0..1")
  ),
  required = list("label", "score"),
  additionalProperties = FALSE
)

out_s <- llm_fn_structured(
  x = words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral with confidence.",
  .config = cfg_openai,
  .schema = schema,
  .fields = c("label", "score")
)
out_s
#> # A tibble: 3 × 20
#>   response_text                finish_reason sent_tokens rec_tokens total_tokens
#>   <chr>                        <chr>               <int>      <int>        <int>
#> 1 "{\"label\":\"Positive\",\"… stop                   69        409          478
#> 2 "{\"label\":\"Negative\",\"… stop                   70        410          480
#> 3 "{\"label\":\"Neutral\",\"s… stop                   69        473          542
#> # ℹ 15 more variables: reasoning_tokens <int>, success <lgl>,
#> #   error_message <chr>, status_code <int>, error_code <chr>, bad_param <chr>,
#> #   response_id <chr>, duration <dbl>, raw_response_json <chr>,
#> #   structured_ok <lgl>, structured_data <list>, label <chr>, score <dbl>,
#> #   structured_valid <lgl>, structured_error <chr>

llm_mutate: unstructured (Anthropic)

df <- tibble::tibble(
  id   = 1:3,
  text = c("Cats are great pets", "The weather is bad", "I like tea")
)

df_u <- df |>
  llm_mutate(
    answer,
    prompt  = "Give a short category for: {text}",
    .config = cfg_cld,
    .return = "columns"
  )

df_u
#> # A tibble: 3 × 15
#>   answer answer_finish answer_sent answer_rec answer_tot answer_reason answer_ok
#>   <chr>  <chr>               <int>      <int>      <int>         <int> <lgl>    
#> 1 **Pet… stop                   18         14         32            NA TRUE     
#> 2 **Wea… stop                   17         14         31            NA TRUE     
#> 3 **Bev… stop                   16         10         26            NA TRUE     
#> # ℹ 8 more variables: answer_err <chr>, answer_id <chr>, answer_status <int>,
#> #   answer_ecode <chr>, answer_param <chr>, answer_t <dbl>, id <int>,
#> #   text <chr>

llm_mutate_structured: structured (Gemini)

schema2 <- list(
  type = "object",
  properties = list(
    category  = list(type = "string"),
    rationale = list(type = "string")
  ),
  required = list("category", "rationale"),
  additionalProperties = FALSE
)

df_s <- df |>
  llm_mutate_structured(
    annot,
    prompt  = "Extract category and a one-sentence rationale for: {text}",
    .config = cfg_gemini,
    .schema = schema2
    # Because a schema is present, fields auto-hoist; you can also pass:
    # .fields = c("category", "rationale")
  )

df_s
#> # A tibble: 3 × 19
#>   annot        annot_finish annot_sent annot_rec annot_tot annot_reason annot_ok
#>   <chr>        <chr>             <int>     <int>     <int>        <int> <lgl>   
#> 1 "{\n  \"cat… stop                 15        31        46          923 TRUE    
#> 2 "{\n  \"cat… stop                 15        30        45          129 TRUE    
#> 3 "{\n  \"cat… stop                 14        36        50          129 TRUE    
#> # ℹ 12 more variables: annot_err <chr>, annot_id <chr>, annot_status <int>,
#> #   annot_ecode <chr>, annot_param <chr>, annot_t <dbl>, id <int>, text <chr>,
#> #   structured_ok <lgl>, structured_data <list>, category <chr>,
#> #   rationale <chr>

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.