Class: OmniAI::Anthropic::Computer

Inherits:
Object
  • Object
show all
Defined in:
lib/omniai/anthropic/computer.rb

Overview

A reference implementation of an OmniAI computer tool using xdotool for mouse / keyboard: docs.anthropic.com/en/docs/build-with-claude/computer-use#computer-tool

Usage:

computer = OmniAI::Anthropic::Computer.new()

Defined Under Namespace

Modules: Action, Button

Constant Summary collapse

TYPE =
"computer_20241022"
SCREENSHOT_DELAY =

seconds

2.0
TYPING_DELAY =

milliseconds

20

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(display_width_px:, display_height_px:, display_number: 1, name: "computer") ⇒ Computer

Returns a new instance of Computer.

Parameters:

  • name (String) (defaults to: "computer")

    optional

  • display_width_px (Integer)
  • display_height_px (Integer)
  • display_number (Integer) (defaults to: 1)

    optional



48
49
50
51
52
53
# File 'lib/omniai/anthropic/computer.rb', line 48

def initialize(display_width_px:, display_height_px:, display_number: 1, name: "computer")
  @name = name
  @display_width_px = display_width_px
  @display_height_px = display_height_px
  @display_number = display_number
end

Instance Attribute Details

#nameString

Returns:

  • (String)


42
43
44
# File 'lib/omniai/anthropic/computer.rb', line 42

def name
  @name
end

Instance Method Details

#call(args = {}) ⇒ String

Examples:

computer.call({ "action" => 'type', "text" => 'Hello' })

Parameters:

  • args (Hash) (defaults to: {})

Returns:

  • (String)


81
82
83
84
85
86
87
# File 'lib/omniai/anthropic/computer.rb', line 81

def call(args = {})
  perform(
    action: args["action"],
    text: args["text"],
    coordinate: args["coordinate"]
  )
end

#click(button:) ⇒ String

Parameters:

  • button (Integer)

Returns:

  • (String)


130
131
132
# File 'lib/omniai/anthropic/computer.rb', line 130

def click(button:)
  xdotool("click", button)
end

#double_click(button:) ⇒ String

Parameters:

  • button (Integer)

Returns:

  • (String)


137
138
139
# File 'lib/omniai/anthropic/computer.rb', line 137

def double_click(button:)
  xdotool("click", button, "--repeat", 2)
end

#key(text:) ⇒ String

Parameters:

  • text (String)

Returns:

  • (String)


174
175
176
# File 'lib/omniai/anthropic/computer.rb', line 174

def key(text:)
  xdotool("key", "--", text)
end

#mouse_down_move_up(coordinate:, button:) ⇒ String

Parameters:

  • coordinate (Array)
    x, y
  • button (Integer)

Returns:

  • (String)


153
154
155
156
# File 'lib/omniai/anthropic/computer.rb', line 153

def mouse_down_move_up(coordinate:, button:)
  x, y = coordinate
  xdotool("mousedown", button, "mousemove", "--sync", x, y, "mouseup", button)
end

#mouse_locationString

Returns:

  • (String)


159
160
161
# File 'lib/omniai/anthropic/computer.rb', line 159

def mouse_location
  xdotool("getmouselocation")
end

#mouse_move(coordinate:) ⇒ String

Parameters:

  • coordinate (Array)
    x, y

Returns:

  • (String)


144
145
146
147
# File 'lib/omniai/anthropic/computer.rb', line 144

def mouse_move(coordinate:)
  x, y = coordinate
  xdotool("mousemove", "--sync", x, y)
end

#perform(action:, text: nil, coordinate: nil) ⇒ Array<Hash>

Parameters:

  • action (String)
  • coordinate (Array) (defaults to: nil)
    x, y

    optional

  • text (String) (defaults to: nil)

    optional

Returns:

  • (Array<Hash>)


94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/omniai/anthropic/computer.rb', line 94

def perform(action:, text: nil, coordinate: nil) # rubocop:disable Metrics/CyclomaticComplexity
  case action
  when Action::KEY then key(text: text)
  when Action::TYPE then type(text: text)
  when Action::CURSOR_POSITION then mouse_location
  when Action::LEFT_CLICK then click(button: Button::LEFT)
  when Action::MIDDLE_CLICK then click(button: Button::MIDDLE)
  when Action::RIGHT_CLICK then click(button: Button::RIGHT)
  when Action::LEFT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::LEFT)
  when Action::MIDDLE_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::MIDDLE)
  when Action::RIGHT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::RIGHT)
  when Action::MOUSE_MOVE then mouse_move(coordinate:)
  when Action::DOUBLE_CLICK then double_click(button: Button::LEFT)
  when Action::SCREENSHOT then screenshot
  end
end

#screenshotHash

Returns:

  • (Hash)


179
180
181
182
183
184
185
186
187
188
189
# File 'lib/omniai/anthropic/computer.rb', line 179

def screenshot
  tempfile = Tempfile.new(["screenshot", ".png"])
  Kernel.system("gnome-screenshot", "-w", "-f", tempfile.path)
  tempfile.rewind
  data = Base64.encode64(tempfile.read)

  { type: "base64", media_type: "image/png", data: data }
ensure
  tempfile.close
  tempfile.unlink
end

#serializeHash

Examples:

tool.serialize # =>
# {
#  "type": "computer_20241022",
#  "name": "computer",
#  "display_width_px": 1024,
#  "display_height_px": 768,
#  "display_number": 1,
# }

Returns:

  • (Hash)


66
67
68
69
70
71
72
73
74
# File 'lib/omniai/anthropic/computer.rb', line 66

def serialize(*)
  {
    type: TYPE,
    name: @name,
    display_width_px: @display_width_px,
    display_height_px: @display_height_px,
    display_number: @display_number,
  }
end

#shell(cmd) ⇒ String

Parameters:

  • cmd (String)

Returns:

  • (String)


114
115
116
117
118
# File 'lib/omniai/anthropic/computer.rb', line 114

def shell(cmd, ...)
  stdout, stderr, status = Open3.capture3(cmd, ...)

  "stdout=#{stdout.inspect} stderr=#{stderr.inspect} status=#{status}"
end

#type(text:, delay: TYPING_DELAY) ⇒ String

Parameters:

  • text (String)
  • delay (Integer) (defaults to: TYPING_DELAY)

    milliseconds

Returns:

  • (String)


167
168
169
# File 'lib/omniai/anthropic/computer.rb', line 167

def type(text:, delay: TYPING_DELAY)
  xdotool("type", "--delay", delay, "--", text)
end

#xdotoolString

Parameters:

  • cmd (String)

Returns:

  • (String)


123
124
125
# File 'lib/omniai/anthropic/computer.rb', line 123

def xdotool(...)
  shell("xdotool", ...)
end