diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..cd74318 --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +BROWSERBASE_API_KEY=bb_live_your_api_key_here +BROWSERBASE_PROJECT_ID=your-bb-project-uuid-here +MODEL_API_KEY=sk-proj-your-llm-api-key-here diff --git a/.gitignore b/.gitignore index b1346e6..a65171e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ build/ codegen.log kls_database.db +.env diff --git a/README.md b/README.md index abb13e3..35c6e6a 100644 --- a/README.md +++ b/README.md @@ -41,25 +41,181 @@ implementation("com.browserbase.api:stagehand-java:0.4.0") ## Requirements -This library requires Java 8 or later. +This library requires Java 8 through Java 21. Java 22+ is not currently supported. + +## Running the Example + +A complete working example is available at [`stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java`](stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java). + +To run it, first export the required environment variables, then use Gradle: + +```bash +export BROWSERBASE_API_KEY="your-bb-api-key" +export BROWSERBASE_PROJECT_ID="your-bb-project-uuid" +export MODEL_API_KEY="sk-proj-your-llm-api-key" + +./gradlew :stagehand-java-example:run +``` ## Usage +This example demonstrates the full Stagehand workflow: starting a session, navigating to a page, observing possible actions, acting on elements, extracting data, and running an autonomous agent. + ```java import com.browserbase.api.client.StagehandClient; import com.browserbase.api.client.okhttp.StagehandOkHttpClient; -import com.browserbase.api.models.sessions.SessionActParams; -import com.browserbase.api.models.sessions.SessionActResponse; +import com.browserbase.api.core.JsonValue; +import com.browserbase.api.models.sessions.*; -// Configures using the `stagehand.browserbaseApiKey`, `stagehand.browserbaseProjectId`, `stagehand.modelApiKey` and `stagehand.baseUrl` system properties -// Or configures using the `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID`, `MODEL_API_KEY` and `STAGEHAND_BASE_URL` environment variables -StagehandClient client = StagehandOkHttpClient.fromEnv(); +import java.util.List; +import java.util.Map; +import java.util.Optional; -SessionActParams params = SessionActParams.builder() - .id("00000000-your-session-id-000000000000") - .input("click the first link on the page") - .build(); -SessionActResponse response = client.sessions().act(params); +public class Main { + private static final String SDK_VERSION = "3.0.6"; + + public static void main(String[] args) { + // Create client using environment variables: + // BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, MODEL_API_KEY + StagehandClient client = StagehandOkHttpClient.fromEnv(); + + // Start a new browser session + SessionStartResponse startResponse = client.sessions().start( + SessionStartParams.builder() + .modelName("openai/gpt-5-nano") + .xLanguage(SessionStartParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + + String sessionId = startResponse.data().sessionId(); + System.out.println("Session started: " + sessionId); + + try { + // Navigate to a webpage + // frameId is required - use empty string for the main frame + client.sessions().navigate( + SessionNavigateParams.builder() + .id(sessionId) + .url("https://news.ycombinator.com") + .frameId("") + .xLanguage(SessionNavigateParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + System.out.println("Navigated to Hacker News"); + + // Observe to find possible actions on the page + SessionObserveResponse observeResponse = client.sessions().observe( + SessionObserveParams.builder() + .id(sessionId) + .instruction("find the link to view comments for the top post") + .xLanguage(SessionObserveParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + + List results = observeResponse.data().result(); + System.out.println("Found " + results.size() + " possible actions"); + + if (results.isEmpty()) { + System.out.println("No actions found"); + return; + } + + // Take the first action returned by Observe + // Convert the result to an Action to pass to Act + SessionObserveResponse.Data.Result result = results.get(0); + Action action = JsonValue.from(result).convert(Action.class); + System.out.println("Acting on: " + action.description()); + + // Pass the structured action to Act + SessionActResponse actResponse = client.sessions().act( + SessionActParams.builder() + .id(sessionId) + .input(action) + .xLanguage(SessionActParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + System.out.println("Act completed: " + actResponse.data().result().message()); + + // Extract structured data from the page using a JSON schema + SessionExtractResponse extractResponse = client.sessions().extract( + SessionExtractParams.builder() + .id(sessionId) + .instruction("extract the text of the top comment on this page") + .schema(SessionExtractParams.Schema.builder() + .putAdditionalProperty("type", JsonValue.from("object")) + .putAdditionalProperty("properties", JsonValue.from(Map.of( + "commentText", Map.of( + "type", "string", + "description", "The text content of the top comment" + ), + "author", Map.of( + "type", "string", + "description", "The username of the comment author" + ) + ))) + .putAdditionalProperty("required", JsonValue.from(List.of("commentText"))) + .build()) + .xLanguage(SessionExtractParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + + JsonValue extractedResult = extractResponse.data()._result(); + System.out.println("Extracted data: " + extractedResult); + + // Get the author from the extracted data + String author = extractedResult.asObject() + .flatMap(obj -> Optional.ofNullable(obj.get("author"))) + .flatMap(JsonValue::asString) + .orElse("unknown"); + System.out.println("Looking up profile for author: " + author); + + // Run an autonomous agent to accomplish a complex task + SessionExecuteResponse executeResponse = client.sessions().execute( + SessionExecuteParams.builder() + .id(sessionId) + .executeOptions(SessionExecuteParams.ExecuteOptions.builder() + .instruction(String.format( + "Find any personal website, GitHub, or LinkedIn profile for user '%s'. " + + "Click on their username to view their profile page.", + author + )) + .maxSteps(10.0) + .build()) + .agentConfig(SessionExecuteParams.AgentConfig.builder() + .model(ModelConfig.ofModelConfigObject( + ModelConfig.ModelConfigObject.builder() + .modelName("openai/gpt-5-nano") + .apiKey(System.getenv("MODEL_API_KEY")) + .build() + )) + .cua(false) + .build()) + .xLanguage(SessionExecuteParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + + System.out.println("Agent completed: " + executeResponse.data().result().message()); + System.out.println("Agent success: " + executeResponse.data().result().success()); + + } finally { + // End the browser session to clean up resources + client.sessions().end( + SessionEndParams.builder() + .id(sessionId) + .xLanguage(SessionEndParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + System.out.println("Session ended"); + } + } +} ``` ## Client configuration @@ -125,7 +281,7 @@ To temporarily use a modified client configuration, while reusing the same conne import com.browserbase.api.client.StagehandClient; StagehandClient clientWithOptions = client.withOptions(optionsBuilder -> { - optionsBuilder.baseUrl("https://example.com"); + optionsBuilder.modelApiKey("sk-your-llm-api-key-here"); optionsBuilder.maxRetries(42); }); ``` diff --git a/stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java b/stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java new file mode 100644 index 0000000..f5b98ae --- /dev/null +++ b/stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java @@ -0,0 +1,186 @@ +package com.stagehand.api.example; + +import com.browserbase.api.client.StagehandClient; +import com.browserbase.api.client.okhttp.StagehandOkHttpClient; +import com.browserbase.api.core.JsonValue; +import com.browserbase.api.core.RequestOptions; +import com.browserbase.api.models.sessions.*; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * Basic example demonstrating the Stagehand Java SDK. + * + * This example shows the full flow of: + * 1. Starting a browser session + * 2. Navigating to a webpage + * 3. Observing to find possible actions + * 4. Acting on an element + * 5. Extracting structured data + * 6. Running an autonomous agent + * 7. Ending the session + * + * Required environment variables: + * - BROWSERBASE_API_KEY: Your Browserbase API key + * - BROWSERBASE_PROJECT_ID: Your Browserbase project ID + * - MODEL_API_KEY: Your OpenAI API key + */ +public class Main { + + // SDK version for API compatibility (matches TypeScript SDK v3) + private static final String SDK_VERSION = "3.0.6"; + + public static void main(String[] args) { + // Create client using environment variables + // BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, MODEL_API_KEY + StagehandClient client = StagehandOkHttpClient.fromEnv(); + + // Start a new browser session + SessionStartResponse startResponse = client.sessions().start( + SessionStartParams.builder() + .modelName("openai/gpt-5-nano") + .xLanguage(SessionStartParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + + String sessionId = startResponse.data().sessionId(); + System.out.println("Session started: " + sessionId); + + try { + // Navigate to Hacker News + client.sessions().navigate( + SessionNavigateParams.builder() + .id(sessionId) + .url("https://news.ycombinator.com") + .frameId("") // Empty string for main frame + .xLanguage(SessionNavigateParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + System.out.println("Navigated to Hacker News"); + + // Observe to find possible actions - looking for the comments link + SessionObserveResponse observeResponse = client.sessions().observe( + SessionObserveParams.builder() + .id(sessionId) + .instruction("find the link to view comments for the top post") + .xLanguage(SessionObserveParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + + List results = observeResponse.data().result(); + System.out.println("Found " + results.size() + " possible actions"); + + if (results.isEmpty()) { + System.out.println("No actions found"); + return; + } + + // Use the first result - convert to Action since they share the same shape + SessionObserveResponse.Data.Result result = results.get(0); + Action action = JsonValue.from(result).convert(Action.class); + System.out.println("Acting on: " + action.description()); + + // Pass the structured action to Act + SessionActResponse actResponse = client.sessions().act( + SessionActParams.builder() + .id(sessionId) + .input(action) + .xLanguage(SessionActParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + System.out.println("Act completed: " + actResponse.data().result().message()); + + // Extract data from the page + // We're now on the comments page, so extract the top comment text + SessionExtractResponse extractResponse = client.sessions().extract( + SessionExtractParams.builder() + .id(sessionId) + .instruction("extract the text of the top comment on this page") + .schema(SessionExtractParams.Schema.builder() + .putAdditionalProperty("type", JsonValue.from("object")) + .putAdditionalProperty("properties", JsonValue.from(Map.of( + "commentText", Map.of( + "type", "string", + "description", "The text content of the top comment" + ), + "author", Map.of( + "type", "string", + "description", "The username of the comment author" + ) + ))) + .putAdditionalProperty("required", JsonValue.from(List.of("commentText"))) + .build()) + .xLanguage(SessionExtractParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + + // Get the extracted result + JsonValue extractedResult = extractResponse.data()._result(); + System.out.println("Extracted data: " + extractedResult); + + // Get the author from the extracted data + String author = "unknown"; + Optional> extractedObject = extractedResult.asObject(); + if (extractedObject.isPresent()) { + JsonValue authorValue = extractedObject.get().get("author"); + if (authorValue != null) { + author = (String) authorValue.asString().orElse("unknown"); + } + } + System.out.println("Looking up profile for author: " + author); + + // Use the Agent to find the author's profile + // Execute runs an autonomous agent that can navigate and interact with pages + // Use a longer timeout (5 minutes) since agent execution can take a while + SessionExecuteResponse executeResponse = client.sessions().execute( + SessionExecuteParams.builder() + .id(sessionId) + .executeOptions(SessionExecuteParams.ExecuteOptions.builder() + .instruction(String.format( + "Find any personal website, GitHub, LinkedIn, or other best profile URL for the Hacker News user '%s'. " + + "Click on their username to go to their profile page and look for any links they have shared. " + + "Use Google Search with their username or other details from their profile if you dont find any direct links.", + author + )) + .maxSteps(15.0) + .build()) + .agentConfig(SessionExecuteParams.AgentConfig.builder() + .model(ModelConfig.ofModelConfigObject( + ModelConfig.ModelConfigObject.builder() + .modelName("openai/gpt-5-nano") + .apiKey(System.getenv("MODEL_API_KEY")) + .build() + )) + .cua(false) + .build()) + .xLanguage(SessionExecuteParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build(), + RequestOptions.builder().timeout(Duration.ofMinutes(5)).build() + ); + + System.out.println("Agent completed: " + executeResponse.data().result().message()); + System.out.println("Agent success: " + executeResponse.data().result().success()); + System.out.println("Agent actions taken: " + executeResponse.data().result().actions().size()); + + } finally { + // End the session to clean up resources + client.sessions().end( + SessionEndParams.builder() + .id(sessionId) + .xLanguage(SessionEndParams.XLanguage.TYPESCRIPT) + .xSdkVersion(SDK_VERSION) + .build() + ); + System.out.println("Session ended"); + } + } +}