WIP prompt engineering

2025-06-08 17:56:36 +02:00 · 2025-06-08 17:56:36 +02:00 · 0bb5b9f876
commit 0bb5b9f876
parent 128ad5ee1f
7 changed files with 201 additions and 82 deletions
--- a/src/functions/prompts-to-test-spec/src/services/project-service.ts
+++ b/src/functions/prompts-to-test-spec/src/services/project-service.ts
@ -200,12 +200,12 @@ export class ProjectService {
        let logMessage = `${timestamp} - Gemini updates`;

        response.stepOutcomes.forEach(outcome => {
-            logMessage += `\n- ${outcome.decision}: ${outcome.reason}`;
+            logMessage += `\n- ${outcome.outcomes}: ${outcome.reason}`;
        })
-        response.fileDeleted.forEach(file => {
+        response.filesDeleted.forEach(file => {
            logMessage += `\n-  Delete file ${file}`;
        })
-        response.fileWritten.forEach(file => {
+        response.filesWritten.forEach(file => {
            logMessage += `\n-  Added file ${file}`;
        })

--- a/src/functions/prompts-to-test-spec/src/services/project-workitems-service.ts
+++ b/src/functions/prompts-to-test-spec/src/services/project-workitems-service.ts
@ -117,7 +117,7 @@ export class ProjectWorkitemsService {
                relevantFiles
            );

-            const hasChanges = result.fileWritten.length > 0 || result.fileDeleted.length > 0;
+            const hasChanges = result.filesWritten.length > 0 || result.filesDeleted.length > 0;
            // Update the workitem file with implementation log
            if (hasChanges) {
                try {
@ -133,12 +133,12 @@ export class ProjectWorkitemsService {
                }
            }

-            console.log(`ProjectWorkitemsService: Completed processing workitem: ${workitem.name} (Files written: ${result.fileWritten.length})`);
+            console.log(`ProjectWorkitemsService: Completed processing workitem: ${workitem.name} (Files written: ${result.filesWritten.length})`);
            return {
                success: true,
                workitem,
-                filesWritten: result.fileWritten,
-                filesRemoved: result.fileDeleted,
+                filesWritten: result.filesWritten,
+                filesRemoved: result.filesDeleted,
            };
        } catch (error) {
            console.error(`Error processing workitem ${workitem.name}:`, error);
@ -203,8 +203,8 @@ export class ProjectWorkitemsService {
        if (DRY_RUN_SKIP_GEMINI) {
            console.log(`[DRY RUN] Skipping Gemini API call for generating feature file for ${workitemName}`);
            return {
-                fileWritten: [],
-                fileDeleted: [],
+                filesWritten: [],
+                filesDeleted: [],
                stepOutcomes: [],
                modelResponses: []
            };
--- a/src/functions/shared/src/services/gemini-file-system-service.ts
+++ b/src/functions/shared/src/services/gemini-file-system-service.ts
@ -23,18 +23,23 @@ export interface FunctionArgs {
    dirPath?: string;
    searchString?: string;
    filePattern?: string;
-    outcome?: 'create' | 'update' | 'delete' | 'skip';
-    reason?: string;
+    step?: string;
+    outcome?: string | 'end';
+    description?: string;
 }

 export interface GeminiResponse {
-    fileWritten: string[];
-    fileDeleted: string[];
+    filesWritten: string[];
+    filesDeleted: string[];
    stepOutcomes: {
-        decision: 'create' | 'update' | 'delete' | 'skip';
+        step?: string;
+        outcomes: string;
        reason: string;
    }[];
    modelResponses: string[];
+    inputCost?: number;
+    outputCost?: number;
+    totalCost?: number;
 }

 /**
@ -164,21 +169,25 @@ export class GeminiFileSystemService {
                    },
                    {
                        name: "reportStepOutcome",
-                        description: "Submit the outcome for a step in compliance with guidelines. Can be called multiple times.",
+                        description: "Submit the status/outcome for a step in your workplan",
                        parameters: {
                            type: FunctionDeclarationSchemaType.OBJECT,
                            properties: {
+                                step: {
+                                    type: FunctionDeclarationSchemaType.STRING,
+                                    description: "The step identifier",
+                                },
                                outcome: {
                                    type: FunctionDeclarationSchemaType.STRING,
-                                    description: "The step outcome: 'create', 'update', 'delete', or 'skip'",
-                                    enum: ["create", "update", "delete", "skip"]
+                                    description: "The step outcome. Use the special value 'end' to abort/complete the session",
+                                    enum: ["started", "done", "partially-done", "skip", "end", "end-confirmed"]
                                },
-                                reason: {
+                                description: {
                                    type: FunctionDeclarationSchemaType.STRING,
-                                    description: "Reason for this outcome. For instance, 'create' when files have been created, 'skip' when no files has been created, or 'update' when files have been updated."
+                                    description: "Description for this outcome. A short paragraph at most"
                                }
                            },
-                            required: ["outcome", "reason"]
+                            required: ["outcome", "description"]
                        }
                    }
                ]
@ -404,23 +413,26 @@ export class GeminiFileSystemService {
            console.log(`[DRY RUN] Skipping Gemini API call for processing`);
            return {
                stepOutcomes: [],
-                fileDeleted: [],
+                filesDeleted: [],
                modelResponses: [],
-                fileWritten: []
+                filesWritten: []
            };
        }

        // Create the prompt
-        const prompt = `
-Here is your guideline:
-
-${guidelines}
-
-Additional content:
-
-${additionalContent}
-
-You have access to the following function calls to help you understand the project structure and create implementations:
+        const prompts: string[] = [
+            `Here is your guidelines:
+${guidelines}`,
+            `Additional content:
+${additionalContent}`,
+            `Make a work plan:
+- create steps to comply with the guidelines
+- report each step outcome as you start them: use the reportStepOutcome(step, outcome, description) function
+- start each step by considering creating substeps based on the outcome of the preceding steps
+- keep track of step hierarchy by their identifiers. Dont create substeps at a depth higher than 5
+- report each step outcome as you complete them: use the reportStepOutcome(step, outcome, description) function            
+            `,
+            `Access the filesystem: You have access to the following function calls to interact with the project repository:
 - getFileContent(filePath): Get the content of a file in the project repository
 - writeFileContent(filePath, content): Write content to a file in the project repository (create or update)
 - fileExists(filePath): Check if a file exists in the project repository
@ -428,13 +440,17 @@ You have access to the following function calls to help you understand the proje
 - grepFiles(searchString, filePattern): Search for a string in project files, optionally filtered by a file pattern (glob)
   use filePattern='path/**' to search recursively in all files under path.
 - deleteFile(filePath): Delete a file from the project repository
-
-IMPORTANT: First use the function calls above to comply with the guidelines. Create, update, or delete all required files.
-
-You can use this function to report the outcome of each step as you work through the guidelines:
- reportStepOutcome(outcome, reason): Outcome must be one of: 'create', 'update', 'delete', 'skip'
-
-`;
+            `,
+            `Be throughout:
+Ensure each file you create is entirely implemented, and that you changes are fully compliant with the guidelines.
+Create a new work list is additional scanning / editing is required.
+`,
+            `Complete the session:
+Once you have completed all steps, call reportStepOutcome with outcome 'end'`,
+        ];
+        const promptContents: Content[] = prompts.map(promptPart => {
+            return {role: 'user', parts: [{text: promptPart}]}
+        })

        // Instantiate the model with our file operation tools
        const generativeModel = this.vertexAI.getGenerativeModel({
@ -447,9 +463,7 @@ You can use this function to report the outcome of each step as you work through

        // Create the initial request
        const request: GenerateContentRequest = {
-            contents: [
-                {role: 'user', parts: [{text: prompt}]}
-            ],
+            contents: promptContents,
            tools: this.fileOperationTools,
        };
        const geminiResponse = await this.handleGeminiStream(generativeModel, request, rootPath);
@ -463,6 +477,22 @@ You can use this function to report the outcome of each step as you work through
        return geminiResponse;
    }

+
+    private createReevaluationContrent(): Content [] {
+        return [
+            {
+                role: 'USER',
+                parts: [
+                    {
+                        text: `Re-evaluate compliance with all guidelines. 
+                        Create a new work list to comply if needed.
+                        Report a step with outcome 'end-confirmed' and a description detailling your confidence if you are completely done`
+                    }
+                ]
+            }
+        ];
+    }
+
    private createFunctionExchangeContents(
        functionCall: FunctionCall,
        responseData: any,
@ -497,7 +527,7 @@ You can use this function to report the outcome of each step as you work through
    private processFunctionCall(functionCall: FunctionCall, rootPath: string, callbacks: {
        onFileWritten: (file: string) => any;
        onFileDelete: (file: string) => any;
-        onStepOutcome: (outcome: 'create' | 'update' | 'delete' | 'skip', reason: string) => any
+        onStepOutcome: (step: string | undefined, outcome: string | 'end' | 'end-confirmed', reason: string) => any
    }): string | string[] | boolean | any {
        const functionName = functionCall.name;
        try {
@ -531,9 +561,13 @@ You can use this function to report the outcome of each step as you work through
                    callbacks.onFileDelete(functionArgs.filePath!);
                    break;
                case 'reportStepOutcome':
-                    console.debug(` - received reportStepOutcome function call: ${functionArgs.outcome} - ${functionArgs.reason}`);
-                    callbacks.onStepOutcome(functionArgs.outcome!, functionArgs.reason!);
-                    functionResponse = `Step outcome recorded: ${functionArgs.outcome} - ${functionArgs.reason}`;
+                    console.debug(` - received reportStepOutcome: ${functionArgs.step} -  ${functionArgs.outcome} - ${functionArgs.description}`);
+                    callbacks.onStepOutcome(functionArgs.step, functionArgs.outcome!, functionArgs.description!);
+                    functionResponse = {
+                        step: functionArgs.step,
+                        outcome: functionArgs.outcome,
+                        reason: functionArgs.description,
+                    };
                    break;
                default:
                    throw new Error(`Unknown function: ${functionName}`);
@ -551,17 +585,26 @@ You can use this function to report the outcome of each step as you work through
                                     rootPath: string,
                                     geminiResponse: GeminiResponse = {
                                         stepOutcomes: [],
-                                         fileDeleted: [],
-                                         fileWritten: [],
+                                         filesDeleted: [],
+                                         filesWritten: [],
                                         modelResponses: []
                                     }): Promise<GeminiResponse> {
        // Generate content in a streaming fashion
        const streamGenerateContentResult = await generativeModel.generateContentStream(request);

        const pendingFunctionCalls = [];
+        let endReceived = false;

        // Process the streaming response
        for await (const item of streamGenerateContentResult.stream) {
+            const inputTokens = item.usageMetadata?.promptTokenCount ?? 0;
+            const outputTokens = item.usageMetadata?.candidatesTokenCount ?? 0;
+            const totalTokens = item.usageMetadata?.totalTokenCount ?? 0;
+            geminiResponse.inputCost = (geminiResponse.inputCost ?? 0) + inputTokens;
+            geminiResponse.outputCost = (geminiResponse.outputCost ?? 0) + outputTokens;
+            geminiResponse.totalCost = (geminiResponse.totalCost ?? 0) + totalTokens;
+
+
            // Iterate over every part in the response
            let generateContentCandidates = item.candidates ?? [];
            if (generateContentCandidates.length === 0) {
@ -591,33 +634,59 @@ You can use this function to report the outcome of each step as you work through
            }
        }

-        // Process any function calls that were detected
-        if (pendingFunctionCalls.length > 0) {
        // TODO: drop old content above 1M tokens
        const updatedRequestContents = [
            ...request.contents,
        ];
+
+        // Process any function calls that were detected
+        if (pendingFunctionCalls.length > 0) {
            for (const functionCall of pendingFunctionCalls) {
                const responseData = this.processFunctionCall(functionCall, rootPath, {
-                    onFileWritten: (f) => geminiResponse.fileWritten.push(f),
-                    onFileDelete: (f) => geminiResponse.fileDeleted.push(f),
-                    onStepOutcome: (outcome, reason) => geminiResponse.stepOutcomes.push({
-                        decision: outcome,
+                    onFileWritten: (f) => {
+                        if (!geminiResponse.filesWritten.includes(f)) {
+                            geminiResponse.filesWritten.push(f);
+                        }
+                    },
+                    onFileDelete: (f) => {
+                        if (!geminiResponse.filesDeleted.includes(f)) {
+                            geminiResponse.filesDeleted.push(f)
+                        }
+                    },
+                    onStepOutcome: (step, outcome, reason) => {
+                        if (outcome === 'end') {
+                            const updatedContent = this.createReevaluationContrent();
+                            updatedRequestContents.push(...updatedContent);
+                        } else if (outcome === 'end-confirmed') {
+                            console.log('End confirmed');
+                            endReceived = true;
+                        } else {
+                            geminiResponse.stepOutcomes.push({
+                                step: step,
+                                outcomes: outcome,
                                reason: reason
-                    })
+                            });
+                        }
+                    }
                });
                const contents = this.createFunctionExchangeContents(functionCall, responseData);
                updatedRequestContents.push(...contents);
            }

+        } else {
+            console.debug("No function calls detected in response.")
+            const updatedContent = this.createReevaluationContrent();
+            updatedRequestContents.push(...updatedContent);
+        }
+
+        if (endReceived) {
+            return geminiResponse;
+        }
        // Submit a new request
        const updatedRequest: GenerateContentRequest = {
            contents: updatedRequestContents,
            tools: this.fileOperationTools,
        };
        return this.handleGeminiStream(generativeModel, updatedRequest, rootPath, geminiResponse);
-        } else {
-            return geminiResponse;
-        }
    }
 }
--- a/src/functions/test-spec-to-test-implementation/src/services/processor-service.ts
+++ b/src/functions/test-spec-to-test-implementation/src/services/processor-service.ts
@ -5,24 +5,25 @@ import * as path from 'path';
 import * as os from 'os';
 import {ProcessResult, RepoCredentials} from '../types';
 import {
-    RepositoryService as SharedRepositoryService,
+    GeminiService,
+    Project,
    PullRequestService as SharedPullRequestService,
-    GeminiService, Project
+    RepositoryService as SharedRepositoryService
 } from 'shared-functions';
 import {ProjectService} from './project-service';
 import {ProjectTestSpecsService} from './project-test-specs-service';
 import {
    DRY_RUN_SKIP_COMMITS,
+    DRY_RUN_SKIP_GEMINI,
+    GEMINI_MODEL,
    getGiteaCredentials,
    getGithubCredentials,
    getMainRepoCredentials,
+    GOOGLE_CLOUD_LOCATION,
+    GOOGLE_CLOUD_PROJECT_ID,
    MAIN_REPO_URL,
    USE_LOCAL_REPO,
-    validateConfig,
-    GOOGLE_CLOUD_PROJECT_ID,
-    GOOGLE_CLOUD_LOCATION,
-    GEMINI_MODEL,
-    DRY_RUN_SKIP_GEMINI
+    validateConfig
 } from '../config';

 export class ProcessorService {
@ -231,8 +232,18 @@ export class ProcessorService {
            await this.sharedRepositoryService.pushChanges(projectRepoPath, branchName, credentials);

            // Generate PR description using Gemini
-            const description = await this.geminiService.generatePullRequestDescription(
-                "Test spec implementation",
+            const modelResponses = result.modelResponses ?? [];
+            const lastModelResponse = modelResponses.slice(Math.max(modelResponses.length - 10, 0), modelResponses.length);
+            const changeDescription = `
+            feature spec implementation.
+            
+            ${result.totalCost} tokens consumed to write ${result.filesWritten?.length ?? 0} files`;
+            `last model responses:
+            ${lastModelResponse.join('\n')}
+            `;
+
+            const prDescription = await this.geminiService.generatePullRequestDescription(
+                changeDescription,
                result.gitPatch
            );

@ -245,7 +256,7 @@ export class ProcessorService {
                branchName,
                credentials,
                title,
-                description
+                prDescription
            );

            console.log(`Created pull request: ${pullRequestUrl}`);
--- a/src/functions/test-spec-to-test-implementation/src/services/project-test-specs-service.ts
+++ b/src/functions/test-spec-to-test-implementation/src/services/project-test-specs-service.ts
@ -41,7 +41,7 @@ export class ProjectTestSpecsService {

            if ((result.filesWritten?.length ?? 0) > 0 || (result.filesRemoved?.length ?? 0) > 0) {
                try {
-                    console.log(`Generating git patch for project ${project.name} with ${result.filesWritten} files written`);
+                    console.log(`Generating git patch for project ${project.name} with ${result.filesWritten?.length} files written`);
                    gitPatch = await this.sharedRepositoryService.generateGitPatch(projectRepoPath);

                } catch (error) {
@ -87,12 +87,13 @@ export class ProjectTestSpecsService {
                relevantFiles
            );

-            console.log(`ProjectTestSpecsService: Completed processing project (Files written: ${result.fileWritten.length})`);
+            console.log(`ProjectTestSpecsService: Completed processing project (Files written: ${result.filesWritten.length})`);
            return {
                project: project,
                success: true,
-                filesWritten: result.fileWritten,
-                filesRemoved: result.fileDeleted,
+                filesWritten: result.filesWritten,
+                filesRemoved: result.filesDeleted,
+                totalCost: result.totalCost
            };
        } catch (error) {
            console.error(`Error processing project ${project.name}:`, error);
@ -155,8 +156,8 @@ export class ProjectTestSpecsService {
            return {
                modelResponses: [],
                stepOutcomes: [],
-                fileDeleted: [],
-                fileWritten: []
+                filesDeleted: [],
+                filesWritten: []
            };
        }

--- a/src/functions/test-spec-to-test-implementation/src/types.ts
+++ b/src/functions/test-spec-to-test-implementation/src/types.ts
@ -36,6 +36,8 @@ export interface ProcessResult {
    gitPatch?: string;
    filesWritten?: string[];
    filesRemoved?: string[];
+    totalCost?: number;
+    modelResponses?: string[];
 }

 /**
--- a/src/prompts/test-spec-to-test-implementation/nitro-back/AI.md
+++ b/src/prompts/test-spec-to-test-implementation/nitro-back/AI.md
@ -1,4 +1,40 @@
-This is your guideline for the implementation of the feature file:
+Implement tests according to the cucumber ".feature" files.

 - Iterate over cucumber ".feature" definition files in the `nitro-it/src/test/resources/workitems/` directory.
- For each of them create all required files to implement the feature.
+- For each of them create all required files to implement the tests.
+- Use quarkus apis and best practices
+- All files and all their method must be correctly implemented, without any TODO or stub or placeholder.
+- The code produced must be ready for test driven development without any adaptation required.
+- The tests are business-driven integration tests: A real api must be accessed to ensure proper application
+  behavior.
+
+- Scan the existing api in nitro-domain-api/src/main/java to implement http requests to the api endpoints.
+- Use the following techniques to identify the relevant resources:
+    - search for patterns like 'class Ws*<resource-name-camel-case>*' to identify api models file names
+    - search for patterns like 'interface Ws*<resource-name-camel-case>*Controller' to identify api controller file
+      names
+    - Retrieve files content to inspect their structure and interactions
+    - Grep a class name to discover where its used across the codebase
+    - fetch the pom.xml files to inspect the dependencies and their versions
+- Get a complete understanding of the relevant resources, how they relate to each other, and the available operations.
+- Get a complete understanding of the various entities composing the business resources
+
+- Create missing global configuration in nitro-it/src/test/resources/application-bdd.properties
+- create or update @ApplicationScoped services in nitro-it/src/test/java/be/fiscalteam/nitro/bdd/services/
+  to implement the test logic
+- Those services must be fully implemented and make actual http requests to the api endpoints when called.
+
+For each feature file, create or update the implementation in nitro-it/src/test/java/be/fiscalteam/nitro/bdd/features/<
+feature-name>/
+
+- Create or update a "ScenarioState.java" service annotated @ScenarioScope.
+  This service contains the state for each scenario execution.
+- Create or update a "FeatureSteps.java" class to implement the step definitions from the feature file.
+  This class injects the ScenarioState and other services. Add javadoc referencing the feature file.
+  Use Given/When/Then/And annotations from io.cucumber.java.en to implement each step in the feature file.
+- Step definition implementations must be short, passing data between @ApplicationScoped services and the @ScenarioScope
+  state. Implement or reuse services in nitro-it/src/test/java/be/fiscalteam/nitro/bdd/services/ if needed.
+- No hardcoded values should be present - use constant files or obtain data from services.
+
+- Supporting data and constants can be defined in resource files in the
+  nitro-it/src/test/resources/be/fiscalteam/nitro/bdd/features/<feature-name>/ directory when required