WIP prompt engineering

2025-06-08 17:56:36 +02:00 · 2025-06-08 17:56:36 +02:00 · 0bb5b9f876
commit 0bb5b9f876
parent 128ad5ee1f
7 changed files with 201 additions and 82 deletions
--- a/src/functions/prompts-to-test-spec/src/services/project-service.ts
+++ b/src/functions/prompts-to-test-spec/src/services/project-service.ts
@ -200,12 +200,12 @@ export class ProjectService {
        let logMessage = `${timestamp} - Gemini updates`;
        response.stepOutcomes.forEach(outcome => {
-            logMessage += `\n- ${outcome.decision}: ${outcome.reason}`;
+            logMessage += `\n- ${outcome.outcomes}: ${outcome.reason}`;
        })
-        response.fileDeleted.forEach(file => {
+        response.filesDeleted.forEach(file => {
            logMessage += `\n-  Delete file ${file}`;
        })
-        response.fileWritten.forEach(file => {
+        response.filesWritten.forEach(file => {
            logMessage += `\n-  Added file ${file}`;
        })
--- a/src/functions/prompts-to-test-spec/src/services/project-workitems-service.ts
+++ b/src/functions/prompts-to-test-spec/src/services/project-workitems-service.ts
@ -117,7 +117,7 @@ export class ProjectWorkitemsService {
                relevantFiles
            );
-            const hasChanges = result.fileWritten.length > 0 || result.fileDeleted.length > 0;
+            const hasChanges = result.filesWritten.length > 0 || result.filesDeleted.length > 0;
            // Update the workitem file with implementation log
            if (hasChanges) {
                try {
@ -133,12 +133,12 @@ export class ProjectWorkitemsService {
                }
            }
-            console.log(`ProjectWorkitemsService: Completed processing workitem: ${workitem.name} (Files written: ${result.fileWritten.length})`);
+            console.log(`ProjectWorkitemsService: Completed processing workitem: ${workitem.name} (Files written: ${result.filesWritten.length})`);
            return {
                success: true,
                workitem,
-                filesWritten: result.fileWritten,
+                filesWritten: result.filesWritten,
-                filesRemoved: result.fileDeleted,
+                filesRemoved: result.filesDeleted,
            };
        } catch (error) {
            console.error(`Error processing workitem ${workitem.name}:`, error);
@ -203,8 +203,8 @@ export class ProjectWorkitemsService {
        if (DRY_RUN_SKIP_GEMINI) {
            console.log(`[DRY RUN] Skipping Gemini API call for generating feature file for ${workitemName}`);
            return {
-                fileWritten: [],
+                filesWritten: [],
-                fileDeleted: [],
+                filesDeleted: [],
                stepOutcomes: [],
                modelResponses: []
            };
--- a/src/functions/shared/src/services/gemini-file-system-service.ts
+++ b/src/functions/shared/src/services/gemini-file-system-service.ts
@ -23,18 +23,23 @@ export interface FunctionArgs {
    dirPath?: string;
    searchString?: string;
    filePattern?: string;
-    outcome?: 'create' | 'update' | 'delete' | 'skip';
+    step?: string;
-    reason?: string;
+    outcome?: string | 'end';
    description?: string;
 }
 export interface GeminiResponse {
-    fileWritten: string[];
+    filesWritten: string[];
-    fileDeleted: string[];
+    filesDeleted: string[];
    stepOutcomes: {
-        decision: 'create' | 'update' | 'delete' | 'skip';
+        step?: string;
        outcomes: string;
        reason: string;
    }[];
    modelResponses: string[];
    inputCost?: number;
    outputCost?: number;
    totalCost?: number;
 }
 /**
@ -164,21 +169,25 @@ export class GeminiFileSystemService {
                    },
                    {
                        name: "reportStepOutcome",
-                        description: "Submit the outcome for a step in compliance with guidelines. Can be called multiple times.",
+                        description: "Submit the status/outcome for a step in your workplan",
                        parameters: {
                            type: FunctionDeclarationSchemaType.OBJECT,
                            properties: {
                                step: {
                                    type: FunctionDeclarationSchemaType.STRING,
                                    description: "The step identifier",
                                },
                                outcome: {
                                    type: FunctionDeclarationSchemaType.STRING,
-                                    description: "The step outcome: 'create', 'update', 'delete', or 'skip'",
+                                    description: "The step outcome. Use the special value 'end' to abort/complete the session",
-                                    enum: ["create", "update", "delete", "skip"]
+                                    enum: ["started", "done", "partially-done", "skip", "end", "end-confirmed"]
                                },
-                                reason: {
+                                description: {
                                    type: FunctionDeclarationSchemaType.STRING,
-                                    description: "Reason for this outcome. For instance, 'create' when files have been created, 'skip' when no files has been created, or 'update' when files have been updated."
+                                    description: "Description for this outcome. A short paragraph at most"
                                }
                            },
-                            required: ["outcome", "reason"]
+                            required: ["outcome", "description"]
                        }
                    }
                ]
@ -404,23 +413,26 @@ export class GeminiFileSystemService {
            console.log(`[DRY RUN] Skipping Gemini API call for processing`);
            return {
                stepOutcomes: [],
-                fileDeleted: [],
+                filesDeleted: [],
                modelResponses: [],
-                fileWritten: []
+                filesWritten: []
            };
        }
        // Create the prompt
-        const prompt = `
+        const prompts: string[] = [
-Here is your guideline:
+            `Here is your guidelines:
-
+${guidelines}`,
-${guidelines}
+            `Additional content:
-
+${additionalContent}`,
-Additional content:
+            `Make a work plan:
-
+- create steps to comply with the guidelines
-${additionalContent}
+- report each step outcome as you start them: use the reportStepOutcome(step, outcome, description) function
-
+- start each step by considering creating substeps based on the outcome of the preceding steps
-You have access to the following function calls to help you understand the project structure and create implementations:
+- keep track of step hierarchy by their identifiers. Dont create substeps at a depth higher than 5
 - report each step outcome as you complete them: use the reportStepOutcome(step, outcome, description) function            
            `,
            `Access the filesystem: You have access to the following function calls to interact with the project repository:
 - getFileContent(filePath): Get the content of a file in the project repository
 - writeFileContent(filePath, content): Write content to a file in the project repository (create or update)
 - fileExists(filePath): Check if a file exists in the project repository
@ -428,13 +440,17 @@ You have access to the following function calls to help you understand the proje
 - grepFiles(searchString, filePattern): Search for a string in project files, optionally filtered by a file pattern (glob)
   use filePattern='path/**' to search recursively in all files under path.
 - deleteFile(filePath): Delete a file from the project repository
-
+            `,
-IMPORTANT: First use the function calls above to comply with the guidelines. Create, update, or delete all required files.
+            `Be throughout:
-
+Ensure each file you create is entirely implemented, and that you changes are fully compliant with the guidelines.
-You can use this function to report the outcome of each step as you work through the guidelines:
+Create a new work list is additional scanning / editing is required.
- reportStepOutcome(outcome, reason): Outcome must be one of: 'create', 'update', 'delete', 'skip'
+`,
-
+            `Complete the session:
-`;
+Once you have completed all steps, call reportStepOutcome with outcome 'end'`,
        ];
        const promptContents: Content[] = prompts.map(promptPart => {
            return {role: 'user', parts: [{text: promptPart}]}
        })
        // Instantiate the model with our file operation tools
        const generativeModel = this.vertexAI.getGenerativeModel({
@ -447,9 +463,7 @@ You can use this function to report the outcome of each step as you work through
        // Create the initial request
        const request: GenerateContentRequest = {
-            contents: [
+            contents: promptContents,
                {role: 'user', parts: [{text: prompt}]}
            ],
            tools: this.fileOperationTools,
        };
        const geminiResponse = await this.handleGeminiStream(generativeModel, request, rootPath);
@ -463,6 +477,22 @@ You can use this function to report the outcome of each step as you work through
        return geminiResponse;
    }
    private createReevaluationContrent(): Content [] {
        return [
            {
                role: 'USER',
                parts: [
                    {
                        text: `Re-evaluate compliance with all guidelines. 
                        Create a new work list to comply if needed.
                        Report a step with outcome 'end-confirmed' and a description detailling your confidence if you are completely done`
                    }
                ]
            }
        ];
    }
    private createFunctionExchangeContents(
        functionCall: FunctionCall,
        responseData: any,
@ -497,7 +527,7 @@ You can use this function to report the outcome of each step as you work through
    private processFunctionCall(functionCall: FunctionCall, rootPath: string, callbacks: {
        onFileWritten: (file: string) => any;
        onFileDelete: (file: string) => any;
-        onStepOutcome: (outcome: 'create' | 'update' | 'delete' | 'skip', reason: string) => any
+        onStepOutcome: (step: string | undefined, outcome: string | 'end' | 'end-confirmed', reason: string) => any
    }): string | string[] | boolean | any {
        const functionName = functionCall.name;
        try {
@ -531,9 +561,13 @@ You can use this function to report the outcome of each step as you work through
                    callbacks.onFileDelete(functionArgs.filePath!);
                    break;
                case 'reportStepOutcome':
-                    console.debug(` - received reportStepOutcome function call: ${functionArgs.outcome} - ${functionArgs.reason}`);
+                    console.debug(` - received reportStepOutcome: ${functionArgs.step} -  ${functionArgs.outcome} - ${functionArgs.description}`);
-                    callbacks.onStepOutcome(functionArgs.outcome!, functionArgs.reason!);
+                    callbacks.onStepOutcome(functionArgs.step, functionArgs.outcome!, functionArgs.description!);
-                    functionResponse = `Step outcome recorded: ${functionArgs.outcome} - ${functionArgs.reason}`;
+                    functionResponse = {
                        step: functionArgs.step,
                        outcome: functionArgs.outcome,
                        reason: functionArgs.description,
                    };
                    break;
                default:
                    throw new Error(`Unknown function: ${functionName}`);
@ -551,17 +585,26 @@ You can use this function to report the outcome of each step as you work through
                                     rootPath: string,
                                     geminiResponse: GeminiResponse = {
                                         stepOutcomes: [],
-                                         fileDeleted: [],
+                                         filesDeleted: [],
-                                         fileWritten: [],
+                                         filesWritten: [],
                                         modelResponses: []
                                     }): Promise<GeminiResponse> {
        // Generate content in a streaming fashion
        const streamGenerateContentResult = await generativeModel.generateContentStream(request);
        const pendingFunctionCalls = [];
        let endReceived = false;
        // Process the streaming response
        for await (const item of streamGenerateContentResult.stream) {
            const inputTokens = item.usageMetadata?.promptTokenCount ?? 0;
            const outputTokens = item.usageMetadata?.candidatesTokenCount ?? 0;
            const totalTokens = item.usageMetadata?.totalTokenCount ?? 0;
            geminiResponse.inputCost = (geminiResponse.inputCost ?? 0) + inputTokens;
            geminiResponse.outputCost = (geminiResponse.outputCost ?? 0) + outputTokens;
            geminiResponse.totalCost = (geminiResponse.totalCost ?? 0) + totalTokens;
            // Iterate over every part in the response
            let generateContentCandidates = item.candidates ?? [];
            if (generateContentCandidates.length === 0) {
@ -591,33 +634,59 @@ You can use this function to report the outcome of each step as you work through
            }
        }
        // TODO: drop old content above 1M tokens
        const updatedRequestContents = [
            ...request.contents,
        ];
        // Process any function calls that were detected
        if (pendingFunctionCalls.length > 0) {
            // TODO: drop old content above 1M tokens
            const updatedRequestContents = [
                ...request.contents,
            ];
            for (const functionCall of pendingFunctionCalls) {
                const responseData = this.processFunctionCall(functionCall, rootPath, {
-                    onFileWritten: (f) => geminiResponse.fileWritten.push(f),
+                    onFileWritten: (f) => {
-                    onFileDelete: (f) => geminiResponse.fileDeleted.push(f),
+                        if (!geminiResponse.filesWritten.includes(f)) {
-                    onStepOutcome: (outcome, reason) => geminiResponse.stepOutcomes.push({
+                            geminiResponse.filesWritten.push(f);
-                        decision: outcome,
+                        }
-                        reason: reason
+                    },
-                    })
+                    onFileDelete: (f) => {
                        if (!geminiResponse.filesDeleted.includes(f)) {
                            geminiResponse.filesDeleted.push(f)
                        }
                    },
                    onStepOutcome: (step, outcome, reason) => {
                        if (outcome === 'end') {
                            const updatedContent = this.createReevaluationContrent();
                            updatedRequestContents.push(...updatedContent);
                        } else if (outcome === 'end-confirmed') {
                            console.log('End confirmed');
                            endReceived = true;
                        } else {
                            geminiResponse.stepOutcomes.push({
                                step: step,
                                outcomes: outcome,
                                reason: reason
                            });
                        }
                    }
                });
                const contents = this.createFunctionExchangeContents(functionCall, responseData);
                updatedRequestContents.push(...contents);
            }
            // Submit a new request
            const updatedRequest: GenerateContentRequest = {
                contents: updatedRequestContents,
                tools: this.fileOperationTools,
            };
            return this.handleGeminiStream(generativeModel, updatedRequest, rootPath, geminiResponse);
        } else {
            console.debug("No function calls detected in response.")
            const updatedContent = this.createReevaluationContrent();
            updatedRequestContents.push(...updatedContent);
        }
        if (endReceived) {
            return geminiResponse;
        }
        // Submit a new request
        const updatedRequest: GenerateContentRequest = {
            contents: updatedRequestContents,
            tools: this.fileOperationTools,
        };
        return this.handleGeminiStream(generativeModel, updatedRequest, rootPath, geminiResponse);
    }
 }
--- a/src/functions/test-spec-to-test-implementation/src/services/processor-service.ts
+++ b/src/functions/test-spec-to-test-implementation/src/services/processor-service.ts
@ -5,24 +5,25 @@ import * as path from 'path';
 import * as os from 'os';
 import {ProcessResult, RepoCredentials} from '../types';
 import {
-    RepositoryService as SharedRepositoryService,
+    GeminiService,
    Project,
    PullRequestService as SharedPullRequestService,
-    GeminiService, Project
+    RepositoryService as SharedRepositoryService
 } from 'shared-functions';
 import {ProjectService} from './project-service';
 import {ProjectTestSpecsService} from './project-test-specs-service';
 import {
    DRY_RUN_SKIP_COMMITS,
    DRY_RUN_SKIP_GEMINI,
    GEMINI_MODEL,
    getGiteaCredentials,
    getGithubCredentials,
    getMainRepoCredentials,
    GOOGLE_CLOUD_LOCATION,
    GOOGLE_CLOUD_PROJECT_ID,
    MAIN_REPO_URL,
    USE_LOCAL_REPO,
-    validateConfig,
+    validateConfig
    GOOGLE_CLOUD_PROJECT_ID,
    GOOGLE_CLOUD_LOCATION,
    GEMINI_MODEL,
    DRY_RUN_SKIP_GEMINI
 } from '../config';
 export class ProcessorService {
@ -231,8 +232,18 @@ export class ProcessorService {
            await this.sharedRepositoryService.pushChanges(projectRepoPath, branchName, credentials);
            // Generate PR description using Gemini
-            const description = await this.geminiService.generatePullRequestDescription(
+            const modelResponses = result.modelResponses ?? [];
-                "Test spec implementation",
+            const lastModelResponse = modelResponses.slice(Math.max(modelResponses.length - 10, 0), modelResponses.length);
            const changeDescription = `
            feature spec implementation.
            ${result.totalCost} tokens consumed to write ${result.filesWritten?.length ?? 0} files`;
            `last model responses:
            ${lastModelResponse.join('\n')}
            `;
            const prDescription = await this.geminiService.generatePullRequestDescription(
                changeDescription,
                result.gitPatch
            );
@ -245,7 +256,7 @@ export class ProcessorService {
                branchName,
                credentials,
                title,
-                description
+                prDescription
            );
            console.log(`Created pull request: ${pullRequestUrl}`);
--- a/src/functions/test-spec-to-test-implementation/src/services/project-test-specs-service.ts
+++ b/src/functions/test-spec-to-test-implementation/src/services/project-test-specs-service.ts
@ -41,7 +41,7 @@ export class ProjectTestSpecsService {
            if ((result.filesWritten?.length ?? 0) > 0 || (result.filesRemoved?.length ?? 0) > 0) {
                try {
-                    console.log(`Generating git patch for project ${project.name} with ${result.filesWritten} files written`);
+                    console.log(`Generating git patch for project ${project.name} with ${result.filesWritten?.length} files written`);
                    gitPatch = await this.sharedRepositoryService.generateGitPatch(projectRepoPath);
                } catch (error) {
@ -87,12 +87,13 @@ export class ProjectTestSpecsService {
                relevantFiles
            );
-            console.log(`ProjectTestSpecsService: Completed processing project (Files written: ${result.fileWritten.length})`);
+            console.log(`ProjectTestSpecsService: Completed processing project (Files written: ${result.filesWritten.length})`);
            return {
                project: project,
                success: true,
-                filesWritten: result.fileWritten,
+                filesWritten: result.filesWritten,
-                filesRemoved: result.fileDeleted,
+                filesRemoved: result.filesDeleted,
                totalCost: result.totalCost
            };
        } catch (error) {
            console.error(`Error processing project ${project.name}:`, error);
@ -155,8 +156,8 @@ export class ProjectTestSpecsService {
            return {
                modelResponses: [],
                stepOutcomes: [],
-                fileDeleted: [],
+                filesDeleted: [],
-                fileWritten: []
+                filesWritten: []
            };
        }
--- a/src/functions/test-spec-to-test-implementation/src/types.ts
+++ b/src/functions/test-spec-to-test-implementation/src/types.ts
@ -36,6 +36,8 @@ export interface ProcessResult {
    gitPatch?: string;
    filesWritten?: string[];
    filesRemoved?: string[];
    totalCost?: number;
    modelResponses?: string[];
 }
 /**
--- a/src/prompts/test-spec-to-test-implementation/nitro-back/AI.md
+++ b/src/prompts/test-spec-to-test-implementation/nitro-back/AI.md
@ -1,4 +1,40 @@
-This is your guideline for the implementation of the feature file:
+Implement tests according to the cucumber ".feature" files.
 - Iterate over cucumber ".feature" definition files in the `nitro-it/src/test/resources/workitems/` directory.
- For each of them create all required files to implement the feature.
+- For each of them create all required files to implement the tests.
 - Use quarkus apis and best practices
 - All files and all their method must be correctly implemented, without any TODO or stub or placeholder.
 - The code produced must be ready for test driven development without any adaptation required.
 - The tests are business-driven integration tests: A real api must be accessed to ensure proper application
  behavior.
 - Scan the existing api in nitro-domain-api/src/main/java to implement http requests to the api endpoints.
 - Use the following techniques to identify the relevant resources:
    - search for patterns like 'class Ws*<resource-name-camel-case>*' to identify api models file names
    - search for patterns like 'interface Ws*<resource-name-camel-case>*Controller' to identify api controller file
      names
    - Retrieve files content to inspect their structure and interactions
    - Grep a class name to discover where its used across the codebase
    - fetch the pom.xml files to inspect the dependencies and their versions
 - Get a complete understanding of the relevant resources, how they relate to each other, and the available operations.
 - Get a complete understanding of the various entities composing the business resources
 - Create missing global configuration in nitro-it/src/test/resources/application-bdd.properties
 - create or update @ApplicationScoped services in nitro-it/src/test/java/be/fiscalteam/nitro/bdd/services/
  to implement the test logic
 - Those services must be fully implemented and make actual http requests to the api endpoints when called.
 For each feature file, create or update the implementation in nitro-it/src/test/java/be/fiscalteam/nitro/bdd/features/<
 feature-name>/
 - Create or update a "ScenarioState.java" service annotated @ScenarioScope.
  This service contains the state for each scenario execution.
 - Create or update a "FeatureSteps.java" class to implement the step definitions from the feature file.
  This class injects the ScenarioState and other services. Add javadoc referencing the feature file.
  Use Given/When/Then/And annotations from io.cucumber.java.en to implement each step in the feature file.
 - Step definition implementations must be short, passing data between @ApplicationScoped services and the @ScenarioScope
  state. Implement or reuse services in nitro-it/src/test/java/be/fiscalteam/nitro/bdd/services/ if needed.
 - No hardcoded values should be present - use constant files or obtain data from services.
 - Supporting data and constants can be defined in resource files in the
  nitro-it/src/test/resources/be/fiscalteam/nitro/bdd/features/<feature-name>/ directory when required