{
  "version": 1,
  "suiteId": "phase7-first-pilot",
  "modes": [
    {
      "id": "no-cache",
      "description": "Task prompt only; no CacheSphere data attached."
    },
    {
      "id": "raw-record",
      "description": "Task prompt plus full relevant CacheSphere records."
    },
    {
      "id": "compact-pack",
      "description": "Task prompt plus selected context pack(s) and examples."
    }
  ],
  "tasks": [
    {
      "id": "cli-export-filter",
      "category": "CLI implementation",
      "goal": "Build a small CLI that exports language records with a filter flag.",
      "prompt": "Create a CLI that reads the CacheSphere language catalogue, filters by a user-provided language ID, and writes the result to JSON with a short help message.",
      "expectedArtifacts": [
        "src/cli/export.mjs",
        "test/cli/export.test.mjs"
      ],
      "selectionHints": {
        "languageIds": [
          "javascript",
          "go"
        ],
        "stackIds": [
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "cli-tool"
        ],
        "tags": [
          "cli",
          "systems",
          "agent-memory"
        ]
      },
      "acceptanceChecks": [
        "supports a language filter flag",
        "writes deterministic JSON",
        "includes a short usage note"
      ]
    },
    {
      "id": "crud-api-create-read",
      "category": "CRUD/API implementation",
      "goal": "Implement a simple CRUD endpoint set with validation and tests.",
      "prompt": "Build a minimal CRUD API for notes with create and read routes, basic validation, and a clear error response shape.",
      "expectedArtifacts": [
        "src/api/notes.mjs",
        "test/api/notes.test.mjs"
      ],
      "selectionHints": {
        "languageIds": [
          "typescript",
          "javascript",
          "python"
        ],
        "stackIds": [
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "web-api"
        ],
        "tags": [
          "api",
          "crud",
          "validation"
        ]
      },
      "acceptanceChecks": [
        "create route validates input",
        "read route returns stable JSON",
        "tests cover the happy path and one failure path"
      ]
    },
    {
      "id": "bug-repair-array-dedup",
      "category": "bug repair",
      "goal": "Repair a small data bug without changing the public shape.",
      "prompt": "Fix a bug where duplicate items appear in a filtered array result while preserving the original output format.",
      "expectedArtifacts": [
        "src/utils/dedup.mjs",
        "test/utils/dedup.test.mjs"
      ],
      "selectionHints": {
        "languageIds": [
          "javascript",
          "typescript"
        ],
        "projectTypes": [
          "bug-fix"
        ],
        "tags": [
          "repair",
          "dedup",
          "regression"
        ]
      },
      "acceptanceChecks": [
        "duplicates are removed once",
        "ordering stays stable",
        "regression test fails before the fix"
      ]
    },
    {
      "id": "cross-language-ported-parser",
      "category": "cross-language port",
      "goal": "Port a tiny parser or transform across language boundaries.",
      "prompt": "Port a small line-based parser from Python to TypeScript while keeping the same output contract and error behavior.",
      "expectedArtifacts": [
        "src/parser.ts",
        "test/parser.test.ts"
      ],
      "selectionHints": {
        "languageIds": [
          "python",
          "typescript"
        ],
        "projectTypes": [
          "porting"
        ],
        "tags": [
          "port",
          "parser",
          "cross-language"
        ]
      },
      "acceptanceChecks": [
        "matches the source output contract",
        "preserves error handling shape",
        "calls out any semantic differences"
      ]
    },
    {
      "id": "data-transform-csv-json",
      "category": "data/file transform",
      "goal": "Transform a small file format without pulling in heavy dependencies.",
      "prompt": "Convert a small CSV-like dataset into grouped JSON records and note any assumptions about headers, quoting, and empty rows.",
      "expectedArtifacts": [
        "src/transform/group-json.mjs",
        "test/transform/group-json.test.mjs"
      ],
      "selectionHints": {
        "languageIds": [
          "python",
          "javascript",
          "typescript"
        ],
        "projectTypes": [
          "data-transform"
        ],
        "tags": [
          "data",
          "etl",
          "csv",
          "json"
        ]
      },
      "acceptanceChecks": [
        "handles blank rows predictably",
        "groups records deterministically",
        "documents assumptions"
      ]
    },
    {
      "id": "frontend-search-interaction",
      "category": "frontend/browser interaction",
      "goal": "Wire a small browser interaction with accessible state updates.",
      "prompt": "Implement a browser search widget that filters a local list, updates results on input, and keeps keyboard and aria semantics simple.",
      "expectedArtifacts": [
        "src/ui/search-widget.mjs",
        "test/ui/search-widget.test.mjs"
      ],
      "selectionHints": {
        "languageIds": [
          "javascript",
          "typescript"
        ],
        "projectTypes": [
          "frontend"
        ],
        "tags": [
          "browser",
          "ui",
          "accessibility"
        ]
      },
      "acceptanceChecks": [
        "filters on input",
        "supports keyboard focus",
        "mentions a11y or aria basics"
      ]
    },
    {
      "id": "language-choice-before-build",
      "category": "language/tool choice before implementation",
      "goal": "Force the model to choose a stack before writing code.",
      "prompt": "Recommend the best language and stack for a tiny internal CRUD tool, explain why that choice beats the default, and list the first implementation files.",
      "expectedArtifacts": [
        "decision-kit response",
        "top recommendation and alternatives"
      ],
      "selectionHints": {
        "languageIds": [
          "javascript",
          "typescript",
          "python",
          "go"
        ],
        "stackIds": [
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "decision-making"
        ],
        "tags": [
          "language-choice",
          "decision-kit",
          "planning"
        ]
      },
      "acceptanceChecks": [
        "includes a recommendation",
        "names at least one alternative",
        "explains why the default is not enough"
      ]
    },
    {
      "id": "ambiguous-stack-security-api",
      "category": "Security-sensitive API decision",
      "goal": "Force the model to choose between convenience and security, not default to the easiest option.",
      "prompt": "Design an API endpoint that accepts user-generated content with file attachments. The default stack is Node/Express with multer. Explain why that default may be unsafe for untrusted uploads, recommend a safer architecture, and sketch the validation + sandboxing flow.",
      "expectedArtifacts": [
        "decision-kit response",
        "recommended architecture with security rationale",
        "validation flow sketch"
      ],
      "selectionHints": {
        "languageIds": [
          "typescript",
          "javascript",
          "python",
          "go"
        ],
        "stackIds": [
          "boring-saas-api",
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "web-api",
          "decision-making"
        ],
        "tags": [
          "security",
          "api",
          "file-upload",
          "validation",
          "sandbox"
        ]
      },
      "acceptanceChecks": [
        "calls out at least one security risk in the default stack",
        "recommends an alternative with concrete reasoning",
        "includes a validation or sandboxing step, not just 'use a library'"
      ]
    },
    {
      "id": "default-js-is-wrong-task",
      "category": "Default-avoidance implementation",
      "goal": "Prevent the model from auto-selecting JavaScript/Node when another language is clearly better.",
      "prompt": "Build a small background job processor that handles CPU-bound tasks (image resizing, PDF generation) with queue semantics. The model typically defaults to Node.js + Bull. Explain why that default performs poorly for CPU-bound work, pick a better stack, and implement the worker + queue contract.",
      "expectedArtifacts": [
        "src/worker/main.{ext}",
        "src/worker/queue.{ext}",
        "decision rationale"
      ],
      "selectionHints": {
        "languageIds": [
          "go",
          "python",
          "rust",
          "typescript"
        ],
        "stackIds": [
          "solo-agentic-coding",
          "boring-saas-api"
        ],
        "projectTypes": [
          "systems",
          "decision-making"
        ],
        "tags": [
          "worker",
          "queue",
          "cpu-bound",
          "performance",
          "default-avoidance"
        ]
      },
      "acceptanceChecks": [
        "explicitly explains why Node.js/Bull is suboptimal for CPU-bound work",
        "chooses a language with better CPU parallelism (Go, Python multiprocessing, or Rust)",
        "implements queue contract with at least push and pop semantics"
      ]
    },
    {
      "id": "low-token-constrained-refactor",
      "category": "Low-token constrained implementation",
      "goal": "Force the model to produce correct output within a strict token budget, testing context efficiency directly.",
      "prompt": "Refactor a 200-line Express route file into a clean 40-line Fastify equivalent. You have a 1200-token output budget. Preserve all route behavior, validation, and error handling. Do not exceed the token limit.",
      "expectedArtifacts": [
        "src/routes/refactored.mjs",
        "behavior parity notes"
      ],
      "selectionHints": {
        "languageIds": [
          "typescript",
          "javascript"
        ],
        "stackIds": [
          "solo-agentic-coding",
          "boring-saas-api"
        ],
        "projectTypes": [
          "refactor",
          "api"
        ],
        "tags": [
          "fastify",
          "express",
          "refactor",
          "token-budget",
          "compact"
        ]
      },
      "acceptanceChecks": [
        "all original routes are preserved",
        "validation logic is not dropped",
        "output is under 1200 tokens"
      ]
    },
    {
      "id": "dockerize-node-service",
      "category": "Containerization",
      "goal": "Test whether the model can build a production Dockerfile with proper layer caching, non-root user, and healthcheck.",
      "prompt": "Containerize an existing Node.js Express API for production deployment. Create a multi-stage Dockerfile that builds dependencies separately from source, runs as a non-root user, includes a healthcheck, and keeps the image under 200MB. Also create a docker-compose.yml for local development.",
      "expectedArtifacts": [
        "Dockerfile",
        "docker-compose.yml",
        ".dockerignore"
      ],
      "selectionHints": {
        "languageIds": [
          "javascript",
          "typescript",
          "dockerfile"
        ],
        "stackIds": [
          "boring-saas-api",
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "deployment",
          "devops"
        ],
        "tags": [
          "docker",
          "container",
          "deployment",
          "production"
        ]
      },
      "acceptanceChecks": [
        "uses multi-stage build",
        "runs as non-root user",
        "includes healthcheck directive or equivalent",
        ".dockerignore excludes node_modules and .git"
      ]
    },
    {
      "id": "add-test-coverage-to-api",
      "category": "Testing coverage",
      "goal": "Test whether the model can design a test strategy with unit, integration, and property-based tests for an existing API.",
      "prompt": "An existing Express API has zero tests. Add comprehensive test coverage: unit tests for route handlers using in-memory fakes, integration tests for the full request/response cycle with a test database, and at least one parameterized test for input validation boundaries. Target 80% coverage.",
      "expectedArtifacts": [
        "test/unit/routes.test.js",
        "test/integration/api.test.js",
        "vitest.config.ts"
      ],
      "selectionHints": {
        "languageIds": [
          "javascript",
          "typescript"
        ],
        "stackIds": [
          "solo-agentic-coding",
          "boring-saas-api"
        ],
        "projectTypes": [
          "testing",
          "api"
        ],
        "tags": [
          "testing",
          "coverage",
          "vitest",
          "jest",
          "fakes"
        ]
      },
      "acceptanceChecks": [
        "unit tests use fakes or stubs, not real database",
        "integration tests verify end-to-end request/response",
        "parameterized tests cover boundary values",
        "tests are deterministic and independent"
      ]
    },
    {
      "id": "no-match-niche-stack",
      "category": "Niche-stack fallback",
      "goal": "Force the advisor to admit no relevant pack exists and fall back gracefully to raw-record mode.",
      "prompt": "Build a real-time dashboard using Elixir and Phoenix LiveView. The dashboard must display live updating metrics from a WebSocket feed, handle 10,000 concurrent connections, and include a fallback UI for connection drops. No JavaScript framework on the frontend—only LiveView templates.",
      "expectedArtifacts": [
        "lib/my_app_web/live/dashboard_live.ex",
        "lib/my_app_web/channels/metrics_channel.ex",
        "decision-kit response"
      ],
      "selectionHints": {
        "languageIds": [
          "elixir"
        ],
        "stackIds": [
          "phoenix"
        ],
        "projectTypes": [
          "web-api",
          "realtime"
        ],
        "tags": [
          "liveview",
          "realtime",
          "dashboard",
          "websocket",
          "concurrent"
        ]
      },
      "acceptanceChecks": [
        "acknowledges no exact CacheSphere pack match for Elixir/Phoenix",
        "falls back to general systems patterns without hallucinating Elixir-specific advice",
        "recommends raw-record or general-service packs rather than irrelevant frontend packs"
      ]
    },
    {
      "id": "multi-domain-split",
      "category": "Multi-domain implementation",
      "goal": "Require the advisor to select packs from two unrelated domains (frontend + backend data pipeline) in a single repo.",
      "prompt": "Build a fullstack app in a monorepo: a React TypeScript frontend for visualizing CSV uploads, and a Go backend that ingests the CSV, validates schema, and streams transformed rows into a SQLite database. Both must share a single repo with separate package managers and a unified build script.",
      "expectedArtifacts": [
        "frontend/src/App.tsx",
        "backend/cmd/ingest/main.go",
        "package.json",
        "go.mod",
        "Makefile or build script"
      ],
      "selectionHints": {
        "languageIds": [
          "typescript",
          "go"
        ],
        "stackIds": [
          "boring-saas-api",
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "fullstack",
          "data-pipeline"
        ],
        "tags": [
          "react",
          "frontend",
          "csv",
          "go",
          "pipeline",
          "monorepo",
          "sqlite"
        ]
      },
      "acceptanceChecks": [
        "selects context packs from both frontend and backend domains",
        "does not omit either the React or Go implementation guidance",
        "addresses monorepo organization or cross-domain communication"
      ]
    },
    {
      "id": "contradictory-signals",
      "category": "Conflict resolution",
      "goal": "Force the model to prioritize security/compliance over speed when the description says 'fast prototype' but requirements demand HIPAA audit logging.",
      "prompt": "We need a fast prototype of a patient data API. Speed to demo is important, but it must include HIPAA-compliant audit logging, field-level encryption for PHI, and role-based access control. Build the smallest possible working API that does not skip any compliance requirement for the sake of prototyping speed.",
      "expectedArtifacts": [
        "src/api/patients.ts",
        "src/audit/logger.ts",
        "src/auth/rbac.ts",
        "decision rationale"
      ],
      "selectionHints": {
        "languageIds": [
          "typescript"
        ],
        "stackIds": [
          "boring-saas-api",
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "prototype",
          "web-api"
        ],
        "tags": [
          "prototype",
          "hipaa",
          "audit",
          "compliance",
          "encryption",
          "rbac"
        ]
      },
      "acceptanceChecks": [
        "prioritizes security/compliance over prototyping speed in design decisions",
        "includes concrete audit logging implementation, not just 'add logging later'",
        "does not omit validation or encryption to save time"
      ]
    },
    {
      "id": "python-etl-pipeline",
      "category": "Data pipeline implementation",
      "goal": "Build a data pipeline that ingests CSV, transforms with validation, and emits clean parquet output.",
      "prompt": "Build a Python ETL pipeline that reads a raw CSV of transaction records, validates each row against a schema (rejecting malformed rows to a dead-letter file), transforms amounts to a normalized currency, deduplicates by transaction ID, and writes the clean output to partitioned parquet files. Include data quality metrics logging.",
      "expectedArtifacts": [
        "src/pipeline/extract.py",
        "src/pipeline/transform.py",
        "src/pipeline/load.py",
        "tests/test_pipeline.py"
      ],
      "selectionHints": {
        "languageIds": [
          "python",
          "sql"
        ],
        "stackIds": [
          "data-insights-dashboard"
        ],
        "projectTypes": [
          "data-pipeline",
          "etl"
        ],
        "tags": [
          "python",
          "etl",
          "pandas",
          "polars",
          "data-quality",
          "parquet"
        ]
      },
      "acceptanceChecks": [
        "validates schema at ingestion and rejects bad rows to a separate output",
        "deduplicates by a key field without data loss",
        "writes partitioned parquet output",
        "logs row counts and quality metrics at each stage"
      ]
    },
    {
      "id": "react-component-library",
      "category": "Frontend component library",
      "goal": "Build reusable, accessible React components with proper TypeScript types and tests.",
      "prompt": "Create a small React component library with a Button, Modal, and Toast notification component. Each must be fully typed with TypeScript, accessible (keyboard navigable, proper ARIA), and tested with React Testing Library. Include a Storybook-style usage example for each component.",
      "expectedArtifacts": [
        "src/components/Button.tsx",
        "src/components/Modal.tsx",
        "src/components/Toast.tsx",
        "tests/components/"
      ],
      "selectionHints": {
        "languageIds": [
          "typescript"
        ],
        "stackIds": [
          "solo-agentic-coding",
          "teaching-prototype"
        ],
        "projectTypes": [
          "frontend",
          "component-library"
        ],
        "tags": [
          "react",
          "typescript",
          "accessibility",
          "testing",
          "components"
        ]
      },
      "acceptanceChecks": [
        "all components are keyboard accessible",
        "proper ARIA attributes are present (roles, labels, live regions for Toast)",
        "TypeScript types are explicit, not inferred as any",
        "tests cover render, interaction, and accessibility basics"
      ]
    },
    {
      "id": "android-offline-crud",
      "category": "Mobile offline-first implementation",
      "goal": "Build a Kotlin/Compose CRUD app with local-first persistence and background sync.",
      "prompt": "Build a Kotlin Android app using Jetpack Compose that manages a todo list with full CRUD. Data must persist locally in Room, display immediately from the local cache, and sync to a remote API when connectivity is available. Handle conflict resolution with last-write-wins. Show sync status in the UI.",
      "expectedArtifacts": [
        "app/src/main/java/com/example/ui/TodoScreen.kt",
        "app/src/main/java/com/example/data/TodoRepository.kt",
        "app/src/main/java/com/example/data/local/TodoDao.kt",
        "app/src/main/java/com/example/sync/SyncWorker.kt"
      ],
      "selectionHints": {
        "languageIds": [
          "kotlin"
        ],
        "stackIds": [
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "mobile",
          "android"
        ],
        "tags": [
          "kotlin",
          "compose",
          "room",
          "offline-first",
          "sync"
        ]
      },
      "acceptanceChecks": [
        "CRUD operations work entirely offline using Room",
        "sync worker runs when connectivity is restored",
        "conflict resolution strategy is explicit (last-write-wins or similar)",
        "UI reflects current sync status (synced, pending, error)"
      ]
    },
    {
      "id": "serverless-api-cold-start",
      "category": "Serverless deployment",
      "goal": "Deploy a serverless function with explicit cold-start mitigation and stateless design.",
      "prompt": "Create a Cloudflare Worker (or AWS Lambda) function that serves a JSON API endpoint for user lookup. The function must initialize a database connection pool outside the handler for warm reuse, validate input with early returns, keep the bundle under 1MB, and include a health endpoint. Document cold-start latency trade-offs in comments.",
      "expectedArtifacts": [
        "src/handlers/lookup.ts",
        "src/handlers/health.ts",
        "wrangler.toml",
        "tests/handlers.test.ts"
      ],
      "selectionHints": {
        "languageIds": [
          "typescript"
        ],
        "stackIds": [
          "boring-saas-api",
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "serverless",
          "api"
        ],
        "tags": [
          "serverless",
          "cold-start",
          "cloudflare-workers",
          "lambda",
          "stateless"
        ]
      },
      "acceptanceChecks": [
        "shared resources initialized outside handler for warm invocation reuse",
        "input validation returns early with structured error responses",
        "bundle size is minimized (no heavy unused dependencies)",
        "documents cold-start trade-offs explicitly"
      ]
    },
    {
      "id": "terraform-module",
      "category": "Infrastructure as code",
      "goal": "Write a reusable Terraform module with inputs, outputs, and validation.",
      "prompt": "Write a reusable Terraform module that provisions a VPC with public and private subnets across multiple availability zones. The module must accept configurable CIDR blocks with validation, output subnet IDs and VPC ID, support tagging, and include a README with usage examples. Ensure the module is idempotent and handles plan/apply lifecycle correctly.",
      "expectedArtifacts": [
        "modules/vpc/main.tf",
        "modules/vpc/variables.tf",
        "modules/vpc/outputs.tf",
        "modules/vpc/README.md"
      ],
      "selectionHints": {
        "languageIds": [
          "terraform"
        ],
        "stackIds": [
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "infrastructure",
          "devops"
        ],
        "tags": [
          "terraform",
          "module",
          "vpc",
          "infrastructure",
          "idempotent"
        ]
      },
      "acceptanceChecks": [
        "variables have type constraints and validation blocks",
        "outputs expose VPC ID and subnet IDs for downstream consumption",
        "resources are tagged consistently",
        "module is self-contained with no hard-coded values"
      ]
    },
    {
      "id": "graphql-schema-design",
      "category": "GraphQL API design",
      "goal": "Design a GraphQL schema with N+1 mitigation, subscriptions, and proper type boundaries.",
      "prompt": "Design a GraphQL API for a blog platform with Posts, Authors, and Comments. Implement resolvers that use DataLoader to batch author lookups (preventing N+1), add a subscription for new comments on a post, validate mutation inputs, and enforce query depth limiting. Include codegen configuration for TypeScript types.",
      "expectedArtifacts": [
        "src/schema.graphql",
        "src/resolvers/post.ts",
        "src/resolvers/comment.ts",
        "src/dataloaders/author.ts",
        "codegen.ts"
      ],
      "selectionHints": {
        "languageIds": [
          "typescript"
        ],
        "stackIds": [
          "boring-saas-api"
        ],
        "projectTypes": [
          "api",
          "graphql"
        ],
        "tags": [
          "graphql",
          "dataloader",
          "subscriptions",
          "n-plus-one",
          "schema-first"
        ]
      },
      "acceptanceChecks": [
        "DataLoader batches author resolution for post lists",
        "subscription resolves new comments in real-time via PubSub",
        "query depth or complexity limiting is configured",
        "mutation inputs are validated before database writes"
      ]
    },
    {
      "id": "elixir-liveview-dashboard",
      "category": "Real-time web implementation",
      "goal": "Build a Phoenix LiveView dashboard with real-time updates and OTP supervision.",
      "prompt": "Build a Phoenix LiveView dashboard that displays live-updating server metrics (CPU, memory, request count). Metrics should be collected by a supervised GenServer that broadcasts via PubSub every second. The LiveView must handle reconnection gracefully, display a fallback when disconnected, and support at least 100 concurrent viewers without per-connection polling.",
      "expectedArtifacts": [
        "lib/my_app_web/live/dashboard_live.ex",
        "lib/my_app/metrics/collector.ex",
        "lib/my_app/application.ex",
        "test/my_app_web/live/dashboard_live_test.exs"
      ],
      "selectionHints": {
        "languageIds": [
          "elixir"
        ],
        "stackIds": [
          "solo-agentic-coding"
        ],
        "projectTypes": [
          "web-app",
          "realtime"
        ],
        "tags": [
          "elixir",
          "phoenix",
          "liveview",
          "otp",
          "realtime",
          "dashboard"
        ]
      },
      "acceptanceChecks": [
        "metrics collector runs under OTP supervision and broadcasts via PubSub",
        "LiveView subscribes to PubSub topic on mount, not via polling",
        "handles socket disconnection with a fallback UI",
        "no per-connection polling or timer; relies on server-push"
      ]
    }
  ]
}