在 Day 20 完成架構盤點後,我們發現部署流程仍然是手動的。今天我們將建立完整的 CI/CD Pipeline,使用 GitHub Actions 實現從程式碼提交到 AWS ECS 部署的全自動化流程。
┌──────────────┐
│ Git Push │
│ to GitHub │
└──────┬───────┘
│
▼
┌──────────────────────┐
│ GitHub Actions │
│ ────────────────────│
│ 1. Checkout Code │
│ 2. Run Tests │
│ 3. Build TypeScript │
│ 4. Build Docker │
│ 5. Push to ECR │
│ 6. Update ECS │
└──────┬───────────────┘
│
▼
┌──────────────────────┐
│ AWS ECR │
│ (Docker Registry) │
└──────┬───────────────┘
│
▼
┌──────────────────────┐
│ AWS ECS Fargate │
│ (Container Service) │
└──────┬───────────────┘
│
▼
┌──────────────────────┐
│ Production │
│ Application │
└──────────────────────┘
首先在 GitHub Repository 設定必要的 Secrets:
Settings → Secrets and variables → Actions → New repository secret
# AWS 認證
AWS_ACCESS_KEY_ID=AKIA...
AWS_SECRET_ACCESS_KEY=...
AWS_REGION=ap-northeast-1
# ECR
ECR_REPOSITORY=kyo-otp-image
ECR_REGISTRY=123456789012.dkr.ecr.ap-northeast-1.amazonaws.com
# ECS
ECS_CLUSTER=kyo-system-cluster
ECS_SERVICE=kyo-otp-service
ECS_TASK_DEFINITION=kyo-otp-task
# 環境變數(生產環境)
REDIS_URL=redis://production-redis:6379
DATABASE_URL=postgresql://...
JWT_SECRET=...
MITAKE_USERNAME=...
MITAKE_PASSWORD=...
# .github/workflows/deploy.yml
name: Deploy to AWS ECS
on:
push:
branches:
- main
- develop
pull_request:
branches:
- main
env:
AWS_REGION: ${{ secrets.AWS_REGION }}
ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }}
ECS_SERVICE: ${{ secrets.ECS_SERVICE }}
ECS_CLUSTER: ${{ secrets.ECS_CLUSTER }}
CONTAINER_NAME: kyo-otp-service
jobs:
# Job 1: 測試
test:
name: Run Tests
runs-on: ubuntu-latest
services:
redis:
image: redis:7-alpine
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup pnpm
uses: pnpm/action-setup@v2
with:
version: 9
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'pnpm'
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Build packages
run: pnpm run build
- name: Run tests
run: pnpm test
env:
REDIS_URL: redis://localhost:6379
NODE_ENV: test
- name: Generate coverage report
run: pnpm test:coverage
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage/coverage-final.json
# Job 2: 建置並部署
deploy:
name: Build and Deploy
needs: test
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Setup pnpm
uses: pnpm/action-setup@v2
with:
version: 9
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'pnpm'
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Build TypeScript
run: pnpm run build
- name: Build Docker image
id: build-image
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
IMAGE_TAG: ${{ github.sha }}
run: |
docker build \
-f apps/kyo-otp-service/Dockerfile \
-t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG \
-t $ECR_REGISTRY/$ECR_REPOSITORY:latest \
.
echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
- name: Push image to Amazon ECR
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
IMAGE_TAG: ${{ github.sha }}
run: |
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
docker push $ECR_REGISTRY/$ECR_REPOSITORY:latest
- name: Download task definition
run: |
aws ecs describe-task-definition \
--task-definition ${{ env.ECS_TASK_DEFINITION }} \
--query taskDefinition \
> task-definition.json
- name: Fill in the new image ID in the task definition
id: task-def
uses: aws-actions/amazon-ecs-render-task-definition@v1
with:
task-definition: task-definition.json
container-name: ${{ env.CONTAINER_NAME }}
image: ${{ steps.build-image.outputs.image }}
- name: Deploy to Amazon ECS
uses: aws-actions/amazon-ecs-deploy-task-definition@v1
with:
task-definition: ${{ steps.task-def.outputs.task-definition }}
service: ${{ env.ECS_SERVICE }}
cluster: ${{ env.ECS_CLUSTER }}
wait-for-service-stability: true
- name: Verify deployment
run: |
# 等待服務穩定
aws ecs wait services-stable \
--cluster ${{ env.ECS_CLUSTER }} \
--services ${{ env.ECS_SERVICE }}
# 檢查健康狀態
TASK_ARN=$(aws ecs list-tasks \
--cluster ${{ env.ECS_CLUSTER }} \
--service-name ${{ env.ECS_SERVICE }} \
--query 'taskArns[0]' \
--output text)
aws ecs describe-tasks \
--cluster ${{ env.ECS_CLUSTER }} \
--tasks $TASK_ARN
- name: Notify deployment success
if: success()
uses: 8398a7/action-slack@v3
with:
status: ${{ job.status }}
text: |
🚀 Deployment to ECS successful!
Environment: ${{ github.ref == 'refs/heads/main' && 'Production' || 'Staging' }}
Commit: ${{ github.sha }}
webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
# apps/kyo-otp-service/Dockerfile
# ============================================
# Stage 1: Dependencies
# ============================================
FROM node:20-alpine AS deps
# 安裝 pnpm
RUN corepack enable && corepack prepare pnpm@9.0.0 --activate
WORKDIR /app
# 只複製 package.json 和 pnpm-lock.yaml
COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY apps/kyo-otp-service/package.json ./apps/kyo-otp-service/
COPY packages/kyo-core/package.json ./packages/kyo-core/
COPY packages/kyo-types/package.json ./packages/kyo-types/
# 安裝依賴(包含 devDependencies for build)
RUN pnpm install --frozen-lockfile
# ============================================
# Stage 2: Builder
# ============================================
FROM node:20-alpine AS builder
RUN corepack enable && corepack prepare pnpm@9.0.0 --activate
WORKDIR /app
# 複製 node_modules
COPY --from=deps /app/node_modules ./node_modules
COPY --from=deps /app/apps ./apps
COPY --from=deps /app/packages ./packages
# 複製原始碼
COPY . .
# 建置 TypeScript
RUN pnpm run build
# 移除 devDependencies
RUN pnpm install --prod --frozen-lockfile
# ============================================
# Stage 3: Runner
# ============================================
FROM node:20-alpine AS runner
# 安裝 dumb-init (處理 signals)
RUN apk add --no-cache dumb-init
# 建立非 root 使用者
RUN addgroup -g 1001 -S nodejs && \
adduser -S nodejs -u 1001
WORKDIR /app
# 設定環境
ENV NODE_ENV=production \
PORT=3000
# 複製必要檔案
COPY --from=builder --chown=nodejs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nodejs:nodejs /app/apps/kyo-otp-service/dist ./apps/kyo-otp-service/dist
COPY --from=builder --chown=nodejs:nodejs /app/packages ./packages
COPY --from=builder --chown=nodejs:nodejs /app/package.json ./
# 切換到非 root 使用者
USER nodejs
# 健康檢查
HEALTHCHECK --interval=30s --timeout=3s --start-period=40s \
CMD node -e "require('http').get('http://localhost:3000/api/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"
EXPOSE 3000
# 使用 dumb-init 啟動
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "apps/kyo-otp-service/dist/index.js"]
# .dockerignore
node_modules
dist
coverage
.git
.github
*.md
.env*
.DS_Store
*.log
// infra/cdk/lib/ecs-service-stack.ts
import * as cdk from 'aws-cdk-lib';
import * as ec2 from 'aws-cdk-lib/aws-ec2';
import * as ecs from 'aws-cdk-lib/aws-ecs';
import * as ecr from 'aws-cdk-lib/aws-ecr';
import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2';
import * as logs from 'aws-cdk-lib/aws-logs';
import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager';
import { Construct } from 'constructs';
export class KyoOtpServiceStack extends cdk.Stack {
constructor(scope: Construct, id: string, props?: cdk.StackProps) {
super(scope, id, props);
// VPC(假設已存在)
const vpc = ec2.Vpc.fromLookup(this, 'VPC', {
vpcName: 'kyo-system-vpc',
});
// ECS Cluster
const cluster = new ecs.Cluster(this, 'Cluster', {
vpc,
clusterName: 'kyo-system-cluster',
containerInsights: true,
});
// ECR Repository
const repository = ecr.Repository.fromRepositoryName(
this,
'Repository',
'kyo-otp-image'
);
// Secrets Manager
const secrets = secretsmanager.Secret.fromSecretNameV2(
this,
'AppSecrets',
'kyo-otp-service/production'
);
// CloudWatch Logs
const logGroup = new logs.LogGroup(this, 'LogGroup', {
logGroupName: '/ecs/kyo-otp-service',
retention: logs.RetentionDays.TWO_WEEKS,
removalPolicy: cdk.RemovalPolicy.DESTROY,
});
// Task Definition
const taskDefinition = new ecs.FargateTaskDefinition(this, 'TaskDef', {
family: 'kyo-otp-task',
cpu: 256, // 0.25 vCPU
memoryLimitMiB: 512, // 512 MB
runtimePlatform: {
cpuArchitecture: ecs.CpuArchitecture.X86_64,
operatingSystemFamily: ecs.OperatingSystemFamily.LINUX,
},
});
// Container
const container = taskDefinition.addContainer('AppContainer', {
containerName: 'kyo-otp-service',
image: ecs.ContainerImage.fromEcrRepository(repository, 'latest'),
logging: ecs.LogDrivers.awsLogs({
streamPrefix: 'kyo-otp',
logGroup,
}),
environment: {
NODE_ENV: 'production',
PORT: '3000',
},
secrets: {
REDIS_URL: ecs.Secret.fromSecretsManager(secrets, 'REDIS_URL'),
DATABASE_URL: ecs.Secret.fromSecretsManager(secrets, 'DATABASE_URL'),
JWT_SECRET: ecs.Secret.fromSecretsManager(secrets, 'JWT_SECRET'),
MITAKE_USERNAME: ecs.Secret.fromSecretsManager(secrets, 'MITAKE_USERNAME'),
MITAKE_PASSWORD: ecs.Secret.fromSecretsManager(secrets, 'MITAKE_PASSWORD'),
},
healthCheck: {
command: ['CMD-SHELL', 'curl -f http://localhost:3000/api/health || exit 1'],
interval: cdk.Duration.seconds(30),
timeout: cdk.Duration.seconds(5),
retries: 3,
startPeriod: cdk.Duration.seconds(60),
},
});
container.addPortMappings({
containerPort: 3000,
protocol: ecs.Protocol.TCP,
});
// Application Load Balancer
const alb = new elbv2.ApplicationLoadBalancer(this, 'ALB', {
vpc,
internetFacing: true,
loadBalancerName: 'kyo-system-alb',
});
const targetGroup = new elbv2.ApplicationTargetGroup(this, 'TargetGroup', {
vpc,
port: 3000,
protocol: elbv2.ApplicationProtocol.HTTP,
targetType: elbv2.TargetType.IP,
healthCheck: {
path: '/api/health',
interval: cdk.Duration.seconds(30),
timeout: cdk.Duration.seconds(5),
healthyThresholdCount: 2,
unhealthyThresholdCount: 3,
},
deregistrationDelay: cdk.Duration.seconds(30),
});
alb.addListener('HttpListener', {
port: 80,
defaultTargetGroups: [targetGroup],
});
// ECS Service
const service = new ecs.FargateService(this, 'Service', {
cluster,
serviceName: 'kyo-otp-service',
taskDefinition,
desiredCount: 2,
minHealthyPercent: 50,
maxHealthyPercent: 200,
healthCheckGracePeriod: cdk.Duration.seconds(60),
circuitBreaker: {
rollback: true,
},
capacityProviderStrategies: [
{
capacityProvider: 'FARGATE_SPOT',
weight: 2,
},
{
capacityProvider: 'FARGATE',
weight: 1,
},
],
});
service.attachToApplicationTargetGroup(targetGroup);
// Auto Scaling
const scaling = service.autoScaleTaskCount({
minCapacity: 2,
maxCapacity: 10,
});
scaling.scaleOnCpuUtilization('CpuScaling', {
targetUtilizationPercent: 70,
scaleInCooldown: cdk.Duration.seconds(300),
scaleOutCooldown: cdk.Duration.seconds(60),
});
scaling.scaleOnMemoryUtilization('MemoryScaling', {
targetUtilizationPercent: 80,
});
// Outputs
new cdk.CfnOutput(this, 'LoadBalancerDNS', {
value: alb.loadBalancerDnsName,
description: 'ALB DNS Name',
});
new cdk.CfnOutput(this, 'ServiceName', {
value: service.serviceName,
description: 'ECS Service Name',
});
}
}
#!/bin/bash
# scripts/pre-deploy-check.sh
set -e
echo "🔍 執行部署前檢查..."
# 1. 檢查測試是否通過
echo "1. 執行測試..."
pnpm test
# 2. 檢查 TypeScript 編譯
echo "2. 檢查 TypeScript..."
pnpm run build
# 3. 檢查環境變數
echo "3. 檢查環境變數..."
required_vars=(
"AWS_ACCESS_KEY_ID"
"AWS_SECRET_ACCESS_KEY"
"ECR_REPOSITORY"
)
for var in "${required_vars[@]}"; do
if [ -z "${!var}" ]; then
echo "❌ 缺少環境變數: $var"
exit 1
fi
done
# 4. 檢查 AWS 認證
echo "4. 檢查 AWS 認證..."
aws sts get-caller-identity
# 5. 檢查 Docker
echo "5. 檢查 Docker..."
docker --version
echo "✅ 所有檢查通過!可以開始部署。"
#!/bin/bash
# scripts/post-deploy-verify.sh
set -e
CLUSTER="kyo-system-cluster"
SERVICE="kyo-otp-service"
ALB_DNS="your-alb-dns-name.ap-northeast-1.elb.amazonaws.com"
echo "🔍 驗證部署狀態..."
# 1. 檢查服務狀態
echo "1. 檢查 ECS 服務狀態..."
aws ecs describe-services \
--cluster $CLUSTER \
--services $SERVICE \
--query 'services[0].{Status:status,Running:runningCount,Desired:desiredCount}' \
--output table
# 2. 檢查任務健康狀態
echo "2. 檢查任務健康狀態..."
TASK_ARN=$(aws ecs list-tasks \
--cluster $CLUSTER \
--service-name $SERVICE \
--query 'taskArns[0]' \
--output text)
aws ecs describe-tasks \
--cluster $CLUSTER \
--tasks $TASK_ARN \
--query 'tasks[0].{LastStatus:lastStatus,HealthStatus:healthStatus}' \
--output table
# 3. 測試 Health Endpoint
echo "3. 測試健康檢查端點..."
response=$(curl -s -o /dev/null -w "%{http_code}" http://$ALB_DNS/api/health)
if [ "$response" = "200" ]; then
echo "✅ 健康檢查通過 (HTTP $response)"
else
echo "❌ 健康檢查失敗 (HTTP $response)"
exit 1
fi
# 4. 檢查日誌
echo "4. 檢查最新日誌..."
aws logs tail /ecs/kyo-otp-service --since 5m --format short
echo "✅ 部署驗證完成!"
#!/bin/bash
# scripts/rollback.sh
set -e
CLUSTER="kyo-system-cluster"
SERVICE="kyo-otp-service"
PREVIOUS_TASK_DEF="kyo-otp-task:5" # 從參數傳入
echo "⚠️ 準備回滾到 $PREVIOUS_TASK_DEF..."
# 更新服務使用舊的 task definition
aws ecs update-service \
--cluster $CLUSTER \
--service $SERVICE \
--task-definition $PREVIOUS_TASK_DEF \
--force-new-deployment
# 等待服務穩定
echo "等待服務穩定..."
aws ecs wait services-stable \
--cluster $CLUSTER \
--services $SERVICE
echo "✅ 回滾完成!"
# 驗證
./scripts/post-deploy-verify.sh
# .github/workflows/rollback.yml
name: Rollback Deployment
on:
workflow_dispatch:
inputs:
task_definition_revision:
description: 'Task Definition Revision to rollback to'
required: true
type: string
jobs:
rollback:
runs-on: ubuntu-latest
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Rollback ECS Service
run: |
aws ecs update-service \
--cluster kyo-system-cluster \
--service kyo-otp-service \
--task-definition kyo-otp-task:${{ inputs.task_definition_revision }} \
--force-new-deployment
- name: Wait for stability
run: |
aws ecs wait services-stable \
--cluster kyo-system-cluster \
--services kyo-otp-service
- name: Verify rollback
run: |
echo "Rollback to revision ${{ inputs.task_definition_revision }} completed"
// infra/cdk/lib/monitoring-stack.ts
import * as cdk from 'aws-cdk-lib';
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
import * as sns from 'aws-cdk-lib/aws-sns';
import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions';
export class MonitoringStack extends cdk.Stack {
constructor(scope: Construct, id: string, props?: cdk.StackProps) {
super(scope, id, props);
// SNS Topic for alerts
const alertTopic = new sns.Topic(this, 'AlertTopic', {
displayName: 'Kyo System Alerts',
});
alertTopic.addSubscription(
new subscriptions.EmailSubscription('ops@example.com')
);
// CPU Utilization Alarm
new cloudwatch.Alarm(this, 'HighCPUAlarm', {
alarmName: 'kyo-otp-service-high-cpu',
metric: new cloudwatch.Metric({
namespace: 'AWS/ECS',
metricName: 'CPUUtilization',
dimensionsMap: {
ServiceName: 'kyo-otp-service',
ClusterName: 'kyo-system-cluster',
},
statistic: 'Average',
period: cdk.Duration.minutes(5),
}),
threshold: 80,
evaluationPeriods: 2,
actionsEnabled: true,
}).addAlarmAction(new cloudwatch_actions.SnsAction(alertTopic));
// Memory Utilization Alarm
new cloudwatch.Alarm(this, 'HighMemoryAlarm', {
alarmName: 'kyo-otp-service-high-memory',
metric: new cloudwatch.Metric({
namespace: 'AWS/ECS',
metricName: 'MemoryUtilization',
dimensionsMap: {
ServiceName: 'kyo-otp-service',
ClusterName: 'kyo-system-cluster',
},
statistic: 'Average',
period: cdk.Duration.minutes(5),
}),
threshold: 85,
evaluationPeriods: 2,
}).addAlarmAction(new cloudwatch_actions.SnsAction(alertTopic));
// 4xx Error Rate
new cloudwatch.Alarm(this, 'High4xxAlarm', {
alarmName: 'kyo-otp-service-high-4xx',
metric: new cloudwatch.Metric({
namespace: 'AWS/ApplicationELB',
metricName: 'HTTPCode_Target_4XX_Count',
dimensionsMap: {
LoadBalancer: 'app/kyo-system-alb/xxx',
},
statistic: 'Sum',
period: cdk.Duration.minutes(5),
}),
threshold: 100,
evaluationPeriods: 1,
}).addAlarmAction(new cloudwatch_actions.SnsAction(alertTopic));
}
}
我們建立了完整的 CI/CD Pipeline:
✅ GitHub Actions Workflow
✅ Docker 優化
✅ ECS 部署
✅ 監控與告警
✅ 回滾機制
明天我們將: