在 Day 15 我們完成了基礎的 AWS 部署配置,今天我們將實作完整的容器化部署策略。我們將針對前端和後端提供兩種不同的架構方案,聊聊各自的優缺點和適用場景,並重點實作後端的 ECS Fargate 自動擴容系統。
優點:
缺點:
優點:
缺點:
考慮到我們的 SaaS 產品特性:
這樣的混合架構能最大化各自優勢,保持設計一致性。
# apps/kyo-otp-service/Dockerfile
# ============================================
# 多階段建構 - OTP 服務
# ============================================
# Stage 1: Dependencies base
FROM node:18-alpine AS deps-base
LABEL maintainer="kyong-saas@example.com"
LABEL service="kyo-otp-service"
# 安全性:使用非 root 用戶
RUN addgroup -g 1001 -S nodejs
RUN adduser -S nodejs -u 1001
# 設置工作目錄
WORKDIR /app
# 安裝 pnpm
RUN npm install -g pnpm@8
# Stage 2: Dependencies installation
FROM deps-base AS deps
COPY package.json pnpm-lock.yaml ./
COPY packages/ packages/
# 僅安裝生產依賴
RUN pnpm install --frozen-lockfile --prod
# Stage 3: Build stage
FROM deps-base AS builder
COPY package.json pnpm-lock.yaml ./
COPY packages/ packages/
COPY apps/kyo-otp-service/ apps/kyo-otp-service/
# 安裝所有依賴(包含 dev)
RUN pnpm install --frozen-lockfile
# 建構共享包
RUN pnpm --filter @kyong/kyo-* build
# 建構 OTP 服務
RUN pnpm --filter kyo-otp-service build
# Stage 4: Runtime
FROM node:18-alpine AS runtime
# 安全性配置
RUN addgroup -g 1001 -S nodejs
RUN adduser -S nodejs -u 1001
# 系統套件更新
RUN apk update && apk upgrade && apk add --no-cache \
dumb-init \
curl \
ca-certificates \
&& rm -rf /var/cache/apk/*
# 設置工作目錄
WORKDIR /app
# 複製生產依賴
COPY --from=deps --chown=nodejs:nodejs /app/node_modules ./node_modules
COPY --from=deps --chown=nodejs:nodejs /app/packages ./packages
# 複製建構後的應用
COPY --from=builder --chown=nodejs:nodejs /app/apps/kyo-otp-service/dist ./apps/kyo-otp-service/dist
COPY --from=builder --chown=nodejs:nodejs /app/apps/kyo-otp-service/package.json ./apps/kyo-otp-service/
# 健康檢查配置
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3000/health || exit 1
# 暴露端口
EXPOSE 3000
# 切換到非特權用戶
USER nodejs
# 使用 dumb-init 作為 PID 1
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "apps/kyo-otp-service/dist/index.js"]
前端繼續使用 S3 + CloudFront 的靜態託管方式,與後端 ECS 服務整合:
# 前端建構和部署腳本
#!/bin/bash
# 建構前端應用
cd apps/kyo-dashboard
pnpm build
# 同步到 S3
aws s3 sync dist/ s3://kyo-saas-frontend-bucket/ --delete
# 建立 CloudFront invalidation
aws cloudfront create-invalidation \
--distribution-id E1234567890123 \
--paths "/*"
# 更新 API 端點配置
echo "VITE_API_URL=https://api.kyong-saas.com" > .env.production
{
"family": "kyo-otp-service",
"networkMode": "awsvpc",
"requiresCompatibilities": ["FARGATE"],
"cpu": "512",
"memory": "1024",
"executionRoleArn": "arn:aws:iam::ACCOUNT_ID:role/ecsTaskExecutionRole",
"taskRoleArn": "arn:aws:iam::ACCOUNT_ID:role/ecsTaskRole",
"containerDefinitions": [
{
"name": "kyo-otp-service",
"image": "ACCOUNT_ID.dkr.ecr.REGION.amazonaws.com/kyo-otp-service:latest",
"essential": true,
"portMappings": [
{
"containerPort": 3000,
"protocol": "tcp"
}
],
"healthCheck": {
"command": [
"CMD-SHELL",
"curl -f http://localhost:3000/health || exit 1"
],
"interval": 30,
"timeout": 5,
"retries": 3,
"startPeriod": 60
},
"environment": [
{
"name": "NODE_ENV",
"value": "production"
},
{
"name": "PORT",
"value": "3000"
}
],
"secrets": [
{
"name": "DATABASE_URL",
"valueFrom": "/kyo-saas/production/database-url"
},
{
"name": "REDIS_URL",
"valueFrom": "/kyo-saas/production/redis-url"
},
{
"name": "JWT_SECRET",
"valueFrom": "/kyo-saas/production/jwt-secret"
},
{
"name": "MITAKE_USERNAME",
"valueFrom": "/kyo-saas/production/mitake-username"
},
{
"name": "MITAKE_PASSWORD",
"valueFrom": "/kyo-saas/production/mitake-password"
}
],
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/kyo-otp-service",
"awslogs-region": "ap-southeast-1",
"awslogs-stream-prefix": "ecs"
}
},
"ulimits": [
{
"name": "nofile",
"softLimit": 65536,
"hardLimit": 65536
}
],
"mountPoints": [],
"volumesFrom": [],
"linuxParameters": {
"initProcessEnabled": true
}
}
],
"placementConstraints": [],
"tags": [
{
"key": "Environment",
"value": "production"
},
{
"key": "Service",
"value": "kyo-otp-service"
},
{
"key": "CostCenter",
"value": "engineering"
}
]
}
# infrastructure/aws/ecs-cluster.tf
resource "aws_ecs_cluster" "kyo_saas" {
name = "${var.environment}-kyo-saas-cluster"
capacity_providers = ["FARGATE", "FARGATE_SPOT"]
default_capacity_provider_strategy {
capacity_provider = "FARGATE"
weight = 1
base = 2
}
default_capacity_provider_strategy {
capacity_provider = "FARGATE_SPOT"
weight = 4
base = 0
}
setting {
name = "containerInsights"
value = "enabled"
}
tags = local.common_tags
}
# 容器見解配置
resource "aws_cloudwatch_log_group" "ecs_cluster" {
name = "/aws/ecs/cluster/${aws_ecs_cluster.kyo_saas.name}"
retention_in_days = 30
tags = local.common_tags
}
# ============================================
# OTP 服務配置
# ============================================
# OTP 服務定義
resource "aws_ecs_service" "otp_service" {
name = "${var.environment}-kyo-otp-service"
cluster = aws_ecs_cluster.kyo_saas.id
task_definition = aws_ecs_task_definition.otp_service.arn
desired_count = var.otp_service_desired_count
capacity_provider_strategy {
capacity_provider = "FARGATE"
weight = 1
base = 1
}
capacity_provider_strategy {
capacity_provider = "FARGATE_SPOT"
weight = 3
base = 0
}
network_configuration {
subnets = var.private_subnet_ids
security_groups = [aws_security_group.ecs_service.id]
}
load_balancer {
target_group_arn = aws_lb_target_group.otp_service.arn
container_name = "kyo-otp-service"
container_port = 3000
}
# 部署配置
deployment_configuration {
maximum_percent = 200
minimum_healthy_percent = 100
deployment_circuit_breaker {
enable = true
rollback = true
}
}
# 服務發現
service_registries {
registry_arn = aws_service_discovery_service.otp_service.arn
}
# 自動擴容配置
enable_execute_command = true
tags = local.common_tags
depends_on = [
aws_lb_listener.app_lb_listener,
aws_iam_role_policy_attachment.ecs_task_execution_role,
]
}
# 自動擴容目標
resource "aws_appautoscaling_target" "otp_service" {
max_capacity = var.otp_service_max_capacity
min_capacity = var.otp_service_min_capacity
resource_id = "service/${aws_ecs_cluster.kyo_saas.name}/${aws_ecs_service.otp_service.name}"
scalable_dimension = "ecs:service:DesiredCount"
service_namespace = "ecs"
tags = local.common_tags
}
# CPU 使用率擴容策略
resource "aws_appautoscaling_policy" "otp_service_cpu" {
name = "${var.environment}-kyo-otp-cpu-scaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.otp_service.resource_id
scalable_dimension = aws_appautoscaling_target.otp_service.scalable_dimension
service_namespace = aws_appautoscaling_target.otp_service.service_namespace
target_tracking_scaling_policy_configuration {
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageCPUUtilization"
}
target_value = 70.0
scale_in_cooldown = 300
scale_out_cooldown = 60
}
}
# 記憶體使用率擴容策略
resource "aws_appautoscaling_policy" "otp_service_memory" {
name = "${var.environment}-kyo-otp-memory-scaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.otp_service.resource_id
scalable_dimension = aws_appautoscaling_target.otp_service.scalable_dimension
service_namespace = aws_appautoscaling_target.otp_service.service_namespace
target_tracking_scaling_policy_configuration {
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageMemoryUtilization"
}
target_value = 80.0
scale_in_cooldown = 300
scale_out_cooldown = 60
}
}
# 自定義指標擴容策略(請求數量)
resource "aws_appautoscaling_policy" "otp_service_requests" {
name = "${var.environment}-kyo-otp-requests-scaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.otp_service.resource_id
scalable_dimension = aws_appautoscaling_target.otp_service.scalable_dimension
service_namespace = aws_appautoscaling_target.otp_service.service_namespace
target_tracking_scaling_policy_configuration {
customized_metric_specification {
metric_name = "RequestCount"
namespace = "AWS/ApplicationELB"
statistic = "Sum"
dimensions = {
TargetGroup = aws_lb_target_group.otp_service.arn_suffix
LoadBalancer = aws_lb.app_lb.arn_suffix
}
}
target_value = 1000.0 # 每分鐘1000個請求
scale_in_cooldown = 300
scale_out_cooldown = 60
}
}
# 注意:前端使用 S3 + CloudFront 部署(參考 Day 15)
# 此處不需要前端的 ECS 服務配置
# infrastructure/aws/service-discovery.tf
resource "aws_service_discovery_private_dns_namespace" "kyo_saas" {
name = "${var.environment}.kyo-saas.local"
description = "Service discovery for Kyo SaaS"
vpc = var.vpc_id
tags = local.common_tags
}
resource "aws_service_discovery_service" "otp_service" {
name = "otp-service"
dns_config {
namespace_id = aws_service_discovery_private_dns_namespace.kyo_saas.id
dns_records {
ttl = 10
type = "A"
}
routing_policy = "MULTIVALUE"
}
health_check_grace_period_seconds = 30
tags = local.common_tags
}
# infrastructure/aws/load-balancer.tf
resource "aws_lb" "app_lb" {
name = "${var.environment}-kyo-saas-alb"
internal = false
load_balancer_type = "application"
security_groups = [aws_security_group.alb.id]
subnets = var.public_subnet_ids
enable_deletion_protection = var.enable_deletion_protection
# 存取日誌
access_logs {
bucket = aws_s3_bucket.alb_logs.bucket
prefix = "alb"
enabled = true
}
tags = local.common_tags
}
# OTP 服務目標群組
resource "aws_lb_target_group" "otp_service" {
name = "${var.environment}-kyo-otp-tg"
port = 3000
protocol = "HTTP"
vpc_id = var.vpc_id
target_type = "ip"
# 健康檢查配置
health_check {
enabled = true
healthy_threshold = 2
interval = 30
matcher = "200"
path = "/health"
port = "traffic-port"
protocol = "HTTP"
timeout = 5
unhealthy_threshold = 3
}
# 連線耗盡
deregistration_delay = 30
# 粘性會話(如果需要)
stickiness {
type = "lb_cookie"
cookie_duration = 86400
enabled = false
}
tags = local.common_tags
lifecycle {
create_before_destroy = true
}
}
# 注意:前端使用 S3 + CloudFront,不需要 ALB 目標群組
# HTTPS 監聽器 - 專用於 API 流量
resource "aws_lb_listener" "app_lb_listener_https" {
load_balancer_arn = aws_lb.app_lb.arn
port = "443"
protocol = "HTTPS"
ssl_policy = "ELBSecurityPolicy-TLS-1-2-2017-01"
certificate_arn = aws_acm_certificate_validation.cert_validation.certificate_arn
# 默認回應 404,所有流量通過規則路由
default_action {
type = "fixed-response"
fixed_response {
content_type = "text/plain"
message_body = "API endpoint not found"
status_code = "404"
}
}
tags = local.common_tags
}
# HTTP 重新導向到 HTTPS
resource "aws_lb_listener" "app_lb_listener_http" {
load_balancer_arn = aws_lb.app_lb.arn
port = "80"
protocol = "HTTP"
default_action {
type = "redirect"
redirect {
port = "443"
protocol = "HTTPS"
status_code = "HTTP_301"
}
}
tags = local.common_tags
}
# API 路由規則
resource "aws_lb_listener_rule" "api_routing" {
listener_arn = aws_lb_listener.app_lb_listener_https.arn
priority = 100
action {
type = "forward"
target_group_arn = aws_lb_target_group.otp_service.arn
}
condition {
path_pattern {
values = ["/api/*", "/ws/*"]
}
}
tags = local.common_tags
}
# infrastructure/aws/codedeploy.tf
resource "aws_codedeploy_application" "kyo_saas" {
compute_platform = "ECS"
name = "${var.environment}-kyo-saas-app"
tags = local.common_tags
}
# OTP 服務部署群組
resource "aws_codedeploy_deployment_group" "otp_service" {
app_name = aws_codedeploy_application.kyo_saas.name
deployment_group_name = "${var.environment}-otp-service-dg"
service_role_arn = aws_iam_role.codedeploy_service_role.arn
auto_rollback_configuration {
enabled = true
events = ["DEPLOYMENT_FAILURE", "DEPLOYMENT_STOP_ON_ALARM"]
}
# 藍綠部署配置
blue_green_deployment_config {
terminate_blue_instances_on_deployment_success {
action = "TERMINATE"
termination_wait_time_in_minutes = 5
}
deployment_ready_option {
action_on_timeout = "CONTINUE_DEPLOYMENT"
}
green_fleet_provisioning_option {
action = "COPY_AUTO_SCALING_GROUP"
}
}
# ECS 配置
ecs_service {
cluster_name = aws_ecs_cluster.kyo_saas.name
service_name = aws_ecs_service.otp_service.name
}
# 負載均衡器配置
load_balancer_info {
target_group_info {
name = aws_lb_target_group.otp_service.name
}
}
# 告警配置
alarm_configuration {
enabled = true
alarms = [
aws_cloudwatch_metric_alarm.deployment_errors.alarm_name,
aws_cloudwatch_metric_alarm.high_response_time.alarm_name
]
}
tags = local.common_tags
}
# 部署配置
resource "aws_codedeploy_deployment_config" "blue_green_ecs" {
deployment_config_name = "${var.environment}-BlueGreenECS"
compute_platform = "ECS"
traffic_routing_config {
type = "TimeBasedCanary"
time_based_canary {
canary_percentage = 20
canary_interval = 10
}
}
tags = local.common_tags
}
# .github/workflows/deploy-production.yml
name: Deploy to Production
on:
push:
branches: [main]
workflow_dispatch:
inputs:
deployment_type:
description: 'Deployment Type'
required: true
default: 'blue-green'
type: choice
options:
- blue-green
- rolling
- immediate
env:
AWS_REGION: ap-southeast-1
ECR_REPOSITORY_OTP: kyo-otp-service
ECR_REPOSITORY_DASHBOARD: kyo-dashboard
ECS_CLUSTER: production-kyo-saas-cluster
ENVIRONMENT: production
jobs:
security-scan:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run security scan
uses: github/super-linter@v4
env:
VALIDATE_DOCKERFILE_HADOLINT: true
VALIDATE_TYPESCRIPT_ES: true
DEFAULT_BRANCH: main
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Container security scan
uses: aquasecurity/trivy-action@master
with:
scan-type: 'fs'
scan-ref: '.'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload security scan results
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: 'trivy-results.sarif'
build-and-push:
needs: security-scan
runs-on: ubuntu-latest
outputs:
otp-image: ${{ steps.build-otp.outputs.image }}
dashboard-image: ${{ steps.build-dashboard.outputs.image }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push OTP service image
id: build-otp
uses: docker/build-push-action@v5
with:
context: .
file: ./apps/kyo-otp-service/Dockerfile
push: true
tags: |
${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY_OTP }}:latest
${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY_OTP }}:${{ github.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64
build-args: |
BUILDKIT_INLINE_CACHE=1
- name: Build and deploy frontend to S3
run: |
# 建構前端
cd apps/kyo-dashboard
npm run build
# 部署到 S3
aws s3 sync dist/ s3://production-kyo-saas-frontend/ --delete
# CloudFront invalidation
aws cloudfront create-invalidation \
--distribution-id ${{ secrets.CLOUDFRONT_DISTRIBUTION_ID }} \
--paths "/*"
- name: Image vulnerability scan
run: |
# OTP 服務掃描
docker run --rm -v /var/run/docker.sock:/var/run/docker.sock \
aquasec/trivy image --exit-code 0 --severity HIGH,CRITICAL \
${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY_OTP }}:${{ github.sha }}
deploy-staging:
needs: build-and-push
runs-on: ubuntu-latest
environment: staging
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Deploy to Staging
run: |
# 部署前端到 S3
aws s3 sync apps/kyo-dashboard/dist/ s3://staging-kyo-saas-frontend/ --delete
aws cloudfront create-invalidation \
--distribution-id ${{ secrets.STAGING_CLOUDFRONT_DISTRIBUTION_ID }} \
--paths "/*"
# 更新後端 ECS 服務
aws ecs update-service \
--cluster staging-kyo-saas-cluster \
--service staging-kyo-otp-service \
--force-new-deployment
- name: Wait for deployment
run: |
aws ecs wait services-stable \
--cluster staging-kyo-saas-cluster \
--services staging-kyo-otp-service
- name: Run smoke tests
run: |
# 基本健康檢查
curl -f https://staging.kyong-saas.com/api/health
curl -f https://staging.kyong-saas.com/health
deploy-production:
needs: [build-and-push, deploy-staging]
runs-on: ubuntu-latest
environment: production
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Create new task definitions
run: |
# 生成新的任務定義
aws ecs describe-task-definition \
--task-definition production-kyo-otp-service \
--query taskDefinition > otp-task-def.json
# 更新映像 URI
jq --arg IMAGE "${{ needs.build-and-push.outputs.otp-image }}" \
'.containerDefinitions[0].image = $IMAGE' \
otp-task-def.json > otp-task-def-new.json
# 移除不需要的欄位
jq 'del(.taskDefinitionArn, .revision, .status, .requiresAttributes, .placementConstraints, .compatibilities, .registeredAt, .registeredBy)' \
otp-task-def-new.json > otp-task-def-final.json
- name: Deploy with Blue-Green strategy
if: ${{ github.event.inputs.deployment_type == 'blue-green' || github.event.inputs.deployment_type == '' }}
run: |
# 註冊新任務定義
OTP_TASK_DEF_ARN=$(aws ecs register-task-definition \
--cli-input-json file://otp-task-def-final.json \
--query 'taskDefinition.taskDefinitionArn' --output text)
# 建立 CodeDeploy 應用程式規格
cat > appspec.yml << EOF
version: 0.0
Resources:
- TargetService:
Type: AWS::ECS::Service
Properties:
TaskDefinition: $OTP_TASK_DEF_ARN
LoadBalancerInfo:
ContainerName: kyo-otp-service
ContainerPort: 3000
PlatformVersion: LATEST
Hooks:
- BeforeInstall:
- Location: scripts/pre_traffic_hook.sh
Timeout: 300
RunAs: root
- AfterInstall:
- Location: scripts/post_traffic_hook.sh
Timeout: 300
RunAs: root
- BeforeAllowTraffic:
- Location: scripts/pre_traffic_hook.sh
Timeout: 300
RunAs: root
- AfterAllowTraffic:
- Location: scripts/post_traffic_hook.sh
Timeout: 300
RunAs: root
EOF
# 啟動 CodeDeploy 部署
DEPLOYMENT_ID=$(aws deploy create-deployment \
--application-name production-kyo-saas-app \
--deployment-group-name production-otp-service-dg \
--revision revisionType=AppSpecContent,appSpecContent="{\"content\":\"$(cat appspec.yml | base64 -w 0)\"}" \
--deployment-config-name CodeDeployDefault.ECSBlueGreenCanary10Percent5Minutes \
--query 'deploymentId' --output text)
echo "Deployment ID: $DEPLOYMENT_ID"
# 等待部署完成
aws deploy wait deployment-successful --deployment-id $DEPLOYMENT_ID
- name: Verify deployment
run: |
# 健康檢查
for i in {1..10}; do
if curl -f https://kyong-saas.com/api/health; then
echo "Health check passed"
break
fi
echo "Health check failed, retrying in 30 seconds..."
sleep 30
done
# 監控關鍵指標
aws cloudwatch get-metric-statistics \
--namespace AWS/ECS \
--metric-name CPUUtilization \
--dimensions Name=ServiceName,Value=production-kyo-otp-service \
--statistics Average \
--start-time $(date -u -d '5 minutes ago' +%Y-%m-%dT%H:%M:%S) \
--end-time $(date -u +%Y-%m-%dT%H:%M:%S) \
--period 300
- name: Rollback on failure
if: failure()
run: |
echo "Deployment failed, initiating rollback..."
# 停止當前部署
aws deploy stop-deployment --deployment-id $DEPLOYMENT_ID --auto-rollback-enabled
# 發送告警
aws sns publish \
--topic-arn ${{ secrets.SNS_ALERT_TOPIC }} \
--message "Production deployment failed and rolled back. SHA: ${{ github.sha }}"
post-deployment:
needs: deploy-production
runs-on: ubuntu-latest
if: always()
steps:
- name: Deployment notification
uses: 8398a7/action-slack@v3
with:
status: ${{ job.status }}
channel: '#deployments'
webhook_url: ${{ secrets.SLACK_WEBHOOK }}
fields: repo,message,commit,author,action,eventName,ref,workflow
- name: Update deployment database
run: |
# 記錄部署資訊到內部系統
curl -X POST https://internal-api.kyong-saas.com/deployments \
-H "Authorization: Bearer ${{ secrets.INTERNAL_API_TOKEN }}" \
-H "Content-Type: application/json" \
-d '{
"environment": "production",
"version": "${{ github.sha }}",
"status": "${{ job.status }}",
"timestamp": "'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"
}'
# infrastructure/aws/monitoring.tf
resource "aws_cloudwatch_dashboard" "kyo_saas_overview" {
dashboard_name = "${var.environment}-kyo-saas-overview"
dashboard_body = jsonencode({
widgets = [
# ECS 服務概覽
{
type = "metric"
x = 0
y = 0
width = 12
height = 6
properties = {
metrics = [
["AWS/ECS", "CPUUtilization", "ServiceName", "${var.environment}-kyo-otp-service", "ClusterName", "${var.environment}-kyo-saas-cluster"],
["AWS/ECS", "MemoryUtilization", "ServiceName", "${var.environment}-kyo-otp-service", "ClusterName", "${var.environment}-kyo-saas-cluster"],
["AWS/ECS", "CPUUtilization", "ServiceName", "${var.environment}-kyo-dashboard", "ClusterName", "${var.environment}-kyo-saas-cluster"],
["AWS/ECS", "MemoryUtilization", "ServiceName", "${var.environment}-kyo-dashboard", "ClusterName", "${var.environment}-kyo-saas-cluster"]
]
period = 300
stat = "Average"
region = var.aws_region
title = "ECS Service Resource Utilization"
yAxis = {
left = {
min = 0
max = 100
}
}
}
},
# 負載均衡器指標
{
type = "metric"
x = 12
y = 0
width = 12
height = 6
properties = {
metrics = [
["AWS/ApplicationELB", "RequestCount", "LoadBalancer", aws_lb.app_lb.arn_suffix],
["AWS/ApplicationELB", "TargetResponseTime", "LoadBalancer", aws_lb.app_lb.arn_suffix],
["AWS/ApplicationELB", "HTTPCode_Target_2XX_Count", "LoadBalancer", aws_lb.app_lb.arn_suffix],
["AWS/ApplicationELB", "HTTPCode_Target_4XX_Count", "LoadBalancer", aws_lb.app_lb.arn_suffix],
["AWS/ApplicationELB", "HTTPCode_Target_5XX_Count", "LoadBalancer", aws_lb.app_lb.arn_suffix]
]
period = 300
stat = "Sum"
region = var.aws_region
title = "Application Load Balancer Metrics"
}
},
# 資料庫效能
{
type = "metric"
x = 0
y = 6
width = 12
height = 6
properties = {
metrics = [
["AWS/RDS", "CPUUtilization", "DBInstanceIdentifier", "${var.environment}-kyo-saas-db"],
["AWS/RDS", "DatabaseConnections", "DBInstanceIdentifier", "${var.environment}-kyo-saas-db"],
["AWS/RDS", "FreeableMemory", "DBInstanceIdentifier", "${var.environment}-kyo-saas-db"],
["AWS/RDS", "ReadLatency", "DBInstanceIdentifier", "${var.environment}-kyo-saas-db"],
["AWS/RDS", "WriteLatency", "DBInstanceIdentifier", "${var.environment}-kyo-saas-db"]
]
period = 300
stat = "Average"
region = var.aws_region
title = "RDS Performance Metrics"
}
},
# Redis 快取
{
type = "metric"
x = 12
y = 6
width = 12
height = 6
properties = {
metrics = [
["AWS/ElastiCache", "CPUUtilization", "CacheClusterId", "${var.environment}-kyo-saas-redis"],
["AWS/ElastiCache", "DatabaseMemoryUsagePercentage", "CacheClusterId", "${var.environment}-kyo-saas-redis"],
["AWS/ElastiCache", "CacheHits", "CacheClusterId", "${var.environment}-kyo-saas-redis"],
["AWS/ElastiCache", "CacheMisses", "CacheClusterId", "${var.environment}-kyo-saas-redis"]
]
period = 300
stat = "Average"
region = var.aws_region
title = "ElastiCache Redis Metrics"
}
}
]
})
tags = local.common_tags
}
# ============================================
# 告警配置
# ============================================
# 高 CPU 使用率告警
resource "aws_cloudwatch_metric_alarm" "high_cpu_otp_service" {
alarm_name = "${var.environment}-kyo-otp-high-cpu"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/ECS"
period = "300"
statistic = "Average"
threshold = "80"
alarm_description = "This metric monitors ECS CPU utilization"
alarm_actions = [aws_sns_topic.alerts.arn]
ok_actions = [aws_sns_topic.alerts.arn]
dimensions = {
ServiceName = aws_ecs_service.otp_service.name
ClusterName = aws_ecs_cluster.kyo_saas.name
}
tags = local.common_tags
}
# 高記憶體使用率告警
resource "aws_cloudwatch_metric_alarm" "high_memory_otp_service" {
alarm_name = "${var.environment}-kyo-otp-high-memory"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "MemoryUtilization"
namespace = "AWS/ECS"
period = "300"
statistic = "Average"
threshold = "85"
alarm_description = "This metric monitors ECS memory utilization"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
ServiceName = aws_ecs_service.otp_service.name
ClusterName = aws_ecs_cluster.kyo_saas.name
}
tags = local.common_tags
}
# 應用程式回應時間告警
resource "aws_cloudwatch_metric_alarm" "high_response_time" {
alarm_name = "${var.environment}-kyo-saas-high-response-time"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "TargetResponseTime"
namespace = "AWS/ApplicationELB"
period = "300"
statistic = "Average"
threshold = "2"
alarm_description = "This metric monitors ALB target response time"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
LoadBalancer = aws_lb.app_lb.arn_suffix
}
tags = local.common_tags
}
# 錯誤率告警
resource "aws_cloudwatch_metric_alarm" "high_error_rate" {
alarm_name = "${var.environment}-kyo-saas-high-error-rate"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "3"
metric_name = "HTTPCode_Target_5XX_Count"
namespace = "AWS/ApplicationELB"
period = "300"
statistic = "Sum"
threshold = "10"
alarm_description = "This metric monitors 5XX error count"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
LoadBalancer = aws_lb.app_lb.arn_suffix
}
treat_missing_data = "notBreaching"
tags = local.common_tags
}
# 資料庫連線告警
resource "aws_cloudwatch_metric_alarm" "high_db_connections" {
alarm_name = "${var.environment}-kyo-saas-high-db-connections"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "DatabaseConnections"
namespace = "AWS/RDS"
period = "300"
statistic = "Average"
threshold = "80"
alarm_description = "This metric monitors RDS connection count"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
DBInstanceIdentifier = "${var.environment}-kyo-saas-db"
}
tags = local.common_tags
}
# SNS 主題
resource "aws_sns_topic" "alerts" {
name = "${var.environment}-kyo-saas-alerts"
tags = local.common_tags
}
# SNS 主題訂閱
resource "aws_sns_topic_subscription" "email_alerts" {
count = length(var.alert_email_addresses)
topic_arn = aws_sns_topic.alerts.arn
protocol = "email"
endpoint = var.alert_email_addresses[count.index]
}
# Slack 整合
resource "aws_sns_topic_subscription" "slack_alerts" {
count = var.slack_webhook_url != "" ? 1 : 0
topic_arn = aws_sns_topic.alerts.arn
protocol = "https"
endpoint = var.slack_webhook_url
}
# infrastructure/aws/logging.tf
resource "aws_cloudwatch_log_group" "otp_service" {
name = "/ecs/${var.environment}-kyo-otp-service"
retention_in_days = var.log_retention_days
tags = local.common_tags
}
resource "aws_cloudwatch_log_group" "dashboard" {
name = "/ecs/${var.environment}-kyo-dashboard"
retention_in_days = var.log_retention_days
tags = local.common_tags
}
# 日誌串流
resource "aws_cloudwatch_log_stream" "otp_service" {
name = "${var.environment}-otp-service-stream"
log_group_name = aws_cloudwatch_log_group.otp_service.name
}
# 日誌過濾器 - 錯誤檢測
resource "aws_cloudwatch_log_metric_filter" "error_count" {
name = "${var.environment}-error-count"
log_group_name = aws_cloudwatch_log_group.otp_service.name
pattern = "[timestamp, request_id, level=\"ERROR\", ...]"
metric_transformation {
name = "ErrorCount"
namespace = "KyoSaaS/Application"
value = "1"
}
}
# 基於日誌的告警
resource "aws_cloudwatch_metric_alarm" "application_errors" {
alarm_name = "${var.environment}-kyo-saas-application-errors"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "ErrorCount"
namespace = "KyoSaaS/Application"
period = "300"
statistic = "Sum"
threshold = "5"
alarm_description = "This metric monitors application error count from logs"
alarm_actions = [aws_sns_topic.alerts.arn]
tags = local.common_tags
}
# X-Ray 追蹤
resource "aws_xray_sampling_rule" "kyo_saas" {
rule_name = "${var.environment}-kyo-saas-sampling"
priority = 9000
version = 1
reservoir_size = 1
fixed_rate = 0.1
url_path = "*"
host = "*"
http_method = "*"
service_type = "*"
service_name = "*"
resource_arn = "*"
tags = local.common_tags
}
今天我們完成了混合架構的部署策略,並實作了後端容器化自動擴容系統:
混合部署策略
後端容器化架構
自動擴容系統
部署自動化