在 Day 16 我們建立了 WebSocket 即時協作引擎的基礎架構,今天我們要將這個架構擴展為完整的微服務體系,並在 AWS 雲端環境中實現高可用的分散式部署。我們將關注服務間通訊、負載均衡、以及 Redis 叢集的配置。
# infrastructure/services-architecture.yml
services:
api-gateway:
type: application-load-balancer
function: 統一入口點與路由分發
gym-management-service:
type: ecs-fargate
function: 健身房基礎管理功能
dependencies: [postgres-rds, redis-cluster]
collaboration-service:
type: ecs-fargate
function: WebSocket 即時協作
dependencies: [redis-cluster, postgres-rds]
notification-service:
type: ecs-fargate
function: 推播與通知管理
dependencies: [redis-cluster, ses, sns]
analytics-service:
type: ecs-fargate
function: 數據分析與報表
dependencies: [postgres-rds, s3, cloudwatch]
shared-infrastructure:
postgres-rds:
engine: postgresql-15
deployment: multi-az
redis-cluster:
engine: redis-7
deployment: cluster-mode
nodes: 3-primary-3-replica
s3-bucket:
purpose: static-assets-storage
cloudfront:
purpose: cdn-delivery
{
"family": "collaboration-service",
"networkMode": "awsvpc",
"requiresCompatibilities": ["FARGATE"],
"cpu": "1024",
"memory": "2048",
"executionRoleArn": "arn:aws:iam::ACCOUNT:role/ecsTaskExecutionRole",
"taskRoleArn": "arn:aws:iam::ACCOUNT:role/ecsTaskRole",
"containerDefinitions": [
{
"name": "collaboration-service",
"image": "ACCOUNT.dkr.ecr.REGION.amazonaws.com/kyo-collaboration:latest",
"essential": true,
"portMappings": [
{
"containerPort": 3001,
"protocol": "tcp"
}
],
"environment": [
{
"name": "NODE_ENV",
"value": "production"
},
{
"name": "REDIS_CLUSTER_ENDPOINT",
"value": "kyo-redis-cluster.abc123.cache.amazonaws.com:6379"
}
],
"secrets": [
{
"name": "DATABASE_URL",
"valueFrom": "arn:aws:ssm:REGION:ACCOUNT:parameter/kyo/database-url"
},
{
"name": "JWT_SECRET",
"valueFrom": "arn:aws:ssm:REGION:ACCOUNT:parameter/kyo/jwt-secret"
}
],
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/collaboration-service",
"awslogs-region": "REGION",
"awslogs-stream-prefix": "ecs"
}
},
"healthCheck": {
"command": [
"CMD-SHELL",
"curl -f http://localhost:3001/health || exit 1"
],
"interval": 30,
"timeout": 5,
"retries": 3,
"startPeriod": 60
}
}
]
}
# infrastructure/alb-config.yml
apiVersion: v1
kind: ConfigMap
metadata:
name: alb-configuration
data:
target-groups.json: |
{
"gym-management": {
"port": 3000,
"protocol": "HTTP",
"health_check": {
"path": "/api/health",
"healthy_threshold": 2,
"unhealthy_threshold": 3,
"timeout": 5,
"interval": 30
}
},
"collaboration": {
"port": 3001,
"protocol": "HTTP",
"health_check": {
"path": "/health",
"healthy_threshold": 2,
"unhealthy_threshold": 3,
"timeout": 5,
"interval": 30
}
}
}
listener-rules.json: |
{
"rules": [
{
"priority": 100,
"conditions": [
{
"field": "path-pattern",
"values": ["/api/collaboration/*", "/socket.io/*"]
}
],
"actions": [
{
"type": "forward",
"target_group": "collaboration"
}
]
},
{
"priority": 200,
"conditions": [
{
"field": "path-pattern",
"values": ["/api/*"]
}
],
"actions": [
{
"type": "forward",
"target_group": "gym-management"
}
]
}
]
}
{
"serviceName": "collaboration-service",
"cluster": "kyo-production-cluster",
"taskDefinition": "collaboration-service:latest",
"desiredCount": 3,
"launchType": "FARGATE",
"networkConfiguration": {
"awsvpcConfiguration": {
"subnets": [
"subnet-12345678",
"subnet-87654321"
],
"securityGroups": [
"sg-collaboration-service"
],
"assignPublicIp": "DISABLED"
}
},
"loadBalancers": [
{
"targetGroupArn": "arn:aws:elasticloadbalancing:REGION:ACCOUNT:targetgroup/collaboration/1234567890123456",
"containerName": "collaboration-service",
"containerPort": 3001
}
],
"serviceRegistries": [
{
"registryArn": "arn:aws:servicediscovery:REGION:ACCOUNT:service/srv-collaboration",
"containerName": "collaboration-service",
"containerPort": 3001
}
]
}
# infrastructure/redis-cluster.yml
Resources:
RedisSubnetGroup:
Type: AWS::ElastiCache::SubnetGroup
Properties:
Description: Subnet group for Redis cluster
SubnetIds:
- !Ref PrivateSubnet1
- !Ref PrivateSubnet2
- !Ref PrivateSubnet3
RedisCluster:
Type: AWS::ElastiCache::ReplicationGroup
Properties:
ReplicationGroupId: kyo-redis-cluster
Description: Redis cluster for real-time collaboration
NodeType: cache.r7g.large
Engine: redis
EngineVersion: 7.0
Port: 6379
NumCacheClusters: 6
NumNodeGroups: 3
ReplicasPerNodeGroup: 1
CacheSubnetGroupName: !Ref RedisSubnetGroup
SecurityGroupIds:
- !Ref RedisSecurityGroup
AtRestEncryptionEnabled: true
TransitEncryptionEnabled: true
AutomaticFailoverEnabled: true
MultiAZEnabled: true
PreferredMaintenanceWindow: sun:05:00-sun:06:00
SnapshotWindow: 03:00-04:00
SnapshotRetentionLimit: 7
RedisSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: Security group for Redis cluster
VpcId: !Ref VPC
SecurityGroupIngress:
- IpProtocol: tcp
FromPort: 6379
ToPort: 6379
SourceSecurityGroupId: !Ref ApplicationSecurityGroup
Description: Redis access from application services
// infrastructure/redis-config.ts
export interface RedisClusterConfig {
nodes: Array<{
host: string;
port: number;
}>;
options: {
enableReadyCheck: boolean;
redisOptions: {
family: number;
connectTimeout: number;
commandTimeout: number;
retryDelayOnFailover: number;
enableOfflineQueue: boolean;
maxRetriesPerRequest: number;
};
slotsRefreshTimeout: number;
slotsRefreshInterval: number;
};
}
export const createRedisClusterConfig = (): RedisClusterConfig => {
const clusterEndpoint = process.env.REDIS_CLUSTER_ENDPOINT;
if (!clusterEndpoint) {
throw new Error('REDIS_CLUSTER_ENDPOINT environment variable is required');
}
// AWS ElastiCache 叢集端點格式解析
const [host, portStr] = clusterEndpoint.split(':');
const port = parseInt(portStr, 10) || 6379;
return {
nodes: [
{ host, port },
{ host: host.replace('.cache.amazonaws.com', '.0001.cache.amazonaws.com'), port },
{ host: host.replace('.cache.amazonaws.com', '.0002.cache.amazonaws.com'), port }
],
options: {
enableReadyCheck: true,
redisOptions: {
family: 4,
connectTimeout: 5000,
commandTimeout: 5000,
retryDelayOnFailover: 100,
enableOfflineQueue: false,
maxRetriesPerRequest: 3
},
slotsRefreshTimeout: 10000,
slotsRefreshInterval: 30000
}
};
};
# collaboration-service/Dockerfile
FROM node:18-alpine AS builder
WORKDIR /app
# 複製 package files
COPY package*.json ./
COPY pnpm-lock.yaml ./
# 安裝 pnpm 並安裝依賴
RUN npm install -g pnpm
RUN pnpm install --frozen-lockfile
# 複製源碼並構建
COPY . .
RUN pnpm run build
# 生產階段
FROM node:18-alpine AS production
# 建立非 root 用戶
RUN addgroup -g 1001 -S nodejs
RUN adduser -S nodejs -u 1001
WORKDIR /app
# 複製構建結果
COPY --from=builder --chown=nodejs:nodejs /app/dist ./dist
COPY --from=builder --chown=nodejs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nodejs:nodejs /app/package.json ./package.json
# 健康檢查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3001/health || exit 1
# 安全設置
USER nodejs
EXPOSE 3001
CMD ["node", "dist/index.js"]
# .github/workflows/deploy-collaboration-service.yml
name: Deploy Collaboration Service
on:
push:
branches: [main]
paths:
- 'apps/collaboration-service/**'
- 'packages/@kyong/kyo-core/**'
env:
AWS_REGION: ap-northeast-1
ECR_REPOSITORY: kyo-collaboration
ECS_SERVICE: collaboration-service
ECS_CLUSTER: kyo-production-cluster
jobs:
build-and-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Build and push image
id: build-image
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
IMAGE_TAG: ${{ github.sha }}
run: |
cd apps/collaboration-service
docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
- name: Update ECS service
env:
IMAGE_URI: ${{ steps.build-image.outputs.image }}
run: |
# 更新任務定義
aws ecs describe-task-definition \
--task-definition $ECS_SERVICE \
--query taskDefinition > task-def.json
# 更新映像 URI
jq --arg IMAGE_URI "$IMAGE_URI" \
'.containerDefinitions[0].image = $IMAGE_URI' \
task-def.json > updated-task-def.json
# 註冊新的任務定義
aws ecs register-task-definition \
--cli-input-json file://updated-task-def.json
# 更新服務
aws ecs update-service \
--cluster $ECS_CLUSTER \
--service $ECS_SERVICE \
--task-definition $ECS_SERVICE
# 等待部署完成
aws ecs wait services-stable \
--cluster $ECS_CLUSTER \
--services $ECS_SERVICE
# infrastructure/monitoring.yml
Resources:
CollaborationServiceAlarms:
Type: AWS::CloudWatch::CompositeAlarm
Properties:
AlarmName: CollaborationService-Health
AlarmDescription: Composite alarm for collaboration service health
ActionsEnabled: true
AlarmActions:
- !Ref SNSTopicArn
AlarmRule: !Sub |
ALARM(${CPUUtilizationAlarm}) OR
ALARM(${MemoryUtilizationAlarm}) OR
ALARM(${ErrorRateAlarm}) OR
ALARM(${ResponseTimeAlarm})
CPUUtilizationAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: CollaborationService-HighCPU
AlarmDescription: High CPU utilization for collaboration service
MetricName: CPUUtilization
Namespace: AWS/ECS
Statistic: Average
Period: 300
EvaluationPeriods: 2
Threshold: 80
ComparisonOperator: GreaterThanThreshold
Dimensions:
- Name: ServiceName
Value: collaboration-service
- Name: ClusterName
Value: kyo-production-cluster
ErrorRateAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: CollaborationService-HighErrorRate
AlarmDescription: High error rate for collaboration service
MetricName: HTTPCode_Target_5XX_Count
Namespace: AWS/ApplicationELB
Statistic: Sum
Period: 300
EvaluationPeriods: 2
Threshold: 10
ComparisonOperator: GreaterThanThreshold
TreatMissingData: notBreaching
// infrastructure/tracing.ts
import { NodeTracerProvider } from '@opentelemetry/sdk-node';
import { Resource } from '@opentelemetry/resources';
import { SemanticResourceAttributes } from '@opentelemetry/semantic-conventions';
import { AWSXRayIdGenerator } from '@opentelemetry/id-generator-aws-xray';
import { AWSXRayPropagator } from '@opentelemetry/propagator-aws-xray';
export const initializeTracing = () => {
const provider = new NodeTracerProvider({
resource: new Resource({
[SemanticResourceAttributes.SERVICE_NAME]: 'collaboration-service',
[SemanticResourceAttributes.SERVICE_VERSION]: process.env.SERVICE_VERSION || '1.0.0',
[SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: process.env.NODE_ENV || 'production'
}),
idGenerator: new AWSXRayIdGenerator()
});
// AWS X-Ray 整合
provider.register({
propagator: new AWSXRayPropagator()
});
return provider;
};
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "arn:aws:logs:*:*:*"
},
{
"Effect": "Allow",
"Action": [
"ssm:GetParameter",
"ssm:GetParameters"
],
"Resource": [
"arn:aws:ssm:*:*:parameter/kyo/*"
]
},
{
"Effect": "Allow",
"Action": [
"elasticache:Describe*",
"elasticache:List*"
],
"Resource": "*"
},
{
"Effect": "Allow",
"Action": [
"xray:PutTraceSegments",
"xray:PutTelemetryRecords"
],
"Resource": "*"
}
]
}
今天我們完成了 WebSocket 協作服務的雲端部署架構設計: