看似這樣的主題與 ai 無關,但相信有用過 aws 服務的都很清楚,人家 amazon 是營利機構,
在我們建置 ai 應用的時候,難免要和老闆,或是大公司有會計,要交代成本,和可預期的成本
這樣可以幫助公司決定適不適合做 ai 應用,也同時,如果方法正確, 也可以用同樣的效果達到叫低成本的應用,
所以了解理論外,『成本』也是理論很重要且值得思考的部分
首先先了解 Amazon Bedrock 的計費成本是如何組成的
- 模型推理費用(token 計費)
- 知識庫查詢費用 (根據請求計費)
- 模型微調費用 (fine tune -> 按訓練時間)
- 數據存儲費用 (按照存儲容量)
像是幾個比較常用的 claude 3 Sonnet ,Titan Text G1, Stable Diffusion
基本上我這裡就不說費用,因為可能會隨著時間有所改變
- instance 運行費用(按小時計算)
- 存儲費用 (EBS, S3)
- 數據處理費用
- 端點推理費用
- 模型訓練費用
思考成本的部分無非根據以上計費方式去評估和止損,可以參考以下思考去做調整
了解成本組成(上述提到) -> 監控成本(每日),且設置預警,超過閾值警報 -> 思考使用優化
AWS Cost Explorer 配置
這裡一樣用 python boto3 做為範例
import boto3
import json
from datetime import datetime, timedelta
class CostMonitor:
def __init__(self):
self.cost_client = boto3.client('ce')
self.cloudwatch = boto3.client('cloudwatch')
def get_daily_costs(self, service_name, days=30):
"""獲取指定服務的每日成本"""
end_date = datetime.now().date()
start_date = end_date - timedelta(days=days)
response = self.cost_client.get_cost_and_usage(
TimePeriod={
'Start': start_date.strftime('%Y-%m-%d'),
'End': end_date.strftime('%Y-%m-%d')
},
Granularity='DAILY',
Metrics=['BlendedCost'],
GroupBy=[
{
'Type': 'DIMENSION',
'Key': 'SERVICE'
}
],
Filter={
'Dimensions': {
'Key': 'SERVICE',
'Values': [service_name]
}
}
)
return response
def create_cost_alarm(self, threshold_amount, service_name):
"""創建成本預警"""
alarm_name = f'AI-Service-Cost-Alarm-{service_name}'
self.cloudwatch.put_metric_alarm(
AlarmName=alarm_name,
ComparisonOperator='GreaterThanThreshold',
EvaluationPeriods=1,
MetricName='EstimatedCharges',
Namespace='AWS/Billing',
Period=86400, # 24 hours
Statistic='Maximum',
Threshold=threshold_amount,
ActionsEnabled=True,
AlarmActions=[
'arn:aws:sns:us-east-1:123456789012:cost-alerts'
],
AlarmDescription=f'Alarm when {service_name} costs exceed ${threshold_amount}',
Dimensions=[
{
'Name': 'ServiceName',
'Value': service_name
},
{
'Name': 'Currency',
'Value': 'USD'
}
]
)
# 使用範例
monitor = CostMonitor()
# 設定 Bedrock 成本超過 $100 時發送警告
monitor.create_cost_alarm(100, 'Amazon Bedrock')
我們這裡用 matplotlib 實現
import matplotlib.pyplot as plt
import pandas as pd
def create_cost_dashboard(cost_data):
"""創建成本可視化儀表板"""
# 處理成本資料
daily_costs = []
dates = []
for result in cost_data['ResultsByTime']:
date = result['TimePeriod']['Start']
cost = float(result['Total']['BlendedCost']['Amount'])
daily_costs.append(cost)
dates.append(date)
df = pd.DataFrame({
'Date': pd.to_datetime(dates),
'Cost': daily_costs
})
# 創建圖表
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
# 每日成本趨勢
ax1.plot(df['Date'], df['Cost'], marker='o')
ax1.set_title('Daily AI Service Costs')
ax1.set_ylabel('Cost (USD)')
ax1.grid(True)
# 累積成本
df['Cumulative_Cost'] = df['Cost'].cumsum()
ax2.plot(df['Date'], df['Cumulative_Cost'], marker='s', color='red')
ax2.set_title('Cumulative AI Service Costs')
ax2.set_ylabel('Cumulative Cost (USD)')
ax2.set_xlabel('Date')
ax2.grid(True)
plt.tight_layout()
plt.show()
return df
根據我們各自打擊的目標,無非是
class BedrockCostOptimizer:
def __init__(self):
self.bedrock = boto3.client('bedrock-runtime')
self.token_costs = {
'anthropic.claude-3-sonnet': {'input': 0.003, 'output': 0.015},
'amazon.titan-text-lite': {'input': 0.0005, 'output': 0.00065}
}
def optimize_prompt(self, original_prompt):
"""優化提示以減少 token 使用量"""
# 移除不必要的空格和換行
optimized = ' '.join(original_prompt.split())
# 使用更簡潔的指令
replacements = {
'Please provide a detailed explanation': 'Explain',
'I would like you to': '',
'Can you help me': '',
'Could you please': 'Please'
}
for old, new in replacements.items():
optimized = optimized.replace(old, new)
return optimized
def estimate_cost(self, text, model_id, is_input=True):
"""估算文本的處理成本"""
# 簡化的 token 計算 (實際應使用 tiktoken)
token_count = len(text.split()) * 1.3 # 粗略估算
cost_per_1k = self.token_costs[model_id]['input' if is_input else 'output']
estimated_cost = (token_count / 1000) * cost_per_1k
return estimated_cost, token_count
def batch_requests(self, prompts, model_id, batch_size=10):
"""批次處理請求以減少API呼叫次數"""
results = []
for i in range(0, len(prompts), batch_size):
batch = prompts[i:i+batch_size]
combined_prompt = '\n---\n'.join([
f"Request {j+1}: {prompt}"
for j, prompt in enumerate(batch)
])
# 單次API呼叫處理多個請求
response = self.bedrock.invoke_model(
modelId=model_id,
body=json.dumps({
'anthropic_version': 'bedrock-2023-05-31',
'max_tokens': 1000,
'messages': [{
'role': 'user',
'content': combined_prompt
}]
})
)
results.append(response)
return results
def choose_optimal_model(task_complexity, performance_requirement):
"""根據任務選擇最經濟的模型"""
models = {
'simple': {
'model': 'amazon.titan-text-lite',
'cost_factor': 1.0,
'performance': 0.7
},
'medium': {
'model': 'anthropic.claude-3-haiku',
'cost_factor': 2.5,
'performance': 0.85
},
'complex': {
'model': 'anthropic.claude-3-sonnet',
'cost_factor': 6.0,
'performance': 0.95
}
}
# 根據需求選擇最經濟的選項
if task_complexity == 'simple' and performance_requirement < 0.8:
return models['simple']
elif task_complexity == 'medium' and performance_requirement < 0.9:
return models['medium']
else:
return models['complex']
instance 會每小時花費,那我們可以用比較經濟實惠的 instance
可以考慮 Spot instance 減少消耗
class SageMakerCostOptimizer:
def __init__(self):
self.sagemaker = boto3.client('sagemaker')
# 實例類型成本對照表 (每小時USD)
self.instance_costs = {
'ml.t3.medium': 0.05,
'ml.m5.large': 0.115,
'ml.c5.xlarge': 0.204,
'ml.p3.2xlarge': 3.825,
'ml.inf1.xlarge': 0.362
}
def recommend_instance(self, workload_type, expected_requests_per_hour):
"""根據工作負載推薦實例類型"""
recommendations = {
'development': 'ml.t3.medium',
'light_inference': 'ml.m5.large',
'heavy_inference': 'ml.c5.xlarge',
'gpu_inference': 'ml.p3.2xlarge',
'optimized_inference': 'ml.inf1.xlarge'
}
# 根據預期請求量調整建議
if expected_requests_per_hour > 1000:
if workload_type in ['light_inference', 'heavy_inference']:
return 'ml.c5.xlarge'
return recommendations.get(workload_type, 'ml.m5.large')
def setup_auto_scaling(self, endpoint_name, min_capacity=1, max_capacity=10):
"""設定端點自動擴展"""
# 註冊可擴展目標
autoscaling = boto3.client('application-autoscaling')
autoscaling.register_scalable_target(
ServiceNamespace='sagemaker',
ResourceId=f'endpoint/{endpoint_name}/variant/variant-1',
ScalableDimension='sagemaker:variant:DesiredInstanceCount',
MinCapacity=min_capacity,
MaxCapacity=max_capacity
)
# 設定擴展政策
autoscaling.put_scaling_policy(
PolicyName=f'{endpoint_name}-scaling-policy',
ServiceNamespace='sagemaker',
ResourceId=f'endpoint/{endpoint_name}/variant/variant-1',
ScalableDimension='sagemaker:variant:DesiredInstanceCount',
PolicyType='TargetTrackingScaling',
TargetTrackingScalingPolicyConfiguration={
'TargetValue': 70.0,
'PredefinedMetricSpecification': {
'PredefinedMetricType': 'SageMakerVariantInvocationsPerInstance'
},
'ScaleOutCooldown': 300,
'ScaleInCooldown': 300
}
)
def create_training_job_with_spot(job_name, training_image, instance_type):
"""使用 Spot 實例進行訓練以節省成本"""
sagemaker = boto3.client('sagemaker')
response = sagemaker.create_training_job(
TrainingJobName=job_name,
RoleArn='arn:aws:iam::123456789012:role/SageMakerRole',
AlgorithmSpecification={
'TrainingImage': training_image,
'TrainingInputMode': 'File'
},
InputDataConfig=[
{
'ChannelName': 'training',
'DataSource': {
'S3DataSource': {
'S3DataType': 'S3Prefix',
'S3Uri': 's3://my-bucket/training-data/',
'S3DataDistributionType': 'FullyReplicated',
}
}
}
],
OutputDataConfig={
'S3OutputPath': 's3://my-bucket/model-artifacts/'
},
ResourceConfig={
'InstanceType': instance_type,
'InstanceCount': 1,
'VolumeSizeInGB': 30
},
StoppingCondition={
'MaxRuntimeInSeconds': 3600
},
# 啟用 Spot 實例 (節省高達70%成本)
EnableManagedSpotTraining=True,
CheckpointConfig={
'S3Uri': 's3://my-bucket/checkpoints/'
}
)
return response
import json
from datetime import datetime
class CostReporter:
def __init__(self):
self.cost_client = boto3.client('ce')
self.ses_client = boto3.client('ses')
def generate_weekly_report(self):
"""生成週度成本報告"""
# 獲取過去7天的成本資料
end_date = datetime.now().date()
start_date = end_date - timedelta(days=7)
# Bedrock 成本
bedrock_costs = self.cost_client.get_cost_and_usage(
TimePeriod={
'Start': start_date.strftime('%Y-%m-%d'),
'End': end_date.strftime('%Y-%m-%d')
},
Granularity='DAILY',
Metrics=['BlendedCost'],
Filter={
'Dimensions': {
'Key': 'SERVICE',
'Values': ['Amazon Bedrock']
}
}
)
# SageMaker 成本
sagemaker_costs = self.cost_client.get_cost_and_usage(
TimePeriod={
'Start': start_date.strftime('%Y-%m-%d'),
'End': end_date.strftime('%Y-%m-%d')
},
Granularity='DAILY',
Metrics=['BlendedCost'],
Filter={
'Dimensions': {
'Key': 'SERVICE',
'Values': ['Amazon SageMaker']
}
}
)
return self._format_report(bedrock_costs, sagemaker_costs)
def _format_report(self, bedrock_costs, sagemaker_costs):
"""格式化報告內容"""
bedrock_total = sum(
float(result['Total']['BlendedCost']['Amount'])
for result in bedrock_costs['ResultsByTime']
)
sagemaker_total = sum(
float(result['Total']['BlendedCost']['Amount'])
for result in sagemaker_costs['ResultsByTime']
)
report = f"""
📊 AI 服務週度成本報告
🤖 Amazon Bedrock: ${bedrock_total:.2f}
🧠 Amazon SageMaker: ${sagemaker_total:.2f}
💰 總計: ${bedrock_total + sagemaker_total:.2f}
📈 優化建議:
- 考慮使用較小的模型處理簡單任務
- 實施批次處理減少API呼叫次數
- 設定實例自動擴展以降低閒置成本
"""
return report
def send_report_email(self, report_content, recipient_email):
"""發送成本報告郵件"""
self.ses_client.send_email(
Source='noreply@example.com',
Destination={'ToAddresses': [recipient_email]},
Message={
'Subject': {'Data': 'AI 服務成本週報'},
'Body': {'Text': {'Data': report_content}}
}
)
這次寫的東西比較多,先這樣吧