要使用AWS SageMaker SKLearn,您需要将Scikit-learn代码包装成SageMaker的容器。以下是一个解决方法的示例:
import os
import subprocess
import sys
from sagemaker_containers import _env
if __name__ == '__main__':
subprocess.check_call([sys.executable, "-m", "pip", "install", "scikit-learn==0.24.2"])
from sagemaker_sklearn_container import entry_point
entry_point()
import os
import pandas as pd
from sklearn import svm
from sklearn.externals import joblib
def model_fn(model_dir):
clf = joblib.load(os.path.join(model_dir, "model.joblib"))
return clf
def input_fn(input_data, content_type):
if content_type == 'text/csv':
df = pd.read_csv(StringIO(input_data), header=None)
return df
else:
raise ValueError(f"Invalid content type: {content_type}")
def predict_fn(input_data, model):
return model.predict(input_data)
def output_fn(prediction, accept):
if accept == 'application/json':
return pd.Series(prediction).to_json(orient='values')
else:
raise ValueError(f"Invalid accept type: {accept}")
def train():
# Load training data
train_data = pd.read_csv(os.path.join("/opt/ml/input/data", "train", "train.csv"), header=None)
train_labels = pd.read_csv(os.path.join("/opt/ml/input/data", "train", "train_labels.csv"), header=None)
# Train model
clf = svm.SVC(gamma='scale')
clf.fit(train_data, train_labels)
# Save model
joblib.dump(clf, os.path.join("/opt/ml/model", "model.joblib"))
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
sagemaker_session = sagemaker.Session()
sklearn = SKLearn(entry_point='sklearn_container.py',
source_dir='.',
role=sagemaker.get_execution_role(),
train_instance_count=1,
train_instance_type='ml.c4.xlarge',
framework_version='0.24.2',
py_version='py3',
hyperparameters={'epochs': 10})
sklearn.fit({'train': 's3://path/to/train/data'})
通过这种方式,您可以在AWS SageMaker上使用Scikit-learn的0.24.2版本。请确保您将正确的框架版本指定为framework_version参数,并在容器中安装相应的Scikit-learn版本。