public class TrainingDataset extends Object
| Modifier and Type | Class and Description |
|---|---|
static class |
TrainingDataset.TrainingDatasetBuilder |
| Constructor and Description |
|---|
TrainingDataset() |
TrainingDataset(@NonNull String name,
Integer version,
String description,
DataFormat dataFormat,
Boolean coalesce,
StorageConnector storageConnector,
String location,
List<Split> splits,
String trainSplit,
Long seed,
FeatureStore featureStore,
StatisticsConfig statisticsConfig,
List<String> label,
String eventStartTime,
String eventEndTime,
TrainingDatasetType trainingDatasetType,
Float validationSize,
Float testSize,
String trainStart,
String trainEnd,
String validationStart,
String validationEnd,
String testStart,
String testEnd,
Integer timeSplitSize,
FilterLogic extraFilterLogic,
Filter extraFilter) |
| Modifier and Type | Method and Description |
|---|---|
void |
addTag(String name,
Object value)
Add name/value tag to the training dataset.
|
static TrainingDataset.TrainingDatasetBuilder |
builder() |
Statistics |
computeStatistics()
Recompute the statistics for the entire training dataset and save them to the feature store.
|
void |
delete()
Delete training dataset and all associated metadata.
|
void |
deleteTag(String name)
Delete a tag of the training dataset.
|
Boolean |
getCoalesce() |
DataFormat |
getDataFormat() |
String |
getDescription() |
Date |
getEventEndTime() |
Date |
getEventStartTime() |
FilterLogic |
getExtraFilter() |
List<TrainingDatasetFeature> |
getFeatures() |
FeatureStore |
getFeatureStore() |
Integer |
getId() |
List<String> |
getLabel() |
String |
getLocation() |
String |
getName() |
String |
getQuery() |
String |
getQuery(boolean withLabel) |
String |
getQuery(Storage storage) |
String |
getQuery(Storage storage,
boolean withLabel) |
Query |
getQueryInt() |
Long |
getSeed() |
HashSet<String> |
getServingKeys()
Set of primary key names that is used as keys in input dict object for `get_serving_vector` method.
|
List<Object> |
getServingVector(Map<String,Object> entry)
Retrieve feature vector from online feature store.
|
List<Object> |
getServingVector(Map<String,Object> entry,
boolean external)
Retrieve feature vector from online feature store.
|
List<List<Object>> |
getServingVectors(Map<String,List<Object>> entry) |
List<List<Object>> |
getServingVectors(Map<String,List<Object>> entry,
boolean external) |
List<Split> |
getSplits() |
Statistics |
getStatistics()
Get the last statistics commit for the training dataset.
|
Statistics |
getStatistics(String commitTime)
Get the statistics of a specific commit time for the training dataset.
|
StatisticsConfig |
getStatisticsConfig() |
StorageConnector |
getStorageConnector() |
Object |
getTag(String name)
Get a single tag value of the training dataset.
|
Map<String,Object> |
getTags()
Get all tags of the training dataset.
|
TrainingDatasetType |
getTrainingDatasetType() |
String |
getTrainSplit() |
String |
getType() |
Integer |
getVersion() |
void |
initPreparedStatement()
Initialise and cache parametrised prepared statement to retrieve feature vector from online feature store.
|
void |
initPreparedStatement(boolean external)
Initialise and cache parametrised prepared statement to retrieve feature vector from online feature store.
|
void |
initPreparedStatement(boolean external,
boolean batch)
Initialise and cache parametrised prepared statement to retrieve batch feature vectors from online feature store.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read()
Read the content of the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(Map<String,String> readOptions)
Read the content of the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String split)
Read all a single split from the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String split,
Map<String,String> readOptions)
Read a single split from the training dataset.
|
void |
save(Query query)
Create the training dataset based on the content of the feature store query.
|
void |
save(Query query,
Map<String,String> writeOptions)
Create the training dataset based on the content of the feature store query.
|
void |
setCoalesce(Boolean coalesce) |
void |
setDataFormat(DataFormat dataFormat) |
void |
setDescription(String description) |
void |
setEventEndTime(Date eventEndTime) |
void |
setEventStartTime(Date eventStartTime) |
void |
setExtraFilter(FilterLogic extraFilter) |
void |
setFeatures(List<TrainingDatasetFeature> features) |
void |
setFeatureStore(FeatureStore featureStore) |
void |
setId(Integer id) |
void |
setLabel(List<String> label) |
void |
setLocation(String location) |
void |
setName(String name) |
void |
setQueryInt(Query queryInt) |
void |
setSeed(Long seed) |
void |
setSplits(List<Split> splits) |
void |
setStatisticsConfig(StatisticsConfig statisticsConfig) |
void |
setStorageConnector(StorageConnector storageConnector) |
void |
setTrainingDatasetType(TrainingDatasetType trainingDatasetType) |
void |
setTrainSplit(String trainSplit) |
void |
setType(String type) |
void |
setVersion(Integer version) |
void |
show(int numRows)
Show numRows from the training dataset (across all splits).
|
void |
updateStatisticsConfig()
Update the statistics configuration of the training dataset.
|
public TrainingDataset(@NonNull
@NonNull String name,
Integer version,
String description,
DataFormat dataFormat,
Boolean coalesce,
StorageConnector storageConnector,
String location,
List<Split> splits,
String trainSplit,
Long seed,
FeatureStore featureStore,
StatisticsConfig statisticsConfig,
List<String> label,
String eventStartTime,
String eventEndTime,
TrainingDatasetType trainingDatasetType,
Float validationSize,
Float testSize,
String trainStart,
String trainEnd,
String validationStart,
String validationEnd,
String testStart,
String testEnd,
Integer timeSplitSize,
FilterLogic extraFilterLogic,
Filter extraFilter)
throws FeatureStoreException,
ParseException
FeatureStoreExceptionParseExceptionpublic TrainingDataset()
public void save(Query query) throws FeatureStoreException, IOException
query - the query to save as training datasetFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic void save(Query query, Map<String,String> writeOptions) throws FeatureStoreException, IOException
query - the query to save as training datasetwriteOptions - options to pass to the Spark write operationFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read()
throws FeatureStoreException,
IOException
FeatureStoreException - if the training dataset has splits and the split was not specifiedIOException - IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(Map<String,String> readOptions) throws FeatureStoreException, IOException
readOptions - options to pass to the Spark read operationFeatureStoreException - if the training dataset has splits and the split was not specifiedIOException - IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String split) throws FeatureStoreException, IOException
split - the split nameFeatureStoreException - if the training dataset has splits and the split was not specifiedIOException - IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String split, Map<String,String> readOptions) throws FeatureStoreException, IOException
split - the split namereadOptions - options to pass to the Spark read operationFeatureStoreException - if the training dataset has splits and the split was not specifiedIOException - IOExceptionpublic void show(int numRows)
throws FeatureStoreException,
IOException
numRows - number of rows to displayFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Statistics computeStatistics() throws FeatureStoreException, IOException
FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic void updateStatisticsConfig()
throws FeatureStoreException,
IOException
FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Statistics getStatistics() throws FeatureStoreException, IOException
FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Statistics getStatistics(String commitTime) throws FeatureStoreException, IOException
commitTime - commit time in the format "YYYYMMDDhhmmss"FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic void addTag(String name, Object value) throws FeatureStoreException, IOException
name - name of the tagvalue - value of the tag. The value of a tag can be any valid json - primitives, arrays or json objectsFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Map<String,Object> getTags() throws FeatureStoreException, IOException
FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Object getTag(String name) throws FeatureStoreException, IOException
name - name of the tagFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic void deleteTag(String name) throws FeatureStoreException, IOException
name - name of the tag to be deletedFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic String getQuery() throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic String getQuery(boolean withLabel) throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic String getQuery(Storage storage) throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic String getQuery(Storage storage, boolean withLabel) throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic void initPreparedStatement()
throws SQLException,
IOException,
FeatureStoreException,
ClassNotFoundException
SQLException - SQLExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionClassNotFoundException - ClassNotFoundExceptionpublic void initPreparedStatement(boolean external)
throws SQLException,
IOException,
FeatureStoreException,
ClassNotFoundException
external - whether is from external client or notSQLException - SQLExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionClassNotFoundException - ClassNotFoundExceptionpublic void initPreparedStatement(boolean external,
boolean batch)
throws SQLException,
IOException,
FeatureStoreException,
ClassNotFoundException
external - whether is from external client or notbatch - whether to initialise feature vector for batch retrievalSQLException - SQLExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionClassNotFoundException - ClassNotFoundExceptionpublic List<Object> getServingVector(Map<String,Object> entry) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
entry - Map object with kes as primary key names of the training dataset features groups and values as
corresponding ids to retrieve feature vector from online feature store.SQLException - SQLExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionClassNotFoundException - ClassNotFoundExceptionpublic List<Object> getServingVector(Map<String,Object> entry, boolean external) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
entry - Map object with kes as primary key names of the training dataset features groups and values as
corresponding ids to retrieve feature vector from online feature store.external - If true, the connection to the online feature store will be established using the hostname
provided in the hsfs.connection() setup.SQLException - SQLExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionClassNotFoundException - ClassNotFoundExceptionpublic List<List<Object>> getServingVectors(Map<String,List<Object>> entry) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
public List<List<Object>> getServingVectors(Map<String,List<Object>> entry, boolean external) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
public void delete()
throws FeatureStoreException,
IOException
FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic HashSet<String> getServingKeys() throws SQLException, IOException, FeatureStoreException, ClassNotFoundException
SQLException - SQLExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionClassNotFoundException - ClassNotFoundExceptionpublic static TrainingDataset.TrainingDatasetBuilder builder()
public Integer getId()
public void setId(Integer id)
public String getName()
public void setName(String name)
public Integer getVersion()
public void setVersion(Integer version)
public String getDescription()
public void setDescription(String description)
public DataFormat getDataFormat()
public void setDataFormat(DataFormat dataFormat)
public Boolean getCoalesce()
public void setCoalesce(Boolean coalesce)
public TrainingDatasetType getTrainingDatasetType()
public void setTrainingDatasetType(TrainingDatasetType trainingDatasetType)
public List<TrainingDatasetFeature> getFeatures()
public void setFeatures(List<TrainingDatasetFeature> features)
public FeatureStore getFeatureStore()
public void setFeatureStore(FeatureStore featureStore)
public StorageConnector getStorageConnector()
public void setStorageConnector(StorageConnector storageConnector)
public String getLocation()
public void setLocation(String location)
public Long getSeed()
public void setSeed(Long seed)
public String getTrainSplit()
public void setTrainSplit(String trainSplit)
public StatisticsConfig getStatisticsConfig()
public void setStatisticsConfig(StatisticsConfig statisticsConfig)
public Query getQueryInt()
public void setQueryInt(Query queryInt)
public Date getEventStartTime()
public void setEventStartTime(Date eventStartTime)
public Date getEventEndTime()
public void setEventEndTime(Date eventEndTime)
public FilterLogic getExtraFilter()
public void setExtraFilter(FilterLogic extraFilter)
public String getType()
public void setType(String type)
Copyright © 2023. All rights reserved.