public class FeatureGroup extends FeatureGroupBase
| Modifier and Type | Class and Description |
|---|---|
static class |
FeatureGroup.FeatureGroupBuilder |
created, creator, description, eventTime, expectationsNames, featureGroupBaseEngine, features, featureStore, id, location, name, primaryKeys, statisticsConfig, statisticsEngine, subject, timeTravelFormat, type, utils, version| Constructor and Description |
|---|
FeatureGroup() |
FeatureGroup(Boolean onlineEnabled,
List<String> statisticColumns,
List<String> partitionKeys,
String hudiPrecombineKey,
String onlineTopicName) |
FeatureGroup(FeatureStore featureStore,
int id) |
FeatureGroup(FeatureStore featureStore,
@NonNull String name,
Integer version,
String description,
List<String> primaryKeys,
List<String> partitionKeys,
String hudiPrecombineKey,
boolean onlineEnabled,
TimeTravelFormat timeTravelFormat,
List<Feature> features,
StatisticsConfig statisticsConfig,
String onlineTopicName,
String eventTime) |
FeatureGroup(Integer id,
String description,
List<Feature> features) |
| Modifier and Type | Method and Description |
|---|---|
Query |
asOf(String wallclockTime)
Get Query object to retrieve all features of the group at a point in the past.
|
Query |
asOf(String wallclockTime,
String excludeUntil)
Get Query object to retrieve all features of the group at a point in the past.
|
static FeatureGroup.FeatureGroupBuilder |
builder() |
void |
commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
void |
commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
Map<Long,Map<String,String>> |
commitDetails()
Return commit details.
|
Map<Long,Map<String,String>> |
commitDetails(Integer limit)
Return commit details.
|
Map<Long,Map<String,String>> |
commitDetails(String wallclockTime)
Return commit details.
|
Map<Long,Map<String,String>> |
commitDetails(String wallclockTime,
Integer limit)
Return commit details.
|
Statistics |
computeStatistics(String wallclockTime)
Recompute the statistics for the feature group and save them to the feature store.
|
String |
getAvroSchema() |
List<String> |
getComplexFeatures() |
org.apache.avro.Schema |
getDeserializedAvroSchema() |
String |
getEncodedAvroSchema() |
String |
getFeatureAvroSchema(String featureName) |
Boolean |
getOnlineEnabled() |
String |
getOnlineTopicName() |
List<String> |
getStatisticColumns() |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite,
Map<String,String> writeOptions) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
HudiOperationType operation)
Commit insert or upsert to time travel enabled Feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite,
HudiOperationType operation,
Map<String,String> writeOptions) |
org.apache.spark.sql.streaming.StreamingQuery |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
Deprecated.
insertStream method is deprecated FeatureGroups. Full capability insertStream is available for StreamFeatureGroups.
|
org.apache.spark.sql.streaming.StreamingQuery |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName)
Deprecated.
insertStream method is deprecated FeatureGroups. Full capability insertStream is available for StreamFeatureGroups.
|
org.apache.spark.sql.streaming.StreamingQuery |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode)
Deprecated.
insertStream method is deprecated FeatureGroups. Full capability insertStream is available for StreamFeatureGroups.
|
org.apache.spark.sql.streaming.StreamingQuery |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout)
Deprecated.
insertStream method is deprecated FeatureGroups. Full capability insertStream is available for StreamFeatureGroups.
|
org.apache.spark.sql.streaming.StreamingQuery |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation,
Map<String,String> writeOptions)
Deprecated.
insertStream method is deprecated FeatureGroups. Full capability insertStream is available for StreamFeatureGroups.
|
org.apache.spark.sql.streaming.StreamingQuery |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
String checkpointLocation)
Deprecated.
insertStream method is deprecated FeatureGroups. Full capability insertStream is available for StreamFeatureGroups.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read() |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online,
Map<String,String> readOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(Map<String,String> readOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String wallclockTime)
Reads Feature group data at a specific point in time.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String wallclockTime,
Map<String,String> readOptions)
Reads Feature group data at a specific point in time.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
readChanges(String wallclockStartTime,
String wallclockEndTime)
Deprecated.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
readChanges(String wallclockStartTime,
String wallclockEndTime,
Map<String,String> readOptions)
Deprecated.
|
void |
save(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
Deprecated.
|
void |
save(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
Deprecated.
|
void |
setOnlineEnabled(Boolean onlineEnabled) |
void |
setOnlineTopicName(String onlineTopicName) |
void |
setStatisticColumns(List<String> statisticColumns) |
void |
show(int numRows) |
void |
show(int numRows,
boolean online) |
addTag, appendFeatures, appendFeatures, computeStatistics, delete, deleteTag, filter, filter, getCreated, getCreator, getDescription, getEventTime, getExpectationsNames, getFeature, getFeatures, getFeatureStore, getId, getLocation, getName, getPrimaryKeys, getStatistics, getStatistics, getStatisticsConfig, getSubject, getTag, getTags, getTimeTravelFormat, getType, getVersion, select, selectAll, selectAll, selectExcept, selectExceptFeatures, selectFeatures, setDescription, setEventTime, setExpectationsNames, setFeatures, setFeatureStore, setId, setLocation, setName, setStatisticsConfig, setTimeTravelFormat, setType, setVersion, unloadSubject, updateDescription, updateFeatureDescription, updateFeatures, updateFeatures, updateStatisticsConfigpublic FeatureGroup(FeatureStore featureStore, @NonNull @NonNull String name, Integer version, String description, List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey, boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<Feature> features, StatisticsConfig statisticsConfig, String onlineTopicName, String eventTime)
public FeatureGroup()
public FeatureGroup(FeatureStore featureStore, int id)
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read()
throws FeatureStoreException,
IOException
read in class FeatureGroupBaseFeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online)
throws FeatureStoreException,
IOException
FeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(Map<String,String> readOptions) throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online,
Map<String,String> readOptions)
throws FeatureStoreException,
IOException
FeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String wallclockTime) throws FeatureStoreException, IOException, ParseException
wallclockTime - point in timeFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String wallclockTime, Map<String,String> readOptions) throws FeatureStoreException, IOException, ParseException
wallclockTime - point in timereadOptions - Additional read options as key-value pairs, defaults to empty Map.FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> readChanges(String wallclockStartTime, String wallclockEndTime) throws FeatureStoreException, IOException, ParseException
wallclockStartTime - start date.wallclockEndTime - end date.FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> readChanges(String wallclockStartTime, String wallclockEndTime, Map<String,String> readOptions) throws FeatureStoreException, IOException, ParseException
wallclockStartTime - start date.wallclockEndTime - end date.readOptions - Additional write options as key-value pairs, defaults to empty Map.FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic Query asOf(String wallclockTime) throws FeatureStoreException, ParseException
wallclockTime - Datetime string. The String should be formatted in one of the
following formats `%Y%m%d`, `%Y%m%d%H`, `%Y%m%d%H%M`, or `%Y%m%d%H%M%S`.FeatureStoreException - FeatureStoreExceptionParseException - ParseExceptionpublic Query asOf(String wallclockTime, String excludeUntil) throws FeatureStoreException, ParseException
wallclockTime - Datetime string. The String should be formatted in one of the
following formats `%Y%m%d`, `%Y%m%d%H`, `%Y%m%d%H%M`, or `%Y%m%d%H%M%S`.excludeUntil - Datetime string. The String should be formatted in one of the
following formats `%Y%m%d`, `%Y%m%d%H`, `%Y%m%d%H%M`, or `%Y%m%d%H%M%S`.FeatureStoreException - FeatureStoreExceptionParseException - ParseExceptionpublic void show(int numRows)
throws FeatureStoreException,
IOException
FeatureStoreExceptionIOExceptionpublic void show(int numRows,
boolean online)
throws FeatureStoreException,
IOException
FeatureStoreExceptionIOException@Deprecated public void save(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws FeatureStoreException, IOException, ParseException
@Deprecated public void save(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
throws IOException,
FeatureStoreException,
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
throws FeatureStoreException,
IOException,
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage)
throws IOException,
FeatureStoreException,
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite)
throws IOException,
FeatureStoreException,
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite)
throws IOException,
FeatureStoreException,
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite,
Map<String,String> writeOptions)
throws FeatureStoreException,
IOException,
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
HudiOperationType operation)
throws FeatureStoreException,
IOException,
ParseException
featureData - dataframe to be committed.operation - commit operation type, INSERT or UPSERT.FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite,
HudiOperationType operation,
Map<String,String> writeOptions)
throws FeatureStoreException,
IOException,
ParseException
@Deprecated public org.apache.spark.sql.streaming.StreamingQuery insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws org.apache.spark.sql.streaming.StreamingQueryException, IOException, FeatureStoreException, TimeoutException, ParseException
featureData - Spark dataframe containing feature dataorg.apache.spark.sql.streaming.StreamingQueryException - StreamingQueryExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionTimeoutException - TimeoutExceptionParseException - ParseException@Deprecated public org.apache.spark.sql.streaming.StreamingQuery insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName) throws org.apache.spark.sql.streaming.StreamingQueryException, IOException, FeatureStoreException, TimeoutException, ParseException
featureData - Spark dataframe containing feature dataqueryName - name of spark StreamingQueryorg.apache.spark.sql.streaming.StreamingQueryException - StreamingQueryExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionTimeoutException - TimeoutExceptionParseException - ParseException@Deprecated public org.apache.spark.sql.streaming.StreamingQuery insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode) throws org.apache.spark.sql.streaming.StreamingQueryException, IOException, FeatureStoreException, TimeoutException, ParseException
featureData - Spark dataframe containing feature dataqueryName - name of spark StreamingQueryoutputMode - outputModeorg.apache.spark.sql.streaming.StreamingQueryException - StreamingQueryExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionTimeoutException - TimeoutExceptionParseException - ParseException@Deprecated public org.apache.spark.sql.streaming.StreamingQuery insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout) throws org.apache.spark.sql.streaming.StreamingQueryException, IOException, FeatureStoreException, TimeoutException, ParseException
featureData - Spark dataframe containing feature dataqueryName - name of spark StreamingQueryoutputMode - outputModeawaitTermination - whether or not to wait for query Terminationtimeout - timeoutorg.apache.spark.sql.streaming.StreamingQueryException - StreamingQueryExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionTimeoutException - TimeoutExceptionParseException - ParseException@Deprecated public org.apache.spark.sql.streaming.StreamingQuery insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, String checkpointLocation) throws org.apache.spark.sql.streaming.StreamingQueryException, IOException, FeatureStoreException, TimeoutException, ParseException
featureData - Spark dataframe containing feature dataqueryName - name of spark StreamingQueryoutputMode - outputModeawaitTermination - whether or not to wait for query TerminationcheckpointLocation - path to checkpoint location directoryorg.apache.spark.sql.streaming.StreamingQueryException - StreamingQueryExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionTimeoutException - TimeoutExceptionParseException - ParseException@Deprecated public org.apache.spark.sql.streaming.StreamingQuery insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation, Map<String,String> writeOptions) throws FeatureStoreException, IOException, org.apache.spark.sql.streaming.StreamingQueryException, TimeoutException, ParseException
featureData - Spark dataframe containing feature dataqueryName - name of spark StreamingQueryoutputMode - outputModeawaitTermination - whether or not to wait for query Terminationtimeout - timeoutcheckpointLocation - path to checkpoint location directorywriteOptions - Additional write options as key-value pairs, defaults to empty Map.org.apache.spark.sql.streaming.StreamingQueryException - StreamingQueryExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionTimeoutException - TimeoutExceptionParseException - ParseExceptionpublic void commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
throws FeatureStoreException,
IOException,
ParseException
public void commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
throws FeatureStoreException,
IOException,
ParseException
public Map<Long,Map<String,String>> commitDetails() throws IOException, FeatureStoreException, ParseException
FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic Map<Long,Map<String,String>> commitDetails(Integer limit) throws IOException, FeatureStoreException, ParseException
limit - number of commits to return.FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic Map<Long,Map<String,String>> commitDetails(String wallclockTime) throws IOException, FeatureStoreException, ParseException
wallclockTime - point in time.FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic Map<Long,Map<String,String>> commitDetails(String wallclockTime, Integer limit) throws IOException, FeatureStoreException, ParseException
wallclockTime - point in time.limit - number of commits to return.FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic String getAvroSchema() throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic List<String> getComplexFeatures()
getComplexFeatures in class FeatureGroupBasepublic String getFeatureAvroSchema(String featureName) throws FeatureStoreException, IOException
getFeatureAvroSchema in class FeatureGroupBaseFeatureStoreExceptionIOExceptionpublic String getEncodedAvroSchema() throws FeatureStoreException, IOException
getEncodedAvroSchema in class FeatureGroupBaseFeatureStoreExceptionIOExceptionpublic org.apache.avro.Schema getDeserializedAvroSchema()
throws FeatureStoreException,
IOException
getDeserializedAvroSchema in class FeatureGroupBaseFeatureStoreExceptionIOExceptionpublic Statistics computeStatistics(String wallclockTime) throws FeatureStoreException, IOException, ParseException
wallclockTime - number of commits to return.FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic static FeatureGroup.FeatureGroupBuilder builder()
public Boolean getOnlineEnabled()
public void setOnlineEnabled(Boolean onlineEnabled)
public String getOnlineTopicName()
getOnlineTopicName in class FeatureGroupBasepublic void setOnlineTopicName(String onlineTopicName)
Copyright © 2023. All rights reserved.