public class JavaRDD<T>
extends Object
Constructor and Description |
---|
JavaRDD(RDD<T> rdd,
scala.reflect.ClassTag<T> classTag) |
Modifier and Type | Method and Description |
---|---|
static <U> U |
aggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp) |
JavaRDD<T> |
cache()
Persist this RDD with the default storage level (
MEMORY_ONLY ). |
static <U> JavaPairRDD<T,U> |
cartesian(JavaRDDLike<U,?> other) |
static void |
checkpoint() |
scala.reflect.ClassTag<T> |
classTag() |
JavaRDD<T> |
coalesce(int numPartitions)
Return a new RDD that is reduced into
numPartitions partitions. |
JavaRDD<T> |
coalesce(int numPartitions,
boolean shuffle)
Return a new RDD that is reduced into
numPartitions partitions. |
static java.util.List<T> |
collect() |
static JavaFutureAction<java.util.List<T>> |
collectAsync() |
static java.util.List<T>[] |
collectPartitions(int[] partitionIds) |
static SparkContext |
context() |
static long |
count() |
static PartialResult<BoundedDouble> |
countApprox(long timeout) |
static PartialResult<BoundedDouble> |
countApprox(long timeout,
double confidence) |
static long |
countApproxDistinct(double relativeSD) |
static JavaFutureAction<Long> |
countAsync() |
static java.util.Map<T,Long> |
countByValue() |
static PartialResult<java.util.Map<T,BoundedDouble>> |
countByValueApprox(long timeout) |
static PartialResult<java.util.Map<T,BoundedDouble>> |
countByValueApprox(long timeout,
double confidence) |
JavaRDD<T> |
distinct()
Return a new RDD containing the distinct elements in this RDD.
|
JavaRDD<T> |
distinct(int numPartitions)
Return a new RDD containing the distinct elements in this RDD.
|
JavaRDD<T> |
filter(Function<T,Boolean> f)
Return a new RDD containing only the elements that satisfy a predicate.
|
static T |
first() |
static <U> JavaRDD<U> |
flatMap(FlatMapFunction<T,U> f) |
static JavaDoubleRDD |
flatMapToDouble(DoubleFlatMapFunction<T> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
flatMapToPair(PairFlatMapFunction<T,K2,V2> f) |
static T |
fold(T zeroValue,
Function2<T,T,T> f) |
static void |
foreach(VoidFunction<T> f) |
static JavaFutureAction<Void> |
foreachAsync(VoidFunction<T> f) |
static void |
foreachPartition(VoidFunction<java.util.Iterator<T>> f) |
static JavaFutureAction<Void> |
foreachPartitionAsync(VoidFunction<java.util.Iterator<T>> f) |
static <T> JavaRDD<T> |
fromRDD(RDD<T> rdd,
scala.reflect.ClassTag<T> evidence$1) |
static Optional<String> |
getCheckpointFile() |
static int |
getNumPartitions() |
static StorageLevel |
getStorageLevel() |
static JavaRDD<java.util.List<T>> |
glom() |
static <U> JavaPairRDD<U,Iterable<T>> |
groupBy(Function<T,U> f) |
static <U> JavaPairRDD<U,Iterable<T>> |
groupBy(Function<T,U> f,
int numPartitions) |
static int |
id() |
JavaRDD<T> |
intersection(JavaRDD<T> other)
Return the intersection of this RDD and another one.
|
static boolean |
isCheckpointed() |
static boolean |
isEmpty() |
static java.util.Iterator<T> |
iterator(Partition split,
TaskContext taskContext) |
static <U> JavaPairRDD<U,T> |
keyBy(Function<T,U> f) |
static <R> JavaRDD<R> |
map(Function<T,R> f) |
static <U> JavaRDD<U> |
mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f) |
static <U> JavaRDD<U> |
mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f,
boolean preservesPartitioning) |
static JavaDoubleRDD |
mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f) |
static JavaDoubleRDD |
mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f,
boolean preservesPartitioning) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f,
boolean preservesPartitioning) |
static <R> JavaRDD<R> |
mapPartitionsWithIndex(Function2<Integer,java.util.Iterator<T>,java.util.Iterator<R>> f,
boolean preservesPartitioning) |
static <R> boolean |
mapPartitionsWithIndex$default$2() |
static <R> JavaDoubleRDD |
mapToDouble(DoubleFunction<T> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapToPair(PairFunction<T,K2,V2> f) |
static T |
max(java.util.Comparator<T> comp) |
static T |
min(java.util.Comparator<T> comp) |
static String |
name() |
static Optional<Partitioner> |
partitioner() |
static java.util.List<Partition> |
partitions() |
JavaRDD<T> |
persist(StorageLevel newLevel)
Set this RDD's storage level to persist its values across operations after the first time
it is computed.
|
static JavaRDD<String> |
pipe(java.util.List<String> command) |
static JavaRDD<String> |
pipe(java.util.List<String> command,
java.util.Map<String,String> env) |
static JavaRDD<String> |
pipe(java.util.List<String> command,
java.util.Map<String,String> env,
boolean separateWorkingDir,
int bufferSize) |
static JavaRDD<String> |
pipe(java.util.List<String> command,
java.util.Map<String,String> env,
boolean separateWorkingDir,
int bufferSize,
String encoding) |
static JavaRDD<String> |
pipe(String command) |
JavaRDD<T>[] |
randomSplit(double[] weights)
Randomly splits this RDD with the provided weights.
|
JavaRDD<T>[] |
randomSplit(double[] weights,
long seed)
Randomly splits this RDD with the provided weights.
|
RDD<T> |
rdd() |
static T |
reduce(Function2<T,T,T> f) |
JavaRDD<T> |
repartition(int numPartitions)
Return a new RDD that has exactly numPartitions partitions.
|
JavaRDD<T> |
sample(boolean withReplacement,
double fraction)
Return a sampled subset of this RDD with a random seed.
|
JavaRDD<T> |
sample(boolean withReplacement,
double fraction,
long seed)
Return a sampled subset of this RDD, with a user-supplied seed.
|
static void |
saveAsObjectFile(String path) |
static void |
saveAsTextFile(String path) |
static void |
saveAsTextFile(String path,
Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec) |
JavaRDD<T> |
setName(String name)
Assign a name to this RDD
|
<S> JavaRDD<T> |
sortBy(Function<T,S> f,
boolean ascending,
int numPartitions)
Return this RDD sorted by the given key function.
|
JavaRDD<T> |
subtract(JavaRDD<T> other)
Return an RDD with the elements from
this that are not in other . |
JavaRDD<T> |
subtract(JavaRDD<T> other,
int numPartitions)
Return an RDD with the elements from
this that are not in other . |
JavaRDD<T> |
subtract(JavaRDD<T> other,
Partitioner p)
Return an RDD with the elements from
this that are not in other . |
static java.util.List<T> |
take(int num) |
static JavaFutureAction<java.util.List<T>> |
takeAsync(int num) |
static java.util.List<T> |
takeOrdered(int num) |
static java.util.List<T> |
takeOrdered(int num,
java.util.Comparator<T> comp) |
static java.util.List<T> |
takeSample(boolean withReplacement,
int num) |
static java.util.List<T> |
takeSample(boolean withReplacement,
int num,
long seed) |
static String |
toDebugString() |
static java.util.Iterator<T> |
toLocalIterator() |
static java.util.List<T> |
top(int num) |
static java.util.List<T> |
top(int num,
java.util.Comparator<T> comp) |
static <T> RDD<T> |
toRDD(JavaRDD<T> rdd) |
String |
toString() |
static <U> U |
treeAggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp) |
static <U> U |
treeAggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp,
int depth) |
static T |
treeReduce(Function2<T,T,T> f) |
static T |
treeReduce(Function2<T,T,T> f,
int depth) |
JavaRDD<T> |
union(JavaRDD<T> other)
Return the union of this RDD and another one.
|
JavaRDD<T> |
unpersist()
Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
|
JavaRDD<T> |
unpersist(boolean blocking)
Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
|
JavaRDD<T> |
wrapRDD(RDD<T> rdd) |
static <U> JavaPairRDD<T,U> |
zip(JavaRDDLike<U,?> other) |
static <U,V> JavaRDD<V> |
zipPartitions(JavaRDDLike<U,?> other,
FlatMapFunction2<java.util.Iterator<T>,java.util.Iterator<U>,V> f) |
static JavaPairRDD<T,Long> |
zipWithIndex() |
static JavaPairRDD<T,Long> |
zipWithUniqueId() |
aggregate, cartesian, checkpoint, collect, collectAsync, collectPartitions, context, count, countApprox, countApprox, countApproxDistinct, countAsync, countByValue, countByValueApprox, countByValueApprox, first, flatMap, flatMapToDouble, flatMapToPair, fold, foreach, foreachAsync, foreachPartition, foreachPartitionAsync, getCheckpointFile, getNumPartitions, getStorageLevel, glom, groupBy, groupBy, id, isCheckpointed, isEmpty, iterator, keyBy, map, mapPartitions, mapPartitions, mapPartitionsToDouble, mapPartitionsToDouble, mapPartitionsToPair, mapPartitionsToPair, mapPartitionsWithIndex, mapToDouble, mapToPair, max, min, name, partitioner, partitions, pipe, pipe, pipe, pipe, pipe, reduce, saveAsObjectFile, saveAsTextFile, saveAsTextFile, take, takeAsync, takeOrdered, takeOrdered, takeSample, takeSample, toDebugString, toLocalIterator, top, top, treeAggregate, treeAggregate, treeReduce, treeReduce, zip, zipPartitions, zipWithIndex, zipWithUniqueId
public static java.util.List<Partition> partitions()
public static int getNumPartitions()
public static Optional<Partitioner> partitioner()
public static SparkContext context()
public static int id()
public static StorageLevel getStorageLevel()
public static java.util.Iterator<T> iterator(Partition split, TaskContext taskContext)
public static <R> JavaRDD<R> mapPartitionsWithIndex(Function2<Integer,java.util.Iterator<T>,java.util.Iterator<R>> f, boolean preservesPartitioning)
public static <R> JavaDoubleRDD mapToDouble(DoubleFunction<T> f)
public static <K2,V2> JavaPairRDD<K2,V2> mapToPair(PairFunction<T,K2,V2> f)
public static <U> JavaRDD<U> flatMap(FlatMapFunction<T,U> f)
public static JavaDoubleRDD flatMapToDouble(DoubleFlatMapFunction<T> f)
public static <K2,V2> JavaPairRDD<K2,V2> flatMapToPair(PairFlatMapFunction<T,K2,V2> f)
public static <U> JavaRDD<U> mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f)
public static <U> JavaRDD<U> mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f, boolean preservesPartitioning)
public static JavaDoubleRDD mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f)
public static <K2,V2> JavaPairRDD<K2,V2> mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f)
public static JavaDoubleRDD mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f, boolean preservesPartitioning)
public static <K2,V2> JavaPairRDD<K2,V2> mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f, boolean preservesPartitioning)
public static void foreachPartition(VoidFunction<java.util.Iterator<T>> f)
public static JavaRDD<java.util.List<T>> glom()
public static <U> JavaPairRDD<T,U> cartesian(JavaRDDLike<U,?> other)
public static <U> JavaPairRDD<U,Iterable<T>> groupBy(Function<T,U> f)
public static <U> JavaPairRDD<U,Iterable<T>> groupBy(Function<T,U> f, int numPartitions)
public static JavaRDD<String> pipe(String command)
public static JavaRDD<String> pipe(java.util.List<String> command)
public static JavaRDD<String> pipe(java.util.List<String> command, java.util.Map<String,String> env)
public static JavaRDD<String> pipe(java.util.List<String> command, java.util.Map<String,String> env, boolean separateWorkingDir, int bufferSize)
public static JavaRDD<String> pipe(java.util.List<String> command, java.util.Map<String,String> env, boolean separateWorkingDir, int bufferSize, String encoding)
public static <U> JavaPairRDD<T,U> zip(JavaRDDLike<U,?> other)
public static <U,V> JavaRDD<V> zipPartitions(JavaRDDLike<U,?> other, FlatMapFunction2<java.util.Iterator<T>,java.util.Iterator<U>,V> f)
public static JavaPairRDD<T,Long> zipWithUniqueId()
public static JavaPairRDD<T,Long> zipWithIndex()
public static void foreach(VoidFunction<T> f)
public static java.util.List<T> collect()
public static java.util.Iterator<T> toLocalIterator()
public static java.util.List<T>[] collectPartitions(int[] partitionIds)
public static T reduce(Function2<T,T,T> f)
public static T treeReduce(Function2<T,T,T> f, int depth)
public static T treeReduce(Function2<T,T,T> f)
public static T fold(T zeroValue, Function2<T,T,T> f)
public static <U> U aggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp)
public static <U> U treeAggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp, int depth)
public static <U> U treeAggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp)
public static long count()
public static PartialResult<BoundedDouble> countApprox(long timeout, double confidence)
public static PartialResult<BoundedDouble> countApprox(long timeout)
public static java.util.Map<T,Long> countByValue()
public static PartialResult<java.util.Map<T,BoundedDouble>> countByValueApprox(long timeout, double confidence)
public static PartialResult<java.util.Map<T,BoundedDouble>> countByValueApprox(long timeout)
public static java.util.List<T> take(int num)
public static java.util.List<T> takeSample(boolean withReplacement, int num)
public static java.util.List<T> takeSample(boolean withReplacement, int num, long seed)
public static T first()
public static boolean isEmpty()
public static void saveAsTextFile(String path)
public static void saveAsTextFile(String path, Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec)
public static void saveAsObjectFile(String path)
public static <U> JavaPairRDD<U,T> keyBy(Function<T,U> f)
public static void checkpoint()
public static boolean isCheckpointed()
public static Optional<String> getCheckpointFile()
public static String toDebugString()
public static java.util.List<T> top(int num, java.util.Comparator<T> comp)
public static java.util.List<T> top(int num)
public static java.util.List<T> takeOrdered(int num, java.util.Comparator<T> comp)
public static T max(java.util.Comparator<T> comp)
public static T min(java.util.Comparator<T> comp)
public static java.util.List<T> takeOrdered(int num)
public static long countApproxDistinct(double relativeSD)
public static String name()
public static JavaFutureAction<Long> countAsync()
public static JavaFutureAction<java.util.List<T>> collectAsync()
public static JavaFutureAction<java.util.List<T>> takeAsync(int num)
public static JavaFutureAction<Void> foreachAsync(VoidFunction<T> f)
public static JavaFutureAction<Void> foreachPartitionAsync(VoidFunction<java.util.Iterator<T>> f)
public static <R> boolean mapPartitionsWithIndex$default$2()
public scala.reflect.ClassTag<T> classTag()
public JavaRDD<T> cache()
MEMORY_ONLY
).public JavaRDD<T> persist(StorageLevel newLevel)
newLevel
- (undocumented)public JavaRDD<T> unpersist()
public JavaRDD<T> unpersist(boolean blocking)
blocking
- Whether to block until all blocks are deleted.public JavaRDD<T> distinct()
public JavaRDD<T> distinct(int numPartitions)
numPartitions
- (undocumented)public JavaRDD<T> filter(Function<T,Boolean> f)
f
- (undocumented)public JavaRDD<T> coalesce(int numPartitions)
numPartitions
partitions.numPartitions
- (undocumented)public JavaRDD<T> coalesce(int numPartitions, boolean shuffle)
numPartitions
partitions.numPartitions
- (undocumented)shuffle
- (undocumented)public JavaRDD<T> repartition(int numPartitions)
Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data.
If you are decreasing the number of partitions in this RDD, consider using coalesce
,
which can avoid performing a shuffle.
numPartitions
- (undocumented)public JavaRDD<T> sample(boolean withReplacement, double fraction)
withReplacement
- can elements be sampled multiple times (replaced when sampled out)fraction
- expected size of the sample as a fraction of this RDD's size
without replacement: probability that each element is chosen; fraction must be [0, 1]
with replacement: expected number of times each element is chosen; fraction must be greater
than or equal to 0
RDD
.public JavaRDD<T> sample(boolean withReplacement, double fraction, long seed)
withReplacement
- can elements be sampled multiple times (replaced when sampled out)fraction
- expected size of the sample as a fraction of this RDD's size
without replacement: probability that each element is chosen; fraction must be [0, 1]
with replacement: expected number of times each element is chosen; fraction must be greater
than or equal to 0seed
- seed for the random number generator
RDD
.public JavaRDD<T>[] randomSplit(double[] weights)
weights
- weights for splits, will be normalized if they don't sum to 1
public JavaRDD<T>[] randomSplit(double[] weights, long seed)
weights
- weights for splits, will be normalized if they don't sum to 1seed
- random seed
public JavaRDD<T> union(JavaRDD<T> other)
.distinct()
to eliminate them).other
- (undocumented)public JavaRDD<T> intersection(JavaRDD<T> other)
other
- (undocumented)public JavaRDD<T> subtract(JavaRDD<T> other)
this
that are not in other
.
Uses this
partitioner/partition size, because even if other
is huge, the resulting
RDD will be less than or equal to us.
other
- (undocumented)public JavaRDD<T> subtract(JavaRDD<T> other, int numPartitions)
this
that are not in other
.other
- (undocumented)numPartitions
- (undocumented)public JavaRDD<T> subtract(JavaRDD<T> other, Partitioner p)
this
that are not in other
.other
- (undocumented)p
- (undocumented)public String toString()
toString
in class Object