public class PMIPairSort extends SequenceFileTextStage<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>
Modifier and Type | Field and Description |
---|---|
static String |
MINP_KEY
The minimum PMI
|
static String |
MINPAIRCOUNT_KEY
The minimum number of pairs
|
static String |
PAIRMI_LOC
The location of the pairmi
|
static String |
PMI_NAME
the output name
|
Constructor and Description |
---|
PMIPairSort(double minp,
int minPairCount,
org.apache.hadoop.fs.Path outpath) |
PMIPairSort(double minp,
org.apache.hadoop.fs.Path outpath) |
Modifier and Type | Method and Description |
---|---|
Class<? extends org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>> |
mapper()
By default this method returns the
IdentityMapper class. |
String |
outname() |
static IndependentPair<Long,Double> |
parseTimeBinary(byte[] bytes)
read time and pmi from a byte array.
|
static IndependentPair<Long,Double> |
parseTimeBinary(byte[] bytes,
int start,
int len)
use a
ByteArrayInputStream and a DataInputStream to read a byte[] |
Class<? extends org.apache.hadoop.mapreduce.Reducer<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>> |
reducer()
By default this method returns the
IdentityReducer class. |
void |
setup(org.apache.hadoop.mapreduce.Job job)
Add any final adjustments to the job's config
|
static byte[] |
timePMIBinary(long timet,
double pmi)
write time pmi to a byte array
|
combiner, finished, lzoCompress, setCombinerClass, setMapperClass, setReducerClass, stage
public static final String MINP_KEY
public static final String PMI_NAME
public static final String MINPAIRCOUNT_KEY
public static final String PAIRMI_LOC
public PMIPairSort(double minp, org.apache.hadoop.fs.Path outpath)
minp
- the minimum PMI valueoutpath
- for loading the PMIStats filepublic PMIPairSort(double minp, int minPairCount, org.apache.hadoop.fs.Path outpath)
minp
- the minimum PMI valueminPairCount
- the minimum number of pairs to emitoutpath
- for loading the PMIStats filepublic Class<? extends org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>> mapper()
Stage
IdentityMapper
class. This
mapper outputs the values handed as they are.mapper
in class Stage<org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.mapreduce.lib.output.TextOutputFormat<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>
public Class<? extends org.apache.hadoop.mapreduce.Reducer<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>> reducer()
Stage
IdentityReducer
class. This
reducer outputs the values handed as they are.reducer
in class Stage<org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.mapreduce.lib.output.TextOutputFormat<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>
public String outname()
outname
in class Stage<org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.mapreduce.lib.output.TextOutputFormat<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>
public void setup(org.apache.hadoop.mapreduce.Job job)
Stage
setup
in class Stage<org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.mapreduce.lib.output.TextOutputFormat<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Text>
public static byte[] timePMIBinary(long timet, double pmi) throws IOException
timet
- pmi
- IOException
public static IndependentPair<Long,Double> parseTimeBinary(byte[] bytes) throws IOException
parseTimeBinary(byte[], int, int)
with
start = 0 and len = bytes.lengthbytes
- the bytes to parseIOException
public static IndependentPair<Long,Double> parseTimeBinary(byte[] bytes, int start, int len) throws IOException
ByteArrayInputStream
and a DataInputStream
to read a byte[]bytes
- start
- offset into byteslen
- length to readIOException