????map????????????£?
private static void setMapCount(long totalBytes?? JobConf job)
throws IOException {
int numMaps =
(int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL?? BYTES_PER_MAP));
numMaps = Math.min(numMaps??
job.getInt(MAX_MAPS_LABEL?? MAX_MAPS_PER_NODE *
new JobClient(job).getClusterStatus().getTaskTrackers()));
job.setNumMapTasks(Math.max(numMaps?? 1));
}
????????????????DistCp??????ж????????tasktracker?????????map?????????????o??map????????????????
?????з???????£?
SequenceFile.Reader sl = null;
try {
sl = new SequenceFile.Reader(fs?? src?? job);
for (; sl.next(key?? value); last = sl.getPosition()) {
// if adding this split would put this split past the target size??
// cut the last split and put this next file in the next split.
if (acc + key.get() > targetsize && acc != 0) {
long splitsize = last - pos;
splits.add(new FileSplit(src?? pos?? splitsize?? (String[])null));
cbrem -= splitsize;
pos = last;
acc = 0L;
}
acc += key.get();
}
}
finally {
checkAndClose(sl);
}
????split??????Mapper??н?Σ?map task??????????????????????????б??????е????????????????????Permission??Replication??Block Size????????????趨??-update??????????????update???ж??????趨??-overwrite?????????е??????????Owner?????б?????£?????????????????map????????????????????t??????????map???濽????????????????Owner??????????λ????????????????????????????????Tool??????Tool?????????Configurable?????????-D??????????DistCp???????????Ч????????趨“-Ddfs.replication=1”??????????????????replication?????????1????????????????????д??????????????????????????????????????????????????????????????Datanode????????????????????????????????????????????????????????????distcp?????????????????????