/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.nativeio;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.HardLink;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SecureIOUtils.AlreadyExistsException;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.util.CleanerUtil;
import org.apache.hadoop.util.NativeCodeLoader;
import org.apache.hadoop.util.PerformanceAdvisory;
import org.apache.hadoop.util.Shell;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.misc.Unsafe;
import java.io.*;
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* JNI wrappers for various native IO-related calls not available in Java.
* These functions should generally be used alongside a fallback to another
* more portable mechanism.u
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class NativeIO
{
public static class POSIX
{
// Flags for open() call from bits/fcntl.h - Set by JNI
public static int O_RDONLY = -1;
public static int O_WRONLY = -1;
public static int O_RDWR = -1;
public static int O_CREAT = -1;
public static int O_EXCL = -1;
public static int O_NOCTTY = -1;
public static int O_TRUNC = -1;
public static int O_APPEND = -1;
public static int O_NONBLOCK = -1;
public static int O_SYNC = -1;
// Flags for posix_fadvise() from bits/fcntl.h - Set by JNI
/* No further special treatment. */
public static int POSIX_FADV_NORMAL = -1;
/* Expect random page references. */
public static int POSIX_FADV_RANDOM = -1;
/* Expect sequential page references. */
public static int POSIX_FADV_SEQUENTIAL = -1;
/* Will need these pages. */
public static int POSIX_FADV_WILLNEED = -1;
/* Don't need these pages. */
public static int POSIX_FADV_DONTNEED = -1;
/* Data will be accessed once. */
public static int POSIX_FADV_NOREUSE = -1;
// Updated by JNI when supported by glibc. Leave defaults in case kernel
// supports sync_file_range, but glibc does not.
/* Wait upon writeout of all pages
in the range before performing the
write. */
public static int SYNC_FILE_RANGE_WAIT_BEFORE = 1;
/* Initiate writeout of all those
dirty pages in the range which are
not presently under writeback. */
public static int SYNC_FILE_RANGE_WRITE = 2;
/* Wait upon writeout of all pages in
the range after performing the
write. */
public static int SYNC_FILE_RANGE_WAIT_AFTER = 4;
/**
* Keeps the support state of PMDK.
*/
public enum SupportState
{
UNSUPPORTED(-1),PMDK_LIB_NOT_FOUND(1),SUPPORTED(0);
private byte stateCode;
SupportState(int stateCode)
{
this.stateCode = (byte)stateCode;
}
public int getStateCode()
{
return stateCode;
}
public String getMessage()
{
String msg;
switch(stateCode)
{
// -1 represents UNSUPPORTED.
case -1:
msg = "The native code was built without PMDK support.";
break;
// 1 represents PMDK_LIB_NOT_FOUND.
case 1:
msg = "The native code was built with PMDK support, but PMDK libs " + "were NOT found in execution environment or failed to be loaded.";
break;
// 0 represents SUPPORTED.
case 0:
msg = "The native code was built with PMDK support, and PMDK libs " + "were loaded successfully.";
break;
default:
msg = "The state code: " + stateCode + " is unrecognized!";
}
return msg;
}
}
// Denotes the state of supporting PMDK. The value is set by JNI.
private static SupportState pmdkSupportState = SupportState.UNSUPPORTED;
private static final Logger LOG = LoggerFactory.getLogger(NativeIO.class);
// Set to true via JNI if possible
public static boolean fadvisePossible = false;
private static boolean nativeLoaded = false;
private static boolean syncFileRangePossible = true;
static final String WORKAROUND_NON_THREADSAFE_CALLS_KEY = "hadoop.workaround.non.threadsafe.getpwuid";
static final boolean WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT = true;
private static long cacheTimeout = -1;
private static CacheManipulator cacheManipulator = new CacheManipulator();
public static CacheManipulator getCacheManipulator()
{
return cacheManipulator;
}
public static void setCacheManipulator(CacheManipulator cacheManipulator)
{
POSIX.cacheManipulator = cacheManipulator;
}
// This method is invoked by JNI.
public static void setPmdkSupportState(int stateCode)
{
for(SupportState state : SupportState.values())
{
if(state.getStateCode() == stateCode)
{
pmdkSupportState = state;
return;
}
}
LOG.error("The state code: " + stateCode + " is unrecognized!");
}
public static String getPmdkSupportStateMessage()
{
if(getPmdkLibPath() != null)
{
return pmdkSupportState.getMessage() + " The pmdk lib path: " + getPmdkLibPath();
}
return pmdkSupportState.getMessage();
}
public static boolean isPmdkAvailable()
{
LOG.info(pmdkSupportState.getMessage());
return pmdkSupportState == SupportState.SUPPORTED;
}
/**
* Denote memory region for a file mapped.
*/
public static class PmemMappedRegion
{
private long address;
private long length;
private boolean isPmem;
public PmemMappedRegion(long address,long length,boolean isPmem)
{
this.address = address;
this.length = length;
this.isPmem = isPmem;
}
public boolean isPmem()
{
return this.isPmem;
}
public long getAddress()
{
return this.address;
}
public long getLength()
{
return this.length;
}
}
/**
* JNI wrapper of persist memory operations.
*/
public static class Pmem
{
// Check whether the address is a Pmem address or DIMM address
public static boolean isPmem(long address,long length)
{
return POSIX.isPmemCheck(address,length);
}
// Map a file in persistent memory, if the given file exists,
// directly map it. If not, create the named file on persistent memory
// and then map it.
public static PmemMappedRegion mapBlock(String path,long length,boolean isFileExist)
{
return POSIX.pmemMapFile(path,length,isFileExist);
}
// Unmap a pmem file
public static boolean unmapBlock(long address,long length)
{
return POSIX.pmemUnMap(address,length);
}
// Copy data from disk file(src) to pmem file(dest), without flush
public static void memCopy(byte[] src,long dest,boolean isPmem,long length)
{
POSIX.pmemCopy(src,dest,isPmem,length);
}
// Flush the memory content to persistent storage
public static void memSync(PmemMappedRegion region)
{
if(region.isPmem())
{
POSIX.pmemDrain();
}
else
{
POSIX.pmemSync(region.getAddress(),region.getLength());
}
}
public static String getPmdkLibPath()
{
return POSIX.getPmdkLibPath();
}
}
private static native String getPmdkLibPath();
private static native boolean isPmemCheck(long address,long length);
private static native PmemMappedRegion pmemMapFile(String path,long length,boolean isFileExist);
private static native boolean pmemUnMap(long address,long length);
private static native void pmemCopy(byte[] src,long dest,boolean isPmem,long length);
private static native void pmemDrain();
private static native void pmemSync(long address,long length);
/**
* Used to manipulate the operating system cache.
*/
@VisibleForTesting
public static class CacheManipulator
{
public void mlock(String identifier,ByteBuffer buffer,long len) throws IOException
{
POSIX.mlock(buffer,len);
}
public long getMemlockLimit()
{
return NativeIO.getMemlockLimit();
}
public long getOperatingSystemPageSize()
{
return NativeIO.getOperatingSystemPageSize();
}
public void posixFadviseIfPossible(String identifier,FileDescriptor fd,long offset,long len,int flags) throws NativeIOException
{
POSIX.posixFadviseIfPossible(identifier,fd,offset,len,flags);
}
public boolean verifyCanMlock()
{
return NativeIO.isAvailable();
}
}
/**
* A CacheManipulator used for testing which does not actually call mlock.
* This allows many tests to be run even when the operating system does not
* allow mlock, or only allows limited mlocking.
*/
@VisibleForTesting
public static class NoMlockCacheManipulator extends CacheManipulator
{
public void mlock(String identifier,ByteBuffer buffer,long len) throws IOException
{
LOG.info("mlocking " + identifier);
}
public long getMemlockLimit()
{
return 1125899906842624L;
}
public long getOperatingSystemPageSize()
{
return 4096;
}
public boolean verifyCanMlock()
{
return true;
}
}
static
{
if(NativeCodeLoader.isNativeCodeLoaded())
{
try
{
Configuration conf = new Configuration();
workaroundNonThreadSafePasswdCalls = conf.getBoolean(WORKAROUND_NON_THREADSAFE_CALLS_KEY,WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT);
initNative();
nativeLoaded = true;
cacheTimeout = conf.getLong(CommonConfigurationKeys.HADOOP_SECURITY_UID_NAME_CACHE_TIMEOUT_KEY,CommonConfigurationKeys.HADOOP_SECURITY_UID_NAME_CACHE_TIMEOUT_DEFAULT) * 1000;
LOG.debug("Initialized cache for IDs to User/Group mapping with a " + " cache timeout of " + cacheTimeout / 1000 + " seconds.");
}
catch(Throwable t)
{
// This can happen if the user has an older version of libhadoop.so
// installed - in this case we can continue without native IO
// after warning
PerformanceAdvisory.LOG.debug("Unable to initialize NativeIO libraries",t);
}
}
}
/**
* @return Return true if the JNI-based native IO extensions are available.
*/
public static boolean isAvailable()
{
return NativeCodeLoader.isNativeCodeLoaded() && nativeLoaded;
}
private static void assertCodeLoaded() throws IOException
{
if(!isAvailable())
{
throw new IOException("NativeIO was not loaded");
}
}
/**
* Wrapper around open(2) .
*
* @param path input path.
* @param flags input flags.
* @param mode input mode.
* @return FileDescriptor.
* @throws IOException raised on errors performing I/O.
*/
public static native FileDescriptor open(String path,int flags,int mode) throws IOException;
/** Wrapper around fstat(2) */
private static native Stat fstat(FileDescriptor fd) throws IOException;
/** Wrapper around stat(2). */
private static native Stat stat(String path) throws IOException;
/** Native chmod implementation. On UNIX, it is a wrapper around chmod(2) */
private static native void chmodImpl(String path,int mode) throws IOException;
public static void chmod(String path,int mode) throws IOException
{
if(!Shell.WINDOWS)
{
chmodImpl(path,mode);
}
else
{
try
{
chmodImpl(path,mode);
}
catch(NativeIOException nioe)
{
if(nioe.getErrorCode() == 3)
{
throw new NativeIOException("No such file or directory",Errno.ENOENT);
}
else
{
LOG.warn(String.format("NativeIO.chmod error (%d): %s",nioe.getErrorCode(),nioe.getMessage()));
throw new NativeIOException("Unknown error",Errno.UNKNOWN);
}
}
}
}
/** Wrapper around posix_fadvise(2) */
static native void posix_fadvise(FileDescriptor fd,long offset,long len,int flags) throws NativeIOException;
/** Wrapper around sync_file_range(2) */
static native void sync_file_range(FileDescriptor fd,long offset,long nbytes,int flags) throws NativeIOException;
/**
* Call posix_fadvise on the given file descriptor. See the manpage
* for this syscall for more information. On systems where this
* call is not available, does nothing.
*
* @throws NativeIOException if there is an error with the syscall
*/
static void posixFadviseIfPossible(String identifier,FileDescriptor fd,long offset,long len,int flags) throws NativeIOException
{
if(nativeLoaded && fadvisePossible)
{
try
{
posix_fadvise(fd,offset,len,flags);
}
catch(UnsatisfiedLinkError ule)
{
fadvisePossible = false;
}
}
}
/**
* Call sync_file_range on the given file descriptor. See the manpage
* for this syscall for more information. On systems where this
* call is not available, does nothing.
*
* @param fd input fd.
* @param offset input offset.
* @param nbytes input nbytes.
* @param flags input flag.
* @throws NativeIOException if there is an error with the syscall
*/
public static void syncFileRangeIfPossible(FileDescriptor fd,long offset,long nbytes,int flags) throws NativeIOException
{
if(nativeLoaded && syncFileRangePossible)
{
try
{
sync_file_range(fd,offset,nbytes,flags);
}
catch(UnsupportedOperationException uoe)
{
syncFileRangePossible = false;
}
catch(UnsatisfiedLinkError ule)
{
syncFileRangePossible = false;
}
}
}
static native void mlock_native(ByteBuffer buffer,long len) throws NativeIOException;
/**
* Locks the provided direct ByteBuffer into memory, preventing it from
* swapping out. After a buffer is locked, future accesses will not incur
* a page fault.
* <p>
* See the mlock(2) man page for more information.
*
* @throws NativeIOException
*/
static void mlock(ByteBuffer buffer,long len) throws IOException
{
assertCodeLoaded();
if(!buffer.isDirect())
{
throw new IOException("Cannot mlock a non-direct ByteBuffer");
}
mlock_native(buffer,len);
}
/**
* Unmaps the block from memory. See munmap(2).
* <p>
* There isn't any portable way to unmap a memory region in Java.
* So we use the sun.nio method here.
* Note that unmapping a memory region could cause crashes if code
* continues to reference the unmapped code. However, if we don't
* manually unmap the memory, we are dependent on the finalizer to
* do it, and we have no idea when the finalizer will run.
*
* @param buffer The buffer to unmap.
*/
public static void munmap(MappedByteBuffer buffer)
{
if(CleanerUtil.UNMAP_SUPPORTED)
{
try
{
CleanerUtil.getCleaner().freeBuffer(buffer);
}
catch(IOException e)
{
LOG.info("Failed to unmap the buffer",e);
}
}
else
{
LOG.trace(CleanerUtil.UNMAP_NOT_SUPPORTED_REASON);
}
}
/** Linux only methods used for getOwner() implementation */
private static native long getUIDforFDOwnerforOwner(FileDescriptor fd) throws IOException;
private static native String getUserName(long uid) throws IOException;
/**
* Result type of the fstat call
*/
public static class Stat
{
private int ownerId, groupId;
private String owner, group;
private int mode;
// Mode constants - Set by JNI
public static int S_IFMT = -1; /* type of file */
public static int S_IFIFO = -1; /* named pipe (fifo) */
public static int S_IFCHR = -1; /* character special */
public static int S_IFDIR = -1; /* directory */
public static int S_IFBLK = -1; /* block special */
public static int S_IFREG = -1; /* regular */
public static int S_IFLNK = -1; /* symbolic link */
public static int S_IFSOCK = -1; /* socket */
public static int S_ISUID = -1; /* set user id on execution */
public static int S_ISGID = -1; /* set group id on execution */
public static int S_ISVTX = -1; /* save swapped text even after use */
public static int S_IRUSR = -1; /* read permission, owner */
public static int S_IWUSR = -1; /* write permission, owner */
public static int S_IXUSR = -1; /* execute/search permission, owner */
Stat(int ownerId,int groupId,int mode)
{
this.ownerId = ownerId;
this.groupId = groupId;
this.mode = mode;
}
Stat(String owner,String group,int mode)
{
if(!Shell.WINDOWS)
{
this.owner = owner;
}
else
{
this.owner = stripDomain(owner);
}
if(!Shell.WINDOWS)
{
this.group = group;
}
else
{
this.group = stripDomain(group);
}
this.mode = mode;
}
@Override
public String toString()
{
return "Stat(owner='" + owner + "', group='" + group + "'" + ", mode=" + mode + ")";
}
public String getOwner()
{
return owner;
}
public String getGroup()
{
return group;
}
public int getMode()
{
return mode;
}
}
/**
* Returns the file stat for a file descriptor.
*
* @param fd file descriptor.
* @return the file descriptor file stat.
* @throws IOException thrown if there was an IO error while obtaining the file stat.
*/
public static Stat getFstat(FileDescriptor fd) throws IOException
{
Stat stat = null;
if(!Shell.WINDOWS)
{
stat = fstat(fd);
stat.owner = getName(IdCache.USER,stat.ownerId);
stat.group = getName(IdCache.GROUP,stat.groupId);
}
else
{
try
{
stat = fstat(fd);
}
catch(NativeIOException nioe)
{
if(nioe.getErrorCode() == 6)
{
throw new NativeIOException("The handle is invalid.",Errno.EBADF);
}
else
{
LOG.warn(String.format("NativeIO.getFstat error (%d): %s",nioe.getErrorCode(),nioe.getMessage()));
throw new NativeIOException("Unknown error",Errno.UNKNOWN);
}
}
}
return stat;
}
/**
* Return the file stat for a file path.
*
* @param path file path
* @return the file stat
* @throws IOException thrown if there is an IO error while obtaining the
* file stat
*/
public static Stat getStat(String path) throws IOException
{
if(path == null)
{
String errMessage = "Path is null";
LOG.warn(errMessage);
throw new IOException(errMessage);
}
Stat stat = null;
try
{
if(!Shell.WINDOWS)
{
stat = stat(path);
stat.owner = getName(IdCache.USER,stat.ownerId);
stat.group = getName(IdCache.GROUP,stat.groupId);
}
else
{
stat = stat(path);
}
}
catch(NativeIOException nioe)
{
LOG.warn("NativeIO.getStat error ({}): {} -- file path: {}",nioe.getErrorCode(),nioe.getMessage(),path);
throw new PathIOException(path,nioe);
}
return stat;
}
private static String getName(IdCache domain,int id) throws IOException
{
Map<Integer,CachedName> idNameCache = (domain == IdCache.USER) ? USER_ID_NAME_CACHE : GROUP_ID_NAME_CACHE;
String name;
CachedName cachedName = idNameCache.get(id);
long now = System.currentTimeMillis();
if(cachedName != null && (cachedName.timestamp + cacheTimeout) > now)
{
name = cachedName.name;
}
else
{
name = (domain == IdCache.USER) ? getUserName(id) : getGroupName(id);
if(LOG.isDebugEnabled())
{
String type = (domain == IdCache.USER) ? "UserName" : "GroupName";
LOG.debug("Got " + type + " " + name + " for ID " + id + " from the native implementation");
}
cachedName = new CachedName(name,now);
idNameCache.put(id,cachedName);
}
return name;
}
static native String getUserName(int uid) throws IOException;
static native String getGroupName(int uid) throws IOException;
private static class CachedName
{
final long timestamp;
final String name;
public CachedName(String name,long timestamp)
{
this.name = name;
this.timestamp = timestamp;
}
}
private static final Map<Integer,CachedName> USER_ID_NAME_CACHE = new ConcurrentHashMap<Integer,CachedName>();
private static final Map<Integer,CachedName> GROUP_ID_NAME_CACHE = new ConcurrentHashMap<Integer,CachedName>();
private enum IdCache
{USER,GROUP}
public final static int MMAP_PROT_READ = 0x1;
public final static int MMAP_PROT_WRITE = 0x2;
public final static int MMAP_PROT_EXEC = 0x4;
public static native long mmap(FileDescriptor fd,int prot,boolean shared,long length) throws IOException;
public static native void munmap(long addr,long length) throws IOException;
}
private static boolean workaroundNonThreadSafePasswdCalls = false;
public static class Windows
{
// Flags for CreateFile() call on Windows
public static final long GENERIC_READ = 0x80000000L;
public static final long GENERIC_WRITE = 0x40000000L;
public static final long FILE_SHARE_READ = 0x00000001L;
public static final long FILE_SHARE_WRITE = 0x00000002L;
public static final long FILE_SHARE_DELETE = 0x00000004L;
public static final long CREATE_NEW = 1;
public static final long CREATE_ALWAYS = 2;
public static final long OPEN_EXISTING = 3;
public static final long OPEN_ALWAYS = 4;
public static final long TRUNCATE_EXISTING = 5;
public static final long FILE_BEGIN = 0;
public static final long FILE_CURRENT = 1;
public static final long FILE_END = 2;
public static final long FILE_ATTRIBUTE_NORMAL = 0x00000080L;
/**
* Create a directory with permissions set to the specified mode. By setting
* permissions at creation time, we avoid issues related to the user lacking
* WRITE_DAC rights on subsequent chmod calls. One example where this can
* occur is writing to an SMB share where the user does not have Full Control
* rights, and therefore WRITE_DAC is denied.
*
* @param path directory to create
* @param mode permissions of new directory
* @throws IOException if there is an I/O error
*/
public static void createDirectoryWithMode(File path,int mode) throws IOException
{
createDirectoryWithMode0(path.getAbsolutePath(),mode);
}
/** Wrapper around CreateDirectory() on Windows */
private static native void createDirectoryWithMode0(String path,int mode) throws NativeIOException;
/**
* @param path input path.
* @param desiredAccess input desiredAccess.
* @param shareMode input shareMode.
* @param creationDisposition input creationDisposition.
* @return Wrapper around CreateFile() on Windows.
* @throws IOException raised on errors performing I/O.
*/
public static native FileDescriptor createFile(String path,long desiredAccess,long shareMode,long creationDisposition) throws IOException;
/**
* Create a file for write with permissions set to the specified mode. By
* setting permissions at creation time, we avoid issues related to the user
* lacking WRITE_DAC rights on subsequent chmod calls. One example where
* this can occur is writing to an SMB share where the user does not have
* Full Control rights, and therefore WRITE_DAC is denied.
* <p>
* This method mimics the semantics implemented by the JDK in
* {@link FileOutputStream}. The file is opened for truncate or
* append, the sharing mode allows other readers and writers, and paths
* longer than MAX_PATH are supported. (See io_util_md.c in the JDK.)
*
* @param path file to create
* @param append if true, then open file for append
* @param mode permissions of new directory
* @return FileOutputStream of opened file
* @throws IOException if there is an I/O error
*/
public static FileOutputStream createFileOutputStreamWithMode(File path,boolean append,int mode) throws IOException
{
long desiredAccess = GENERIC_WRITE;
long shareMode = FILE_SHARE_READ | FILE_SHARE_WRITE;
long creationDisposition = append ? OPEN_ALWAYS : CREATE_ALWAYS;
return new FileOutputStream(createFileWithMode0(path.getAbsolutePath(),desiredAccess,shareMode,creationDisposition,mode));
}
/** Wrapper around CreateFile() with security descriptor on Windows */
private static native FileDescriptor createFileWithMode0(String path,long desiredAccess,long shareMode,long creationDisposition,int mode) throws NativeIOException;
/**
* @param fd input fd.
* @param distanceToMove input distanceToMove.
* @param moveMethod input moveMethod.
* @return Wrapper around SetFilePointer() on Windows.
* @throws IOException raised on errors performing I/O.
*/
public static native long setFilePointer(FileDescriptor fd,long distanceToMove,long moveMethod) throws IOException;
/** Windows only methods used for getOwner() implementation */
private static native String getOwner(FileDescriptor fd) throws IOException;
/** Supported list of Windows access right flags */
public enum AccessRight
{
ACCESS_READ(0x0001), // FILE_READ_DATA
ACCESS_WRITE(0x0002), // FILE_WRITE_DATA
ACCESS_EXECUTE(0x0020); // FILE_EXECUTE
private final int accessRight;
AccessRight(int access)
{
accessRight = access;
}
public int accessRight()
{
return accessRight;
}
}
;
/**
* Windows only method used to check if the current process has requested
* access rights on the given path.
*/
private static native boolean access0(String path,int requestedAccess);
/**
* Checks whether the current process has desired access rights on
* the given path.
* <p>
* Longer term this native function can be substituted with JDK7
* function Files#isReadable, isWritable, isExecutable.
*
* @param path input path
* @param desiredAccess ACCESS_READ, ACCESS_WRITE or ACCESS_EXECUTE
* @return true if access is allowed
* @throws IOException I/O exception on error
*/
public static boolean access(String path,AccessRight desiredAccess) throws IOException
{
//return access0(path,desiredAccess.accessRight());
return true;
}
/**
* Extends both the minimum and maximum working set size of the current
* process. This method gets the current minimum and maximum working set
* size, adds the requested amount to each and then sets the minimum and
* maximum working set size to the new values. Controlling the working set
* size of the process also controls the amount of memory it can lock.
*
* @param delta amount to increment minimum and maximum working set size
* @throws IOException for any error
* @see POSIX#mlock(ByteBuffer,long)
*/
public static native void extendWorkingSetSize(long delta) throws IOException;
static
{
if(NativeCodeLoader.isNativeCodeLoaded())
{
try
{
initNative();
nativeLoaded = true;
}
catch(Throwable t)
{
// This can happen if the user has an older version of libhadoop.so
// installed - in this case we can continue without native IO
// after warning
PerformanceAdvisory.LOG.debug("Unable to initialize NativeIO libraries",t);
}
}
}
}
private static final Logger LOG = LoggerFactory.getLogger(NativeIO.class);
private static boolean nativeLoaded = false;
static
{
if(NativeCodeLoader.isNativeCodeLoaded())
{
try
{
initNative();
nativeLoaded = true;
}
catch(Throwable t)
{
// This can happen if the user has an older version of libhadoop.so
// installed - in this case we can continue without native IO
// after warning
PerformanceAdvisory.LOG.debug("Unable to initialize NativeIO libraries",t);
}
}
}
/**
* @return Return true if the JNI-based native IO extensions are available.
*/
public static boolean isAvailable()
{
return NativeCodeLoader.isNativeCodeLoaded() && nativeLoaded;
}
/** Initialize the JNI method ID and class ID cache */
private static native void initNative();
/**
* Get the maximum number of bytes that can be locked into memory at any
* given point.
*
* @return 0 if no bytes can be locked into memory;
* Long.MAX_VALUE if there is no limit;
* The number of bytes that can be locked into memory otherwise.
*/
static long getMemlockLimit()
{
return isAvailable() ? getMemlockLimit0() : 0;
}
private static native long getMemlockLimit0();
/**
* @return the operating system's page size.
*/
static long getOperatingSystemPageSize()
{
try
{
Field f = Unsafe.class.getDeclaredField("theUnsafe");
f.setAccessible(true);
Unsafe unsafe = (Unsafe)f.get(null);
return unsafe.pageSize();
}
catch(Throwable e)
{
LOG.warn("Unable to get operating system page size. Guessing 4096.",e);
return 4096;
}
}
private static class CachedUid
{
final long timestamp;
final String username;
public CachedUid(String username,long timestamp)
{
this.timestamp = timestamp;
this.username = username;
}
}
private static final Map<Long,CachedUid> uidCache = new ConcurrentHashMap<Long,CachedUid>();
private static long cacheTimeout;
private static boolean initialized = false;
/**
* The Windows logon name has two part, NetBIOS domain name and
* user account name, of the format DOMAIN\UserName. This method
* will remove the domain part of the full logon name.
*
* @param name the full principal name containing the domain
* @return name with domain removed
* @throws IOException raised on errors performing I/O.
*/
private static String stripDomain(String name)
{
int i = name.indexOf('\\');
if(i != -1)
{
name = name.substring(i + 1);
}
return name;
}
public static String getOwner(FileDescriptor fd) throws IOException
{
ensureInitialized();
if(Shell.WINDOWS)
{
String owner = Windows.getOwner(fd);
owner = stripDomain(owner);
return owner;
}
else
{
long uid = POSIX.getUIDforFDOwnerforOwner(fd);
CachedUid cUid = uidCache.get(uid);
long now = System.currentTimeMillis();
if(cUid != null && (cUid.timestamp + cacheTimeout) > now)
{
return cUid.username;
}
String user = POSIX.getUserName(uid);
LOG.info("Got UserName " + user + " for UID " + uid + " from the native implementation");
cUid = new CachedUid(user,now);
uidCache.put(uid,cUid);
return user;
}
}
/**
* Create a FileDescriptor that shares delete permission on the
* file opened at a given offset, i.e. other process can delete
* the file the FileDescriptor is reading. Only Windows implementation
* uses the native interface.
*
* @param f input f.
* @param seekOffset input seekOffset.
* @return FileDescriptor.
* @throws IOException raised on errors performing I/O.
*/
public static FileDescriptor getShareDeleteFileDescriptor(File f,long seekOffset) throws IOException
{
if(!Shell.WINDOWS)
{
RandomAccessFile rf = new RandomAccessFile(f,"r");
if(seekOffset > 0)
{
rf.seek(seekOffset);
}
return rf.getFD();
}
else
{
// Use Windows native interface to create a FileInputStream that
// shares delete permission on the file opened, and set it to the
// given offset.
//
FileDescriptor fd = Windows.createFile(f.getAbsolutePath(),Windows.GENERIC_READ,Windows.FILE_SHARE_READ | Windows.FILE_SHARE_WRITE | Windows.FILE_SHARE_DELETE,Windows.OPEN_EXISTING);
if(seekOffset > 0)
{
Windows.setFilePointer(fd,seekOffset,Windows.FILE_BEGIN);
}
return fd;
}
}
/**
* @param f the file that we want to create
* @param permissions we want to have on the file (if security is enabled)
* @return Create the specified File for write access, ensuring that it does not exist.
* @throws AlreadyExistsException if the file already exists
* @throws IOException if any other error occurred
*/
public static FileOutputStream getCreateForWriteFileOutputStream(File f,int permissions) throws IOException
{
if(!Shell.WINDOWS)
{
// Use the native wrapper around open(2)
try
{
FileDescriptor fd = POSIX.open(f.getAbsolutePath(),POSIX.O_WRONLY | POSIX.O_CREAT | POSIX.O_EXCL,permissions);
return new FileOutputStream(fd);
}
catch(NativeIOException nioe)
{
if(nioe.getErrno() == Errno.EEXIST)
{
throw new AlreadyExistsException(nioe);
}
throw nioe;
}
}
else
{
// Use the Windows native APIs to create equivalent FileOutputStream
try
{
FileDescriptor fd = Windows.createFile(f.getCanonicalPath(),Windows.GENERIC_WRITE,Windows.FILE_SHARE_DELETE | Windows.FILE_SHARE_READ | Windows.FILE_SHARE_WRITE,Windows.CREATE_NEW);
POSIX.chmod(f.getCanonicalPath(),permissions);
return new FileOutputStream(fd);
}
catch(NativeIOException nioe)
{
if(nioe.getErrorCode() == 80)
{
// ERROR_FILE_EXISTS
// 80 (0x50)
// The file exists
throw new AlreadyExistsException(nioe);
}
throw nioe;
}
}
}
private synchronized static void ensureInitialized()
{
if(!initialized)
{
cacheTimeout = new Configuration().getLong("hadoop.security.uid.cache.secs",4 * 60 * 60) * 1000;
LOG.info("Initialized cache for UID to User mapping with a cache" + " timeout of " + cacheTimeout / 1000 + " seconds.");
initialized = true;
}
}
/**
* A version of renameTo that throws a descriptive exception when it fails.
*
* @param src The source path
* @param dst The destination path
* @throws NativeIOException On failure.
*/
public static void renameTo(File src,File dst) throws IOException
{
if(!nativeLoaded)
{
if(!src.renameTo(dst))
{
throw new IOException("renameTo(src=" + src + ", dst=" + dst + ") failed.");
}
}
else
{
renameTo0(src.getAbsolutePath(),dst.getAbsolutePath());
}
}
/**
* Creates a hardlink "dst" that points to "src".
* <p>
* This is deprecated since JDK7 NIO can create hardlinks via the
* {@link java.nio.file.Files} API.
*
* @param src source file
* @param dst hardlink location
* @throws IOException raised on errors performing I/O.
*/
@Deprecated
public static void link(File src,File dst) throws IOException
{
if(!nativeLoaded)
{
HardLink.createHardLink(src,dst);
}
else
{
link0(src.getAbsolutePath(),dst.getAbsolutePath());
}
}
/**
* A version of renameTo that throws a descriptive exception when it fails.
*
* @param src The source path
* @param dst The destination path
* @throws NativeIOException On failure.
*/
private static native void renameTo0(String src,String dst) throws NativeIOException;
private static native void link0(String src,String dst) throws NativeIOException;
/**
* Unbuffered file copy from src to dst without tainting OS buffer cache
* <p>
* In POSIX platform:
* It uses FileChannel#transferTo() which internally attempts
* unbuffered IO on OS with native sendfile64() support and falls back to
* buffered IO otherwise.
* <p>
* It minimizes the number of FileChannel#transferTo call by passing the the
* src file size directly instead of a smaller size as the 3rd parameter.
* This saves the number of sendfile64() system call when native sendfile64()
* is supported. In the two fall back cases where sendfile is not supported,
* FileChannle#transferTo already has its own batching of size 8 MB and 8 KB,
* respectively.
* <p>
* In Windows Platform:
* It uses its own native wrapper of CopyFileEx with COPY_FILE_NO_BUFFERING
* flag, which is supported on Windows Server 2008 and above.
* <p>
* Ideally, we should use FileChannel#transferTo() across both POSIX and Windows
* platform. Unfortunately, the wrapper(Java_sun_nio_ch_FileChannelImpl_transferTo0)
* used by FileChannel#transferTo for unbuffered IO is not implemented on Windows.
* Based on OpenJDK 6/7/8 source code, Java_sun_nio_ch_FileChannelImpl_transferTo0
* on Windows simply returns IOS_UNSUPPORTED.
* <p>
* Note: This simple native wrapper does minimal parameter checking before copy and
* consistency check (e.g., size) after copy.
* It is recommended to use wrapper function like
* the Storage#nativeCopyFileUnbuffered() function in hadoop-hdfs with pre/post copy
* checks.
*
* @param src The source path
* @param dst The destination path
* @throws IOException raised on errors performing I/O.
*/
public static void copyFileUnbuffered(File src,File dst) throws IOException
{
if(nativeLoaded && Shell.WINDOWS)
{
copyFileUnbuffered0(src.getAbsolutePath(),dst.getAbsolutePath());
}
else
{
FileInputStream fis = new FileInputStream(src);
FileChannel input = null;
try
{
input = fis.getChannel();
try(FileOutputStream fos = new FileOutputStream(dst); FileChannel output = fos.getChannel())
{
long remaining = input.size();
long position = 0;
long transferred = 0;
while(remaining > 0)
{
transferred = input.transferTo(position,remaining,output);
remaining -= transferred;
position += transferred;
}
}
}
finally
{
IOUtils.cleanupWithLogger(LOG,input,fis);
}
}
}
private static native void copyFileUnbuffered0(String src,String dst) throws NativeIOException;
}
最新发布