1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.Closeable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.io.PrintWriter;
25  import java.io.StringWriter;
26  import java.net.InetAddress;
27  import java.net.URI;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collection;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.HashMap;
34  import java.util.HashSet;
35  import java.util.Iterator;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.Set;
40  import java.util.SortedMap;
41  import java.util.SortedSet;
42  import java.util.TreeMap;
43  import java.util.TreeSet;
44  import java.util.concurrent.Callable;
45  import java.util.concurrent.ConcurrentSkipListMap;
46  import java.util.concurrent.ExecutionException;
47  import java.util.concurrent.ExecutorService;
48  import java.util.concurrent.Executors;
49  import java.util.concurrent.Future;
50  import java.util.concurrent.FutureTask;
51  import java.util.concurrent.ScheduledThreadPoolExecutor;
52  import java.util.concurrent.TimeUnit;
53  import java.util.concurrent.TimeoutException;
54  import java.util.concurrent.atomic.AtomicBoolean;
55  import java.util.concurrent.atomic.AtomicInteger;
56  
57  import com.google.common.base.Joiner;
58  import com.google.common.base.Preconditions;
59  import com.google.common.collect.ImmutableList;
60  import com.google.common.collect.Lists;
61  import com.google.common.collect.Multimap;
62  import com.google.common.collect.Ordering;
63  import com.google.common.collect.TreeMultimap;
64  import com.google.protobuf.ServiceException;
65  
66  import org.apache.commons.lang.StringUtils;
67  import org.apache.commons.logging.Log;
68  import org.apache.commons.logging.LogFactory;
69  import org.apache.hadoop.hbase.classification.InterfaceAudience;
70  import org.apache.hadoop.hbase.classification.InterfaceStability;
71  import org.apache.hadoop.conf.Configuration;
72  import org.apache.hadoop.conf.Configured;
73  import org.apache.hadoop.fs.FSDataOutputStream;
74  import org.apache.hadoop.fs.FileStatus;
75  import org.apache.hadoop.fs.FileSystem;
76  import org.apache.hadoop.fs.Path;
77  import org.apache.hadoop.fs.permission.FsAction;
78  import org.apache.hadoop.fs.permission.FsPermission;
79  import org.apache.hadoop.hbase.Abortable;
80  import org.apache.hadoop.hbase.Cell;
81  import org.apache.hadoop.hbase.ClusterStatus;
82  import org.apache.hadoop.hbase.CoordinatedStateException;
83  import org.apache.hadoop.hbase.HBaseConfiguration;
84  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
85  import org.apache.hadoop.hbase.HColumnDescriptor;
86  import org.apache.hadoop.hbase.HConstants;
87  import org.apache.hadoop.hbase.HRegionInfo;
88  import org.apache.hadoop.hbase.HRegionLocation;
89  import org.apache.hadoop.hbase.HTableDescriptor;
90  import org.apache.hadoop.hbase.KeyValue;
91  import org.apache.hadoop.hbase.MasterNotRunningException;
92  import org.apache.hadoop.hbase.RegionLocations;
93  import org.apache.hadoop.hbase.ServerName;
94  import org.apache.hadoop.hbase.TableName;
95  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
96  import org.apache.hadoop.hbase.MetaTableAccessor;
97  import org.apache.hadoop.hbase.classification.InterfaceAudience;
98  import org.apache.hadoop.hbase.classification.InterfaceStability;
99  import org.apache.hadoop.hbase.client.Admin;
100 import org.apache.hadoop.hbase.client.ClusterConnection;
101 import org.apache.hadoop.hbase.client.ConnectionFactory;
102 import org.apache.hadoop.hbase.client.Delete;
103 import org.apache.hadoop.hbase.client.Get;
104 import org.apache.hadoop.hbase.client.HBaseAdmin;
105 import org.apache.hadoop.hbase.client.HConnectable;
106 import org.apache.hadoop.hbase.client.HConnection;
107 import org.apache.hadoop.hbase.client.HConnectionManager;
108 import org.apache.hadoop.hbase.client.MetaScanner;
109 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
110 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
111 import org.apache.hadoop.hbase.client.Put;
112 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
113 import org.apache.hadoop.hbase.client.Result;
114 import org.apache.hadoop.hbase.client.RowMutations;
115 import org.apache.hadoop.hbase.client.Table;
116 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
117 import org.apache.hadoop.hbase.io.hfile.HFile;
118 import org.apache.hadoop.hbase.master.MasterFileSystem;
119 import org.apache.hadoop.hbase.master.RegionState;
120 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
121 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
122 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
123 import org.apache.hadoop.hbase.regionserver.HRegion;
124 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
125 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
126 import org.apache.hadoop.hbase.security.AccessDeniedException;
127 import org.apache.hadoop.hbase.security.UserProvider;
128 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
129 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
130 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
131 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
132 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
133 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
134 import org.apache.hadoop.hbase.wal.WALSplitter;
135 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
136 import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
137 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
138 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
139 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
140 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
141 import org.apache.hadoop.io.IOUtils;
142 import org.apache.hadoop.ipc.RemoteException;
143 import org.apache.hadoop.security.UserGroupInformation;
144 import org.apache.hadoop.util.ReflectionUtils;
145 import org.apache.hadoop.util.Tool;
146 import org.apache.hadoop.util.ToolRunner;
147 import org.apache.zookeeper.KeeperException;
148 
149 /**
150  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
151  * table integrity problems in a corrupted HBase.
152  * <p>
153  * Region consistency checks verify that hbase:meta, region deployment on region
154  * servers and the state of data in HDFS (.regioninfo files) all are in
155  * accordance.
156  * <p>
157  * Table integrity checks verify that all possible row keys resolve to exactly
158  * one region of a table.  This means there are no individual degenerate
159  * or backwards regions; no holes between regions; and that there are no
160  * overlapping regions.
161  * <p>
162  * The general repair strategy works in two phases:
163  * <ol>
164  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
165  * <li> Repair Region Consistency with hbase:meta and assignments
166  * </ol>
167  * <p>
168  * For table integrity repairs, the tables' region directories are scanned
169  * for .regioninfo files.  Each table's integrity is then verified.  If there
170  * are any orphan regions (regions with no .regioninfo files) or holes, new
171  * regions are fabricated.  Backwards regions are sidelined as well as empty
172  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
173  * a new region is created and all data is merged into the new region.
174  * <p>
175  * Table integrity repairs deal solely with HDFS and could potentially be done
176  * offline -- the hbase region servers or master do not need to be running.
177  * This phase can eventually be used to completely reconstruct the hbase:meta table in
178  * an offline fashion.
179  * <p>
180  * Region consistency requires three conditions -- 1) valid .regioninfo file
181  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
182  * and 3) a region is deployed only at the regionserver that was assigned to
183  * with proper state in the master.
184  * <p>
185  * Region consistency repairs require hbase to be online so that hbck can
186  * contact the HBase master and region servers.  The hbck#connect() method must
187  * first be called successfully.  Much of the region consistency information
188  * is transient and less risky to repair.
189  * <p>
190  * If hbck is run from the command line, there are a handful of arguments that
191  * can be used to limit the kinds of repairs hbck will do.  See the code in
192  * {@link #printUsageAndExit()} for more details.
193  */
194 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
195 @InterfaceStability.Evolving
196 public class HBaseFsck extends Configured implements Closeable {
197   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
198   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
199   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
200   private static boolean rsSupportsOffline = true;
201   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
202   private static final int DEFAULT_MAX_MERGE = 5;
203   private static final String TO_BE_LOADED = "to_be_loaded";
204   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
205   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
206   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200;
207 
208   /**********************
209    * Internal resources
210    **********************/
211   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
212   private ClusterStatus status;
213   private ClusterConnection connection;
214   private Admin admin;
215   private Table meta;
216   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
217   protected ExecutorService executor;
218   private long startMillis = System.currentTimeMillis();
219   private HFileCorruptionChecker hfcc;
220   private int retcode = 0;
221   private Path HBCK_LOCK_PATH;
222   private FSDataOutputStream hbckOutFd;
223   // This lock is to prevent cleanup of balancer resources twice between
224   // ShutdownHook and the main code. We cleanup only if the connect() is
225   // successful
226   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
227 
228   /***********
229    * Options
230    ***********/
231   private static boolean details = false; // do we display the full report
232   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
233   private boolean fixAssignments = false; // fix assignment errors?
234   private boolean fixMeta = false; // fix meta errors?
235   private boolean checkHdfs = true; // load and check fs consistency?
236   private boolean fixHdfsHoles = false; // fix fs holes?
237   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
238   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
239   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
240   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
241   private boolean fixSplitParents = false; // fix lingering split parents
242   private boolean fixReferenceFiles = false; // fix lingering reference store file
243   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
244   private boolean fixTableLocks = false; // fix table locks which are expired
245   private boolean fixTableZNodes = false; // fix table Znodes which are orphaned
246   private boolean fixAny = false; // Set to true if any of the fix is required.
247 
248   // limit checking/fixes to listed tables, if empty attempt to check/fix all
249   // hbase:meta are always checked
250   private Set<TableName> tablesIncluded = new HashSet<TableName>();
251   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
252   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
253   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
254   private Path sidelineDir = null;
255 
256   private boolean rerun = false; // if we tried to fix something, rerun hbck
257   private static boolean summary = false; // if we want to print less output
258   private boolean checkMetaOnly = false;
259   private boolean checkRegionBoundaries = false;
260   private boolean ignorePreCheckPermission = false; // if pre-check permission
261 
262   /*********
263    * State
264    *********/
265   final private ErrorReporter errors;
266   int fixes = 0;
267 
268   /**
269    * This map contains the state of all hbck items.  It maps from encoded region
270    * name to HbckInfo structure.  The information contained in HbckInfo is used
271    * to detect and correct consistency (hdfs/meta/deployment) problems.
272    */
273   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
274   private TreeSet<TableName> disabledTables =
275     new TreeSet<TableName>();
276   // Empty regioninfo qualifiers in hbase:meta
277   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
278 
279   /**
280    * This map from Tablename -> TableInfo contains the structures necessary to
281    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
282    * to prevent dupes.
283    *
284    * If tablesIncluded is empty, this map contains all tables.
285    * Otherwise, it contains only meta tables and tables in tablesIncluded,
286    * unless checkMetaOnly is specified, in which case, it contains only
287    * the meta table
288    */
289   private SortedMap<TableName, TableInfo> tablesInfo =
290       new ConcurrentSkipListMap<TableName, TableInfo>();
291 
292   /**
293    * When initially looking at HDFS, we attempt to find any orphaned data.
294    */
295   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
296 
297   private Map<TableName, Set<String>> orphanTableDirs =
298       new HashMap<TableName, Set<String>>();
299 
300 
301   /**
302    * List of orphaned table ZNodes
303    */
304   private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
305   private final RetryCounterFactory lockFileRetryCounterFactory;
306   
307 
308   /**
309    * Constructor
310    *
311    * @param conf Configuration object
312    * @throws MasterNotRunningException if the master is not running
313    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
314    */
315   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
316       ZooKeeperConnectionException, IOException, ClassNotFoundException {
317     super(conf);
318     // make a copy, just to be sure we're not overriding someone else's config
319     setConf(HBaseConfiguration.create(getConf()));
320     // disable blockcache for tool invocation, see HBASE-10500
321     getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
322     // Disable usage of meta replicas in hbck
323     getConf().setBoolean(HConstants.USE_META_REPLICAS, false);
324     errors = getErrorReporter(conf);
325 
326     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
327     executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
328     lockFileRetryCounterFactory = new RetryCounterFactory(
329         getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), 
330         getConf().getInt("hbase.hbck.lockfile.attempt.sleep.interval",
331             DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL));
332   }
333 
334   /**
335    * Constructor
336    *
337    * @param conf
338    *          Configuration object
339    * @throws MasterNotRunningException
340    *           if the master is not running
341    * @throws ZooKeeperConnectionException
342    *           if unable to connect to ZooKeeper
343    */
344   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
345       ZooKeeperConnectionException, IOException, ClassNotFoundException {
346     super(conf);
347     errors = getErrorReporter(getConf());
348     this.executor = exec;
349     lockFileRetryCounterFactory = new RetryCounterFactory(
350         getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
351         getConf().getInt("hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL));
352   }
353   
354   private class FileLockCallable implements Callable<FSDataOutputStream> {
355     RetryCounter retryCounter;
356 
357     public FileLockCallable(RetryCounter retryCounter) {
358       this.retryCounter = retryCounter;
359     }
360     @Override
361     public FSDataOutputStream call() throws IOException {
362       try {
363         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
364         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
365             HConstants.DATA_FILE_UMASK_KEY);
366         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
367         fs.mkdirs(tmpDir);
368         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
369         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
370         out.writeBytes(InetAddress.getLocalHost().toString());
371         out.flush();
372         return out;
373       } catch(RemoteException e) {
374         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
375           return null;
376         } else {
377           throw e;
378         }
379       }
380     }
381 
382     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
383         final Path hbckLockFilePath, final FsPermission defaultPerms)
384         throws IOException {
385 
386       IOException exception = null;
387       do {
388         try {
389           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
390         } catch (IOException ioe) {
391           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
392               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
393               + retryCounter.getMaxAttempts());
394           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), 
395               ioe);
396           try {
397             exception = ioe;
398             retryCounter.sleepUntilNextRetry();
399           } catch (InterruptedException ie) {
400             throw (InterruptedIOException) new InterruptedIOException(
401                 "Can't create lock file " + hbckLockFilePath.getName())
402             .initCause(ie);
403           }
404         }
405       } while (retryCounter.shouldRetry());
406 
407       throw exception;
408     }
409   }
410 
411   /**
412    * This method maintains a lock using a file. If the creation fails we return null
413    *
414    * @return FSDataOutputStream object corresponding to the newly opened lock file
415    * @throws IOException
416    */
417   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
418     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
419     FileLockCallable callable = new FileLockCallable(retryCounter);
420     ExecutorService executor = Executors.newFixedThreadPool(1);
421     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
422     executor.execute(futureTask);
423     final int timeoutInSeconds = 30;
424     FSDataOutputStream stream = null;
425     try {
426       stream = futureTask.get(30, TimeUnit.SECONDS);
427     } catch (ExecutionException ee) {
428       LOG.warn("Encountered exception when opening lock file", ee);
429     } catch (InterruptedException ie) {
430       LOG.warn("Interrupted when opening lock file", ie);
431       Thread.currentThread().interrupt();
432     } catch (TimeoutException exception) {
433       // took too long to obtain lock
434       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
435       futureTask.cancel(true);
436     } finally {
437       executor.shutdownNow();
438     }
439     return stream;
440   }
441 
442   private void unlockHbck() {
443     if (hbckLockCleanup.compareAndSet(true, false)) {
444       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
445       do {
446         try {
447           IOUtils.closeStream(hbckOutFd);
448           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
449               HBCK_LOCK_PATH, true);
450           return;
451         } catch (IOException ioe) {
452           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
453               + (retryCounter.getAttemptTimes() + 1) + " of "
454               + retryCounter.getMaxAttempts());
455           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
456           try {
457             retryCounter.sleepUntilNextRetry();
458           } catch (InterruptedException ie) {
459             Thread.currentThread().interrupt();
460             LOG.warn("Interrupted while deleting lock file" +
461                 HBCK_LOCK_PATH);
462             return;
463           }
464         }
465       } while (retryCounter.shouldRetry());
466 
467     }
468   }
469 
470   /**
471    * To repair region consistency, one must call connect() in order to repair
472    * online state.
473    */
474   public void connect() throws IOException {
475 
476     // Check if another instance of balancer is running
477     hbckOutFd = checkAndMarkRunningHbck();
478     if (hbckOutFd == null) {
479       setRetCode(-1);
480       LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
481           " no other instance is running, delete the lock file " +
482           HBCK_LOCK_PATH + " and rerun the tool]");
483       throw new IOException("Duplicate hbck - Abort");
484     }
485 
486     // Make sure to cleanup the lock
487     hbckLockCleanup.set(true);
488 
489     // Add a shutdown hook to this thread, incase user tries to
490     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
491     // it is available for further calls
492     Runtime.getRuntime().addShutdownHook(new Thread() {
493       @Override
494       public void run() {
495         IOUtils.closeStream(HBaseFsck.this);
496         unlockHbck();
497       }
498     });
499     LOG.debug("Launching hbck");
500 
501     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
502     admin = connection.getAdmin();
503     meta = connection.getTable(TableName.META_TABLE_NAME);
504     status = admin.getClusterStatus();
505   }
506 
507   /**
508    * Get deployed regions according to the region servers.
509    */
510   private void loadDeployedRegions() throws IOException, InterruptedException {
511     // From the master, get a list of all known live region servers
512     Collection<ServerName> regionServers = status.getServers();
513     errors.print("Number of live region servers: " + regionServers.size());
514     if (details) {
515       for (ServerName rsinfo: regionServers) {
516         errors.print("  " + rsinfo.getServerName());
517       }
518     }
519 
520     // From the master, get a list of all dead region servers
521     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
522     errors.print("Number of dead region servers: " + deadRegionServers.size());
523     if (details) {
524       for (ServerName name: deadRegionServers) {
525         errors.print("  " + name);
526       }
527     }
528 
529     // Print the current master name and state
530     errors.print("Master: " + status.getMaster());
531 
532     // Print the list of all backup masters
533     Collection<ServerName> backupMasters = status.getBackupMasters();
534     errors.print("Number of backup masters: " + backupMasters.size());
535     if (details) {
536       for (ServerName name: backupMasters) {
537         errors.print("  " + name);
538       }
539     }
540 
541     errors.print("Average load: " + status.getAverageLoad());
542     errors.print("Number of requests: " + status.getRequestsCount());
543     errors.print("Number of regions: " + status.getRegionsCount());
544 
545     Map<String, RegionState> rits = status.getRegionsInTransition();
546     errors.print("Number of regions in transition: " + rits.size());
547     if (details) {
548       for (RegionState state: rits.values()) {
549         errors.print("  " + state.toDescriptiveString());
550       }
551     }
552 
553     // Determine what's deployed
554     processRegionServers(regionServers);
555   }
556 
557   /**
558    * Clear the current state of hbck.
559    */
560   private void clearState() {
561     // Make sure regionInfo is empty before starting
562     fixes = 0;
563     regionInfoMap.clear();
564     emptyRegionInfoQualifiers.clear();
565     disabledTables.clear();
566     errors.clear();
567     tablesInfo.clear();
568     orphanHdfsDirs.clear();
569   }
570 
571   /**
572    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
573    * the table integrity rules.  HBase doesn't need to be online for this
574    * operation to work.
575    */
576   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
577     // Initial pass to fix orphans.
578     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
579         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
580       LOG.info("Loading regioninfos HDFS");
581       // if nothing is happening this should always complete in two iterations.
582       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
583       int curIter = 0;
584       do {
585         clearState(); // clears hbck state and reset fixes to 0 and.
586         // repair what's on HDFS
587         restoreHdfsIntegrity();
588         curIter++;// limit the number of iterations.
589       } while (fixes > 0 && curIter <= maxIterations);
590 
591       // Repairs should be done in the first iteration and verification in the second.
592       // If there are more than 2 passes, something funny has happened.
593       if (curIter > 2) {
594         if (curIter == maxIterations) {
595           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
596               + "Tables integrity may not be fully repaired!");
597         } else {
598           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
599         }
600       }
601     }
602   }
603 
604   /**
605    * This repair method requires the cluster to be online since it contacts
606    * region servers and the masters.  It makes each region's state in HDFS, in
607    * hbase:meta, and deployments consistent.
608    *
609    * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
610    * error.  If 0, we have a clean hbase.
611    */
612   public int onlineConsistencyRepair() throws IOException, KeeperException,
613     InterruptedException {
614     clearState();
615 
616     // get regions according to what is online on each RegionServer
617     loadDeployedRegions();
618     // check whether hbase:meta is deployed and online
619     recordMetaRegion();
620     // Check if hbase:meta is found only once and in the right place
621     if (!checkMetaRegion()) {
622       String errorMsg = "hbase:meta table is not consistent. ";
623       if (shouldFixAssignments()) {
624         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
625       } else {
626         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
627       }
628       errors.reportError(errorMsg + " Exiting...");
629       return -2;
630     }
631     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
632     LOG.info("Loading regionsinfo from the hbase:meta table");
633     boolean success = loadMetaEntries();
634     if (!success) return -1;
635 
636     // Empty cells in hbase:meta?
637     reportEmptyMetaCells();
638 
639     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
640     if (shouldFixEmptyMetaCells()) {
641       fixEmptyMetaCells();
642     }
643 
644     // get a list of all tables that have not changed recently.
645     if (!checkMetaOnly) {
646       reportTablesInFlux();
647     }
648 
649     // load regiondirs and regioninfos from HDFS
650     if (shouldCheckHdfs()) {
651       LOG.info("Loading region directories from HDFS");
652       loadHdfsRegionDirs();
653       LOG.info("Loading region information from HDFS");
654       loadHdfsRegionInfos();
655     }
656 
657     // Get disabled tables from ZooKeeper
658     loadDisabledTables();
659 
660     // fix the orphan tables
661     fixOrphanTables();
662 
663     LOG.info("Checking and fixing region consistency");
664 
665     // Check and fix consistency
666     checkAndFixConsistency();
667 
668     // Check integrity (does not fix)
669     checkIntegrity();
670     return errors.getErrorList().size();
671   }
672 
673   /**
674    * Contacts the master and prints out cluster-wide information
675    * @return 0 on success, non-zero on failure
676    */
677   public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
678     // print hbase server version
679     errors.print("Version: " + status.getHBaseVersion());
680     offlineHdfsIntegrityRepair();
681 
682     // turn the balancer off
683     boolean oldBalancer = admin.setBalancerRunning(false, true);
684     try {
685       onlineConsistencyRepair();
686     }
687     finally {
688       admin.setBalancerRunning(oldBalancer, false);
689     }
690 
691     if (checkRegionBoundaries) {
692       checkRegionBoundaries();
693     }
694 
695     offlineReferenceFileRepair();
696 
697     checkAndFixTableLocks();
698 
699     // Check (and fix if requested) orphaned table ZNodes
700     checkAndFixOrphanedTableZNodes();
701 
702     // Remove the hbck lock
703     unlockHbck();
704 
705     // Print table summary
706     printTableSummary(tablesInfo);
707     return errors.summarize();
708   }
709 
710   public static byte[] keyOnly (byte[] b) {
711     if (b == null)
712       return b;
713     int rowlength = Bytes.toShort(b, 0);
714     byte[] result = new byte[rowlength];
715     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
716     return result;
717   }
718 
719   @Override
720   public void close() throws IOException {
721     IOUtils.cleanup(null, admin, meta, connection);
722   }
723 
724   private static class RegionBoundariesInformation {
725     public byte [] regionName;
726     public byte [] metaFirstKey;
727     public byte [] metaLastKey;
728     public byte [] storesFirstKey;
729     public byte [] storesLastKey;
730     @Override
731     public String toString () {
732       return "regionName=" + Bytes.toStringBinary(regionName) +
733              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
734              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
735              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
736              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
737     }
738   }
739 
740   public void checkRegionBoundaries() {
741     try {
742       ByteArrayComparator comparator = new ByteArrayComparator();
743       List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), connection, false);
744       final RegionBoundariesInformation currentRegionBoundariesInformation =
745           new RegionBoundariesInformation();
746       Path hbaseRoot = FSUtils.getRootDir(getConf());
747       for (HRegionInfo regionInfo : regions) {
748         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
749         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
750         // For each region, get the start and stop key from the META and compare them to the
751         // same information from the Stores.
752         Path path = new Path(tableDir, regionInfo.getEncodedName());
753         FileSystem fs = path.getFileSystem(getConf());
754         FileStatus[] files = fs.listStatus(path);
755         // For all the column families in this region...
756         byte[] storeFirstKey = null;
757         byte[] storeLastKey = null;
758         for (FileStatus file : files) {
759           String fileName = file.getPath().toString();
760           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
761           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
762             FileStatus[] storeFiles = fs.listStatus(file.getPath());
763             // For all the stores in this column family.
764             for (FileStatus storeFile : storeFiles) {
765               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
766                   getConf()), getConf());
767               if ((reader.getFirstKey() != null)
768                   && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
769                       reader.getFirstKey()) > 0))) {
770                 storeFirstKey = reader.getFirstKey();
771               }
772               if ((reader.getLastKey() != null)
773                   && ((storeLastKey == null) || (comparator.compare(storeLastKey,
774                       reader.getLastKey())) < 0)) {
775                 storeLastKey = reader.getLastKey();
776               }
777               reader.close();
778             }
779           }
780         }
781         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
782         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
783         currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
784         currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
785         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
786           currentRegionBoundariesInformation.metaFirstKey = null;
787         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
788           currentRegionBoundariesInformation.metaLastKey = null;
789 
790         // For a region to be correct, we need the META start key to be smaller or equal to the
791         // smallest start key from all the stores, and the start key from the next META entry to
792         // be bigger than the last key from all the current stores. First region start key is null;
793         // Last region end key is null; some regions can be empty and not have any store.
794 
795         boolean valid = true;
796         // Checking start key.
797         if ((currentRegionBoundariesInformation.storesFirstKey != null)
798             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
799           valid = valid
800               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
801                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
802         }
803         // Checking stop key.
804         if ((currentRegionBoundariesInformation.storesLastKey != null)
805             && (currentRegionBoundariesInformation.metaLastKey != null)) {
806           valid = valid
807               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
808                 currentRegionBoundariesInformation.metaLastKey) < 0;
809         }
810         if (!valid) {
811           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
812             tablesInfo.get(regionInfo.getTable()));
813           LOG.warn("Region's boundaries not alligned between stores and META for:");
814           LOG.warn(currentRegionBoundariesInformation);
815         }
816       }
817     } catch (IOException e) {
818       LOG.error(e);
819     }
820   }
821 
822   /**
823    * Iterates through the list of all orphan/invalid regiondirs.
824    */
825   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
826     for (HbckInfo hi : orphanHdfsDirs) {
827       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
828       adoptHdfsOrphan(hi);
829     }
830   }
831 
832   /**
833    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
834    * these orphans by creating a new region, and moving the column families,
835    * recovered edits, WALs, into the new region dir.  We determine the region
836    * startkey and endkeys by looking at all of the hfiles inside the column
837    * families to identify the min and max keys. The resulting region will
838    * likely violate table integrity but will be dealt with by merging
839    * overlapping regions.
840    */
841   @SuppressWarnings("deprecation")
842   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
843     Path p = hi.getHdfsRegionDir();
844     FileSystem fs = p.getFileSystem(getConf());
845     FileStatus[] dirs = fs.listStatus(p);
846     if (dirs == null) {
847       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
848           p + ". This dir could probably be deleted.");
849       return ;
850     }
851 
852     TableName tableName = hi.getTableName();
853     TableInfo tableInfo = tablesInfo.get(tableName);
854     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
855     HTableDescriptor template = tableInfo.getHTD();
856 
857     // find min and max key values
858     Pair<byte[],byte[]> orphanRegionRange = null;
859     for (FileStatus cf : dirs) {
860       String cfName= cf.getPath().getName();
861       // TODO Figure out what the special dirs are
862       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
863 
864       FileStatus[] hfiles = fs.listStatus(cf.getPath());
865       for (FileStatus hfile : hfiles) {
866         byte[] start, end;
867         HFile.Reader hf = null;
868         try {
869           CacheConfig cacheConf = new CacheConfig(getConf());
870           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
871           hf.loadFileInfo();
872           KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
873           start = startKv.getRow();
874           KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
875           end = endKv.getRow();
876         } catch (IOException ioe) {
877           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
878           continue;
879         } catch (NullPointerException ioe) {
880           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
881           continue;
882         } finally {
883           if (hf != null) {
884             hf.close();
885           }
886         }
887 
888         // expand the range to include the range of all hfiles
889         if (orphanRegionRange == null) {
890           // first range
891           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
892         } else {
893           // TODO add test
894 
895           // expand range only if the hfile is wider.
896           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
897             orphanRegionRange.setFirst(start);
898           }
899           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
900             orphanRegionRange.setSecond(end);
901           }
902         }
903       }
904     }
905     if (orphanRegionRange == null) {
906       LOG.warn("No data in dir " + p + ", sidelining data");
907       fixes++;
908       sidelineRegionDir(fs, hi);
909       return;
910     }
911     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
912         Bytes.toString(orphanRegionRange.getSecond()) + ")");
913 
914     // create new region on hdfs.  move data into place.
915     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
916     LOG.info("Creating new region : " + hri);
917     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
918     Path target = region.getRegionFileSystem().getRegionDir();
919 
920     // rename all the data to new region
921     mergeRegionDirs(target, hi);
922     fixes++;
923   }
924 
925   /**
926    * This method determines if there are table integrity errors in HDFS.  If
927    * there are errors and the appropriate "fix" options are enabled, the method
928    * will first correct orphan regions making them into legit regiondirs, and
929    * then reload to merge potentially overlapping regions.
930    *
931    * @return number of table integrity errors found
932    */
933   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
934     // Determine what's on HDFS
935     LOG.info("Loading HBase regioninfo from HDFS...");
936     loadHdfsRegionDirs(); // populating regioninfo table.
937 
938     int errs = errors.getErrorList().size();
939     // First time just get suggestions.
940     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
941     checkHdfsIntegrity(false, false);
942 
943     if (errors.getErrorList().size() == errs) {
944       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
945       return 0;
946     }
947 
948     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
949       adoptHdfsOrphans(orphanHdfsDirs);
950       // TODO optimize by incrementally adding instead of reloading.
951     }
952 
953     // Make sure there are no holes now.
954     if (shouldFixHdfsHoles()) {
955       clearState(); // this also resets # fixes.
956       loadHdfsRegionDirs();
957       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
958       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
959     }
960 
961     // Now we fix overlaps
962     if (shouldFixHdfsOverlaps()) {
963       // second pass we fix overlaps.
964       clearState(); // this also resets # fixes.
965       loadHdfsRegionDirs();
966       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
967       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
968     }
969 
970     return errors.getErrorList().size();
971   }
972 
973   /**
974    * Scan all the store file names to find any lingering reference files,
975    * which refer to some none-exiting files. If "fix" option is enabled,
976    * any lingering reference file will be sidelined if found.
977    * <p>
978    * Lingering reference file prevents a region from opening. It has to
979    * be fixed before a cluster can start properly.
980    */
981   private void offlineReferenceFileRepair() throws IOException {
982     Configuration conf = getConf();
983     Path hbaseRoot = FSUtils.getRootDir(conf);
984     FileSystem fs = hbaseRoot.getFileSystem(conf);
985     LOG.info("Computing mapping of all store files");
986     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, errors);
987     errors.print("");
988     LOG.info("Validating mapping using HDFS state");
989     for (Path path: allFiles.values()) {
990       boolean isReference = false;
991       try {
992         isReference = StoreFileInfo.isReference(path);
993       } catch (Throwable t) {
994         // Ignore. Some files may not be store files at all.
995         // For example, files under .oldlogs folder in hbase:meta
996         // Warning message is already logged by
997         // StoreFile#isReference.
998       }
999       if (!isReference) continue;
1000 
1001       Path referredToFile = StoreFileInfo.getReferredToFile(path);
1002       if (fs.exists(referredToFile)) continue;  // good, expected
1003 
1004       // Found a lingering reference file
1005       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1006         "Found lingering reference file " + path);
1007       if (!shouldFixReferenceFiles()) continue;
1008 
1009       // Now, trying to fix it since requested
1010       boolean success = false;
1011       String pathStr = path.toString();
1012 
1013       // A reference file path should be like
1014       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1015       // Up 5 directories to get the root folder.
1016       // So the file will be sidelined to a similar folder structure.
1017       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1018       for (int i = 0; index > 0 && i < 5; i++) {
1019         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1020       }
1021       if (index > 0) {
1022         Path rootDir = getSidelineDir();
1023         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1024         fs.mkdirs(dst.getParent());
1025         LOG.info("Trying to sildeline reference file "
1026           + path + " to " + dst);
1027         setShouldRerun();
1028 
1029         success = fs.rename(path, dst);
1030       }
1031       if (!success) {
1032         LOG.error("Failed to sideline reference file " + path);
1033       }
1034     }
1035   }
1036 
1037   /**
1038    * TODO -- need to add tests for this.
1039    */
1040   private void reportEmptyMetaCells() {
1041     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1042       emptyRegionInfoQualifiers.size());
1043     if (details) {
1044       for (Result r: emptyRegionInfoQualifiers) {
1045         errors.print("  " + r);
1046       }
1047     }
1048   }
1049 
1050   /**
1051    * TODO -- need to add tests for this.
1052    */
1053   private void reportTablesInFlux() {
1054     AtomicInteger numSkipped = new AtomicInteger(0);
1055     HTableDescriptor[] allTables = getTables(numSkipped);
1056     errors.print("Number of Tables: " + allTables.length);
1057     if (details) {
1058       if (numSkipped.get() > 0) {
1059         errors.detail("Number of Tables in flux: " + numSkipped.get());
1060       }
1061       for (HTableDescriptor td : allTables) {
1062         errors.detail("  Table: " + td.getTableName() + "\t" +
1063                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1064                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1065                            " families: " + td.getFamilies().size());
1066       }
1067     }
1068   }
1069 
1070   public ErrorReporter getErrors() {
1071     return errors;
1072   }
1073 
1074   /**
1075    * Read the .regioninfo file from the file system.  If there is no
1076    * .regioninfo, add it to the orphan hdfs region list.
1077    */
1078   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1079     Path regionDir = hbi.getHdfsRegionDir();
1080     if (regionDir == null) {
1081       LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1082       return;
1083     }
1084 
1085     if (hbi.hdfsEntry.hri != null) {
1086       // already loaded data
1087       return;
1088     }
1089 
1090     FileSystem fs = FileSystem.get(getConf());
1091     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1092     LOG.debug("HRegionInfo read: " + hri.toString());
1093     hbi.hdfsEntry.hri = hri;
1094   }
1095 
1096   /**
1097    * Exception thrown when a integrity repair operation fails in an
1098    * unresolvable way.
1099    */
1100   public static class RegionRepairException extends IOException {
1101     private static final long serialVersionUID = 1L;
1102     final IOException ioe;
1103     public RegionRepairException(String s, IOException ioe) {
1104       super(s);
1105       this.ioe = ioe;
1106     }
1107   }
1108 
1109   /**
1110    * Populate hbi's from regionInfos loaded from file system.
1111    */
1112   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1113       throws IOException, InterruptedException {
1114     tablesInfo.clear(); // regenerating the data
1115     // generate region split structure
1116     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1117 
1118     // Parallelized read of .regioninfo files.
1119     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1120     List<Future<Void>> hbiFutures;
1121 
1122     for (HbckInfo hbi : hbckInfos) {
1123       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1124       hbis.add(work);
1125     }
1126 
1127     // Submit and wait for completion
1128     hbiFutures = executor.invokeAll(hbis);
1129 
1130     for(int i=0; i<hbiFutures.size(); i++) {
1131       WorkItemHdfsRegionInfo work = hbis.get(i);
1132       Future<Void> f = hbiFutures.get(i);
1133       try {
1134         f.get();
1135       } catch(ExecutionException e) {
1136         LOG.warn("Failed to read .regioninfo file for region " +
1137               work.hbi.getRegionNameAsString(), e.getCause());
1138       }
1139     }
1140 
1141     Path hbaseRoot = FSUtils.getRootDir(getConf());
1142     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1143     // serialized table info gathering.
1144     for (HbckInfo hbi: hbckInfos) {
1145 
1146       if (hbi.getHdfsHRI() == null) {
1147         // was an orphan
1148         continue;
1149       }
1150 
1151 
1152       // get table name from hdfs, populate various HBaseFsck tables.
1153       TableName tableName = hbi.getTableName();
1154       if (tableName == null) {
1155         // There was an entry in hbase:meta not in the HDFS?
1156         LOG.warn("tableName was null for: " + hbi);
1157         continue;
1158       }
1159 
1160       TableInfo modTInfo = tablesInfo.get(tableName);
1161       if (modTInfo == null) {
1162         // only executed once per table.
1163         modTInfo = new TableInfo(tableName);
1164         tablesInfo.put(tableName, modTInfo);
1165         try {
1166           HTableDescriptor htd =
1167               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1168           modTInfo.htds.add(htd);
1169         } catch (IOException ioe) {
1170           if (!orphanTableDirs.containsKey(tableName)) {
1171             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1172             //should only report once for each table
1173             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1174                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1175             Set<String> columns = new HashSet<String>();
1176             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1177           }
1178         }
1179       }
1180       if (!hbi.isSkipChecks()) {
1181         modTInfo.addRegionInfo(hbi);
1182       }
1183     }
1184 
1185     loadTableInfosForTablesWithNoRegion();
1186     errors.print("");
1187 
1188     return tablesInfo;
1189   }
1190 
1191   /**
1192    * To get the column family list according to the column family dirs
1193    * @param columns
1194    * @param hbi
1195    * @return a set of column families
1196    * @throws IOException
1197    */
1198   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1199     Path regionDir = hbi.getHdfsRegionDir();
1200     FileSystem fs = regionDir.getFileSystem(getConf());
1201     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1202     for (FileStatus subdir : subDirs) {
1203       String columnfamily = subdir.getPath().getName();
1204       columns.add(columnfamily);
1205     }
1206     return columns;
1207   }
1208 
1209   /**
1210    * To fabricate a .tableinfo file with following contents<br>
1211    * 1. the correct tablename <br>
1212    * 2. the correct colfamily list<br>
1213    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1214    * @throws IOException
1215    */
1216   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1217       Set<String> columns) throws IOException {
1218     if (columns ==null || columns.isEmpty()) return false;
1219     HTableDescriptor htd = new HTableDescriptor(tableName);
1220     for (String columnfamimly : columns) {
1221       htd.addFamily(new HColumnDescriptor(columnfamimly));
1222     }
1223     fstd.createTableDescriptor(htd, true);
1224     return true;
1225   }
1226 
1227   /**
1228    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1229    * @throws IOException
1230    */
1231   public void fixEmptyMetaCells() throws IOException {
1232     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1233       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1234       for (Result region : emptyRegionInfoQualifiers) {
1235         deleteMetaRegion(region.getRow());
1236         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1237       }
1238       emptyRegionInfoQualifiers.clear();
1239     }
1240   }
1241 
1242   /**
1243    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1244    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1245    * 2. else create a default .tableinfo file with following items<br>
1246    * &nbsp;2.1 the correct tablename <br>
1247    * &nbsp;2.2 the correct colfamily list<br>
1248    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1249    * @throws IOException
1250    */
1251   public void fixOrphanTables() throws IOException {
1252     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1253 
1254       List<TableName> tmpList = new ArrayList<TableName>();
1255       tmpList.addAll(orphanTableDirs.keySet());
1256       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1257       Iterator<Entry<TableName, Set<String>>> iter =
1258           orphanTableDirs.entrySet().iterator();
1259       int j = 0;
1260       int numFailedCase = 0;
1261       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1262       while (iter.hasNext()) {
1263         Entry<TableName, Set<String>> entry =
1264             iter.next();
1265         TableName tableName = entry.getKey();
1266         LOG.info("Trying to fix orphan table error: " + tableName);
1267         if (j < htds.length) {
1268           if (tableName.equals(htds[j].getTableName())) {
1269             HTableDescriptor htd = htds[j];
1270             LOG.info("fixing orphan table: " + tableName + " from cache");
1271             fstd.createTableDescriptor(htd, true);
1272             j++;
1273             iter.remove();
1274           }
1275         } else {
1276           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1277             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1278             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1279             iter.remove();
1280           } else {
1281             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1282             numFailedCase++;
1283           }
1284         }
1285         fixes++;
1286       }
1287 
1288       if (orphanTableDirs.isEmpty()) {
1289         // all orphanTableDirs are luckily recovered
1290         // re-run doFsck after recovering the .tableinfo file
1291         setShouldRerun();
1292         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1293       } else if (numFailedCase > 0) {
1294         LOG.error("Failed to fix " + numFailedCase
1295             + " OrphanTables with default .tableinfo files");
1296       }
1297 
1298     }
1299     //cleanup the list
1300     orphanTableDirs.clear();
1301 
1302   }
1303 
1304   /**
1305    * This borrows code from MasterFileSystem.bootstrap()
1306    *
1307    * @return an open hbase:meta HRegion
1308    */
1309   private HRegion createNewMeta() throws IOException {
1310       Path rootdir = FSUtils.getRootDir(getConf());
1311     Configuration c = getConf();
1312     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1313     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1314     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1315     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1316     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1317     return meta;
1318   }
1319 
1320   /**
1321    * Generate set of puts to add to new meta.  This expects the tables to be
1322    * clean with no overlaps or holes.  If there are any problems it returns null.
1323    *
1324    * @return An array list of puts to do in bulk, null if tables have problems
1325    */
1326   private ArrayList<Put> generatePuts(
1327       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1328     ArrayList<Put> puts = new ArrayList<Put>();
1329     boolean hasProblems = false;
1330     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1331       TableName name = e.getKey();
1332 
1333       // skip "hbase:meta"
1334       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1335         continue;
1336       }
1337 
1338       TableInfo ti = e.getValue();
1339       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1340           .entrySet()) {
1341         Collection<HbckInfo> his = spl.getValue();
1342         int sz = his.size();
1343         if (sz != 1) {
1344           // problem
1345           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1346               + " had " +  sz + " regions instead of exactly 1." );
1347           hasProblems = true;
1348           continue;
1349         }
1350 
1351         // add the row directly to meta.
1352         HbckInfo hi = his.iterator().next();
1353         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1354         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1355         puts.add(p);
1356       }
1357     }
1358     return hasProblems ? null : puts;
1359   }
1360 
1361   /**
1362    * Suggest fixes for each table
1363    */
1364   private void suggestFixes(
1365       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1366     logParallelMerge();
1367     for (TableInfo tInfo : tablesInfo.values()) {
1368       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1369       tInfo.checkRegionChain(handler);
1370     }
1371   }
1372 
1373   /**
1374    * Rebuilds meta from information in hdfs/fs.  Depends on configuration
1375    * settings passed into hbck constructor to point to a particular fs/dir.
1376    *
1377    * @param fix flag that determines if method should attempt to fix holes
1378    * @return true if successful, false if attempt failed.
1379    */
1380   public boolean rebuildMeta(boolean fix) throws IOException,
1381       InterruptedException {
1382 
1383     // TODO check to make sure hbase is offline. (or at least the table
1384     // currently being worked on is off line)
1385 
1386     // Determine what's on HDFS
1387     LOG.info("Loading HBase regioninfo from HDFS...");
1388     loadHdfsRegionDirs(); // populating regioninfo table.
1389 
1390     int errs = errors.getErrorList().size();
1391     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1392     checkHdfsIntegrity(false, false);
1393 
1394     // make sure ok.
1395     if (errors.getErrorList().size() != errs) {
1396       // While in error state, iterate until no more fixes possible
1397       while(true) {
1398         fixes = 0;
1399         suggestFixes(tablesInfo);
1400         errors.clear();
1401         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1402         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1403 
1404         int errCount = errors.getErrorList().size();
1405 
1406         if (fixes == 0) {
1407           if (errCount > 0) {
1408             return false; // failed to fix problems.
1409           } else {
1410             break; // no fixes and no problems? drop out and fix stuff!
1411           }
1412         }
1413       }
1414     }
1415 
1416     // we can rebuild, move old meta out of the way and start
1417     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1418     Path backupDir = sidelineOldMeta();
1419 
1420     LOG.info("Creating new hbase:meta");
1421     HRegion meta = createNewMeta();
1422 
1423     // populate meta
1424     List<Put> puts = generatePuts(tablesInfo);
1425     if (puts == null) {
1426       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1427         "You may need to restore the previously sidelined hbase:meta");
1428       return false;
1429     }
1430     meta.batchMutate(puts.toArray(new Put[puts.size()]));
1431     HRegion.closeHRegion(meta);
1432     LOG.info("Success! hbase:meta table rebuilt.");
1433     LOG.info("Old hbase:meta is moved into " + backupDir);
1434     return true;
1435   }
1436 
1437   /**
1438    * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1439    */
1440   private void logParallelMerge() {
1441     if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1442       LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1443           " false to run serially.");
1444     } else {
1445       LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1446           " true to run in parallel.");
1447     }
1448   }
1449 
1450   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1451       boolean fixOverlaps) throws IOException {
1452     LOG.info("Checking HBase region split map from HDFS data...");
1453     logParallelMerge();
1454     for (TableInfo tInfo : tablesInfo.values()) {
1455       TableIntegrityErrorHandler handler;
1456       if (fixHoles || fixOverlaps) {
1457         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1458           fixHoles, fixOverlaps);
1459       } else {
1460         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1461       }
1462       if (!tInfo.checkRegionChain(handler)) {
1463         // should dump info as well.
1464         errors.report("Found inconsistency in table " + tInfo.getName());
1465       }
1466     }
1467     return tablesInfo;
1468   }
1469 
1470   private Path getSidelineDir() throws IOException {
1471     if (sidelineDir == null) {
1472       Path hbaseDir = FSUtils.getRootDir(getConf());
1473       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1474       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1475           + startMillis);
1476     }
1477     return sidelineDir;
1478   }
1479 
1480   /**
1481    * Sideline a region dir (instead of deleting it)
1482    */
1483   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1484     return sidelineRegionDir(fs, null, hi);
1485   }
1486 
1487   /**
1488    * Sideline a region dir (instead of deleting it)
1489    *
1490    * @param parentDir if specified, the region will be sidelined to
1491    * folder like .../parentDir/<table name>/<region name>. The purpose
1492    * is to group together similar regions sidelined, for example, those
1493    * regions should be bulk loaded back later on. If null, it is ignored.
1494    */
1495   Path sidelineRegionDir(FileSystem fs,
1496       String parentDir, HbckInfo hi) throws IOException {
1497     TableName tableName = hi.getTableName();
1498     Path regionDir = hi.getHdfsRegionDir();
1499 
1500     if (!fs.exists(regionDir)) {
1501       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1502       return null;
1503     }
1504 
1505     Path rootDir = getSidelineDir();
1506     if (parentDir != null) {
1507       rootDir = new Path(rootDir, parentDir);
1508     }
1509     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1510     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1511     fs.mkdirs(sidelineRegionDir);
1512     boolean success = false;
1513     FileStatus[] cfs =  fs.listStatus(regionDir);
1514     if (cfs == null) {
1515       LOG.info("Region dir is empty: " + regionDir);
1516     } else {
1517       for (FileStatus cf : cfs) {
1518         Path src = cf.getPath();
1519         Path dst =  new Path(sidelineRegionDir, src.getName());
1520         if (fs.isFile(src)) {
1521           // simple file
1522           success = fs.rename(src, dst);
1523           if (!success) {
1524             String msg = "Unable to rename file " + src +  " to " + dst;
1525             LOG.error(msg);
1526             throw new IOException(msg);
1527           }
1528           continue;
1529         }
1530 
1531         // is a directory.
1532         fs.mkdirs(dst);
1533 
1534         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1535         // FileSystem.rename is inconsistent with directories -- if the
1536         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1537         // it moves the src into the dst dir resulting in (foo/a/b).  If
1538         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1539         FileStatus[] hfiles = fs.listStatus(src);
1540         if (hfiles != null && hfiles.length > 0) {
1541           for (FileStatus hfile : hfiles) {
1542             success = fs.rename(hfile.getPath(), dst);
1543             if (!success) {
1544               String msg = "Unable to rename file " + src +  " to " + dst;
1545               LOG.error(msg);
1546               throw new IOException(msg);
1547             }
1548           }
1549         }
1550         LOG.debug("Sideline directory contents:");
1551         debugLsr(sidelineRegionDir);
1552       }
1553     }
1554 
1555     LOG.info("Removing old region dir: " + regionDir);
1556     success = fs.delete(regionDir, true);
1557     if (!success) {
1558       String msg = "Unable to delete dir " + regionDir;
1559       LOG.error(msg);
1560       throw new IOException(msg);
1561     }
1562     return sidelineRegionDir;
1563   }
1564 
1565   /**
1566    * Side line an entire table.
1567    */
1568   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1569       Path backupHbaseDir) throws IOException {
1570     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1571     if (fs.exists(tableDir)) {
1572       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1573       fs.mkdirs(backupTableDir.getParent());
1574       boolean success = fs.rename(tableDir, backupTableDir);
1575       if (!success) {
1576         throw new IOException("Failed to move  " + tableName + " from "
1577             +  tableDir + " to " + backupTableDir);
1578       }
1579     } else {
1580       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1581     }
1582   }
1583 
1584   /**
1585    * @return Path to backup of original directory
1586    */
1587   Path sidelineOldMeta() throws IOException {
1588     // put current hbase:meta aside.
1589     Path hbaseDir = FSUtils.getRootDir(getConf());
1590     FileSystem fs = hbaseDir.getFileSystem(getConf());
1591     Path backupDir = getSidelineDir();
1592     fs.mkdirs(backupDir);
1593 
1594     try {
1595       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1596     } catch (IOException e) {
1597         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1598             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1599             + hbaseDir.getName() + ".", e);
1600       throw e; // throw original exception
1601     }
1602     return backupDir;
1603   }
1604 
1605   /**
1606    * Load the list of disabled tables in ZK into local set.
1607    * @throws ZooKeeperConnectionException
1608    * @throws IOException
1609    */
1610   private void loadDisabledTables()
1611   throws ZooKeeperConnectionException, IOException {
1612     HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1613       @Override
1614       public Void connect(HConnection connection) throws IOException {
1615         ZooKeeperWatcher zkw = createZooKeeperWatcher();
1616         try {
1617           for (TableName tableName :
1618               ZKTableStateClientSideReader.getDisabledOrDisablingTables(zkw)) {
1619             disabledTables.add(tableName);
1620           }
1621         } catch (KeeperException ke) {
1622           throw new IOException(ke);
1623         } catch (InterruptedException e) {
1624           throw new InterruptedIOException();
1625         } finally {
1626           zkw.close();
1627         }
1628         return null;
1629       }
1630     });
1631   }
1632 
1633   /**
1634    * Check if the specified region's table is disabled.
1635    */
1636   private boolean isTableDisabled(HRegionInfo regionInfo) {
1637     return disabledTables.contains(regionInfo.getTable());
1638   }
1639 
1640   /**
1641    * Scan HDFS for all regions, recording their information into
1642    * regionInfoMap
1643    */
1644   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1645     Path rootDir = FSUtils.getRootDir(getConf());
1646     FileSystem fs = rootDir.getFileSystem(getConf());
1647 
1648     // list all tables from HDFS
1649     List<FileStatus> tableDirs = Lists.newArrayList();
1650 
1651     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1652 
1653     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1654     for (Path path : paths) {
1655       TableName tableName = FSUtils.getTableName(path);
1656        if ((!checkMetaOnly &&
1657            isTableIncluded(tableName)) ||
1658            tableName.equals(TableName.META_TABLE_NAME)) {
1659          tableDirs.add(fs.getFileStatus(path));
1660        }
1661     }
1662 
1663     // verify that version file exists
1664     if (!foundVersionFile) {
1665       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1666           "Version file does not exist in root dir " + rootDir);
1667       if (shouldFixVersionFile()) {
1668         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1669             + " file.");
1670         setShouldRerun();
1671         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1672             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1673             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1674             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1675       }
1676     }
1677 
1678     // level 1:  <HBASE_DIR>/*
1679     List<WorkItemHdfsDir> dirs = new ArrayList<WorkItemHdfsDir>(tableDirs.size());
1680     List<Future<Void>> dirsFutures;
1681 
1682     for (FileStatus tableDir : tableDirs) {
1683       LOG.debug("Loading region dirs from " +tableDir.getPath());
1684       dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir));
1685     }
1686 
1687     // Invoke and wait for Callables to complete
1688     dirsFutures = executor.invokeAll(dirs);
1689 
1690     for(Future<Void> f: dirsFutures) {
1691       try {
1692         f.get();
1693       } catch(ExecutionException e) {
1694         LOG.warn("Could not load region dir " , e.getCause());
1695       }
1696     }
1697     errors.print("");
1698   }
1699 
1700   /**
1701    * Record the location of the hbase:meta region as found in ZooKeeper.
1702    */
1703   private boolean recordMetaRegion() throws IOException {
1704     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1705         HConstants.EMPTY_START_ROW, false, false);
1706     if (rl == null) {
1707       errors.reportError(ERROR_CODE.NULL_META_REGION,
1708           "META region or some of its attributes are null.");
1709       return false;
1710     }
1711     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1712       // Check if Meta region is valid and existing
1713       if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1714           metaLocation.getHostname() == null) {
1715         errors.reportError(ERROR_CODE.NULL_META_REGION,
1716             "META region or some of its attributes are null.");
1717         return false;
1718       }
1719       ServerName sn = metaLocation.getServerName();
1720       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1721       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1722       if (hbckInfo == null) {
1723         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1724       } else {
1725         hbckInfo.metaEntry = m;
1726       }
1727     }
1728     return true;
1729   }
1730 
1731   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1732     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1733       @Override
1734       public void abort(String why, Throwable e) {
1735         LOG.error(why, e);
1736         System.exit(1);
1737       }
1738 
1739       @Override
1740       public boolean isAborted() {
1741         return false;
1742       }
1743 
1744     });
1745   }
1746 
1747   private ServerName getMetaRegionServerName(int replicaId)
1748   throws IOException, KeeperException {
1749     ZooKeeperWatcher zkw = createZooKeeperWatcher();
1750     ServerName sn = null;
1751     try {
1752       sn = new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
1753     } finally {
1754       zkw.close();
1755     }
1756     return sn;
1757   }
1758 
1759   /**
1760    * Contacts each regionserver and fetches metadata about regions.
1761    * @param regionServerList - the list of region servers to connect to
1762    * @throws IOException if a remote or network exception occurs
1763    */
1764   void processRegionServers(Collection<ServerName> regionServerList)
1765     throws IOException, InterruptedException {
1766 
1767     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1768     List<Future<Void>> workFutures;
1769 
1770     // loop to contact each region server in parallel
1771     for (ServerName rsinfo: regionServerList) {
1772       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1773     }
1774 
1775     workFutures = executor.invokeAll(workItems);
1776 
1777     for(int i=0; i<workFutures.size(); i++) {
1778       WorkItemRegion item = workItems.get(i);
1779       Future<Void> f = workFutures.get(i);
1780       try {
1781         f.get();
1782       } catch(ExecutionException e) {
1783         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1784             e.getCause());
1785       }
1786     }
1787   }
1788 
1789   /**
1790    * Check consistency of all regions that have been found in previous phases.
1791    */
1792   private void checkAndFixConsistency()
1793   throws IOException, KeeperException, InterruptedException {
1794 	  // Divide the checks in two phases. One for default/primary replicas and another
1795 	  // for the non-primary ones. Keeps code cleaner this way.
1796     List<CheckRegionConsistencyWorkItem> workItems =
1797         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1798     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1799       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1800         workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1801       }
1802     }
1803     checkRegionConsistencyConcurrently(workItems);
1804 
1805     boolean prevHdfsCheck = shouldCheckHdfs();
1806     setCheckHdfs(false); //replicas don't have any hdfs data
1807     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1808     // deployed/undeployed replicas.
1809     List<CheckRegionConsistencyWorkItem> replicaWorkItems =
1810         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1811     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1812       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1813         replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1814       }
1815     }
1816     checkRegionConsistencyConcurrently(replicaWorkItems);
1817     setCheckHdfs(prevHdfsCheck);
1818   }
1819 
1820   /**
1821    * Check consistency of all regions using mulitple threads concurrently.
1822    */
1823   private void checkRegionConsistencyConcurrently(
1824     final List<CheckRegionConsistencyWorkItem> workItems)
1825     throws IOException, KeeperException, InterruptedException {
1826     if (workItems.isEmpty()) {
1827       return;  // nothing to check
1828     }
1829 
1830     List<Future<Void>> workFutures = executor.invokeAll(workItems);
1831     for(Future<Void> f: workFutures) {
1832       try {
1833         f.get();
1834       } catch(ExecutionException e1) {
1835         LOG.warn("Could not check region consistency " , e1.getCause());
1836         if (e1.getCause() instanceof IOException) {
1837           throw (IOException)e1.getCause();
1838         } else if (e1.getCause() instanceof KeeperException) {
1839           throw (KeeperException)e1.getCause();
1840         } else if (e1.getCause() instanceof InterruptedException) {
1841           throw (InterruptedException)e1.getCause();
1842         } else {
1843           throw new IOException(e1.getCause());
1844         }
1845       }
1846     }
1847   }
1848 
1849   class CheckRegionConsistencyWorkItem implements Callable<Void> {
1850     private final String key;
1851     private final HbckInfo hbi;
1852 
1853     CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1854       this.key = key;
1855       this.hbi = hbi;
1856     }
1857 
1858     @Override
1859     public synchronized Void call() throws Exception {
1860       checkRegionConsistency(key, hbi);
1861       return null;
1862     }
1863   }
1864   
1865   private void preCheckPermission() throws IOException, AccessDeniedException {
1866     if (shouldIgnorePreCheckPermission()) {
1867       return;
1868     }
1869 
1870     Path hbaseDir = FSUtils.getRootDir(getConf());
1871     FileSystem fs = hbaseDir.getFileSystem(getConf());
1872     UserProvider userProvider = UserProvider.instantiate(getConf());
1873     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1874     FileStatus[] files = fs.listStatus(hbaseDir);
1875     for (FileStatus file : files) {
1876       try {
1877         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1878       } catch (AccessDeniedException ace) {
1879         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1880         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1881           + " does not have write perms to " + file.getPath()
1882           + ". Please rerun hbck as hdfs user " + file.getOwner());
1883         throw ace;
1884       }
1885     }
1886   }
1887 
1888   /**
1889    * Deletes region from meta table
1890    */
1891   private void deleteMetaRegion(HbckInfo hi) throws IOException {
1892     deleteMetaRegion(hi.metaEntry.getRegionName());
1893   }
1894 
1895   /**
1896    * Deletes region from meta table
1897    */
1898   private void deleteMetaRegion(byte[] metaKey) throws IOException {
1899     Delete d = new Delete(metaKey);
1900     meta.delete(d);
1901     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1902   }
1903 
1904   /**
1905    * Reset the split parent region info in meta table
1906    */
1907   private void resetSplitParent(HbckInfo hi) throws IOException {
1908     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1909     Delete d = new Delete(hi.metaEntry.getRegionName());
1910     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1911     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1912     mutations.add(d);
1913 
1914     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1915     hri.setOffline(false);
1916     hri.setSplit(false);
1917     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1918     mutations.add(p);
1919 
1920     meta.mutateRow(mutations);
1921     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1922   }
1923 
1924   /**
1925    * This backwards-compatibility wrapper for permanently offlining a region
1926    * that should not be alive.  If the region server does not support the
1927    * "offline" method, it will use the closest unassign method instead.  This
1928    * will basically work until one attempts to disable or delete the affected
1929    * table.  The problem has to do with in-memory only master state, so
1930    * restarting the HMaster or failing over to another should fix this.
1931    */
1932   private void offline(byte[] regionName) throws IOException {
1933     String regionString = Bytes.toStringBinary(regionName);
1934     if (!rsSupportsOffline) {
1935       LOG.warn("Using unassign region " + regionString
1936           + " instead of using offline method, you should"
1937           + " restart HMaster after these repairs");
1938       admin.unassign(regionName, true);
1939       return;
1940     }
1941 
1942     // first time we assume the rs's supports #offline.
1943     try {
1944       LOG.info("Offlining region " + regionString);
1945       admin.offline(regionName);
1946     } catch (IOException ioe) {
1947       String notFoundMsg = "java.lang.NoSuchMethodException: " +
1948         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1949       if (ioe.getMessage().contains(notFoundMsg)) {
1950         LOG.warn("Using unassign region " + regionString
1951             + " instead of using offline method, you should"
1952             + " restart HMaster after these repairs");
1953         rsSupportsOffline = false; // in the future just use unassign
1954         admin.unassign(regionName, true);
1955         return;
1956       }
1957       throw ioe;
1958     }
1959   }
1960 
1961   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1962     undeployRegionsForHbi(hi);
1963     // undeploy replicas of the region (but only if the method is invoked for the primary)
1964     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1965       return;
1966     }
1967     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
1968     for (int i = 1; i < numReplicas; i++) {
1969       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
1970       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
1971           hi.getPrimaryHRIForDeployedReplica(), i);
1972       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
1973       if (h != null) {
1974         undeployRegionsForHbi(h);
1975         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
1976         //in consistency checks
1977         h.setSkipChecks(true);
1978       }
1979     }
1980   }
1981 
1982   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
1983     for (OnlineEntry rse : hi.deployedEntries) {
1984       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
1985       try {
1986         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
1987         offline(rse.hri.getRegionName());
1988       } catch (IOException ioe) {
1989         LOG.warn("Got exception when attempting to offline region "
1990             + Bytes.toString(rse.hri.getRegionName()), ioe);
1991       }
1992     }
1993   }
1994 
1995   /**
1996    * Attempts to undeploy a region from a region server based in information in
1997    * META.  Any operations that modify the file system should make sure that
1998    * its corresponding region is not deployed to prevent data races.
1999    *
2000    * A separate call is required to update the master in-memory region state
2001    * kept in the AssignementManager.  Because disable uses this state instead of
2002    * that found in META, we can't seem to cleanly disable/delete tables that
2003    * have been hbck fixed.  When used on a version of HBase that does not have
2004    * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
2005    * restart or failover may be required.
2006    */
2007   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2008     if (hi.metaEntry == null && hi.hdfsEntry == null) {
2009       undeployRegions(hi);
2010       return;
2011     }
2012 
2013     // get assignment info and hregioninfo from meta.
2014     Get get = new Get(hi.getRegionName());
2015     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2016     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2017     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2018     // also get the locations of the replicas to close if the primary region is being closed
2019     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2020       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2021       for (int i = 0; i < numReplicas; i++) {
2022         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2023         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2024       }
2025     }
2026     Result r = meta.get(get);
2027     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2028     if (rl == null) {
2029       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2030           " since meta does not have handle to reach it");
2031       return;
2032     }
2033     for (HRegionLocation h : rl.getRegionLocations()) {
2034       ServerName serverName = h.getServerName();
2035       if (serverName == null) {
2036         errors.reportError("Unable to close region "
2037             + hi.getRegionNameAsString() +  " because meta does not "
2038             + "have handle to reach it.");
2039         continue;
2040       }
2041       HRegionInfo hri = h.getRegionInfo();
2042       if (hri == null) {
2043         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2044             + " because hbase:meta had invalid or missing "
2045             + HConstants.CATALOG_FAMILY_STR + ":"
2046             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2047             + " qualifier value.");
2048         continue;
2049       }
2050       // close the region -- close files and remove assignment
2051       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2052     }
2053   }
2054 
2055   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2056     KeeperException, InterruptedException {
2057     // If we are trying to fix the errors
2058     if (shouldFixAssignments()) {
2059       errors.print(msg);
2060       undeployRegions(hbi);
2061       setShouldRerun();
2062       HRegionInfo hri = hbi.getHdfsHRI();
2063       if (hri == null) {
2064         hri = hbi.metaEntry;
2065       }
2066       HBaseFsckRepair.fixUnassigned(admin, hri);
2067       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2068 
2069       // also assign replicas if needed (do it only when this call operates on a primary replica)
2070       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2071       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2072       for (int i = 1; i < replicationCount; i++) {
2073         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2074         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2075         if (h != null) {
2076           undeployRegions(h);
2077           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2078           //in consistency checks
2079           h.setSkipChecks(true);
2080         }
2081         HBaseFsckRepair.fixUnassigned(admin, hri);
2082         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2083       }
2084 
2085     }
2086   }
2087 
2088   /**
2089    * Check a single region for consistency and correct deployment.
2090    */
2091   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2092   throws IOException, KeeperException, InterruptedException {
2093 
2094 	if (hbi.isSkipChecks()) return;
2095 	String descriptiveName = hbi.toString();
2096     boolean inMeta = hbi.metaEntry != null;
2097     // In case not checking HDFS, assume the region is on HDFS
2098     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2099     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2100     boolean isDeployed = !hbi.deployedOn.isEmpty();
2101     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2102     boolean deploymentMatchesMeta =
2103       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2104       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2105     boolean splitParent =
2106       (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2107     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
2108     boolean recentlyModified = inHdfs &&
2109       hbi.getModTime() + timelag > System.currentTimeMillis();
2110 
2111     // ========== First the healthy cases =============
2112     if (hbi.containsOnlyHdfsEdits()) {
2113       return;
2114     }
2115     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2116       return;
2117     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2118       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2119         "tabled that is not deployed");
2120       return;
2121     } else if (recentlyModified) {
2122       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2123       return;
2124     }
2125     // ========== Cases where the region is not in hbase:meta =============
2126     else if (!inMeta && !inHdfs && !isDeployed) {
2127       // We shouldn't have record of this region at all then!
2128       assert false : "Entry for region with no data";
2129     } else if (!inMeta && !inHdfs && isDeployed) {
2130       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2131           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2132           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2133       if (shouldFixAssignments()) {
2134         undeployRegions(hbi);
2135       }
2136 
2137     } else if (!inMeta && inHdfs && !isDeployed) {
2138       if (hbi.isMerged()) {
2139         // This region has already been merged, the remaining hdfs file will be
2140         // cleaned by CatalogJanitor later
2141         hbi.setSkipChecks(true);
2142         LOG.info("Region " + descriptiveName
2143             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2144         return;
2145       }
2146       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2147           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2148           "or deployed on any region server");
2149       // restore region consistency of an adopted orphan
2150       if (shouldFixMeta()) {
2151         if (!hbi.isHdfsRegioninfoPresent()) {
2152           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2153               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2154               " used.");
2155           return;
2156         }
2157 
2158         HRegionInfo hri = hbi.getHdfsHRI();
2159         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2160 
2161         for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2162           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2163               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2164                 hri.getEndKey()) >= 0)
2165               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2166             if(region.isSplit() || region.isOffline()) continue;
2167             Path regionDir = hbi.getHdfsRegionDir();
2168             FileSystem fs = regionDir.getFileSystem(getConf());
2169             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2170             for (Path familyDir : familyDirs) {
2171               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2172               for (Path referenceFilePath : referenceFilePaths) {
2173                 Path parentRegionDir =
2174                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2175                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2176                   LOG.warn(hri + " start and stop keys are in the range of " + region
2177                       + ". The region might not be cleaned up from hdfs when region " + region
2178                       + " split failed. Hence deleting from hdfs.");
2179                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2180                     regionDir.getParent(), hri);
2181                   return;
2182                 }
2183               }
2184             }
2185           }
2186         }
2187 
2188         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2189         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2190         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2191             admin.getClusterStatus().getServers(), numReplicas);
2192 
2193         tryAssignmentRepair(hbi, "Trying to reassign region...");
2194       }
2195 
2196     } else if (!inMeta && inHdfs && isDeployed) {
2197       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2198           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2199       debugLsr(hbi.getHdfsRegionDir());
2200       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2201         // for replicas, this means that we should undeploy the region (we would have
2202         // gone over the primaries and fixed meta holes in first phase under
2203         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2204         // this stage unless unwanted replica)
2205         if (shouldFixAssignments()) {
2206           undeployRegionsForHbi(hbi);
2207         }
2208       }
2209       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2210         if (!hbi.isHdfsRegioninfoPresent()) {
2211           LOG.error("This should have been repaired in table integrity repair phase");
2212           return;
2213         }
2214 
2215         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2216         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2217         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2218             admin.getClusterStatus().getServers(), numReplicas);
2219         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2220       }
2221 
2222     // ========== Cases where the region is in hbase:meta =============
2223     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2224       // check whether this is an actual error, or just transient state where parent
2225       // is not cleaned
2226       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2227         // check that split daughters are there
2228         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2229         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2230         if (infoA != null && infoB != null) {
2231           // we already processed or will process daughters. Move on, nothing to see here.
2232           hbi.setSkipChecks(true);
2233           return;
2234         }
2235       }
2236       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2237           + descriptiveName + " is a split parent in META, in HDFS, "
2238           + "and not deployed on any region server. This could be transient.");
2239       if (shouldFixSplitParents()) {
2240         setShouldRerun();
2241         resetSplitParent(hbi);
2242       }
2243     } else if (inMeta && !inHdfs && !isDeployed) {
2244       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2245           + descriptiveName + " found in META, but not in HDFS "
2246           + "or deployed on any region server.");
2247       if (shouldFixMeta()) {
2248         deleteMetaRegion(hbi);
2249       }
2250     } else if (inMeta && !inHdfs && isDeployed) {
2251       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2252           + " found in META, but not in HDFS, " +
2253           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2254       // We treat HDFS as ground truth.  Any information in meta is transient
2255       // and equivalent data can be regenerated.  So, lets unassign and remove
2256       // these problems from META.
2257       if (shouldFixAssignments()) {
2258         errors.print("Trying to fix unassigned region...");
2259         undeployRegions(hbi);
2260       }
2261       if (shouldFixMeta()) {
2262         // wait for it to complete
2263         deleteMetaRegion(hbi);
2264       }
2265     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2266       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2267           + " not deployed on any region server.");
2268       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2269     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2270       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2271           "Region " + descriptiveName + " should not be deployed according " +
2272           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2273       if (shouldFixAssignments()) {
2274         errors.print("Trying to close the region " + descriptiveName);
2275         setShouldRerun();
2276         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2277       }
2278     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2279       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2280           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2281           + " but is multiply assigned to region servers " +
2282           Joiner.on(", ").join(hbi.deployedOn));
2283       // If we are trying to fix the errors
2284       if (shouldFixAssignments()) {
2285         errors.print("Trying to fix assignment error...");
2286         setShouldRerun();
2287         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2288       }
2289     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2290       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2291           + descriptiveName + " listed in hbase:meta on region server " +
2292           hbi.metaEntry.regionServer + " but found on region server " +
2293           hbi.deployedOn.get(0));
2294       // If we are trying to fix the errors
2295       if (shouldFixAssignments()) {
2296         errors.print("Trying to fix assignment error...");
2297         setShouldRerun();
2298         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2299         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2300       }
2301     } else {
2302       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2303           " is in an unforeseen state:" +
2304           " inMeta=" + inMeta +
2305           " inHdfs=" + inHdfs +
2306           " isDeployed=" + isDeployed +
2307           " isMultiplyDeployed=" + isMultiplyDeployed +
2308           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2309           " shouldBeDeployed=" + shouldBeDeployed);
2310     }
2311   }
2312 
2313   /**
2314    * Checks tables integrity. Goes over all regions and scans the tables.
2315    * Collects all the pieces for each table and checks if there are missing,
2316    * repeated or overlapping ones.
2317    * @throws IOException
2318    */
2319   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2320     tablesInfo = new TreeMap<TableName,TableInfo> ();
2321     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2322     for (HbckInfo hbi : regionInfoMap.values()) {
2323       // Check only valid, working regions
2324       if (hbi.metaEntry == null) {
2325         // this assumes that consistency check has run loadMetaEntry
2326         Path p = hbi.getHdfsRegionDir();
2327         if (p == null) {
2328           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2329         }
2330 
2331         // TODO test.
2332         continue;
2333       }
2334       if (hbi.metaEntry.regionServer == null) {
2335         errors.detail("Skipping region because no region server: " + hbi);
2336         continue;
2337       }
2338       if (hbi.metaEntry.isOffline()) {
2339         errors.detail("Skipping region because it is offline: " + hbi);
2340         continue;
2341       }
2342       if (hbi.containsOnlyHdfsEdits()) {
2343         errors.detail("Skipping region because it only contains edits" + hbi);
2344         continue;
2345       }
2346 
2347       // Missing regionDir or over-deployment is checked elsewhere. Include
2348       // these cases in modTInfo, so we can evaluate those regions as part of
2349       // the region chain in META
2350       //if (hbi.foundRegionDir == null) continue;
2351       //if (hbi.deployedOn.size() != 1) continue;
2352       if (hbi.deployedOn.size() == 0) continue;
2353 
2354       // We should be safe here
2355       TableName tableName = hbi.metaEntry.getTable();
2356       TableInfo modTInfo = tablesInfo.get(tableName);
2357       if (modTInfo == null) {
2358         modTInfo = new TableInfo(tableName);
2359       }
2360       for (ServerName server : hbi.deployedOn) {
2361         modTInfo.addServer(server);
2362       }
2363 
2364       if (!hbi.isSkipChecks()) {
2365         modTInfo.addRegionInfo(hbi);
2366       }
2367 
2368       tablesInfo.put(tableName, modTInfo);
2369     }
2370 
2371     loadTableInfosForTablesWithNoRegion();
2372 
2373     logParallelMerge();
2374     for (TableInfo tInfo : tablesInfo.values()) {
2375       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2376       if (!tInfo.checkRegionChain(handler)) {
2377         errors.report("Found inconsistency in table " + tInfo.getName());
2378       }
2379     }
2380     return tablesInfo;
2381   }
2382 
2383   /** Loads table info's for tables that may not have been included, since there are no
2384    * regions reported for the table, but table dir is there in hdfs
2385    */
2386   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2387     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2388     for (HTableDescriptor htd : allTables.values()) {
2389       if (checkMetaOnly && !htd.isMetaTable()) {
2390         continue;
2391       }
2392 
2393       TableName tableName = htd.getTableName();
2394       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2395         TableInfo tableInfo = new TableInfo(tableName);
2396         tableInfo.htds.add(htd);
2397         tablesInfo.put(htd.getTableName(), tableInfo);
2398       }
2399     }
2400   }
2401 
2402   /**
2403    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2404    * @return number of file move fixes done to merge regions.
2405    */
2406   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2407     int fileMoves = 0;
2408     String thread = Thread.currentThread().getName();
2409     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2410     debugLsr(contained.getHdfsRegionDir());
2411 
2412     // rename the contained into the container.
2413     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2414     FileStatus[] dirs = null;
2415     try {
2416       dirs = fs.listStatus(contained.getHdfsRegionDir());
2417     } catch (FileNotFoundException fnfe) {
2418       // region we are attempting to merge in is not present!  Since this is a merge, there is
2419       // no harm skipping this region if it does not exist.
2420       if (!fs.exists(contained.getHdfsRegionDir())) {
2421         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2422             + " is missing. Assuming already sidelined or moved.");
2423       } else {
2424         sidelineRegionDir(fs, contained);
2425       }
2426       return fileMoves;
2427     }
2428 
2429     if (dirs == null) {
2430       if (!fs.exists(contained.getHdfsRegionDir())) {
2431         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2432             + " already sidelined.");
2433       } else {
2434         sidelineRegionDir(fs, contained);
2435       }
2436       return fileMoves;
2437     }
2438 
2439     for (FileStatus cf : dirs) {
2440       Path src = cf.getPath();
2441       Path dst =  new Path(targetRegionDir, src.getName());
2442 
2443       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2444         // do not copy the old .regioninfo file.
2445         continue;
2446       }
2447 
2448       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2449         // do not copy the .oldlogs files
2450         continue;
2451       }
2452 
2453       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2454       // FileSystem.rename is inconsistent with directories -- if the
2455       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2456       // it moves the src into the dst dir resulting in (foo/a/b).  If
2457       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2458       for (FileStatus hfile : fs.listStatus(src)) {
2459         boolean success = fs.rename(hfile.getPath(), dst);
2460         if (success) {
2461           fileMoves++;
2462         }
2463       }
2464       LOG.debug("[" + thread + "] Sideline directory contents:");
2465       debugLsr(targetRegionDir);
2466     }
2467 
2468     // if all success.
2469     sidelineRegionDir(fs, contained);
2470     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2471         getSidelineDir());
2472     debugLsr(contained.getHdfsRegionDir());
2473 
2474     return fileMoves;
2475   }
2476 
2477 
2478   static class WorkItemOverlapMerge implements Callable<Void> {
2479     private TableIntegrityErrorHandler handler;
2480     Collection<HbckInfo> overlapgroup;
2481 
2482     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2483       this.handler = handler;
2484       this.overlapgroup = overlapgroup;
2485     }
2486 
2487     @Override
2488     public Void call() throws Exception {
2489       handler.handleOverlapGroup(overlapgroup);
2490       return null;
2491     }
2492   };
2493 
2494 
2495   /**
2496    * Maintain information about a particular table.
2497    */
2498   public class TableInfo {
2499     TableName tableName;
2500     TreeSet <ServerName> deployedOn;
2501 
2502     // backwards regions
2503     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2504 
2505     // sidelined big overlapped regions
2506     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2507 
2508     // region split calculator
2509     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2510 
2511     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2512     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2513 
2514     // key = start split, values = set of splits in problem group
2515     final Multimap<byte[], HbckInfo> overlapGroups =
2516       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2517 
2518     // list of regions derived from meta entries.
2519     private ImmutableList<HRegionInfo> regionsFromMeta = null;
2520 
2521     TableInfo(TableName name) {
2522       this.tableName = name;
2523       deployedOn = new TreeSet <ServerName>();
2524     }
2525 
2526     /**
2527      * @return descriptor common to all regions.  null if are none or multiple!
2528      */
2529     private HTableDescriptor getHTD() {
2530       if (htds.size() == 1) {
2531         return (HTableDescriptor)htds.toArray()[0];
2532       } else {
2533         LOG.error("None/Multiple table descriptors found for table '"
2534           + tableName + "' regions: " + htds);
2535       }
2536       return null;
2537     }
2538 
2539     public void addRegionInfo(HbckInfo hir) {
2540       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2541         // end key is absolute end key, just add it.
2542         // ignore replicas other than primary for these checks
2543         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2544         return;
2545       }
2546 
2547       // if not the absolute end key, check for cycle
2548       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2549         errors.reportError(
2550             ERROR_CODE.REGION_CYCLE,
2551             String.format("The endkey for this region comes before the "
2552                 + "startkey, startkey=%s, endkey=%s",
2553                 Bytes.toStringBinary(hir.getStartKey()),
2554                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2555         backwards.add(hir);
2556         return;
2557       }
2558 
2559       // main case, add to split calculator
2560       // ignore replicas other than primary for these checks
2561       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2562     }
2563 
2564     public void addServer(ServerName server) {
2565       this.deployedOn.add(server);
2566     }
2567 
2568     public TableName getName() {
2569       return tableName;
2570     }
2571 
2572     public int getNumRegions() {
2573       return sc.getStarts().size() + backwards.size();
2574     }
2575 
2576     public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2577       // lazy loaded, synchronized to ensure a single load
2578       if (regionsFromMeta == null) {
2579         List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2580         for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2581           if (tableName.equals(h.getTableName())) {
2582             if (h.metaEntry != null) {
2583               regions.add((HRegionInfo) h.metaEntry);
2584             }
2585           }
2586         }
2587         regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2588       }
2589       
2590       return regionsFromMeta;
2591     }
2592     
2593 
2594       private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2595       ErrorReporter errors;
2596 
2597       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2598         this.errors = errors;
2599         setTableInfo(ti);
2600       }
2601 
2602       @Override
2603       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2604         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2605             "First region should start with an empty key.  You need to "
2606             + " create a new region and regioninfo in HDFS to plug the hole.",
2607             getTableInfo(), hi);
2608       }
2609 
2610       @Override
2611       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2612         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2613             "Last region should end with an empty key. You need to "
2614                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2615       }
2616 
2617       @Override
2618       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2619         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2620             "Region has the same start and end key.", getTableInfo(), hi);
2621       }
2622 
2623       @Override
2624       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2625         byte[] key = r1.getStartKey();
2626         // dup start key
2627         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2628             "Multiple regions have the same startkey: "
2629             + Bytes.toStringBinary(key), getTableInfo(), r1);
2630         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2631             "Multiple regions have the same startkey: "
2632             + Bytes.toStringBinary(key), getTableInfo(), r2);
2633       }
2634 
2635       @Override
2636       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2637         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2638             "There is an overlap in the region chain.",
2639             getTableInfo(), hi1, hi2);
2640       }
2641 
2642       @Override
2643       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2644         errors.reportError(
2645             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2646             "There is a hole in the region chain between "
2647                 + Bytes.toStringBinary(holeStart) + " and "
2648                 + Bytes.toStringBinary(holeStop)
2649                 + ".  You need to create a new .regioninfo and region "
2650                 + "dir in hdfs to plug the hole.");
2651       }
2652     };
2653 
2654     /**
2655      * This handler fixes integrity errors from hdfs information.  There are
2656      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2657      * 3) invalid regions.
2658      *
2659      * This class overrides methods that fix holes and the overlap group case.
2660      * Individual cases of particular overlaps are handled by the general
2661      * overlap group merge repair case.
2662      *
2663      * If hbase is online, this forces regions offline before doing merge
2664      * operations.
2665      */
2666     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2667       Configuration conf;
2668 
2669       boolean fixOverlaps = true;
2670 
2671       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2672           boolean fixHoles, boolean fixOverlaps) {
2673         super(ti, errors);
2674         this.conf = conf;
2675         this.fixOverlaps = fixOverlaps;
2676         // TODO properly use fixHoles
2677       }
2678 
2679       /**
2680        * This is a special case hole -- when the first region of a table is
2681        * missing from META, HBase doesn't acknowledge the existance of the
2682        * table.
2683        */
2684       @Override
2685       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2686         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2687             "First region should start with an empty key.  Creating a new " +
2688             "region and regioninfo in HDFS to plug the hole.",
2689             getTableInfo(), next);
2690         HTableDescriptor htd = getTableInfo().getHTD();
2691         // from special EMPTY_START_ROW to next region's startKey
2692         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2693             HConstants.EMPTY_START_ROW, next.getStartKey());
2694 
2695         // TODO test
2696         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2697         LOG.info("Table region start key was not empty.  Created new empty region: "
2698             + newRegion + " " +region);
2699         fixes++;
2700       }
2701 
2702       @Override
2703       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2704         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2705             "Last region should end with an empty key.  Creating a new "
2706                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2707         HTableDescriptor htd = getTableInfo().getHTD();
2708         // from curEndKey to EMPTY_START_ROW
2709         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2710             HConstants.EMPTY_START_ROW);
2711 
2712         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2713         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2714             + " " + region);
2715         fixes++;
2716       }
2717 
2718       /**
2719        * There is a hole in the hdfs regions that violates the table integrity
2720        * rules.  Create a new empty region that patches the hole.
2721        */
2722       @Override
2723       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2724         errors.reportError(
2725             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2726             "There is a hole in the region chain between "
2727                 + Bytes.toStringBinary(holeStartKey) + " and "
2728                 + Bytes.toStringBinary(holeStopKey)
2729                 + ".  Creating a new regioninfo and region "
2730                 + "dir in hdfs to plug the hole.");
2731         HTableDescriptor htd = getTableInfo().getHTD();
2732         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2733         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2734         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2735         fixes++;
2736       }
2737 
2738       /**
2739        * This takes set of overlapping regions and merges them into a single
2740        * region.  This covers cases like degenerate regions, shared start key,
2741        * general overlaps, duplicate ranges, and partial overlapping regions.
2742        *
2743        * Cases:
2744        * - Clean regions that overlap
2745        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2746        *
2747        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2748        */
2749       @Override
2750       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2751           throws IOException {
2752         Preconditions.checkNotNull(overlap);
2753         Preconditions.checkArgument(overlap.size() >0);
2754 
2755         if (!this.fixOverlaps) {
2756           LOG.warn("Not attempting to repair overlaps.");
2757           return;
2758         }
2759 
2760         if (overlap.size() > maxMerge) {
2761           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2762             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2763           if (sidelineBigOverlaps) {
2764             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2765             sidelineBigOverlaps(overlap);
2766           }
2767           return;
2768         }
2769 
2770         mergeOverlaps(overlap);
2771       }
2772 
2773       void mergeOverlaps(Collection<HbckInfo> overlap)
2774           throws IOException {
2775         String thread = Thread.currentThread().getName();
2776         LOG.info("== [" + thread + "] Merging regions into one region: "
2777           + Joiner.on(",").join(overlap));
2778         // get the min / max range and close all concerned regions
2779         Pair<byte[], byte[]> range = null;
2780         for (HbckInfo hi : overlap) {
2781           if (range == null) {
2782             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2783           } else {
2784             if (RegionSplitCalculator.BYTES_COMPARATOR
2785                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2786               range.setFirst(hi.getStartKey());
2787             }
2788             if (RegionSplitCalculator.BYTES_COMPARATOR
2789                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2790               range.setSecond(hi.getEndKey());
2791             }
2792           }
2793           // need to close files so delete can happen.
2794           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2795           LOG.debug("[" + thread + "] Contained region dir before close");
2796           debugLsr(hi.getHdfsRegionDir());
2797           try {
2798             LOG.info("[" + thread + "] Closing region: " + hi);
2799             closeRegion(hi);
2800           } catch (IOException ioe) {
2801             LOG.warn("[" + thread + "] Was unable to close region " + hi
2802               + ".  Just continuing... ", ioe);
2803           } catch (InterruptedException e) {
2804             LOG.warn("[" + thread + "] Was unable to close region " + hi
2805               + ".  Just continuing... ", e);
2806           }
2807 
2808           try {
2809             LOG.info("[" + thread + "] Offlining region: " + hi);
2810             offline(hi.getRegionName());
2811           } catch (IOException ioe) {
2812             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2813               + ".  Just continuing... ", ioe);
2814           }
2815         }
2816 
2817         // create new empty container region.
2818         HTableDescriptor htd = getTableInfo().getHTD();
2819         // from start key to end Key
2820         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2821             range.getSecond());
2822         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2823         LOG.info("[" + thread + "] Created new empty container region: " +
2824             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2825         debugLsr(region.getRegionFileSystem().getRegionDir());
2826 
2827         // all target regions are closed, should be able to safely cleanup.
2828         boolean didFix= false;
2829         Path target = region.getRegionFileSystem().getRegionDir();
2830         for (HbckInfo contained : overlap) {
2831           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2832           int merges = mergeRegionDirs(target, contained);
2833           if (merges > 0) {
2834             didFix = true;
2835           }
2836         }
2837         if (didFix) {
2838           fixes++;
2839         }
2840       }
2841 
2842       /**
2843        * Sideline some regions in a big overlap group so that it
2844        * will have fewer regions, and it is easier to merge them later on.
2845        *
2846        * @param bigOverlap the overlapped group with regions more than maxMerge
2847        * @throws IOException
2848        */
2849       void sidelineBigOverlaps(
2850           Collection<HbckInfo> bigOverlap) throws IOException {
2851         int overlapsToSideline = bigOverlap.size() - maxMerge;
2852         if (overlapsToSideline > maxOverlapsToSideline) {
2853           overlapsToSideline = maxOverlapsToSideline;
2854         }
2855         List<HbckInfo> regionsToSideline =
2856           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2857         FileSystem fs = FileSystem.get(conf);
2858         for (HbckInfo regionToSideline: regionsToSideline) {
2859           try {
2860             LOG.info("Closing region: " + regionToSideline);
2861             closeRegion(regionToSideline);
2862           } catch (IOException ioe) {
2863             LOG.warn("Was unable to close region " + regionToSideline
2864               + ".  Just continuing... ", ioe);
2865           } catch (InterruptedException e) {
2866             LOG.warn("Was unable to close region " + regionToSideline
2867               + ".  Just continuing... ", e);
2868           }
2869 
2870           try {
2871             LOG.info("Offlining region: " + regionToSideline);
2872             offline(regionToSideline.getRegionName());
2873           } catch (IOException ioe) {
2874             LOG.warn("Unable to offline region from master: " + regionToSideline
2875               + ".  Just continuing... ", ioe);
2876           }
2877 
2878           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2879           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2880           if (sidelineRegionDir != null) {
2881             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2882             LOG.info("After sidelined big overlapped region: "
2883               + regionToSideline.getRegionNameAsString()
2884               + " to " + sidelineRegionDir.toString());
2885             fixes++;
2886           }
2887         }
2888       }
2889     }
2890 
2891     /**
2892      * Check the region chain (from META) of this table.  We are looking for
2893      * holes, overlaps, and cycles.
2894      * @return false if there are errors
2895      * @throws IOException
2896      */
2897     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2898       // When table is disabled no need to check for the region chain. Some of the regions
2899       // accidently if deployed, this below code might report some issues like missing start
2900       // or end regions or region hole in chain and may try to fix which is unwanted.
2901       if (disabledTables.contains(this.tableName)) {
2902         return true;
2903       }
2904       int originalErrorsCount = errors.getErrorList().size();
2905       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2906       SortedSet<byte[]> splits = sc.getSplits();
2907 
2908       byte[] prevKey = null;
2909       byte[] problemKey = null;
2910 
2911       if (splits.size() == 0) {
2912         // no region for this table
2913         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2914       }
2915 
2916       for (byte[] key : splits) {
2917         Collection<HbckInfo> ranges = regions.get(key);
2918         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2919           for (HbckInfo rng : ranges) {
2920             handler.handleRegionStartKeyNotEmpty(rng);
2921           }
2922         }
2923 
2924         // check for degenerate ranges
2925         for (HbckInfo rng : ranges) {
2926           // special endkey case converts '' to null
2927           byte[] endKey = rng.getEndKey();
2928           endKey = (endKey.length == 0) ? null : endKey;
2929           if (Bytes.equals(rng.getStartKey(),endKey)) {
2930             handler.handleDegenerateRegion(rng);
2931           }
2932         }
2933 
2934         if (ranges.size() == 1) {
2935           // this split key is ok -- no overlap, not a hole.
2936           if (problemKey != null) {
2937             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2938           }
2939           problemKey = null; // fell through, no more problem.
2940         } else if (ranges.size() > 1) {
2941           // set the new problem key group name, if already have problem key, just
2942           // keep using it.
2943           if (problemKey == null) {
2944             // only for overlap regions.
2945             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2946             problemKey = key;
2947           }
2948           overlapGroups.putAll(problemKey, ranges);
2949 
2950           // record errors
2951           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2952           //  this dumb and n^2 but this shouldn't happen often
2953           for (HbckInfo r1 : ranges) {
2954             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
2955             subRange.remove(r1);
2956             for (HbckInfo r2 : subRange) {
2957               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
2958               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2959                 handler.handleDuplicateStartKeys(r1,r2);
2960               } else {
2961                 // overlap
2962                 handler.handleOverlapInRegionChain(r1, r2);
2963               }
2964             }
2965           }
2966 
2967         } else if (ranges.size() == 0) {
2968           if (problemKey != null) {
2969             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2970           }
2971           problemKey = null;
2972 
2973           byte[] holeStopKey = sc.getSplits().higher(key);
2974           // if higher key is null we reached the top.
2975           if (holeStopKey != null) {
2976             // hole
2977             handler.handleHoleInRegionChain(key, holeStopKey);
2978           }
2979         }
2980         prevKey = key;
2981       }
2982 
2983       // When the last region of a table is proper and having an empty end key, 'prevKey'
2984       // will be null.
2985       if (prevKey != null) {
2986         handler.handleRegionEndKeyNotEmpty(prevKey);
2987       }
2988 
2989       // TODO fold this into the TableIntegrityHandler
2990       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2991         boolean ok = handleOverlapsParallel(handler, prevKey);
2992         if (!ok) {
2993           return false;
2994         }
2995       } else {
2996         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2997           handler.handleOverlapGroup(overlap);
2998         }
2999       }
3000 
3001       if (details) {
3002         // do full region split map dump
3003         errors.print("---- Table '"  +  this.tableName
3004             + "': region split map");
3005         dump(splits, regions);
3006         errors.print("---- Table '"  +  this.tableName
3007             + "': overlap groups");
3008         dumpOverlapProblems(overlapGroups);
3009         errors.print("There are " + overlapGroups.keySet().size()
3010             + " overlap groups with " + overlapGroups.size()
3011             + " overlapping regions");
3012       }
3013       if (!sidelinedRegions.isEmpty()) {
3014         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3015         errors.print("---- Table '"  +  this.tableName
3016             + "': sidelined big overlapped regions");
3017         dumpSidelinedRegions(sidelinedRegions);
3018       }
3019       return errors.getErrorList().size() == originalErrorsCount;
3020     }
3021 
3022     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3023         throws IOException {
3024       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3025       // safely assume each group is independent.
3026       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
3027       List<Future<Void>> rets;
3028       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3029         //
3030         merges.add(new WorkItemOverlapMerge(overlap, handler));
3031       }
3032       try {
3033         rets = executor.invokeAll(merges);
3034       } catch (InterruptedException e) {
3035         LOG.error("Overlap merges were interrupted", e);
3036         return false;
3037       }
3038       for(int i=0; i<merges.size(); i++) {
3039         WorkItemOverlapMerge work = merges.get(i);
3040         Future<Void> f = rets.get(i);
3041         try {
3042           f.get();
3043         } catch(ExecutionException e) {
3044           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3045         } catch (InterruptedException e) {
3046           LOG.error("Waiting for overlap merges was interrupted", e);
3047           return false;
3048         }
3049       }
3050       return true;
3051     }
3052 
3053     /**
3054      * This dumps data in a visually reasonable way for visual debugging
3055      *
3056      * @param splits
3057      * @param regions
3058      */
3059     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3060       // we display this way because the last end key should be displayed as well.
3061       StringBuilder sb = new StringBuilder();
3062       for (byte[] k : splits) {
3063         sb.setLength(0); // clear out existing buffer, if any.
3064         sb.append(Bytes.toStringBinary(k) + ":\t");
3065         for (HbckInfo r : regions.get(k)) {
3066           sb.append("[ "+ r.toString() + ", "
3067               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3068         }
3069         errors.print(sb.toString());
3070       }
3071     }
3072   }
3073 
3074   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3075     // we display this way because the last end key should be displayed as
3076     // well.
3077     for (byte[] k : regions.keySet()) {
3078       errors.print(Bytes.toStringBinary(k) + ":");
3079       for (HbckInfo r : regions.get(k)) {
3080         errors.print("[ " + r.toString() + ", "
3081             + Bytes.toStringBinary(r.getEndKey()) + "]");
3082       }
3083       errors.print("----");
3084     }
3085   }
3086 
3087   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3088     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3089       TableName tableName = entry.getValue().getTableName();
3090       Path path = entry.getKey();
3091       errors.print("This sidelined region dir should be bulk loaded: "
3092         + path.toString());
3093       errors.print("Bulk load command looks like: "
3094         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3095         + path.toUri().getPath() + " "+ tableName);
3096     }
3097   }
3098 
3099   public Multimap<byte[], HbckInfo> getOverlapGroups(
3100       TableName table) {
3101     TableInfo ti = tablesInfo.get(table);
3102     return ti.overlapGroups;
3103   }
3104 
3105   /**
3106    * Return a list of user-space table names whose metadata have not been
3107    * modified in the last few milliseconds specified by timelag
3108    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3109    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3110    * milliseconds specified by timelag, then the table is a candidate to be returned.
3111    * @return tables that have not been modified recently
3112    * @throws IOException if an error is encountered
3113    */
3114   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3115     List<TableName> tableNames = new ArrayList<TableName>();
3116     long now = System.currentTimeMillis();
3117 
3118     for (HbckInfo hbi : regionInfoMap.values()) {
3119       MetaEntry info = hbi.metaEntry;
3120 
3121       // if the start key is zero, then we have found the first region of a table.
3122       // pick only those tables that were not modified in the last few milliseconds.
3123       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3124         if (info.modTime + timelag < now) {
3125           tableNames.add(info.getTable());
3126         } else {
3127           numSkipped.incrementAndGet(); // one more in-flux table
3128         }
3129       }
3130     }
3131     return getHTableDescriptors(tableNames);
3132   }
3133 
3134   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3135     HTableDescriptor[] htd = new HTableDescriptor[0];
3136     Admin admin = null;
3137     try {
3138       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3139       admin = new HBaseAdmin(getConf());
3140       htd = admin.getTableDescriptorsByTableName(tableNames);
3141     } catch (IOException e) {
3142       LOG.debug("Exception getting table descriptors", e);
3143     } finally {
3144       if (admin != null) {
3145         try {
3146           admin.close();
3147         } catch (IOException e) {
3148           LOG.debug("Exception closing HBaseAdmin", e);
3149         }
3150       }
3151     }
3152     return htd;
3153   }
3154 
3155   /**
3156    * Gets the entry in regionInfo corresponding to the the given encoded
3157    * region name. If the region has not been seen yet, a new entry is added
3158    * and returned.
3159    */
3160   private synchronized HbckInfo getOrCreateInfo(String name) {
3161     HbckInfo hbi = regionInfoMap.get(name);
3162     if (hbi == null) {
3163       hbi = new HbckInfo(null);
3164       regionInfoMap.put(name, hbi);
3165     }
3166     return hbi;
3167   }
3168 
3169   private void checkAndFixTableLocks() throws IOException {
3170     ZooKeeperWatcher zkw = createZooKeeperWatcher();
3171 
3172     try {
3173       TableLockChecker checker = new TableLockChecker(zkw, errors);
3174       checker.checkTableLocks();
3175 
3176       if (this.fixTableLocks) {
3177         checker.fixExpiredTableLocks();
3178       }
3179     } finally {
3180       zkw.close();
3181     }
3182   }
3183 
3184   /**
3185    * Check whether a orphaned table ZNode exists and fix it if requested.
3186    * @throws IOException
3187    * @throws KeeperException
3188    * @throws InterruptedException
3189    */
3190   private void checkAndFixOrphanedTableZNodes()
3191       throws IOException, KeeperException, InterruptedException {
3192     ZooKeeperWatcher zkw = createZooKeeperWatcher();
3193 
3194     try {
3195       Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
3196       String msg;
3197       TableInfo tableInfo;
3198 
3199       for (TableName tableName : enablingTables) {
3200         // Check whether the table exists in hbase
3201         tableInfo = tablesInfo.get(tableName);
3202         if (tableInfo != null) {
3203           // Table exists.  This table state is in transit.  No problem for this table.
3204           continue;
3205         }
3206 
3207         msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
3208         LOG.warn(msg);
3209         orphanedTableZNodes.add(tableName);
3210         errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
3211       }
3212 
3213       if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
3214         ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
3215 
3216         for (TableName tableName : orphanedTableZNodes) {
3217           try {
3218             // Set the table state to be disabled so that if we made mistake, we can trace
3219             // the history and figure it out.
3220             // Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode.
3221             // Both approaches works.
3222             zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
3223           } catch (CoordinatedStateException e) {
3224             // This exception should not happen here
3225             LOG.error(
3226               "Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
3227               e);
3228           }
3229         }
3230       }
3231     } finally {
3232       zkw.close();
3233     }
3234   }
3235 
3236   /**
3237     * Check values in regionInfo for hbase:meta
3238     * Check if zero or more than one regions with hbase:meta are found.
3239     * If there are inconsistencies (i.e. zero or more than one regions
3240     * pretend to be holding the hbase:meta) try to fix that and report an error.
3241     * @throws IOException from HBaseFsckRepair functions
3242     * @throws KeeperException
3243     * @throws InterruptedException
3244     */
3245   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3246     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3247     for (HbckInfo value : regionInfoMap.values()) {
3248       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3249         metaRegions.put(value.getReplicaId(), value);
3250       }
3251     }
3252     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3253         .getRegionReplication();
3254     boolean noProblem = true;
3255     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3256     // Check the deployed servers. It should be exactly one server for each replica.
3257     for (int i = 0; i < metaReplication; i++) {
3258       HbckInfo metaHbckInfo = metaRegions.remove(i);
3259       List<ServerName> servers = new ArrayList<ServerName>();
3260       if (metaHbckInfo != null) {
3261         servers = metaHbckInfo.deployedOn;
3262       }
3263       if (servers.size() != 1) {
3264         noProblem = false;
3265         if (servers.size() == 0) {
3266           assignMetaReplica(i);
3267         } else if (servers.size() > 1) {
3268           errors
3269           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3270                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3271           if (shouldFixAssignments()) {
3272             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3273                          metaHbckInfo.getReplicaId() +"..");
3274             setShouldRerun();
3275             // try fix it (treat is a dupe assignment)
3276             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3277           }
3278         }
3279       }
3280     }
3281     // unassign whatever is remaining in metaRegions. They are excess replicas.
3282     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3283       noProblem = false;
3284       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3285           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3286           ", deployed " + metaRegions.size());
3287       if (shouldFixAssignments()) {
3288         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3289             " of hbase:meta..");
3290         setShouldRerun();
3291         unassignMetaReplica(entry.getValue());
3292       }
3293     }
3294     // if noProblem is false, rerun hbck with hopefully fixed META
3295     // if noProblem is true, no errors, so continue normally
3296     return noProblem;
3297   }
3298 
3299   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3300   KeeperException {
3301     undeployRegions(hi);
3302     ZooKeeperWatcher zkw = createZooKeeperWatcher();
3303     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3304   }
3305 
3306   private void assignMetaReplica(int replicaId)
3307       throws IOException, KeeperException, InterruptedException {
3308     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3309         replicaId +" is not found on any region.");
3310     if (shouldFixAssignments()) {
3311       errors.print("Trying to fix a problem with hbase:meta..");
3312       setShouldRerun();
3313       // try to fix it (treat it as unassigned region)
3314       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3315           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3316       HBaseFsckRepair.fixUnassigned(admin, h);
3317       HBaseFsckRepair.waitUntilAssigned(admin, h);
3318     }
3319   }
3320 
3321   /**
3322    * Scan hbase:meta, adding all regions found to the regionInfo map.
3323    * @throws IOException if an error is encountered
3324    */
3325   boolean loadMetaEntries() throws IOException {
3326     MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3327       int countRecord = 1;
3328 
3329       // comparator to sort KeyValues with latest modtime
3330       final Comparator<Cell> comp = new Comparator<Cell>() {
3331         @Override
3332         public int compare(Cell k1, Cell k2) {
3333           return (int)(k1.getTimestamp() - k2.getTimestamp());
3334         }
3335       };
3336 
3337       @Override
3338       public boolean processRow(Result result) throws IOException {
3339         try {
3340 
3341           // record the latest modification of this META record
3342           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3343           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3344           if (rl == null) {
3345             emptyRegionInfoQualifiers.add(result);
3346             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3347               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3348             return true;
3349           }
3350           ServerName sn = null;
3351           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3352               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3353             emptyRegionInfoQualifiers.add(result);
3354             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3355               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3356             return true;
3357           }
3358           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3359           if (!(isTableIncluded(hri.getTable())
3360               || hri.isMetaRegion())) {
3361             return true;
3362           }
3363           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3364           for (HRegionLocation h : rl.getRegionLocations()) {
3365             if (h == null || h.getRegionInfo() == null) {
3366               continue;
3367             }
3368             sn = h.getServerName();
3369             hri = h.getRegionInfo();
3370 
3371             MetaEntry m = null;
3372             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3373               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3374             } else {
3375               m = new MetaEntry(hri, sn, ts, null, null);
3376             }
3377             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3378             if (previous == null) {
3379               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3380             } else if (previous.metaEntry == null) {
3381               previous.metaEntry = m;
3382             } else {
3383               throw new IOException("Two entries in hbase:meta are same " + previous);
3384             }
3385           }
3386           PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3387           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3388               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3389             if (mergeRegion != null) {
3390               // This region is already been merged
3391               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3392               hbInfo.setMerged(true);
3393             }
3394           }
3395 
3396           // show proof of progress to the user, once for every 100 records.
3397           if (countRecord % 100 == 0) {
3398             errors.progress();
3399           }
3400           countRecord++;
3401           return true;
3402         } catch (RuntimeException e) {
3403           LOG.error("Result=" + result);
3404           throw e;
3405         }
3406       }
3407     };
3408     if (!checkMetaOnly) {
3409       // Scan hbase:meta to pick up user regions
3410       MetaScanner.metaScan(connection, visitor);
3411     }
3412 
3413     errors.print("");
3414     return true;
3415   }
3416 
3417   /**
3418    * Stores the regioninfo entries scanned from META
3419    */
3420   static class MetaEntry extends HRegionInfo {
3421     ServerName regionServer;   // server hosting this region
3422     long modTime;          // timestamp of most recent modification metadata
3423     HRegionInfo splitA, splitB; //split daughters
3424 
3425     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3426       this(rinfo, regionServer, modTime, null, null);
3427     }
3428 
3429     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3430         HRegionInfo splitA, HRegionInfo splitB) {
3431       super(rinfo);
3432       this.regionServer = regionServer;
3433       this.modTime = modTime;
3434       this.splitA = splitA;
3435       this.splitB = splitB;
3436     }
3437 
3438     @Override
3439     public boolean equals(Object o) {
3440       boolean superEq = super.equals(o);
3441       if (!superEq) {
3442         return superEq;
3443       }
3444 
3445       MetaEntry me = (MetaEntry) o;
3446       if (!regionServer.equals(me.regionServer)) {
3447         return false;
3448       }
3449       return (modTime == me.modTime);
3450     }
3451 
3452     @Override
3453     public int hashCode() {
3454       int hash = Arrays.hashCode(getRegionName());
3455       hash ^= getRegionId();
3456       hash ^= Arrays.hashCode(getStartKey());
3457       hash ^= Arrays.hashCode(getEndKey());
3458       hash ^= Boolean.valueOf(isOffline()).hashCode();
3459       hash ^= getTable().hashCode();
3460       if (regionServer != null) {
3461         hash ^= regionServer.hashCode();
3462       }
3463       hash ^= modTime;
3464       return hash;
3465     }
3466   }
3467 
3468   /**
3469    * Stores the regioninfo entries from HDFS
3470    */
3471   static class HdfsEntry {
3472     HRegionInfo hri;
3473     Path hdfsRegionDir = null;
3474     long hdfsRegionDirModTime  = 0;
3475     boolean hdfsRegioninfoFilePresent = false;
3476     boolean hdfsOnlyEdits = false;
3477   }
3478 
3479   /**
3480    * Stores the regioninfo retrieved from Online region servers.
3481    */
3482   static class OnlineEntry {
3483     HRegionInfo hri;
3484     ServerName hsa;
3485 
3486     @Override
3487     public String toString() {
3488       return hsa.toString() + ";" + hri.getRegionNameAsString();
3489     }
3490   }
3491 
3492   /**
3493    * Maintain information about a particular region.  It gathers information
3494    * from three places -- HDFS, META, and region servers.
3495    */
3496   public static class HbckInfo implements KeyRange {
3497     private MetaEntry metaEntry = null; // info in META
3498     private HdfsEntry hdfsEntry = null; // info in HDFS
3499     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3500     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3501     private boolean skipChecks = false; // whether to skip further checks to this region info.
3502     private boolean isMerged = false;// whether this region has already been merged into another one
3503     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3504     private HRegionInfo primaryHRIForDeployedReplica = null;
3505 
3506     HbckInfo(MetaEntry metaEntry) {
3507       this.metaEntry = metaEntry;
3508     }
3509 
3510     public int getReplicaId() {
3511       if (metaEntry != null) return metaEntry.getReplicaId();
3512       return deployedReplicaId;
3513     }
3514 
3515     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3516       OnlineEntry rse = new OnlineEntry() ;
3517       rse.hri = hri;
3518       rse.hsa = server;
3519       this.deployedEntries.add(rse);
3520       this.deployedOn.add(server);
3521       // save the replicaId that we see deployed in the cluster
3522       this.deployedReplicaId = hri.getReplicaId();
3523       this.primaryHRIForDeployedReplica =
3524           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3525     }
3526 
3527     @Override
3528     public synchronized String toString() {
3529       StringBuilder sb = new StringBuilder();
3530       sb.append("{ meta => ");
3531       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3532       sb.append( ", hdfs => " + getHdfsRegionDir());
3533       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3534       sb.append( ", replicaId => " + getReplicaId());
3535       sb.append(" }");
3536       return sb.toString();
3537     }
3538 
3539     @Override
3540     public byte[] getStartKey() {
3541       if (this.metaEntry != null) {
3542         return this.metaEntry.getStartKey();
3543       } else if (this.hdfsEntry != null) {
3544         return this.hdfsEntry.hri.getStartKey();
3545       } else {
3546         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3547         return null;
3548       }
3549     }
3550 
3551     @Override
3552     public byte[] getEndKey() {
3553       if (this.metaEntry != null) {
3554         return this.metaEntry.getEndKey();
3555       } else if (this.hdfsEntry != null) {
3556         return this.hdfsEntry.hri.getEndKey();
3557       } else {
3558         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3559         return null;
3560       }
3561     }
3562 
3563     public TableName getTableName() {
3564       if (this.metaEntry != null) {
3565         return this.metaEntry.getTable();
3566       } else if (this.hdfsEntry != null) {
3567         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3568         // so we get the name from the Path
3569         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3570         return FSUtils.getTableName(tableDir);
3571       } else {
3572         // return the info from the first online/deployed hri
3573         for (OnlineEntry e : deployedEntries) {
3574           return e.hri.getTable();
3575         }
3576         return null;
3577       }
3578     }
3579 
3580     public String getRegionNameAsString() {
3581       if (metaEntry != null) {
3582         return metaEntry.getRegionNameAsString();
3583       } else if (hdfsEntry != null) {
3584         if (hdfsEntry.hri != null) {
3585           return hdfsEntry.hri.getRegionNameAsString();
3586         }
3587       } else {
3588         // return the info from the first online/deployed hri
3589         for (OnlineEntry e : deployedEntries) {
3590           return e.hri.getRegionNameAsString();
3591         }
3592       }
3593       return null;
3594     }
3595 
3596     public byte[] getRegionName() {
3597       if (metaEntry != null) {
3598         return metaEntry.getRegionName();
3599       } else if (hdfsEntry != null) {
3600         return hdfsEntry.hri.getRegionName();
3601       } else {
3602         // return the info from the first online/deployed hri
3603         for (OnlineEntry e : deployedEntries) {
3604           return e.hri.getRegionName();
3605         }
3606         return null;
3607       }
3608     }
3609 
3610     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3611       return primaryHRIForDeployedReplica;
3612     }
3613 
3614     Path getHdfsRegionDir() {
3615       if (hdfsEntry == null) {
3616         return null;
3617       }
3618       return hdfsEntry.hdfsRegionDir;
3619     }
3620 
3621     boolean containsOnlyHdfsEdits() {
3622       if (hdfsEntry == null) {
3623         return false;
3624       }
3625       return hdfsEntry.hdfsOnlyEdits;
3626     }
3627 
3628     boolean isHdfsRegioninfoPresent() {
3629       if (hdfsEntry == null) {
3630         return false;
3631       }
3632       return hdfsEntry.hdfsRegioninfoFilePresent;
3633     }
3634 
3635     long getModTime() {
3636       if (hdfsEntry == null) {
3637         return 0;
3638       }
3639       return hdfsEntry.hdfsRegionDirModTime;
3640     }
3641 
3642     HRegionInfo getHdfsHRI() {
3643       if (hdfsEntry == null) {
3644         return null;
3645       }
3646       return hdfsEntry.hri;
3647     }
3648 
3649     public void setSkipChecks(boolean skipChecks) {
3650       this.skipChecks = skipChecks;
3651     }
3652 
3653     public boolean isSkipChecks() {
3654       return skipChecks;
3655     }
3656 
3657     public void setMerged(boolean isMerged) {
3658       this.isMerged = isMerged;
3659     }
3660 
3661     public boolean isMerged() {
3662       return this.isMerged;
3663     }
3664   }
3665 
3666   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3667     @Override
3668     public int compare(HbckInfo l, HbckInfo r) {
3669       if (l == r) {
3670         // same instance
3671         return 0;
3672       }
3673 
3674       int tableCompare = l.getTableName().compareTo(r.getTableName());
3675       if (tableCompare != 0) {
3676         return tableCompare;
3677       }
3678 
3679       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3680           l.getStartKey(), r.getStartKey());
3681       if (startComparison != 0) {
3682         return startComparison;
3683       }
3684 
3685       // Special case for absolute endkey
3686       byte[] endKey = r.getEndKey();
3687       endKey = (endKey.length == 0) ? null : endKey;
3688       byte[] endKey2 = l.getEndKey();
3689       endKey2 = (endKey2.length == 0) ? null : endKey2;
3690       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3691           endKey2,  endKey);
3692 
3693       if (endComparison != 0) {
3694         return endComparison;
3695       }
3696 
3697       // use regionId as tiebreaker.
3698       // Null is considered after all possible values so make it bigger.
3699       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3700         return 0;
3701       }
3702       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3703         return 1;
3704       }
3705       // l.hdfsEntry must not be null
3706       if (r.hdfsEntry == null) {
3707         return -1;
3708       }
3709       // both l.hdfsEntry and r.hdfsEntry must not be null.
3710       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3711     }
3712   };
3713 
3714   /**
3715    * Prints summary of all tables found on the system.
3716    */
3717   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3718     StringBuilder sb = new StringBuilder();
3719     errors.print("Summary:");
3720     for (TableInfo tInfo : tablesInfo.values()) {
3721       if (errors.tableHasErrors(tInfo)) {
3722         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3723       } else {
3724         errors.print("  " + tInfo.getName() + " is okay.");
3725       }
3726       errors.print("    Number of regions: " + tInfo.getNumRegions());
3727       sb.setLength(0); // clear out existing buffer, if any.
3728       sb.append("    Deployed on: ");
3729       for (ServerName server : tInfo.deployedOn) {
3730         sb.append(" " + server.toString());
3731       }
3732       errors.print(sb.toString());
3733     }
3734   }
3735 
3736   static ErrorReporter getErrorReporter(
3737       final Configuration conf) throws ClassNotFoundException {
3738     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3739     return ReflectionUtils.newInstance(reporter, conf);
3740   }
3741 
3742   public interface ErrorReporter {
3743     enum ERROR_CODE {
3744       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3745       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3746       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3747       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3748       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3749       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3750       WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
3751     }
3752     void clear();
3753     void report(String message);
3754     void reportError(String message);
3755     void reportError(ERROR_CODE errorCode, String message);
3756     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3757     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3758     void reportError(
3759       ERROR_CODE errorCode,
3760       String message,
3761       TableInfo table,
3762       HbckInfo info1,
3763       HbckInfo info2
3764     );
3765     int summarize();
3766     void detail(String details);
3767     ArrayList<ERROR_CODE> getErrorList();
3768     void progress();
3769     void print(String message);
3770     void resetErrors();
3771     boolean tableHasErrors(TableInfo table);
3772   }
3773 
3774   static class PrintingErrorReporter implements ErrorReporter {
3775     public int errorCount = 0;
3776     private int showProgress;
3777     // How frequently calls to progress() will create output
3778     private static final int progressThreshold = 100;
3779 
3780     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3781 
3782     // for use by unit tests to verify which errors were discovered
3783     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3784 
3785     @Override
3786     public void clear() {
3787       errorTables.clear();
3788       errorList.clear();
3789       errorCount = 0;
3790     }
3791 
3792     @Override
3793     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3794       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3795         System.err.println(message);
3796         return;
3797       }
3798 
3799       errorList.add(errorCode);
3800       if (!summary) {
3801         System.out.println("ERROR: " + message);
3802       }
3803       errorCount++;
3804       showProgress = 0;
3805     }
3806 
3807     @Override
3808     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3809       errorTables.add(table);
3810       reportError(errorCode, message);
3811     }
3812 
3813     @Override
3814     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3815                                          HbckInfo info) {
3816       errorTables.add(table);
3817       String reference = "(region " + info.getRegionNameAsString() + ")";
3818       reportError(errorCode, reference + " " + message);
3819     }
3820 
3821     @Override
3822     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3823                                          HbckInfo info1, HbckInfo info2) {
3824       errorTables.add(table);
3825       String reference = "(regions " + info1.getRegionNameAsString()
3826           + " and " + info2.getRegionNameAsString() + ")";
3827       reportError(errorCode, reference + " " + message);
3828     }
3829 
3830     @Override
3831     public synchronized void reportError(String message) {
3832       reportError(ERROR_CODE.UNKNOWN, message);
3833     }
3834 
3835     /**
3836      * Report error information, but do not increment the error count.  Intended for cases
3837      * where the actual error would have been reported previously.
3838      * @param message
3839      */
3840     @Override
3841     public synchronized void report(String message) {
3842       if (! summary) {
3843         System.out.println("ERROR: " + message);
3844       }
3845       showProgress = 0;
3846     }
3847 
3848     @Override
3849     public synchronized int summarize() {
3850       System.out.println(Integer.toString(errorCount) +
3851                          " inconsistencies detected.");
3852       if (errorCount == 0) {
3853         System.out.println("Status: OK");
3854         return 0;
3855       } else {
3856         System.out.println("Status: INCONSISTENT");
3857         return -1;
3858       }
3859     }
3860 
3861     @Override
3862     public ArrayList<ERROR_CODE> getErrorList() {
3863       return errorList;
3864     }
3865 
3866     @Override
3867     public synchronized void print(String message) {
3868       if (!summary) {
3869         System.out.println(message);
3870       }
3871     }
3872 
3873     @Override
3874     public boolean tableHasErrors(TableInfo table) {
3875       return errorTables.contains(table);
3876     }
3877 
3878     @Override
3879     public void resetErrors() {
3880       errorCount = 0;
3881     }
3882 
3883     @Override
3884     public synchronized void detail(String message) {
3885       if (details) {
3886         System.out.println(message);
3887       }
3888       showProgress = 0;
3889     }
3890 
3891     @Override
3892     public synchronized void progress() {
3893       if (showProgress++ == progressThreshold) {
3894         if (!summary) {
3895           System.out.print(".");
3896         }
3897         showProgress = 0;
3898       }
3899     }
3900   }
3901 
3902   /**
3903    * Contact a region server and get all information from it
3904    */
3905   static class WorkItemRegion implements Callable<Void> {
3906     private HBaseFsck hbck;
3907     private ServerName rsinfo;
3908     private ErrorReporter errors;
3909     private HConnection connection;
3910 
3911     WorkItemRegion(HBaseFsck hbck, ServerName info,
3912                    ErrorReporter errors, HConnection connection) {
3913       this.hbck = hbck;
3914       this.rsinfo = info;
3915       this.errors = errors;
3916       this.connection = connection;
3917     }
3918 
3919     @Override
3920     public synchronized Void call() throws IOException {
3921       errors.progress();
3922       try {
3923         BlockingInterface server = connection.getAdmin(rsinfo);
3924 
3925         // list all online regions from this region server
3926         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3927         regions = filterRegions(regions);
3928 
3929         if (details) {
3930           errors.detail("RegionServer: " + rsinfo.getServerName() +
3931                            " number of regions: " + regions.size());
3932           for (HRegionInfo rinfo: regions) {
3933             errors.detail("  " + rinfo.getRegionNameAsString() +
3934                              " id: " + rinfo.getRegionId() +
3935                              " encoded_name: " + rinfo.getEncodedName() +
3936                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3937                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3938           }
3939         }
3940 
3941         // check to see if the existence of this region matches the region in META
3942         for (HRegionInfo r:regions) {
3943           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3944           hbi.addServer(r, rsinfo);
3945         }
3946       } catch (IOException e) {          // unable to connect to the region server.
3947         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3948           " Unable to fetch region information. " + e);
3949         throw e;
3950       }
3951       return null;
3952     }
3953 
3954     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3955       List<HRegionInfo> ret = Lists.newArrayList();
3956       for (HRegionInfo hri : regions) {
3957         if (hri.isMetaTable() || (!hbck.checkMetaOnly
3958             && hbck.isTableIncluded(hri.getTable()))) {
3959           ret.add(hri);
3960         }
3961       }
3962       return ret;
3963     }
3964   }
3965 
3966   /**
3967    * Contact hdfs and get all information about specified table directory into
3968    * regioninfo list.
3969    */
3970   static class WorkItemHdfsDir implements Callable<Void> {
3971     private HBaseFsck hbck;
3972     private FileStatus tableDir;
3973     private ErrorReporter errors;
3974     private FileSystem fs;
3975 
3976     WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3977                     FileStatus status) {
3978       this.hbck = hbck;
3979       this.fs = fs;
3980       this.tableDir = status;
3981       this.errors = errors;
3982     }
3983 
3984     @Override
3985     public synchronized Void call() throws IOException {
3986       try {
3987         // level 2: <HBASE_DIR>/<table>/*
3988         FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3989         for (FileStatus regionDir : regionDirs) {
3990           errors.progress();
3991           String encodedName = regionDir.getPath().getName();
3992           // ignore directories that aren't hexadecimal
3993           if (!encodedName.toLowerCase().matches("[0-9a-f]+")) {
3994             continue;
3995           }
3996 
3997           LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
3998           HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
3999           HdfsEntry he = new HdfsEntry();
4000           synchronized (hbi) {
4001             if (hbi.getHdfsRegionDir() != null) {
4002               errors.print("Directory " + encodedName + " duplicate??" +
4003                            hbi.getHdfsRegionDir());
4004             }
4005 
4006             he.hdfsRegionDir = regionDir.getPath();
4007             he.hdfsRegionDirModTime = regionDir.getModificationTime();
4008             Path regioninfoFile = new Path(he.hdfsRegionDir, HRegionFileSystem.REGION_INFO_FILE);
4009             he.hdfsRegioninfoFilePresent = fs.exists(regioninfoFile);
4010             // we add to orphan list when we attempt to read .regioninfo
4011 
4012             // Set a flag if this region contains only edits
4013             // This is special case if a region is left after split
4014             he.hdfsOnlyEdits = true;
4015             FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4016             Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4017             for (FileStatus subDir : subDirs) {
4018               errors.progress();
4019               String sdName = subDir.getPath().getName();
4020               if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4021                 he.hdfsOnlyEdits = false;
4022                 break;
4023               }
4024             }
4025             hbi.hdfsEntry = he;
4026           }
4027         }
4028       } catch (IOException e) {
4029         // unable to connect to the region server.
4030         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4031             + tableDir.getPath().getName()
4032             + " Unable to fetch region information. " + e);
4033         throw e;
4034       }
4035       return null;
4036     }
4037   }
4038 
4039   /**
4040    * Contact hdfs and get all information about specified table directory into
4041    * regioninfo list.
4042    */
4043   static class WorkItemHdfsRegionInfo implements Callable<Void> {
4044     private HbckInfo hbi;
4045     private HBaseFsck hbck;
4046     private ErrorReporter errors;
4047 
4048     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4049       this.hbi = hbi;
4050       this.hbck = hbck;
4051       this.errors = errors;
4052     }
4053 
4054     @Override
4055     public synchronized Void call() throws IOException {
4056       // only load entries that haven't been loaded yet.
4057       if (hbi.getHdfsHRI() == null) {
4058         try {
4059           errors.progress();
4060           hbck.loadHdfsRegioninfo(hbi);
4061         } catch (IOException ioe) {
4062           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4063               + hbi.getTableName() + " in hdfs dir "
4064               + hbi.getHdfsRegionDir()
4065               + "!  It may be an invalid format or version file.  Treating as "
4066               + "an orphaned regiondir.";
4067           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4068           try {
4069             hbck.debugLsr(hbi.getHdfsRegionDir());
4070           } catch (IOException ioe2) {
4071             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4072             throw ioe2;
4073           }
4074           hbck.orphanHdfsDirs.add(hbi);
4075           throw ioe;
4076         }
4077       }
4078       return null;
4079     }
4080   };
4081 
4082   /**
4083    * Display the full report from fsck. This displays all live and dead region
4084    * servers, and all known regions.
4085    */
4086   public static void setDisplayFullReport() {
4087     details = true;
4088   }
4089 
4090   /**
4091    * Set summary mode.
4092    * Print only summary of the tables and status (OK or INCONSISTENT)
4093    */
4094   void setSummary() {
4095     summary = true;
4096   }
4097 
4098   /**
4099    * Set hbase:meta check mode.
4100    * Print only info about hbase:meta table deployment/state
4101    */
4102   void setCheckMetaOnly() {
4103     checkMetaOnly = true;
4104   }
4105 
4106   /**
4107    * Set region boundaries check mode.
4108    */
4109   void setRegionBoundariesCheck() {
4110     checkRegionBoundaries = true;
4111   }
4112 
4113   /**
4114    * Set table locks fix mode.
4115    * Delete table locks held for a long time
4116    */
4117   public void setFixTableLocks(boolean shouldFix) {
4118     fixTableLocks = shouldFix;
4119     fixAny |= shouldFix;
4120   }
4121 
4122   /**
4123    * Set orphaned table ZNodes fix mode.
4124    * Set the table state to disable in the orphaned table ZNode.
4125    */
4126   public void setFixTableZNodes(boolean shouldFix) {
4127     fixTableZNodes = shouldFix;
4128     fixAny |= shouldFix;
4129   }
4130 
4131   /**
4132    * Check if we should rerun fsck again. This checks if we've tried to
4133    * fix something and we should rerun fsck tool again.
4134    * Display the full report from fsck. This displays all live and dead
4135    * region servers, and all known regions.
4136    */
4137   void setShouldRerun() {
4138     rerun = true;
4139   }
4140 
4141   boolean shouldRerun() {
4142     return rerun;
4143   }
4144 
4145   /**
4146    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4147    * found by fsck utility.
4148    */
4149   public void setFixAssignments(boolean shouldFix) {
4150     fixAssignments = shouldFix;
4151     fixAny |= shouldFix;
4152   }
4153 
4154   boolean shouldFixAssignments() {
4155     return fixAssignments;
4156   }
4157 
4158   public void setFixMeta(boolean shouldFix) {
4159     fixMeta = shouldFix;
4160     fixAny |= shouldFix;
4161   }
4162 
4163   boolean shouldFixMeta() {
4164     return fixMeta;
4165   }
4166 
4167   public void setFixEmptyMetaCells(boolean shouldFix) {
4168     fixEmptyMetaCells = shouldFix;
4169     fixAny |= shouldFix;
4170   }
4171 
4172   boolean shouldFixEmptyMetaCells() {
4173     return fixEmptyMetaCells;
4174   }
4175 
4176   public void setCheckHdfs(boolean checking) {
4177     checkHdfs = checking;
4178   }
4179 
4180   boolean shouldCheckHdfs() {
4181     return checkHdfs;
4182   }
4183 
4184   public void setFixHdfsHoles(boolean shouldFix) {
4185     fixHdfsHoles = shouldFix;
4186     fixAny |= shouldFix;
4187   }
4188 
4189   boolean shouldFixHdfsHoles() {
4190     return fixHdfsHoles;
4191   }
4192 
4193   public void setFixTableOrphans(boolean shouldFix) {
4194     fixTableOrphans = shouldFix;
4195     fixAny |= shouldFix;
4196   }
4197 
4198   boolean shouldFixTableOrphans() {
4199     return fixTableOrphans;
4200   }
4201 
4202   public void setFixHdfsOverlaps(boolean shouldFix) {
4203     fixHdfsOverlaps = shouldFix;
4204     fixAny |= shouldFix;
4205   }
4206 
4207   boolean shouldFixHdfsOverlaps() {
4208     return fixHdfsOverlaps;
4209   }
4210 
4211   public void setFixHdfsOrphans(boolean shouldFix) {
4212     fixHdfsOrphans = shouldFix;
4213     fixAny |= shouldFix;
4214   }
4215 
4216   boolean shouldFixHdfsOrphans() {
4217     return fixHdfsOrphans;
4218   }
4219 
4220   public void setFixVersionFile(boolean shouldFix) {
4221     fixVersionFile = shouldFix;
4222     fixAny |= shouldFix;
4223   }
4224 
4225   public boolean shouldFixVersionFile() {
4226     return fixVersionFile;
4227   }
4228 
4229   public void setSidelineBigOverlaps(boolean sbo) {
4230     this.sidelineBigOverlaps = sbo;
4231   }
4232 
4233   public boolean shouldSidelineBigOverlaps() {
4234     return sidelineBigOverlaps;
4235   }
4236 
4237   public void setFixSplitParents(boolean shouldFix) {
4238     fixSplitParents = shouldFix;
4239     fixAny |= shouldFix;
4240   }
4241 
4242   boolean shouldFixSplitParents() {
4243     return fixSplitParents;
4244   }
4245 
4246   public void setFixReferenceFiles(boolean shouldFix) {
4247     fixReferenceFiles = shouldFix;
4248     fixAny |= shouldFix;
4249   }
4250 
4251   boolean shouldFixReferenceFiles() {
4252     return fixReferenceFiles;
4253   }
4254 
4255   public boolean shouldIgnorePreCheckPermission() {
4256     return !fixAny || ignorePreCheckPermission;
4257   }
4258 
4259   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4260     this.ignorePreCheckPermission = ignorePreCheckPermission;
4261   }
4262 
4263   /**
4264    * @param mm maximum number of regions to merge into a single region.
4265    */
4266   public void setMaxMerge(int mm) {
4267     this.maxMerge = mm;
4268   }
4269 
4270   public int getMaxMerge() {
4271     return maxMerge;
4272   }
4273 
4274   public void setMaxOverlapsToSideline(int mo) {
4275     this.maxOverlapsToSideline = mo;
4276   }
4277 
4278   public int getMaxOverlapsToSideline() {
4279     return maxOverlapsToSideline;
4280   }
4281 
4282   /**
4283    * Only check/fix tables specified by the list,
4284    * Empty list means all tables are included.
4285    */
4286   boolean isTableIncluded(TableName table) {
4287     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4288   }
4289 
4290   public void includeTable(TableName table) {
4291     tablesIncluded.add(table);
4292   }
4293 
4294   Set<TableName> getIncludedTables() {
4295     return new HashSet<TableName>(tablesIncluded);
4296   }
4297 
4298   /**
4299    * We are interested in only those tables that have not changed their state in
4300    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4301    * @param seconds - the time in seconds
4302    */
4303   public void setTimeLag(long seconds) {
4304     timelag = seconds * 1000; // convert to milliseconds
4305   }
4306 
4307   /**
4308    *
4309    * @param sidelineDir - HDFS path to sideline data
4310    */
4311   public void setSidelineDir(String sidelineDir) {
4312     this.sidelineDir = new Path(sidelineDir);
4313   }
4314 
4315   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4316     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4317   }
4318 
4319   public HFileCorruptionChecker getHFilecorruptionChecker() {
4320     return hfcc;
4321   }
4322 
4323   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4324     this.hfcc = hfcc;
4325   }
4326 
4327   public void setRetCode(int code) {
4328     this.retcode = code;
4329   }
4330 
4331   public int getRetCode() {
4332     return retcode;
4333   }
4334 
4335   protected HBaseFsck printUsageAndExit() {
4336     StringWriter sw = new StringWriter(2048);
4337     PrintWriter out = new PrintWriter(sw);
4338     out.println("Usage: fsck [opts] {only tables}");
4339     out.println(" where [opts] are:");
4340     out.println("   -help Display help options (this)");
4341     out.println("   -details Display full report of all regions.");
4342     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4343                        " have not experienced any metadata updates in the last " +
4344                        " <timeInSeconds> seconds.");
4345     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4346         " before checking if the fix worked if run with -fix");
4347     out.println("   -summary Print only summary of the tables and status.");
4348     out.println("   -metaonly Only check the state of the hbase:meta table.");
4349     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4350     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4351 
4352     out.println("");
4353     out.println("  Metadata Repair options: (expert features, use with caution!)");
4354     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4355     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4356     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4357     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4358         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4359     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4360     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4361     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4362     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4363     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4364     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4365     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4366     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4367     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4368     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4369     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4370     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4371         + " (empty REGIONINFO_QUALIFIER rows)");
4372 
4373     out.println("");
4374     out.println("  Datafile Repair options: (expert features, use with caution!)");
4375     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4376     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4377 
4378     out.println("");
4379     out.println("  Metadata Repair shortcuts");
4380     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4381         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4382         "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
4383     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4384 
4385     out.println("");
4386     out.println("  Table lock options");
4387     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4388 
4389     out.println("");
4390     out.println("  Table Znode options");
4391     out.println("   -fixOrphanedTableZnodes    Set table state in ZNode to disabled if table does not exists");
4392 
4393     out.flush();
4394     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4395 
4396     setRetCode(-2);
4397     return this;
4398   }
4399 
4400   /**
4401    * Main program
4402    *
4403    * @param args
4404    * @throws Exception
4405    */
4406   public static void main(String[] args) throws Exception {
4407     // create a fsck object
4408     Configuration conf = HBaseConfiguration.create();
4409     Path hbasedir = FSUtils.getRootDir(conf);
4410     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4411     FSUtils.setFsDefault(conf, new Path(defaultFs));
4412     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4413     System.exit(ret);
4414   }
4415 
4416   /**
4417    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4418    */
4419   static class HBaseFsckTool extends Configured implements Tool {
4420     HBaseFsckTool(Configuration conf) { super(conf); }
4421     @Override
4422     public int run(String[] args) throws Exception {
4423       HBaseFsck hbck = new HBaseFsck(getConf());
4424       hbck.exec(hbck.executor, args);
4425       hbck.close();
4426       return hbck.getRetCode();
4427     }
4428   };
4429 
4430 
4431   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4432     ServiceException, InterruptedException {
4433     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4434 
4435     boolean checkCorruptHFiles = false;
4436     boolean sidelineCorruptHFiles = false;
4437 
4438     // Process command-line args.
4439     for (int i = 0; i < args.length; i++) {
4440       String cmd = args[i];
4441       if (cmd.equals("-help") || cmd.equals("-h")) {
4442         return printUsageAndExit();
4443       } else if (cmd.equals("-details")) {
4444         setDisplayFullReport();
4445       } else if (cmd.equals("-timelag")) {
4446         if (i == args.length - 1) {
4447           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4448           return printUsageAndExit();
4449         }
4450         try {
4451           long timelag = Long.parseLong(args[i+1]);
4452           setTimeLag(timelag);
4453         } catch (NumberFormatException e) {
4454           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4455           return printUsageAndExit();
4456         }
4457         i++;
4458       } else if (cmd.equals("-sleepBeforeRerun")) {
4459         if (i == args.length - 1) {
4460           errors.reportError(ERROR_CODE.WRONG_USAGE,
4461             "HBaseFsck: -sleepBeforeRerun needs a value.");
4462           return printUsageAndExit();
4463         }
4464         try {
4465           sleepBeforeRerun = Long.parseLong(args[i+1]);
4466         } catch (NumberFormatException e) {
4467           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4468           return printUsageAndExit();
4469         }
4470         i++;
4471       } else if (cmd.equals("-sidelineDir")) {
4472         if (i == args.length - 1) {
4473           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4474           return printUsageAndExit();
4475         }
4476         i++;
4477         setSidelineDir(args[i]);
4478       } else if (cmd.equals("-fix")) {
4479         errors.reportError(ERROR_CODE.WRONG_USAGE,
4480           "This option is deprecated, please use  -fixAssignments instead.");
4481         setFixAssignments(true);
4482       } else if (cmd.equals("-fixAssignments")) {
4483         setFixAssignments(true);
4484       } else if (cmd.equals("-fixMeta")) {
4485         setFixMeta(true);
4486       } else if (cmd.equals("-noHdfsChecking")) {
4487         setCheckHdfs(false);
4488       } else if (cmd.equals("-fixHdfsHoles")) {
4489         setFixHdfsHoles(true);
4490       } else if (cmd.equals("-fixHdfsOrphans")) {
4491         setFixHdfsOrphans(true);
4492       } else if (cmd.equals("-fixTableOrphans")) {
4493         setFixTableOrphans(true);
4494       } else if (cmd.equals("-fixHdfsOverlaps")) {
4495         setFixHdfsOverlaps(true);
4496       } else if (cmd.equals("-fixVersionFile")) {
4497         setFixVersionFile(true);
4498       } else if (cmd.equals("-sidelineBigOverlaps")) {
4499         setSidelineBigOverlaps(true);
4500       } else if (cmd.equals("-fixSplitParents")) {
4501         setFixSplitParents(true);
4502       } else if (cmd.equals("-ignorePreCheckPermission")) {
4503         setIgnorePreCheckPermission(true);
4504       } else if (cmd.equals("-checkCorruptHFiles")) {
4505         checkCorruptHFiles = true;
4506       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4507         sidelineCorruptHFiles = true;
4508       } else if (cmd.equals("-fixReferenceFiles")) {
4509         setFixReferenceFiles(true);
4510       } else if (cmd.equals("-fixEmptyMetaCells")) {
4511         setFixEmptyMetaCells(true);
4512       } else if (cmd.equals("-repair")) {
4513         // this attempts to merge overlapping hdfs regions, needs testing
4514         // under load
4515         setFixHdfsHoles(true);
4516         setFixHdfsOrphans(true);
4517         setFixMeta(true);
4518         setFixAssignments(true);
4519         setFixHdfsOverlaps(true);
4520         setFixVersionFile(true);
4521         setSidelineBigOverlaps(true);
4522         setFixSplitParents(false);
4523         setCheckHdfs(true);
4524         setFixReferenceFiles(true);
4525         setFixTableLocks(true);
4526         setFixTableZNodes(true);
4527       } else if (cmd.equals("-repairHoles")) {
4528         // this will make all missing hdfs regions available but may lose data
4529         setFixHdfsHoles(true);
4530         setFixHdfsOrphans(false);
4531         setFixMeta(true);
4532         setFixAssignments(true);
4533         setFixHdfsOverlaps(false);
4534         setSidelineBigOverlaps(false);
4535         setFixSplitParents(false);
4536         setCheckHdfs(true);
4537       } else if (cmd.equals("-maxOverlapsToSideline")) {
4538         if (i == args.length - 1) {
4539           errors.reportError(ERROR_CODE.WRONG_USAGE,
4540             "-maxOverlapsToSideline needs a numeric value argument.");
4541           return printUsageAndExit();
4542         }
4543         try {
4544           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4545           setMaxOverlapsToSideline(maxOverlapsToSideline);
4546         } catch (NumberFormatException e) {
4547           errors.reportError(ERROR_CODE.WRONG_USAGE,
4548             "-maxOverlapsToSideline needs a numeric value argument.");
4549           return printUsageAndExit();
4550         }
4551         i++;
4552       } else if (cmd.equals("-maxMerge")) {
4553         if (i == args.length - 1) {
4554           errors.reportError(ERROR_CODE.WRONG_USAGE,
4555             "-maxMerge needs a numeric value argument.");
4556           return printUsageAndExit();
4557         }
4558         try {
4559           int maxMerge = Integer.parseInt(args[i+1]);
4560           setMaxMerge(maxMerge);
4561         } catch (NumberFormatException e) {
4562           errors.reportError(ERROR_CODE.WRONG_USAGE,
4563             "-maxMerge needs a numeric value argument.");
4564           return printUsageAndExit();
4565         }
4566         i++;
4567       } else if (cmd.equals("-summary")) {
4568         setSummary();
4569       } else if (cmd.equals("-metaonly")) {
4570         setCheckMetaOnly();
4571       } else if (cmd.equals("-boundaries")) {
4572         setRegionBoundariesCheck();
4573       } else if (cmd.equals("-fixTableLocks")) {
4574         setFixTableLocks(true);
4575       } else if (cmd.equals("-fixOrphanedTableZnodes")) {
4576         setFixTableZNodes(true);
4577       } else if (cmd.startsWith("-")) {
4578         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4579         return printUsageAndExit();
4580       } else {
4581         includeTable(TableName.valueOf(cmd));
4582         errors.print("Allow checking/fixes for table: " + cmd);
4583       }
4584     }
4585 
4586     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4587 
4588     // pre-check current user has FS write permission or not
4589     try {
4590       preCheckPermission();
4591     } catch (AccessDeniedException ace) {
4592       Runtime.getRuntime().exit(-1);
4593     } catch (IOException ioe) {
4594       Runtime.getRuntime().exit(-1);
4595     }
4596 
4597     // do the real work of hbck
4598     connect();
4599 
4600     try {
4601       // if corrupt file mode is on, first fix them since they may be opened later
4602       if (checkCorruptHFiles || sidelineCorruptHFiles) {
4603         LOG.info("Checking all hfiles for corruption");
4604         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4605         setHFileCorruptionChecker(hfcc); // so we can get result
4606         Collection<TableName> tables = getIncludedTables();
4607         Collection<Path> tableDirs = new ArrayList<Path>();
4608         Path rootdir = FSUtils.getRootDir(getConf());
4609         if (tables.size() > 0) {
4610           for (TableName t : tables) {
4611             tableDirs.add(FSUtils.getTableDir(rootdir, t));
4612           }
4613         } else {
4614           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4615         }
4616         hfcc.checkTables(tableDirs);
4617         hfcc.report(errors);
4618       }
4619 
4620       // check and fix table integrity, region consistency.
4621       int code = onlineHbck();
4622       setRetCode(code);
4623       // If we have changed the HBase state it is better to run hbck again
4624       // to see if we haven't broken something else in the process.
4625       // We run it only once more because otherwise we can easily fall into
4626       // an infinite loop.
4627       if (shouldRerun()) {
4628         try {
4629           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4630           Thread.sleep(sleepBeforeRerun);
4631         } catch (InterruptedException ie) {
4632           LOG.warn("Interrupted while sleeping");
4633           return this;
4634         }
4635         // Just report
4636         setFixAssignments(false);
4637         setFixMeta(false);
4638         setFixHdfsHoles(false);
4639         setFixHdfsOverlaps(false);
4640         setFixVersionFile(false);
4641         setFixTableOrphans(false);
4642         errors.resetErrors();
4643         code = onlineHbck();
4644         setRetCode(code);
4645       }
4646     } finally {
4647       IOUtils.cleanup(null, this);
4648     }
4649     return this;
4650   }
4651 
4652   /**
4653    * ls -r for debugging purposes
4654    */
4655   void debugLsr(Path p) throws IOException {
4656     debugLsr(getConf(), p, errors);
4657   }
4658 
4659   /**
4660    * ls -r for debugging purposes
4661    */
4662   public static void debugLsr(Configuration conf,
4663       Path p) throws IOException {
4664     debugLsr(conf, p, new PrintingErrorReporter());
4665   }
4666 
4667   /**
4668    * ls -r for debugging purposes
4669    */
4670   public static void debugLsr(Configuration conf,
4671       Path p, ErrorReporter errors) throws IOException {
4672     if (!LOG.isDebugEnabled() || p == null) {
4673       return;
4674     }
4675     FileSystem fs = p.getFileSystem(conf);
4676 
4677     if (!fs.exists(p)) {
4678       // nothing
4679       return;
4680     }
4681     errors.print(p.toString());
4682 
4683     if (fs.isFile(p)) {
4684       return;
4685     }
4686 
4687     if (fs.getFileStatus(p).isDirectory()) {
4688       FileStatus[] fss= fs.listStatus(p);
4689       for (FileStatus status : fss) {
4690         debugLsr(conf, status.getPath(), errors);
4691       }
4692     }
4693   }
4694 }