diff --git a/src/main/java/unimelb/utils/FileCount.java b/src/main/java/unimelb/utils/FileCount.java deleted file mode 100644 index 36a6b677d4515ab332dce83a0c30011539718f2d..0000000000000000000000000000000000000000 --- a/src/main/java/unimelb/utils/FileCount.java +++ /dev/null @@ -1,400 +0,0 @@ -package unimelb.utils; - -import java.io.IOException; -import java.io.PrintStream; -import java.nio.file.FileVisitOption; -import java.nio.file.FileVisitResult; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.SimpleFileVisitor; -import java.nio.file.attribute.BasicFileAttributes; -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.concurrent.Callable; -import java.util.concurrent.atomic.AtomicLong; -import java.util.logging.ConsoleHandler; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.logging.SimpleFormatter; - -import unimelb.utils.FileCount.Result; - -public class FileCount implements Callable<Result> { - - public static class Result { - public final Path rootDirectory; - public final long nbFiles; - public final long nbFailedFiles; - public final long nbFileSymlinks; - public final long nbRegularFiles; - public final long nbNonRegularFiles; - public final long totalFileSize; - public final long totalRegularFileSize; - public final long totalNonRegularFileSize; - public final long nbDirectories; - public final long nbFailedDirectories; - - private Map<Long, Long> _thresholdedCounts; - - public Result(Path rootDir, long nbFiles, long nbFailedFiles, long nbFileSymlinks, long nbRegularFiles, - long nbNonRegularFiles, long totalFileSize, long totalRegularFileSize, long totalNonRegularFileSize, - long nbDirectories, long nbFailedDirectories, Map<Long, Long> thresholdedCounts) { - this.rootDirectory = rootDir; - this.nbFiles = nbFiles; - this.nbFailedFiles = nbFailedFiles; - this.nbFileSymlinks = nbFileSymlinks; - this.nbRegularFiles = nbRegularFiles; - this.nbNonRegularFiles = nbNonRegularFiles; - this.totalFileSize = totalFileSize; - this.totalRegularFileSize = totalRegularFileSize; - this.totalNonRegularFileSize = totalNonRegularFileSize; - this.nbDirectories = nbDirectories; - this.nbFailedDirectories = nbFailedDirectories; - if (thresholdedCounts != null && !thresholdedCounts.isEmpty()) { - _thresholdedCounts = new TreeMap<Long, Long>(); - _thresholdedCounts.putAll(thresholdedCounts); - } - } - - public void print(PrintStream o) { - o.println(); - o.println(String.format("%44s: %32s", "Root directory", this.rootDirectory)); - - o.println(); - o.println(String.format("%44s: %,32d", "File count(failed)", this.nbFailedFiles)); - o.println(String.format("%44s: %,32d", "File count(symlinks)", this.nbFileSymlinks)); - o.println(String.format("%44s: %,32d", "File count(regular)", this.nbRegularFiles)); - o.println(String.format("%44s: %,32d", "File count(non-regular)", this.nbNonRegularFiles)); - o.println(String.format("%44s: %,32d", "File count(total)", this.nbFiles)); - - if (_thresholdedCounts != null && !_thresholdedCounts.isEmpty()) { - o.println(); - List<Long> ts = new ArrayList<Long>(_thresholdedCounts.keySet()); - for (int i = 0; i < ts.size(); i++) { - long t = ts.get(i); - long c = _thresholdedCounts.get(t); - if (i == 0) { - if (t == 0) { - o.println(String.format("%44s: %,32d", "Files(size: 0)", c)); - } else { - o.println(String.format("%44s: %,32d", String.format("Files(size: [0, %d])", t), c)); - } - } else { - long pt = ts.get(i - 1); - long pc = _thresholdedCounts.get(pt); - o.println( - String.format("%44s: %,32d", String.format("Files(size: [%d,%d])", pt + 1, t), c - pc)); - } - if (i == ts.size() - 1) { - o.println(String.format("%44s: %,32d", String.format("Files(size: [%d,∞])", t), - this.nbFiles - c)); - } - } - } - - o.println(); - o.println(String.format("%44s: %,32d", "File size(regular)", this.totalRegularFileSize)); - o.println(String.format("%44s: %,32d", "File size(non-regular)", this.totalNonRegularFileSize)); - o.println(String.format("%44s: %,32d", "File size(total)", this.totalFileSize)); - - o.println(); - o.println(String.format("%44s: %,32d", "Directory count(failed)", this.nbFailedDirectories)); - o.println(String.format("%44s: %,32d", "Directory count(total)", this.nbDirectories)); - o.println(); - - } - } - - private static final Logger logger = Logger.getLogger(FileCount.class.getName()); - static { - ConsoleHandler handler = new ConsoleHandler(); - handler.setFormatter(new SimpleFormatter()); - handler.setLevel(Level.ALL); - logger.addHandler(handler); - logger.setLevel(Level.ALL); - } - - private Path _rootDir; - private boolean _followLinks; - private Map<Long, AtomicLong> _thresholds; - - private AtomicLong _nbFiles; - private AtomicLong _nbFailedFiles; - private AtomicLong _nbFileSymlinks; - private AtomicLong _nbRegularFiles; - private AtomicLong _nbNonRegularFiles; - private AtomicLong _totalFileSize; - private AtomicLong _totalRegularFileSize; - private AtomicLong _totalNonRegularFileSize; - private AtomicLong _nbDirs; - private AtomicLong _nbFailedDirs; - - public FileCount(Path rootDir, boolean followLinks, long... thresholds) throws Throwable { - _rootDir = rootDir; - _followLinks = followLinks; - if (thresholds != null && thresholds.length > 0) { - _thresholds = new TreeMap<Long, AtomicLong>(); - for (long threshold : thresholds) { - _thresholds.put(threshold, new AtomicLong(0L)); - } - } - - _nbFiles = new AtomicLong(0L); - _nbFailedFiles = new AtomicLong(0L); - _nbFileSymlinks = new AtomicLong(0L); - _nbRegularFiles = new AtomicLong(0L); - _nbNonRegularFiles = new AtomicLong(0L); - _totalFileSize = new AtomicLong(0L); - _totalRegularFileSize = new AtomicLong(0L); - _totalNonRegularFileSize = new AtomicLong(0L); - _nbDirs = new AtomicLong(0L); - _nbFailedDirs = new AtomicLong(0L); - } - - public Path rootDir() { - return _rootDir; - } - - public boolean followLinks() { - return _followLinks; - } - - public long[] thresholds() { - if (_thresholds != null && !_thresholds.isEmpty()) { - Set<Long> ts = _thresholds.keySet(); - long[] thresholds = new long[ts.size()]; - int i = 0; - for (Long t : ts) { - thresholds[i] = t; - ++i; - } - return thresholds; - } - return null; - } - - public long nbFiles() { - return _nbFiles.get(); - } - - public long nbFailedFiles() { - return _nbFailedFiles.get(); - } - - public long nbFileSymlinks() { - return _nbFileSymlinks.get(); - } - - public long nbRegularFiles() { - return _nbRegularFiles.get(); - } - - public long nbNonRegularFiles() { - return _nbNonRegularFiles.get(); - } - - public long totalFileSize() { - return _totalFileSize.get(); - } - - public long totalRegularFileSize() { - return _totalRegularFileSize.get(); - } - - public long totalNonRegularFileSize() { - return _totalNonRegularFileSize.get(); - } - - public long nbDirectories() { - return _nbDirs.get(); - } - - public long nbFailedDirectories() { - return _nbFailedDirs.get(); - } - - private void updateThresholdSize(long fileSize) { - if (_thresholds == null || _thresholds.isEmpty()) { - return; - } - _thresholds.forEach((t, s) -> { - if (fileSize <= t) { - s.getAndIncrement(); - } - }); - } - - @Override - public Result call() throws Exception { - Files.walkFileTree(_rootDir, - _followLinks ? EnumSet.of(FileVisitOption.FOLLOW_LINKS) : EnumSet.noneOf(FileVisitOption.class), - Integer.MAX_VALUE, new SimpleFileVisitor<Path>() { - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { - _nbFiles.getAndIncrement(); - System.out - .println(String.format("%d: '%s'", _nbFiles.get(), file.normalize().toAbsolutePath())); - if (Files.isSymbolicLink(file)) { - _nbFileSymlinks.getAndIncrement(); - } else { - long fileSize = Files.size(file); - _totalFileSize.getAndAdd(fileSize); - updateThresholdSize(fileSize); - if (Files.isRegularFile(file)) { - _nbRegularFiles.getAndIncrement(); - _totalRegularFileSize.getAndAdd(fileSize); - } else { - _nbNonRegularFiles.getAndIncrement(); - _totalNonRegularFileSize.getAndAdd(fileSize); - } - } - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult visitFileFailed(Path file, IOException ioe) { - if (Files.isDirectory(file)) { - _nbFailedDirs.getAndIncrement(); - } else { - _nbFailedFiles.getAndIncrement(); - } - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult postVisitDirectory(Path dir, IOException ioe) { - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { - _nbDirs.getAndIncrement(); - return super.preVisitDirectory(dir, attrs); - } - }); - Map<Long, Long> thresholdedCounts = new TreeMap<Long, Long>(); - if (_thresholds != null && !_thresholds.isEmpty()) { - _thresholds.forEach((t, c) -> { - thresholdedCounts.put(t, c.get()); - }); - } - return new Result(_rootDir, nbFiles(), nbFailedFiles(), nbFileSymlinks(), nbRegularFiles(), nbNonRegularFiles(), - totalFileSize(), totalRegularFileSize(), totalNonRegularFileSize(), nbDirectories(), - nbFailedDirectories(), thresholdedCounts); - } - - private static long[] parseThresholds(String ts) { - if (ts != null) { - ts = ts.replace(" ", "").replaceAll(",$", "").replaceAll("^,", ""); - if (!ts.isEmpty()) { - String[] ss = ts.split(","); - long[] ls = new long[ss.length]; - for (int i = 0; i < ss.length; i++) { - ls[i] = parseSize(ss[i]); - } - return ls; - } - } - return null; - } - - private static long parseSize(String s) { - long size; - s = s.toLowerCase(); - if (s.endsWith("b")) { - size = Long.parseLong(s.substring(0, s.length() - 1)); - } else if (s.endsWith("k")) { - size = Long.parseLong(s.substring(0, s.length() - 1)) * 1000L; - } else if (s.endsWith("m")) { - size = Long.parseLong(s.substring(0, s.length() - 1)) * 1000000L; - } else if (s.endsWith("g")) { - size = Long.parseLong(s.substring(0, s.length() - 1)) * 1000000000L; - } else if (s.endsWith("t")) { - size = Long.parseLong(s.substring(0, s.length() - 1)) * 1000000000000L; - } else { - size = Long.parseLong(s); - } - if (size < 0) { - throw new IllegalArgumentException("Invalid file size threshold: " + size); - } - return size; - } - - public static Result execute(Path rootDir, boolean followLinks, long... thresholds) throws Throwable { - return new FileCount(rootDir, followLinks, thresholds).call(); - } - - public static int executeCommand(String[] args) throws Throwable { - Boolean followLinks = null; - long[] thresholds = null; - Path rootDir = null; - try { - for (int i = 0; i < args.length;) { - if (args[i].equalsIgnoreCase("--follow-links")) { - if (followLinks != null) { - throw new IllegalArgumentException("Multiple --follow-links found!"); - } - followLinks = true; - i++; - } else if (args[i].equalsIgnoreCase("--thresholds")) { - if (thresholds != null) { - throw new IllegalArgumentException("Multiple --thresholds found!"); - } - thresholds = parseThresholds(args[i + 1]); - i += 2; - } else { - if (args[i].startsWith("-")) { - throw new IllegalArgumentException("Unknown option: " + args[i]); - } - if (rootDir != null) { - throw new IllegalArgumentException("Multiple root directory specified. Expects only one."); - } - rootDir = Paths.get(args[i]).normalize().toAbsolutePath(); - i++; - } - } - if (rootDir == null) { - rootDir = Paths.get(System.getProperty("user.dir")); - } - if (followLinks == null) { - followLinks = false; - } - } catch (Throwable e) { - throw new IllegalArgumentException(e); - } - execute(rootDir, followLinks, thresholds).print(System.out); - return 0; - } - - public static void printCommandUsage() { - // @formatter:off - System.out.println(); - System.out.println("Usage: file-count [--follow-links] [--thresholds size[b|k|m|g|t]] [directory]"); - System.out.println(); - System.out.println("Description: count the total number of files and sum up the total file sizes in the specified directory. If the directory is not specified, defaults to current work directory."); - System.out.println(); - System.out.println("Options:"); - System.out.println(" --follow-links follow symbolic links."); - System.out.println(" --thresholds size[k|b|m|g] count files by the specified thresholds. separated by comma. For example 1k,1m,10m,1g,5g"); - System.out.println(); - // @formatter:on - } - - public static void main(String[] args) { - try { - executeCommand(args); - } catch (Throwable e) { - e.printStackTrace(); - if (e instanceof IllegalArgumentException) { - printCommandUsage(); - } - System.exit(1); - } - } - -} diff --git a/src/main/java/unimelb/utils/FileList.java b/src/main/java/unimelb/utils/FileList.java new file mode 100644 index 0000000000000000000000000000000000000000..61bae53251b529e862f9f6fa355fd3dd48e50aac --- /dev/null +++ b/src/main/java/unimelb/utils/FileList.java @@ -0,0 +1,472 @@ +package unimelb.utils; + +import java.io.IOException; +import java.nio.file.FileVisitOption; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.Collection; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicLong; +import java.util.logging.ConsoleHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.logging.SimpleFormatter; + +public class FileList implements Callable<FileListSummary>, FileListProgress { + + public static interface FileHandler { + void handleFile(FileListProgress progress, Path file, long fileSize); + } + + private static final Logger logger = Logger.getLogger(FileList.class.getName()); + static { + ConsoleHandler handler = new ConsoleHandler(); + handler.setFormatter(new SimpleFormatter()); + handler.setLevel(Level.ALL); + logger.addHandler(handler); + logger.setLevel(Level.ALL); + } + + private FileListOptions _options; + + private AtomicLong _matchedFileCount; + private Map<Long, AtomicLong> _matchedFileCountThresholded; + private AtomicLong _matchedFileSizeTotal; + private AtomicLong _matchedFileSizeMin; + private AtomicLong _matchedFileSizeMax; + private AtomicLong _fileCount; + private Map<Long, AtomicLong> _fileCountThresholded; + private AtomicLong _failedFileCount; + private AtomicLong _symlinkCount; + private AtomicLong _regularFileCount; + private AtomicLong _nonRegularFileCount; + private AtomicLong _fileSizeTotal; + private AtomicLong _fileSizeMin; + private AtomicLong _fileSizeMax; + private AtomicLong _dirCount; + private AtomicLong _failedDirCount; + + private FileHandler _fh; + + public FileList(FileListOptions options, FileHandler fh) throws Throwable { + _options = options; + _fh = fh == null ? new FileHandler() { + + @Override + public void handleFile(FileListProgress progress, Path file, long size) { + System.out.println(String.format("%-19d '%s'", size, file)); + } + } : fh; + + _matchedFileCount = new AtomicLong(0L); + _matchedFileSizeTotal = new AtomicLong(0L); + _matchedFileSizeMin = new AtomicLong(Long.MAX_VALUE); + _matchedFileSizeMax = new AtomicLong(Long.MIN_VALUE); + + _fileCount = new AtomicLong(0L); + _failedFileCount = new AtomicLong(0L); + _regularFileCount = new AtomicLong(0L); + _symlinkCount = new AtomicLong(0L); + _nonRegularFileCount = new AtomicLong(0L); + _fileSizeTotal = new AtomicLong(0L); + _fileSizeMin = new AtomicLong(Long.MAX_VALUE); + _fileSizeMax = new AtomicLong(Long.MIN_VALUE); + + _dirCount = new AtomicLong(0L); + _failedDirCount = new AtomicLong(0L); + + if (_options.hasThresholds()) { + _fileCountThresholded = new TreeMap<Long, AtomicLong>(); + if (_options.hasConstraints()) { + _matchedFileCountThresholded = new TreeMap<Long, AtomicLong>(); + } + List<Long> thresholds = _options.thresholds(); + for (long threshold : thresholds) { + _fileCountThresholded.put(threshold, new AtomicLong(0L)); + if (_matchedFileCountThresholded != null) { + if (!((_options.sizeFrom() != null && threshold < _options.sizeFrom()) + || (_options.sizeTo() != null && threshold > _options.sizeTo()))) { + _matchedFileCountThresholded.put(threshold, new AtomicLong(0L)); + } + } + } + } + } + + @Override + public FileListOptions options() { + return _options; + } + + @Override + public long matchedFileCount() { + return _matchedFileCount.get(); + } + + @Override + public Map<Long, Long> matchedFileCountThresholded() { + if (_matchedFileCountThresholded == null || _matchedFileCountThresholded.isEmpty()) { + return null; + } + final Map<Long, Long> matchedFileCountThresholded = new TreeMap<Long, Long>(); + _matchedFileCountThresholded.forEach((t, c) -> { + matchedFileCountThresholded.put(t, c.get()); + }); + return matchedFileCountThresholded; + } + + @Override + public long matchedFileSizeTotal() { + return _matchedFileSizeTotal.get(); + } + + @Override + public long matchedFileSizeMin() { + return _matchedFileSizeMin.get(); + } + + @Override + public long matchedFileSizeMax() { + return _matchedFileSizeMax.get(); + } + + @Override + public long regularFileCount() { + return _regularFileCount.get(); + } + + @Override + public long symlinkCount() { + return _symlinkCount.get(); + } + + @Override + public long nonRegularFileCount() { + return _nonRegularFileCount.get(); + } + + @Override + public long fileCount() { + return _fileCount.get(); + } + + @Override + public Map<Long, Long> fileCountThresholded() { + if (_fileCountThresholded == null || _fileCountThresholded.isEmpty()) { + return null; + } + final Map<Long, Long> fileCountThresholded = new TreeMap<Long, Long>(); + _fileCountThresholded.forEach((t, c) -> { + fileCountThresholded.put(t, c.get()); + }); + return fileCountThresholded; + } + + @Override + public long fileSizeTotal() { + return _fileSizeTotal.get(); + } + + @Override + public long failedFileCount() { + return _failedFileCount.get(); + } + + @Override + public long fileSizeMin() { + return _fileSizeMin.get(); + } + + @Override + public long fileSizeMax() { + return _fileSizeMax.get(); + } + + @Override + public long directoryCount() { + return _dirCount.get(); + } + + @Override + public long failedDirectoryCount() { + return _failedDirCount.get(); + } + + @Override + public FileListSummary call() throws Exception { + Files.walkFileTree(_options.rootDirectory(), _options.followLinks() ? EnumSet.of(FileVisitOption.FOLLOW_LINKS) + : EnumSet.noneOf(FileVisitOption.class), Integer.MAX_VALUE, new SimpleFileVisitor<Path>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + _fileCount.getAndIncrement(); + try { + if (Files.isSymbolicLink(file)) { + // ignore symbolic links + _symlinkCount.getAndIncrement(); + } else { + long fileSize = Files.size(file); + _fileSizeTotal.getAndAdd(fileSize); + if (fileSize < _fileSizeMin.get()) { + _fileSizeMin.set(fileSize); + } + if (fileSize > _fileSizeMax.get()) { + _fileSizeMax.set(fileSize); + } + if (!Files.isRegularFile(file)) { + // ignore non-regular files + _nonRegularFileCount.getAndIncrement(); + } else { + _regularFileCount.getAndIncrement(); + updateFileCountThresholded(fileSize); + boolean accept = true; + if (_options.sizeFrom() != null || _options.sizeTo() != null) { + if (_options.sizeFrom() != null && fileSize < _options.sizeFrom()) { + accept = false; + } + if (_options.sizeTo() != null && fileSize > _options.sizeTo()) { + accept = false; + } + } + if (_options.hasIncRegex()) { + accept = false; + String path = file.toString(); + List<String> incRegex = _options.incRegex(); + for (String regex : incRegex) { + if (path.matches(regex)) { + accept = true; + break; + } + } + } + if (_options.hasExcRegex()) { + String path = file.toString(); + List<String> excRegex = _options.excRegex(); + for (String regex : excRegex) { + if (path.matches(regex)) { + accept = false; + break; + } + } + } + if (accept) { + _matchedFileCount.getAndIncrement(); + _matchedFileSizeTotal.getAndAdd(fileSize); + updateMatchedFileCountThresholded(fileSize); + if (fileSize < _matchedFileSizeMin.get()) { + _matchedFileSizeMin.set(fileSize); + } + if (fileSize > _matchedFileSizeMax.get()) { + _matchedFileSizeMax.set(fileSize); + } + _fh.handleFile(FileList.this, file, fileSize); + } + } + } + } catch (Throwable e) { + _failedFileCount.getAndIncrement(); + System.err.println("Failed to read file: '" + file + "'"); + e.printStackTrace(System.err); + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFileFailed(Path file, IOException ioe) { + if (Files.isDirectory(file)) { + _failedDirCount.getAndIncrement(); + } else { + _failedFileCount.getAndIncrement(); + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException ioe) { + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { + _dirCount.getAndIncrement(); + return super.preVisitDirectory(dir, attrs); + } + }); + return new FileListSummary(_options, matchedFileCount(), matchedFileCountThresholded(), matchedFileSizeTotal(), + matchedFileSizeMin(), matchedFileSizeMax(), fileCount(), fileCountThresholded(), failedFileCount(), + regularFileCount(), symlinkCount(), nonRegularFileCount(), fileSizeTotal(), fileSizeMin(), + fileSizeMax(), directoryCount(), failedDirectoryCount()); + } + + private void updateFileCountThresholded(long fileSize) { + if (_fileCountThresholded != null) { + _fileCountThresholded.forEach((t, s) -> { + if (fileSize <= t) { + s.getAndIncrement(); + } + }); + } + } + + private void updateMatchedFileCountThresholded(long fileSize) { + if (_matchedFileCountThresholded != null) { + _matchedFileCountThresholded.forEach((t, s) -> { + if (fileSize <= t) { + s.getAndIncrement(); + } + }); + } + } + + private static long parseSize(String s) { + long size; + s = s.toLowerCase(); + if (s.endsWith("b")) { + size = Long.parseLong(s.substring(0, s.length() - 1)); + } else if (s.endsWith("k")) { + size = Long.parseLong(s.substring(0, s.length() - 1)) * 1000L; + } else if (s.endsWith("m")) { + size = Long.parseLong(s.substring(0, s.length() - 1)) * 1000000L; + } else if (s.endsWith("g")) { + size = Long.parseLong(s.substring(0, s.length() - 1)) * 1000000000L; + } else if (s.endsWith("t")) { + size = Long.parseLong(s.substring(0, s.length() - 1)) * 1000000000000L; + } else { + size = Long.parseLong(s); + } + if (size < 0) { + throw new IllegalArgumentException("Invalid file size: " + s); + } + return size; + } + + public static FileListSummary execute(Path rootDir, boolean followLinks, Long sizeFrom, Long sizeTo, + Collection<String> incRegex, Collection<String> excRegex, FileHandler fh) throws Throwable { + FileListOptions options = new FileListOptions.Builder().setRootDirectory(rootDir).setFollowLinks(followLinks) + .setSizeFrom(sizeFrom).setSizeTo(sizeTo).setIncRegex(incRegex).setExcRegex(excRegex).build(); + return new FileList(options, fh).call(); + } + + public static FileListSummary execute(FileListOptions options, FileHandler fh) throws Throwable { + return new FileList(options, fh).call(); + } + + public static int executeCommand(String[] args) throws Throwable { + FileListOptions.Builder options = new FileListOptions.Builder(); + try { + for (int i = 0; i < args.length;) { + if ("--help".equalsIgnoreCase(args[i]) || "-h".equalsIgnoreCase(args[i])) { + printCommandUsage(); + return 0; + } + if ("--follow-links".equalsIgnoreCase(args[i]) || "-l".equalsIgnoreCase(args[i])) { + if (options.followLinks() != null) { + throw new IllegalArgumentException("Multiple --follow-links found!"); + } + options.setFollowLinks(true); + i += 1; + } else if ("--size-from".equalsIgnoreCase(args[i])) { + if (options.sizeFrom() != null) { + throw new IllegalArgumentException("Multiple --size-from found!"); + } + options.setSizeFrom(parseSize(args[i + 1])); + i += 2; + } else if ("--size-to".equalsIgnoreCase(args[i])) { + if (options.sizeTo() != null) { + throw new IllegalArgumentException("Multiple --size-to found!"); + } + options.setSizeTo(parseSize(args[i + 1])); + i += 2; + } else if ("--include".equalsIgnoreCase(args[i])) { + options.addIncRegex(args[i + 1]); + i += 2; + } else if ("--exclude".equalsIgnoreCase(args[i])) { + options.addExcRegex(args[i + 1]); + i += 2; + } else if ("--thresholds".equalsIgnoreCase(args[i])) { + options.setThresholds(parseThresholds(args[i + 1])); + i += 2; + } else { + if (args[i].startsWith("-")) { + throw new IllegalArgumentException("Unknown option: " + args[i]); + } + if (options.rootDirectory() != null) { + throw new IllegalArgumentException("Multiple root directory specified. Expects only one."); + } + options.setRootDirectory(Paths.get(args[i]).normalize().toAbsolutePath()); + i++; + } + } + if (options.rootDirectory() == null) { + options.setRootDirectory(Paths.get(System.getProperty("user.dir"))); + } + if (options.followLinks() == null) { + options.setFollowLinks(false); + } + } catch (Throwable e) { + throw new IllegalArgumentException(e); + } + execute(options.build(), new FileHandler() { + + @Override + public void handleFile(FileListProgress progress, Path file, long size) { + System.out.println(String.format("%-19d '%s'", size, file)); + } + }).print(System.out); + return 0; + } + + private static long[] parseThresholds(String ts) { + if (ts != null) { + ts = ts.replace(" ", "").replaceAll(",$", "").replaceAll("^,", ""); + if (!ts.isEmpty()) { + String[] ss = ts.split(","); + long[] ls = new long[ss.length]; + for (int i = 0; i < ss.length; i++) { + ls[i] = parseSize(ss[i]); + } + return ls; + } + } + throw new IllegalArgumentException("Failed to parse thresholds: " + ts); + } + + public static void printCommandUsage() { + // @formatter:off + System.out.println(); + System.out.println("Usage: file-list [Options] [directory]"); + System.out.println(); + System.out.println("Description: list files."); + System.out.println(); + System.out.println("Options:"); + System.out.println(" -l, --follow-links follow symbolic links."); + System.out.println(" --size-from size[k|b|m|g] file size filter."); + System.out.println(" --size-to size[k|b|m|g] file size filter."); + System.out.println(" --include regex file path filter."); + System.out.println(" --exclude regex file path filter."); + System.out.println(" --thresholds size[k|b|m|g] count files by the specified thresholds. If multiple separate with commas. For example 1k,1m,10m,1g,5g\""); + System.out.println(" -h, --help print command usage information."); + System.out.println(); + // @formatter:on + } + + public static void main(String[] args) { + try { + executeCommand(args); + } catch (Throwable e) { + e.printStackTrace(); + if (e instanceof IllegalArgumentException) { + printCommandUsage(); + } + System.exit(1); + } + } + +} diff --git a/src/main/java/unimelb/utils/FileListOptions.java b/src/main/java/unimelb/utils/FileListOptions.java new file mode 100644 index 0000000000000000000000000000000000000000..d74262f282f1d074baa97cb254eb6370d6dac8d2 --- /dev/null +++ b/src/main/java/unimelb/utils/FileListOptions.java @@ -0,0 +1,228 @@ +package unimelb.utils; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +public class FileListOptions { + + private final Path _rootDirectory; + private final boolean _followLinks; + private final Long _sizeFrom; + private final Long _sizeTo; + private final List<String> _incRegex; + private final List<String> _excRegex; + private final List<Long> _thresholds; + + public FileListOptions(Path rootDir, boolean followLinks, Long sizeFrom, Long sizeTo, Collection<String> incRegex, + Collection<String> excRegex, List<Long> thresholds) { + _rootDirectory = rootDir; + _followLinks = followLinks; + _sizeFrom = sizeFrom; + _sizeTo = sizeTo; + _incRegex = incRegex == null ? null : new ArrayList<String>(incRegex); + _excRegex = excRegex == null ? null : new ArrayList<String>(excRegex); + _thresholds = thresholds == null ? null : new ArrayList<Long>(thresholds); + } + + public final Path rootDirectory() { + return _rootDirectory; + } + + public final boolean followLinks() { + return _followLinks; + } + + public final Long sizeFrom() { + return _sizeFrom; + } + + public final Long sizeTo() { + return _sizeTo; + } + + public final List<String> incRegex() { + return _incRegex == null ? null : Collections.unmodifiableList(_incRegex); + } + + public final boolean hasIncRegex() { + return _incRegex != null && !_incRegex.isEmpty(); + } + + public final List<String> excRegex() { + return _excRegex == null ? null : Collections.unmodifiableList(_excRegex); + } + + public final boolean hasExcRegex() { + return _excRegex != null && !_excRegex.isEmpty(); + } + + public final boolean hasConstraints() { + return _sizeFrom != null || _sizeTo != null || this.hasIncRegex() || this.hasExcRegex(); + } + + public final List<Long> thresholds() { + return hasThresholds() ? Collections.unmodifiableList(_thresholds) : null; + } + + public final boolean hasThresholds() { + return _thresholds != null && !_thresholds.isEmpty(); + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(String.format("%50s: %26s", "root.directory", _rootDirectory)).append("\n"); + sb.append(String.format("%50s: %26b", "follow.symbolic.links", _followLinks)).append("\n"); + if (_sizeFrom != null && _sizeFrom >= 0) { + sb.append(String.format("%50s: %,26d", "file.size >=", _sizeFrom)).append("\n"); + } + if (_sizeTo != null && _sizeTo >= 0) { + sb.append(String.format("%50s: %,26d", "file.size <=", _sizeTo)).append("\n"); + } + if (this.hasIncRegex()) { + for (String regex : _incRegex) { + sb.append(String.format("%50s: %,26d", "Include(regex): ", regex)).append("\n"); + } + } + if (this.hasExcRegex()) { + for (String regex : _excRegex) { + sb.append(String.format("%50s %,26d", "Exclude(regex): ", regex)).append("\n"); + } + } + return sb.toString(); + } + + public static class Builder { + private Path _rootDirectory; + private Boolean _followLinks; + private Long _sizeFrom; + private Long _sizeTo; + private Set<String> _incRegex; + private Set<String> _excRegex; + private List<Long> _thresholds; + + public Builder(Path rootDir) { + _rootDirectory = rootDir; + _followLinks = false; + _sizeFrom = null; + _sizeTo = null; + _incRegex = new LinkedHashSet<String>(); + _excRegex = new LinkedHashSet<String>(); + _thresholds = new LinkedList<Long>(); + } + + public Builder() { + this(null); + } + + public Path rootDirectory() { + return _rootDirectory; + } + + public Boolean followLinks() { + return _followLinks; + } + + public Long sizeFrom() { + return _sizeFrom; + } + + public Long sizeTo() { + return _sizeTo; + } + + public Collection<String> incRegex() { + return _incRegex.isEmpty() ? null : Collections.unmodifiableCollection(_incRegex); + } + + public Collection<String> excRegex() { + return _excRegex.isEmpty() ? null : Collections.unmodifiableCollection(_excRegex); + } + + public List<Long> thresholds() { + return _thresholds.isEmpty() ? null : Collections.unmodifiableList(_thresholds); + } + + public Builder setRootDirectory(Path rootDir) { + _rootDirectory = rootDir; + return this; + } + + public Builder setFollowLinks(Boolean followLinks) { + _followLinks = followLinks; + return this; + } + + public Builder setSizeFrom(Long sizeFrom) { + _sizeFrom = sizeFrom; + return this; + } + + public Builder setSizeTo(Long sizeTo) { + _sizeTo = sizeTo; + return this; + } + + public Builder addIncRegex(String regex) { + _incRegex.add(regex); + return this; + } + + public Builder addExcRegex(String regex) { + _excRegex.add(regex); + return this; + } + + public Builder setThresholds(long... thresholds) throws IllegalArgumentException { + _thresholds.clear(); + if (thresholds != null) { + for (long threshold : thresholds) { + if (threshold < 0) { + throw new IllegalArgumentException( + "File size threshold should be a positive number. Found: " + threshold); + } + _thresholds.add(threshold); + } + if (!_thresholds.isEmpty()) { + Collections.sort(_thresholds); + } + } + return this; + } + + public FileListOptions build() { + if (rootDirectory() == null) { + setRootDirectory(Paths.get(System.getProperty("user.dir"))); + } + if (followLinks() == null) { + setFollowLinks(false); + } + return new FileListOptions(_rootDirectory, _followLinks, _sizeFrom, _sizeTo, _incRegex, _excRegex, + _thresholds); + } + + public Builder setIncRegex(Collection<String> incRegex) { + _incRegex.clear(); + if (incRegex != null) { + _incRegex.addAll(incRegex); + } + return this; + } + + public Builder setExcRegex(Collection<String> excRegex) { + _excRegex.clear(); + if (excRegex != null) { + _excRegex.addAll(excRegex); + } + return this; + } + + } + +} diff --git a/src/main/java/unimelb/utils/FileListProgress.java b/src/main/java/unimelb/utils/FileListProgress.java new file mode 100644 index 0000000000000000000000000000000000000000..d7ca8f25332a16caf60bfb4172e29db57a557332 --- /dev/null +++ b/src/main/java/unimelb/utils/FileListProgress.java @@ -0,0 +1,41 @@ +package unimelb.utils; + +import java.util.Map; + +public interface FileListProgress { + + FileListOptions options(); + + long matchedFileCount(); + + Map<Long, Long> matchedFileCountThresholded(); + + long matchedFileSizeTotal(); + + long matchedFileSizeMin(); + + long matchedFileSizeMax(); + + long fileCount(); + + long regularFileCount(); + + long symlinkCount(); + + long nonRegularFileCount(); + + long failedFileCount(); + + Map<Long, Long> fileCountThresholded(); + + long fileSizeTotal(); + + long fileSizeMin(); + + long fileSizeMax(); + + long directoryCount(); + + long failedDirectoryCount(); + +} diff --git a/src/main/java/unimelb/utils/FileListSummary.java b/src/main/java/unimelb/utils/FileListSummary.java new file mode 100644 index 0000000000000000000000000000000000000000..cd0b3bcdf5c86603fc9b76a33db19fc2c7687501 --- /dev/null +++ b/src/main/java/unimelb/utils/FileListSummary.java @@ -0,0 +1,230 @@ +package unimelb.utils; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +public class FileListSummary implements FileListProgress { + + private final FileListOptions _options; + + private final long _matchedFileCount; + private final Map<Long, Long> _matchedFileCountThresholded; + private final long _matchedFileSizeTotal; + private final long _matchedFileSizeMin; + private final long _matchedFileSizeMax; + + private final long _fileCount; + private final long _failedFileCount; + private final long _regularFileCount; + private final long _symlinkCount; + private final long _nonRegularFileCount; + private final Map<Long, Long> _fileCountThresholded; + + private final long _fileSizeTotal; + private final long _fileSizeMin; + private final long _fileSizeMax; + + private final long _directoryCount; + private final long _failedDirectoryCount; + + public FileListSummary(FileListOptions options, long matchedFileCount, Map<Long, Long> matchedFileCountThresholded, + long matchedFileSizeTotal, long matchedFileSizeMin, long matchedFileSizeMax, long fileCount, + Map<Long, Long> fileCountThresholded, long failedFileCount, long regularFileCount, long symlinkCount, + long nonRegularFileCount, long fileSizeTotal, long fileSizeMin, long fileSizeMax, long dirCount, + long failedDirCount) { + _options = options; + _matchedFileCount = matchedFileCount; + _matchedFileCountThresholded = (matchedFileCountThresholded == null || matchedFileCountThresholded.isEmpty()) + ? null + : new TreeMap<Long, Long>(matchedFileCountThresholded); + _matchedFileSizeTotal = matchedFileSizeTotal; + _matchedFileSizeMin = matchedFileSizeMin; + _matchedFileSizeMax = matchedFileSizeMax; + _fileCount = fileCount; + _failedFileCount = failedFileCount; + _regularFileCount = regularFileCount; + _symlinkCount = symlinkCount; + _nonRegularFileCount = nonRegularFileCount; + _fileCountThresholded = (fileCountThresholded == null || fileCountThresholded.isEmpty()) ? null + : new TreeMap<Long, Long>(fileCountThresholded); + _fileSizeTotal = fileSizeTotal; + _fileSizeMin = fileSizeMin; + _fileSizeMax = fileSizeMax; + _directoryCount = dirCount; + _failedDirectoryCount = failedDirCount; + } + + @Override + public final FileListOptions options() { + return _options; + } + + @Override + public final long matchedFileCount() { + return _matchedFileCount; + } + + @Override + public final long matchedFileSizeTotal() { + return _matchedFileSizeTotal; + } + + @Override + public final long matchedFileSizeMin() { + return _matchedFileSizeMin; + } + + @Override + public final long matchedFileSizeMax() { + return _matchedFileSizeMax; + } + + @Override + public final long fileCount() { + return _fileCount; + } + + @Override + public final long failedFileCount() { + return _failedFileCount; + } + + @Override + public final long regularFileCount() { + return _regularFileCount; + } + + @Override + public long symlinkCount() { + return _symlinkCount; + } + + @Override + public long nonRegularFileCount() { + return _nonRegularFileCount; + } + + @Override + public final long fileSizeTotal() { + return _fileSizeTotal; + } + + @Override + public final long fileSizeMin() { + return _fileSizeMin; + } + + @Override + public final long fileSizeMax() { + return _fileSizeMax; + } + + @Override + public final long directoryCount() { + return _directoryCount; + } + + @Override + public final long failedDirectoryCount() { + return _failedDirectoryCount; + } + + @Override + public Map<Long, Long> matchedFileCountThresholded() { + if (_matchedFileCountThresholded != null && !_matchedFileCountThresholded.isEmpty()) { + return Collections.unmodifiableMap(_matchedFileCountThresholded); + } + return null; + } + + @Override + public Map<Long, Long> fileCountThresholded() { + if (_fileCountThresholded != null && !_fileCountThresholded.isEmpty()) { + return Collections.unmodifiableMap(_fileCountThresholded); + } + return null; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + // Options + sb.append("\n"); + sb.append("Options:\n"); + sb.append(_options); + // Results + sb.append("\n"); + sb.append("Results:\n"); + if (_options.hasConstraints()) { + sb.append(String.format("%50s: %,26d", "matched.file.count", _matchedFileCount)).append("\n"); + sb.append(String.format("%50s: %,26d", "matched.file.size.total", _matchedFileSizeTotal)).append("\n"); + sb.append(String.format("%50s: %,26d", "matched.file.size.min", _matchedFileSizeMin)).append("\n"); + sb.append(String.format("%50s: %,26d", "matched.file.size.max", _matchedFileSizeMax)).append("\n"); + if (_matchedFileCountThresholded != null) { + saveThresholded(sb, "matched.file.count", _matchedFileCountThresholded, _matchedFileCount, + _matchedFileSizeMax); + } + } + sb.append(String.format("%50s: %,26d", "file.count", _fileCount)).append("\n"); + sb.append(String.format("%50s: %,26d", "symlink.count", _symlinkCount)).append("\n"); + sb.append(String.format("%50s: %,26d", "non-regular.file.count", _nonRegularFileCount)).append("\n"); + if (_fileCountThresholded != null) { + saveThresholded(sb, "file.count", _fileCountThresholded, _regularFileCount, _fileSizeMax); + } + sb.append(String.format("%50s: %,26d", "file.failed", _failedFileCount)).append("\n"); + sb.append(String.format("%50s: %,26d", "file.size.total", _fileSizeTotal)).append("\n"); + sb.append(String.format("%50s: %,26d", "file.size.min", _fileSizeMin)).append("\n"); + sb.append(String.format("%50s: %,26d", "file.size.max", _fileSizeMax)).append("\n"); + sb.append(String.format("%50s: %,26d", "directory.count", _directoryCount)).append("\n"); + sb.append(String.format("%50s: %,26d", "directory.failed", _failedDirectoryCount)).append("\n"); + return sb.toString(); + } + + private static void saveThresholded(StringBuilder sb, String titlePrefix, Map<Long, Long> thresholded, long total, + long maxSize) { + if (thresholded != null && !thresholded.isEmpty()) { + List<Long> ts = new ArrayList<Long>(thresholded.keySet()); + for (int i = 0; i < ts.size(); i++) { + long t = ts.get(i); + long c = thresholded.get(t); + if (i == 0) { + if (c > 0) { + if (t == 0) { + String title = String.format("%s(size = 0 B)", titlePrefix); + sb.append(String.format("%50s: %,26d", title, c)).append("\n"); + } else { + String title = String.format("%s(0 < size <= %s)", titlePrefix, + FileSizeUtils.toHumanReadable(t)); + sb.append(String.format("%50s: %,26d", title, c)).append("\n"); + } + } + } else { + long pt = ts.get(i - 1); + long pc = thresholded.get(pt); + long n = c - pc; + if (n > 0) { + String title = String.format("%s(%s < size <= %s)", titlePrefix, + FileSizeUtils.toHumanReadable(pt + 1), FileSizeUtils.toHumanReadable(t)); + sb.append(String.format("%50s: %,26d", title, n)).append("\n"); + } + } + if (i == ts.size() - 1) { + long n = total - c; + if (n > 0) { + String title = String.format("%s(%s < size <= %s)", titlePrefix, + FileSizeUtils.toHumanReadable(t), FileSizeUtils.toHumanReadable(maxSize)); + sb.append(String.format("%50s: %,26d", title, total - c)).append("\n"); + } + } + } + } + } + + public void print(PrintStream o) { + o.println(toString()); + } + +} diff --git a/src/main/java/unimelb/utils/FileSizeUtils.java b/src/main/java/unimelb/utils/FileSizeUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..fdb90d8cbee8acd54fcdfb24f7a79eeb780421e9 --- /dev/null +++ b/src/main/java/unimelb/utils/FileSizeUtils.java @@ -0,0 +1,61 @@ +package unimelb.utils; + +import java.text.DecimalFormat; + +public class FileSizeUtils { + + public static final long KB = 1000L; + public static final long MB = KB * 1000L; + public static final long GB = MB * 1000L; + public static final long TB = GB * 1000L; + public static final long PB = TB * 1000L; + + public static final long KiB = 1024L; + public static final long MiB = KiB * 1024L; + public static final long GiB = MiB * 1024L; + public static final long TiB = GiB * 1024L; + public static final long PiB = TiB * 1024L; + + public static final DecimalFormat FORMAT = new DecimalFormat("###.###"); + + public static String toHumanReadable(long nBytes, boolean bi) { + if (bi) { + if (nBytes >= PiB) { + return String.format("%s PiB", FORMAT.format(((double) nBytes) / ((double) PiB))); + } else if (nBytes >= TiB) { + return String.format("%s TiB", FORMAT.format(((double) nBytes) / ((double) TiB))); + } else if (nBytes >= GiB) { + return String.format("%s GiB", FORMAT.format(((double) nBytes) / ((double) GiB))); + } else if (nBytes >= MiB) { + return String.format("%s MiB", FORMAT.format(((double) nBytes) / ((double) MiB))); + } else if (nBytes >= KiB) { + return String.format("%s KiB", FORMAT.format(((double) nBytes) / ((double) KiB))); + } else { + return String.format("%d Bytes", nBytes); + } + } else { + if (nBytes >= PB) { + return String.format("%s PB", FORMAT.format(((double) nBytes) / ((double) PB))); + } else if (nBytes >= TB) { + return String.format("%s TB", FORMAT.format(((double) nBytes) / ((double) TB))); + } else if (nBytes >= GB) { + return String.format("%s GB", FORMAT.format(((double) nBytes) / ((double) GB))); + } else if (nBytes >= MB) { + return String.format("%s MB", FORMAT.format(((double) nBytes) / ((double) MB))); + } else if (nBytes >= KB) { + return String.format("%s KB", FORMAT.format(((double) nBytes) / ((double) KB))); + } else { + return String.format("%d Bytes", nBytes); + } + } + } + + public static String toHumanReadable(long nBytes) { + return toHumanReadable(nBytes, false); + } + + public static void main(String[] args) { +// System.out.println(toHumanReadable(12000000000L, false)); + } + +} diff --git a/src/main/scripts/unix/file-count b/src/main/scripts/unix/file-list similarity index 90% rename from src/main/scripts/unix/file-count rename to src/main/scripts/unix/file-list index 673abf8eed34981d10581af571aed87090c97beb..8c49adb836e6d38bdd957b177779459ea83f10f5 100644 --- a/src/main/scripts/unix/file-count +++ b/src/main/scripts/unix/file-list @@ -25,4 +25,4 @@ JAR=${LIB}/unimelb-mf-clients.jar [[ -z $(which java) ]] && echo "Java is not found." >&2 && exit 1 # execute the command -java -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+UseStringDeduplication -Xmx1g -cp "${JAR}" unimelb.utils.FileCount ${1+"$@"} +java -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+UseStringDeduplication -Xmx1g -cp "${JAR}" unimelb.utils.FileList ${1+"$@"} diff --git a/src/main/scripts/windows/file-count.cmd b/src/main/scripts/windows/file-list.cmd similarity index 78% rename from src/main/scripts/windows/file-count.cmd rename to src/main/scripts/windows/file-list.cmd index 5373dca16c1f6d0c0d01a3426501f12ab7a7bbc2..5bc8b78f533f7bb573f95475294f816c3a516f6a 100644 --- a/src/main/scripts/windows/file-count.cmd +++ b/src/main/scripts/windows/file-list.cmd @@ -9,4 +9,4 @@ popd set JAR=%ROOT%\lib\unimelb-mf-clients.jar -java -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+UseStringDeduplication -Xmx1g -cp "%JAR%" unimelb.utils.FileCount %* +java -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+UseStringDeduplication -Xmx1g -cp "%JAR%" unimelb.utils.FileList %*