root/net/ftp/ftp_directory_listing_parser_ls.cc

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. TwoColumnDateListingToTime
  2. DetectColumnOffsetSizeAndModificationTime
  3. ParseFtpDirectoryListingLs

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/ftp/ftp_directory_listing_parser_ls.h"

#include <vector>

#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "net/ftp/ftp_directory_listing_parser.h"
#include "net/ftp/ftp_util.h"

namespace {

bool TwoColumnDateListingToTime(const base::string16& date,
                                const base::string16& time,
                                base::Time* result) {
  base::Time::Exploded time_exploded = { 0 };

  // Date should be in format YYYY-MM-DD.
  std::vector<base::string16> date_parts;
  base::SplitString(date, '-', &date_parts);
  if (date_parts.size() != 3)
    return false;
  if (!base::StringToInt(date_parts[0], &time_exploded.year))
    return false;
  if (!base::StringToInt(date_parts[1], &time_exploded.month))
    return false;
  if (!base::StringToInt(date_parts[2], &time_exploded.day_of_month))
    return false;

  // Time should be in format HH:MM
  if (time.length() != 5)
    return false;

  std::vector<base::string16> time_parts;
  base::SplitString(time, ':', &time_parts);
  if (time_parts.size() != 2)
    return false;
  if (!base::StringToInt(time_parts[0], &time_exploded.hour))
    return false;
  if (!base::StringToInt(time_parts[1], &time_exploded.minute))
    return false;
  if (!time_exploded.HasValidValues())
    return false;

  // We don't know the time zone of the server, so just use local time.
  *result = base::Time::FromLocalExploded(time_exploded);
  return true;
}

// Returns the column index of the end of the date listing and detected
// last modification time.
bool DetectColumnOffsetSizeAndModificationTime(
    const std::vector<base::string16>& columns,
    const base::Time& current_time,
    size_t* offset,
    base::string16* size,
    base::Time* modification_time) {
  // The column offset can be arbitrarily large if some fields
  // like owner or group name contain spaces. Try offsets from left to right
  // and use the first one that matches a date listing.
  //
  // Here is how a listing line should look like. A star ("*") indicates
  // a required field:
  //
  //  * 1. permission listing
  //    2. number of links (optional)
  //  * 3. owner name (may contain spaces)
  //    4. group name (optional, may contain spaces)
  //  * 5. size in bytes
  //  * 6. month
  //  * 7. day of month
  //  * 8. year or time <-- column_offset will be the index of this column
  //    9. file name (optional, may contain spaces)
  for (size_t i = 5U; i < columns.size(); i++) {
    if (net::FtpUtil::LsDateListingToTime(columns[i - 2],
                                          columns[i - 1],
                                          columns[i],
                                          current_time,
                                          modification_time)) {
      *size = columns[i - 3];
      *offset = i;
      return true;
    }
  }

  // Some FTP listings have swapped the "month" and "day of month" columns
  // (for example Russian listings). We try to recognize them only after making
  // sure no column offset works above (this is a more strict way).
  for (size_t i = 5U; i < columns.size(); i++) {
    if (net::FtpUtil::LsDateListingToTime(columns[i - 1],
                                          columns[i - 2],
                                          columns[i],
                                          current_time,
                                          modification_time)) {
      *size = columns[i - 3];
      *offset = i;
      return true;
    }
  }

  // Some FTP listings use a different date format.
  for (size_t i = 5U; i < columns.size(); i++) {
    if (TwoColumnDateListingToTime(columns[i - 1],
                                   columns[i],
                                   modification_time)) {
      *size = columns[i - 2];
      *offset = i;
      return true;
    }
  }

  return false;
}

}  // namespace

namespace net {

bool ParseFtpDirectoryListingLs(
    const std::vector<base::string16>& lines,
    const base::Time& current_time,
    std::vector<FtpDirectoryListingEntry>* entries) {
  // True after we have received a "total n" listing header, where n is an
  // integer. Only one such header is allowed per listing.
  bool received_total_line = false;

  for (size_t i = 0; i < lines.size(); i++) {
    if (lines[i].empty())
      continue;

    std::vector<base::string16> columns;
    base::SplitString(base::CollapseWhitespace(lines[i], false), ' ', &columns);

    // Some FTP servers put a "total n" line at the beginning of the listing
    // (n is an integer). Allow such a line, but only once, and only if it's
    // the first non-empty line. Do not match the word exactly, because it may
    // be in different languages (at least English and German have been seen
    // in the field).
    if (columns.size() == 2 && !received_total_line) {
      received_total_line = true;

      int64 total_number;
      if (!base::StringToInt64(columns[1], &total_number))
        return false;
      if (total_number < 0)
        return false;

      continue;
    }

    FtpDirectoryListingEntry entry;

    size_t column_offset;
    base::string16 size;
    if (!DetectColumnOffsetSizeAndModificationTime(columns,
                                                   current_time,
                                                   &column_offset,
                                                   &size,
                                                   &entry.last_modified)) {
      // Some servers send a message in one of the first few lines.
      // All those messages have in common is the string ".:",
      // where "." means the current directory, and ":" separates it
      // from the rest of the message, which may be empty.
      if (lines[i].find(base::ASCIIToUTF16(".:")) != base::string16::npos)
        continue;

      return false;
    }

    // Do not check "validity" of the permission listing. It's quirky,
    // and some servers send garbage here while other parts of the line are OK.

    if (!columns[0].empty() && columns[0][0] == 'l') {
      entry.type = FtpDirectoryListingEntry::SYMLINK;
    } else if (!columns[0].empty() && columns[0][0] == 'd') {
      entry.type = FtpDirectoryListingEntry::DIRECTORY;
    } else {
      entry.type = FtpDirectoryListingEntry::FILE;
    }

    if (!base::StringToInt64(size, &entry.size)) {
      // Some FTP servers do not separate owning group name from file size,
      // like "group1234". We still want to display the file name for that
      // entry, but can't really get the size (What if the group is named
      // "group1", and the size is in fact 234? We can't distinguish between
      // that and "group" with size 1234). Use a dummy value for the size.
      // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes.
      entry.size = 0;
    }
    if (entry.size < 0) {
      // Some FTP servers have bugs that cause them to display the file size
      // as negative. They're most likely big files like DVD ISO images.
      // We still want to display them, so just say the real file size
      // is unknown.
      entry.size = -1;
    }
    if (entry.type != FtpDirectoryListingEntry::FILE)
      entry.size = -1;

    if (column_offset == columns.size() - 1) {
      // If the end of the date listing is the last column, there is no file
      // name. Some FTP servers send listing entries with empty names.
      // It's not obvious how to display such an entry, so we ignore them.
      // We don't want to make the parsing fail at this point though.
      // Other entries can still be useful.
      continue;
    }

    entry.name = FtpUtil::GetStringPartAfterColumns(lines[i],
                                                    column_offset + 1);

    if (entry.type == FtpDirectoryListingEntry::SYMLINK) {
      base::string16::size_type pos =
          entry.name.rfind(base::ASCIIToUTF16(" -> "));

      // We don't require the " -> " to be present. Some FTP servers don't send
      // the symlink target, possibly for security reasons.
      if (pos != base::string16::npos)
        entry.name = entry.name.substr(0, pos);
    }

    entries->push_back(entry);
  }

  return true;
}

}  // namespace net

/* [<][>][^][v][top][bottom][index][help] */