/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ syntax = "proto2"; option java_package = "org.apache.hadoop.hdfs.server.namenode"; option java_outer_classname = "FsImageProto"; package hadoop.hdfs.fsimage; import "hdfs.proto"; import "acl.proto"; import "xattr.proto"; /** * This file defines the on-disk layout of the file system image. The * layout is defined by the following EBNF grammar, in which angle * brackets mark protobuf definitions. (e.g., ) * * FILE := MAGIC SECTION* FileSummaryLength * MAGIC := 'HDFSIMG1' * SECTION := | ... * FileSummaryLength := 4 byte int * * Some notes: * * The codec field in FileSummary describes the compression codec used * for all sections. The fileheader is always uncompressed. * * All protobuf messages are serialized in delimited form, which means * that there always will be an integer indicates the size of the * protobuf message. * */ message FileSummary { // The version of the above EBNF grammars. required uint32 ondiskVersion = 1; // layoutVersion describes which features are available in the // FSImage. required uint32 layoutVersion = 2; optional string codec = 3; // index for each section message Section { optional string name = 1; optional uint64 length = 2; optional uint64 offset = 3; } repeated Section sections = 4; } /** * Name: NS_INFO */ message NameSystemSection { optional uint32 namespaceId = 1; optional uint64 genstampV1 = 2; // legacy generation stamp optional uint64 genstampV2 = 3; // generation stamp of latest version optional uint64 genstampV1Limit = 4; optional uint64 lastAllocatedBlockId = 5; optional uint64 transactionId = 6; optional uint64 rollingUpgradeStartTime = 7; optional uint64 lastAllocatedStripedBlockId = 8; } /** * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big Endian). * The first and the second parts are the string ids of the user and * group name, and the last 16 bits are the permission bits. * * Name: INODE */ message INodeSection { /** * under-construction feature for INodeFile */ message FileUnderConstructionFeature { optional string clientName = 1; optional string clientMachine = 2; } message AclFeatureProto { /** * An ACL entry is represented by a 32-bit integer in Big Endian * format. The bits can be divided in four segments: * [0:2) || [2:26) || [26:27) || [27:29) || [29:32) * * [0:2) -- reserved for future uses. * [2:26) -- the name of the entry, which is an ID that points to a * string in the StringTableSection. * [26:27) -- the scope of the entry (AclEntryScopeProto) * [27:29) -- the type of the entry (AclEntryTypeProto) * [29:32) -- the permission of the entry (FsActionProto) * */ repeated fixed32 entries = 2 [packed = true]; } message XAttrCompactProto { /** * * [0:2) -- the namespace of XAttr (XAttrNamespaceProto) * [2:26) -- the name of the entry, which is an ID that points to a * string in the StringTableSection. * [26:27) -- namespace extension. Originally there were only 4 namespaces * so only 2 bits were needed. At that time, this bit was reserved. When a * 5th namespace was created (raw) this bit became used as a 3rd namespace * bit. * [27:32) -- reserved for future uses. */ required fixed32 name = 1; optional bytes value = 2; } message XAttrFeatureProto { repeated XAttrCompactProto xAttrs = 1; } message INodeFile { optional uint32 replication = 1; optional uint64 modificationTime = 2; optional uint64 accessTime = 3; optional uint64 preferredBlockSize = 4; optional fixed64 permission = 5; repeated BlockProto blocks = 6; optional FileUnderConstructionFeature fileUC = 7; optional AclFeatureProto acl = 8; optional XAttrFeatureProto xAttrs = 9; optional uint32 storagePolicyID = 10; optional BlockTypeProto blockType = 11; optional uint32 erasureCodingPolicyID = 12; } message QuotaByStorageTypeEntryProto { required StorageTypeProto storageType = 1; required uint64 quota = 2; } message QuotaByStorageTypeFeatureProto { repeated QuotaByStorageTypeEntryProto quotas = 1; } message INodeDirectory { optional uint64 modificationTime = 1; // namespace quota optional uint64 nsQuota = 2; // diskspace quota optional uint64 dsQuota = 3; optional fixed64 permission = 4; optional AclFeatureProto acl = 5; optional XAttrFeatureProto xAttrs = 6; optional QuotaByStorageTypeFeatureProto typeQuotas = 7; } message INodeSymlink { optional fixed64 permission = 1; optional bytes target = 2; optional uint64 modificationTime = 3; optional uint64 accessTime = 4; } message INode { enum Type { FILE = 1; DIRECTORY = 2; SYMLINK = 3; }; required Type type = 1; required uint64 id = 2; optional bytes name = 3; optional INodeFile file = 4; optional INodeDirectory directory = 5; optional INodeSymlink symlink = 6; } optional uint64 lastInodeId = 1; optional uint64 numInodes = 2; // repeated INodes.. } /** * This section records information about under-construction files for * reconstructing the lease map. * NAME: FILES_UNDERCONSTRUCTION */ message FilesUnderConstructionSection { message FileUnderConstructionEntry { optional uint64 inodeId = 1; optional string fullPath = 2; } // repeated FileUnderConstructionEntry... } /** * This section records the children of each directories * NAME: INODE_DIR */ message INodeDirectorySection { /** * A single DirEntry needs to fit in the default PB max message size of * 64MB. Please be careful when adding more fields to a DirEntry! */ message DirEntry { optional uint64 parent = 1; // children that are not reference nodes repeated uint64 children = 2 [packed = true]; // children that are reference nodes, each element is a reference node id repeated uint32 refChildren = 3 [packed = true]; } // repeated DirEntry, ended at the boundary of the section. } message INodeReferenceSection { message INodeReference { // id of the referred inode optional uint64 referredId = 1; // local name recorded in WithName optional bytes name = 2; // recorded in DstReference optional uint32 dstSnapshotId = 3; // recorded in WithName optional uint32 lastSnapshotId = 4; } // repeated INodeReference... } /** * This section records the information about snapshot * NAME: SNAPSHOT */ message SnapshotSection { message Snapshot { optional uint32 snapshotId = 1; // Snapshot root optional INodeSection.INode root = 2; } optional uint32 snapshotCounter = 1; repeated uint64 snapshottableDir = 2 [packed = true]; // total number of snapshots optional uint32 numSnapshots = 3; // repeated Snapshot... } /** * This section records information about snapshot diffs * NAME: SNAPSHOT_DIFF */ message SnapshotDiffSection { message CreatedListEntry { optional bytes name = 1; } message DirectoryDiff { optional uint32 snapshotId = 1; optional uint32 childrenSize = 2; optional bool isSnapshotRoot = 3; optional bytes name = 4; optional INodeSection.INodeDirectory snapshotCopy = 5; optional uint32 createdListSize = 6; repeated uint64 deletedINode = 7 [packed = true]; // id of deleted inodes // id of reference nodes in the deleted list repeated uint32 deletedINodeRef = 8 [packed = true]; // repeated CreatedListEntry (size is specified by createdListSize) } message FileDiff { optional uint32 snapshotId = 1; optional uint64 fileSize = 2; optional bytes name = 3; optional INodeSection.INodeFile snapshotCopy = 4; repeated BlockProto blocks = 5; } message DiffEntry { enum Type { FILEDIFF = 1; DIRECTORYDIFF = 2; } required Type type = 1; optional uint64 inodeId = 2; optional uint32 numOfDiff = 3; // repeated DirectoryDiff or FileDiff } // repeated DiffEntry } /** * This section maps string to id * NAME: STRING_TABLE */ message StringTableSection { message Entry { optional uint32 id = 1; optional string str = 2; } optional uint32 numEntry = 1; optional uint32 maskBits = 2 [default = 0]; // repeated Entry } message SecretManagerSection { message DelegationKey { optional uint32 id = 1; optional uint64 expiryDate = 2; optional bytes key = 3; } message PersistToken { optional uint32 version = 1; optional string owner = 2; optional string renewer = 3; optional string realUser = 4; optional uint64 issueDate = 5; optional uint64 maxDate = 6; optional uint32 sequenceNumber = 7; optional uint32 masterKeyId = 8; optional uint64 expiryDate = 9; } optional uint32 currentId = 1; optional uint32 tokenSequenceNumber = 2; optional uint32 numKeys = 3; optional uint32 numTokens = 4; // repeated DelegationKey keys // repeated PersistToken tokens } message CacheManagerSection { required uint64 nextDirectiveId = 1; required uint32 numPools = 2; required uint32 numDirectives = 3; // repeated CachePoolInfoProto pools // repeated CacheDirectiveInfoProto directives } message ErasureCodingSection { repeated ErasureCodingPolicyProto policies = 1; }