Skip to content

Commit

Permalink
Merge branch 'development' into cache-busting
Browse files Browse the repository at this point in the history
  • Loading branch information
edmc-ss authored Sep 20, 2019
2 parents 7bf30cd + 6374b03 commit 2f9c579
Show file tree
Hide file tree
Showing 12 changed files with 157 additions and 39 deletions.
4 changes: 2 additions & 2 deletions ci/ansible/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
pkg_ver:
etcd: "3.3.10"
golang_versions:
current: "go1.11.4"
candidate: "go1.12.9"
current: "go1.12.9"
candidate: "go1.13"
latest: "{{ lookup('url', 'https://golang.org/VERSION?m=text', split_lines=False) }}"
active_golang_version: "{{ golang_version | default('current') }}"
golang_versions_file: "/etc/golang_versions.json"
Expand Down
10 changes: 9 additions & 1 deletion fs/api_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,15 @@ func (mS *mountStruct) FetchExtentMapChunk(userID inode.InodeUserID, groupID ino
return
}

// doInlineCheckpointIfEnabled is called whenever we must guarantee that reported state changes
// are, indeed, persisted. Absent any sort of persistent transaction log, this means performing
// a checkpoint unfortunately.
//
// Currently, only explicitly invoked Flushes trigger this. But, actually, any Swift/S3 API call
// that modifies Objects or (what the client thinks are) Containers should also.
//
// TODO is to determine where else a call to this func should also be made.
//
func (mS *mountStruct) doInlineCheckpointIfEnabled() {
var (
err error
Expand Down Expand Up @@ -4187,7 +4196,6 @@ func (mS *mountStruct) Wrote(userID inode.InodeUserID, groupID inode.InodeGroupI

err = mS.volStruct.inodeVolumeHandle.Flush(inodeNumber, false)
mS.volStruct.untrackInFlightFileInodeData(inodeNumber, false)
mS.doInlineCheckpointIfEnabled()

err = mS.volStruct.inodeVolumeHandle.Wrote(inodeNumber, objectPath, fileOffset, objectOffset, length, true)

Expand Down
25 changes: 9 additions & 16 deletions liveness/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,9 @@ type globalsStruct struct {
currentTerm uint64
nextState func()
stateMachineStopChan chan struct{}
stateMachineStopped bool
stateMachineDone sync.WaitGroup
livenessCheckerControlChan chan bool // Send true to trigger livenessChecker() to recompute polling schedule
// Send false to trigger livenessChecker() to exit
livenessCheckerActive bool
livenessCheckerWG sync.WaitGroup
volumeToCheckList []*volumeStruct
emptyVolumeGroupToCheckSet map[string]string // List (in "set" form) of VolumeGroups (by name) with no Volumes (Value == ServingPeer)
Expand Down Expand Up @@ -205,7 +203,6 @@ func (dummy *globalsStruct) Up(confMap conf.ConfMap) (err error) {
go requestExpirer()

globals.livenessCheckerControlChan = make(chan bool, 1)
globals.livenessCheckerActive = false

err = nil
return
Expand Down Expand Up @@ -247,18 +244,14 @@ func (dummy *globalsStruct) SignaledStart(confMap conf.ConfMap) (err error) {

globals.active = false

// Ensure livenessChecker() is stopped
// Stop livenessChecker()

if globals.livenessCheckerActive {
globals.livenessCheckerActive = false
globals.livenessCheckerControlChan <- false
globals.livenessCheckerWG.Wait()
}
globals.livenessCheckerControlChan <- false
globals.livenessCheckerWG.Wait()

// Stop state machine

globals.stateMachineStopChan <- struct{}{}

globals.stateMachineDone.Wait()

// Shut off recvMsgs()
Expand Down Expand Up @@ -708,9 +701,9 @@ func (dummy *globalsStruct) SignaledFinish(confMap conf.ConfMap) (err error) {

globals.recvMsgQueue = list.New()

globals.recvMsgChan = make(chan struct{})
globals.recvMsgChan = make(chan struct{}, 1)

globals.recvMsgsDoneChan = make(chan struct{})
globals.recvMsgsDoneChan = make(chan struct{}, 1)
go recvMsgs()

globals.currentLeader = nil
Expand All @@ -719,9 +712,7 @@ func (dummy *globalsStruct) SignaledFinish(confMap conf.ConfMap) (err error) {

globals.nextState = doFollower

globals.stateMachineStopChan = make(chan struct{})

globals.stateMachineStopped = false
globals.stateMachineStopChan = make(chan struct{}, 1)

// Initialize internal Liveness Report data as being empty

Expand All @@ -730,7 +721,6 @@ func (dummy *globalsStruct) SignaledFinish(confMap conf.ConfMap) (err error) {

// Start up livenessChecker()

globals.livenessCheckerActive = true
globals.livenessCheckerWG.Add(1)
go livenessChecker()

Expand All @@ -748,5 +738,8 @@ func (dummy *globalsStruct) SignaledFinish(confMap conf.ConfMap) (err error) {
}

func (dummy *globalsStruct) Down(confMap conf.ConfMap) (err error) {
globals.requestExpirerStopChan <- struct{}{}
globals.requestExpirerDone.Wait()

return nil
}
6 changes: 3 additions & 3 deletions liveness/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ func recvMsgs() {
_ = peer.incompleteRecvMsgQueue.Remove(recvMsgQueueElement.peerRecvMsgQueueElement)
}

// Contstruct a new recvMsgQueueElement
// Construct a new recvMsgQueueElement

recvMsgQueueElement = &recvMsgQueueElementStruct{
peer: peer,
Expand Down Expand Up @@ -495,7 +495,7 @@ func requestExpirer() {
if nil == frontRequestElement {
select {
case <-globals.requestExpirerStartChan:
// Go look again... there is likely somethin in globals.requestsByExpirationTime now
// Go look again... there is likely something in globals.requestsByExpirationTime now
case <-globals.requestExpirerStopChan:
globals.requestExpirerDone.Done()
return
Expand Down Expand Up @@ -566,7 +566,7 @@ func sendRequest(peer *peerStruct, msgTag uint64, requestContext interface{}, re
globals.Unlock()
}

return // err return from sendMsg() is suffient status
return // err return from sendMsg() is sufficient status
}

func deliverResponse(msgTag uint64, responseMsg interface{}) {
Expand Down
4 changes: 2 additions & 2 deletions liveness/polling.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func livenessChecker() {
panic(err)
}

// Delay before next entityToChexk
// Delay before next entityToCheck

select {
case livenessCheckerControlChanValue = <-globals.livenessCheckerControlChan:
Expand Down Expand Up @@ -296,7 +296,7 @@ func livenessCheckVolume(volume *internalVolumeReportStruct) {
// the Volumes to ObervingPeers with the required livenessCheckRedundancy. That said,
// it is a bit misleading for an ObservingPeer to report that a VolumeGroup is "alive"
// when not all of that VolumeGroup's Volumes have been checked. Similarly, it is a
// bit mislesading for an ObservingPeer to report that a ServingPeer is "alive" when
// bit misleading for an ObservingPeer to report that a ServingPeer is "alive" when
// not all of that ServingPeer's VolumeGroups have been checked. Therefore, to get an
// accurate picture of that state of a VolumeGroup or ServingPeer, all results from
// all ObservingPeers should be consulted as a set when making any availability
Expand Down
4 changes: 2 additions & 2 deletions liveness/polling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ func TestComputeLivenessCheckAssignments(t *testing.T) {
}
fmt.Println(string(externalLivenessReportForJustPeerAAsJSON))

// TODO: Actually validate it programatically
// TODO: Actually validate it programmatically

// Validate for observingPeerNameList == []{"PeerA", "PeerB", "PeerC", "PeerD"}

Expand All @@ -297,7 +297,7 @@ func TestComputeLivenessCheckAssignments(t *testing.T) {
}
fmt.Println(string(externalLivenessReportForAllPeersAsJSON))

// TODO: Actually validate it programatically
// TODO: Actually validate it programmatically

// All done
}
19 changes: 8 additions & 11 deletions liveness/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"crypto/rand"
"fmt"
"reflect"
"runtime"
"time"

"github.com/swiftstack/ProxyFS/logger"
Expand All @@ -12,10 +13,6 @@ import (
func stateMachine() {
for {
globals.nextState()
if globals.stateMachineStopped {
globals.stateMachineDone.Done()
return
}
}
}

Expand Down Expand Up @@ -48,7 +45,7 @@ func doCandidate() {
logger.Infof("%s entered Candidate state", globals.myUDPAddr)
}

// Point all LivenessChackAssignments at globals.whoAmI
// Point all LivenessCheckAssignments at globals.whoAmI

globals.Lock()
livenessReportWhileCandidate = computeLivenessCheckAssignments([]string{globals.whoAmI})
Expand Down Expand Up @@ -119,8 +116,8 @@ func doCandidate() {

select {
case <-globals.stateMachineStopChan:
globals.stateMachineStopped = true
return
globals.stateMachineDone.Done()
runtime.Goexit()
case <-globals.recvMsgChan:
recvMsgQueueElement = popGlobalMsg()
if nil != recvMsgQueueElement {
Expand Down Expand Up @@ -335,8 +332,8 @@ func doFollower() {

select {
case <-globals.stateMachineStopChan:
globals.stateMachineStopped = true
return
globals.stateMachineDone.Done()
runtime.Goexit()
case <-globals.recvMsgChan:
recvMsgQueueElement = popGlobalMsg()
if nil != recvMsgQueueElement {
Expand Down Expand Up @@ -635,8 +632,8 @@ func doLeader() {

select {
case <-globals.stateMachineStopChan:
globals.stateMachineStopped = true
return
globals.stateMachineDone.Done()
runtime.Goexit()
case <-globals.recvMsgChan:
recvMsgQueueElement = popGlobalMsg()
if nil != recvMsgQueueElement {
Expand Down
107 changes: 107 additions & 0 deletions pfsagentd/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# PFSAgent

FUSE Driver for presenting ProxyFS Volumes as locally mounted file systems

## Synopsis

PFSAgent is a program that remotely accesses a ProxyFS Volume through
a new `PROXYFS` HTTP Method targetted at any Swift Proxy Server in a
Swift cluster that has contains the `pfs_middleware` filter. All metadata
operations tunnel through `pfs_middleware` on their way to the `ProxyFS`
instance currently managing the specified Volume (Swift Account). To
allow file reads and writes to scale out, however, PFSAgent employs
a `bypass_mode` instructing `pfs_middleware` to pass through an Object
GET or PUT HTTP Method rather than redirect it over to `ProxyFS`.

## Setup

Each Swift Proxy Server configuration file (`proxy-server.conf`) is assumed
to already contain a `[filter:pfs]` section pointing the `pfs_middleware` at
one or more ProxyFS instances. This is what enables Swift API and S3 API
"BiModal" access to ProxyFS Volumes. A new Key:Value that is one of:
* bypass_mode = off
* bypass_mode = read-only
* bypass_mode = read-write
The Value `off` is assumed if the `bypass_mode` Key is not specified.

The program file to execute is `pfsagentd` might typically be installed
as `/opt/ss/bin/pfsagentd`. The program is supplied a configuration file
in .INI format. Here is an example:
```
[Agent]
FUSEVolumeName: CommonVolume
FUSEMountPointPath: AgentMountPoint # Unless starting with '/', relative to $CWD
FUSEUnMountRetryDelay: 100ms
FUSEUnMountRetryCap: 100
SwiftAuthURL: http://localhost:8080/auth/v1.0 # If domain name is used, round-robin among all will be used
# SwiftAuthURL: https://localhost:8443/auth/v1.0 # If domain name is used, round-robin among all will be used
SwiftAuthUser: test:tester
SwiftAuthKey: testing
SwiftAccountName: AUTH_test # Must be a bi-modal account
SwiftTimeout: 20s # Should be significantly higher than MaxFlushTime
SwiftRetryLimit: 10
SwiftRetryDelay: 1s
SwiftRetryExpBackoff: 1.4
SwiftConnectionPoolSize: 200
FetchExtentsFromFileOffset: 32
FetchExtentsBeforeFileOffset: 0
ReadCacheLineSize: 1048576 # Aligned chunk of a LogSegment
ReadCacheLineCount: 1000
SharedFileLimit: 1000
ExclusiveFileLimit: 100
DirtyFileLimit: 50
MaxFlushSize: 10485760
MaxFlushTime: 10s
ReadOnly: false
LogFilePath: # Unless starting with '/', relative to $CWD; Blank to disable
LogToConsole: true
TraceEnabled: false
HTTPServerIPAddr: 127.0.0.1
HTTPServerTCPPort: 9090
AttrDuration: 10s
AttrBlockSize: 65536
LookupEntryDuration: 10s
ReaddirMaxEntries: 1024
```

In the above example, some important fields are as follows:
* FUSEVolumeName should be set to the ProxyFS Volume being mounted
* FUSEMountPointPath should be set to where the FUSE presentation of the Volume should appear (must pre-exist)
* SwiftAuthURL should be set to the AuthURL of your Swift Cluster
* SwiftAuthUser should be set to the Swift User that "owns" the Swift Account for the ProxyFS Volume
* SwiftAccountName should be set to the corresponding Swift Account for the ProxyFS Volume
* HTTPServerIPAddr should be set to the IP Address where PFSAgent should present its embedded HTTP Server
* HTTPServerTCPPort should be set to the TCP Port upon which the PFSAgent should present its embedded HTTP Server

The balance of the settings are more related to tuning choices. Among those, the most pertinent are:
* ReadCacheLineSize specifies how much of a Swift Object is read when a read cache miss occurs
* ReadCacheLineCount specifies how many such read cache lines will be used
* MaxFlushSize specifies how frequently in terms of byte count writes are sent to new Swift Objects
* MaxFlushTime specifies how frequently in terms of time writes are sent to new Swift Objects

Each mounted ProxyFS Volume requires an instance of PFSAgent (`pfsagentd`) to run.
Hence, each ProxyFS Volume must be described by a unique configuration file as described above.

It is convenient to employ `systemd` to launch each PFSAgent instance via unique service files.
An example service file that can be used to allow launching multiple such PFSAgent instances
(by specifying the configuration file name in the `systemd` invocation) is:
```
[Unit]
Description=ProxyFS Agent %i
Wants=network-online.target
After=network-online.target
ConditionFileNotEmpty=/opt/ss/etc/pfsagent/%i.conf
[Service]
ExecStart=/opt/ss/bin/pfsagentd /opt/ss/etc/pfsagent/%i.conf
Type=simple
# Restart the ssnoded daemon after a 2 seconds delay, in case it crashes.
Restart=always
RestartSec=2
StartLimitInterval=11s
[Install]
WantedBy=network-online.target
```
12 changes: 12 additions & 0 deletions release_notes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# ProxyFS Release Notes

## 1.12.2 (September 19, 2019)

### Bug Fixes:

Removed an unnecessary checkpoint performed before each PFSAgent
RpcWrote operation that is generated as each LogSegment is PUT
to Swift. The prior behavior put a strain on the checkpointing
system when a large set of small files are uploaded via PFSAgent
exposed FUSE mount points. Note that explicit flushes (fsync()
or fdatasync() calls) will still trigger a checkpoint so that
ProxyFS/PFSAgent can honor the request faithfully.

## 1.12.1 (September 12, 2019)

### Bug Fixes:
Expand Down
1 change: 1 addition & 0 deletions saio/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ Vagrant.configure(2) do |config|
config.vm.network "forwarded_port", guest: 15346, host: 25346
config.vm.network "forwarded_port", guest: 8080, host: 8080
config.vm.network "forwarded_port", guest: 8443, host: 8443
config.vm.network "private_network", ip: "192.168.22.113", :name => 'vboxnet1', :adapter => 3
config.vm.provision "shell", path: "vagrant_provision.sh"
end
2 changes: 1 addition & 1 deletion saio/vagrant_provision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ yum -y install wget git nfs-utils vim lsof

yum -y --disableexcludes=all install gcc
cd /tmp
TARFILE_NAME=go1.11.4.linux-amd64.tar.gz
TARFILE_NAME=go1.12.9.linux-amd64.tar.gz
wget -q https://dl.google.com/go/$TARFILE_NAME
tar -C /usr/local -xf $TARFILE_NAME
rm $TARFILE_NAME
Expand Down
2 changes: 1 addition & 1 deletion test/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ RUN pip install --upgrade pip
RUN pip install requests tox==3.5.3

# Install Golang
ENV GOLANG_TARFILE_NAME=go1.11.4.linux-amd64.tar.gz
ENV GOLANG_TARFILE_NAME=go1.12.9.linux-amd64.tar.gz
RUN cd /tmp && wget -q https://dl.google.com/go/${GOLANG_TARFILE_NAME}
RUN tar -C /usr/local -xf /tmp/${GOLANG_TARFILE_NAME}
RUN rm -rf /tmp/${GOLANG_TARFILE_NAME}
Expand Down

0 comments on commit 2f9c579

Please sign in to comment.