Skip to content

Commit

Permalink
Merge branch 'JGRP-2786'
Browse files Browse the repository at this point in the history
  • Loading branch information
belaban committed Apr 17, 2024
2 parents 7f2499a + 1e7b497 commit b527db9
Show file tree
Hide file tree
Showing 23 changed files with 3 additions and 4,601 deletions.
4 changes: 2 additions & 2 deletions conf/fork-stacks.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
</config>
</fork-stack>

<fork-stack id="locking">
<fork-stack id="stats">
<config>
<CENTRAL_LOCK num_backups="2"/>
<STATS/>
</config>
</fork-stack>

Expand Down
2 changes: 1 addition & 1 deletion conf/fork.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
xsi:schemaLocation="fork fork-stacks-4.2.xsd">
<fork-stack id="lock">
<config>
<CENTRAL_LOCK2/>
<STATS/>
</config>
</fork-stack>
</fork-stacks>
Expand Down
1 change: 0 additions & 1 deletion conf/jg-magic-map.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
<class id="68" name="org.jgroups.blocks.RequestCorrelator$MultiDestinationHeader"/>
<class id="69" name="org.jgroups.protocols.DAISYCHAIN$DaisyHeader"/>
<class id="71" name="org.jgroups.protocols.STOMP$StompHeader"/>
<class id="72" name="org.jgroups.protocols.Locking$LockingHeader"/>
<class id="74" name="org.jgroups.protocols.COUNTER$CounterHeader"/>
<class id="75" name="org.jgroups.protocols.MERGE3$MergeHeader"/>
<class id="76" name="org.jgroups.protocols.RSVP$RsvpHeader"/>
Expand Down
2 changes: 0 additions & 2 deletions conf/jg-messages.properties
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ CouldNotOpenConnectionToDatabase = JGRP000115: Could not open connection to data
DefaultMembershipChangePolicyFailed = JGRP000119: default membership change policy failed
DidnTFindPhysicalAddressFor = JGRP000121: didn't find physical address for
DigestOrSenderIsNull = JGRP000122: digest or sender is null
DiscardedLOCKDENIEDResponseWithLockId = JGRP000123: discarded LOCK-DENIED response with lock-id=
DiscardedLOCKGRANTEDResponseWithLockId = JGRP000124: discarded LOCK-GRANTED response with lock-id=
ErrorBuildingURL = JGRP000126: Error building URL
ErrorCallingService = JGRP000127: Error calling service
ErrorClearingTable = JGRP000128: Error clearing table
Expand Down
2 changes: 0 additions & 2 deletions conf/jg-protocol-ids.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
<class id="32" name="org.jgroups.protocols.JDBC_PING"/>
<class id="33" name="org.jgroups.protocols.STOMP"/>
<class id="34" name="org.jgroups.protocols.BPING"/>
<class id="35" name="org.jgroups.protocols.CENTRAL_LOCK"/>
<class id="37" name="org.jgroups.protocols.COUNTER"/>
<class id="38" name="org.jgroups.protocols.MERGE3"/>
<class id="39" name="org.jgroups.protocols.RSVP"/>
Expand All @@ -55,7 +54,6 @@
<class id="63" name="org.jgroups.protocols.UFC_NB"/>
<class id="64" name="org.jgroups.protocols.MFC_NB"/>
<class id="65" name="org.jgroups.protocols.DH_KEY_EXCHANGE"/>
<class id="67" name="org.jgroups.protocols.CENTRAL_LOCK2"/>
<class id="68" name="org.jgroups.protocols.LOCAL_PING"/>
<class id="69" name="org.jgroups.protocols.FRAG4"/>
<class id="70" name="org.jgroups.protocols.FD_ALL3"/>
Expand Down
135 changes: 0 additions & 135 deletions doc/manual/blocks.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -718,141 +718,6 @@ Note that this class was written as a demo of how state can be shared between no
never been heavily tested and is therefore not meant to be used in production.


[[LockService]]
=== Cluster wide locking

`LockService` can be used to acquire locks on a cluster-wide basis; ie. only one node can acquire a given lock. E.g. if
member B acquires lock L, and member C also tries to acquire L, then C will block until B releases L
(or leaves /crashes)

The new service is implemented as a building block (`org.jgroups.blocks.locking.LockService`) and a protocol
(`CENTRAL_LOCK` or `CENTRAL_LOCK2`). `LockService` looks up the protocol and talks to it via events. If no locking
protocol is found, `LockService` won't start and will throw an exception.

The main abstraction of a distributed lock is an implementation of `java.util.concurrent.locks.Lock`.

Below is an example of how LockService is typically used:

[source,java]
----
// locking.xml needs to contain a locking protocol, e.g. CENTRAL_LOCK
JChannel ch=new JChannel("/home/bela/locking.xml");
LockService lock_service=new LockService(ch);
ch.connect("lock-cluster");
Lock lock=lock_service.getLock("mylock"); // gets a cluster-wide lock
lock.lock();
try {
// do something with the locked resource
}
finally {
lock.unlock();
}
----

In the example, we create a channel, then a `LockService`, then connect the channel. If the channel's
configuration doesn't include a locking protocol, an exception will be thrown.
Then we grab a lock named `"mylock"`, which we lock and subsequently unlock. If another member P had already
acquired `"mylock"`, we'd block until P released the lock, or P left the cluster or crashed.

Note that the owner of a lock is always a given thread in a cluster, so the owner is the JGroups address and
the thread ID. *This means that different threads inside the same JVM trying to access the same named lock
will compete for it.* If `thread-22` grabs the lock first, then `thread-5` will block until `thread-22`
releases the lock.

NOTE: If we want the lock owner to only be the address (and not the thread-id), then property
`use_thread_id_for_lock_owner` can be set to `false`. This means that all threads in a given node can lock or unlock
a given lock. Example: thread T1 locks "lock", but thread T2 can unlock it. This is _not_ the same semantics as
`java.util.concurrent.locks.Lock`, but nevertheless useful in some scenarios. (Introduced in 3.6)

JGroups includes a demo (`org.jgroups.demos.LockServiceDemo`), which can be used to interactively experiment
with distributed locks. `LockServiceDemo -h` dumps all command line options.

There are two protocols which provides locking: <<CENTRAL_LOCK>> and <<CENTRAL_LOCK2>>.

Note that the locking protocol has to be placed at or towards the top of the stack (close to the channel), because it
requires reliable unicasts and multicasts (e.g. provided by `UNICAST3` and `NAKACK2`).


[[LockingAndMerges]]
==== Locking and merges

The following scenario is susceptible to network partitioning and subsequent merging: we have a cluster
view of `{A,B,C,D}` and then the cluster splits into `{A,B}` and `{C,D}`. Assume that B and D now acquire a
lock `"mylock"`. This is what happens (with the locking protocol being `CENTRAL_LOCK`):

* There are 2 coordinators: A for `{A,B}` and C for `{C,D}`
* B successfully acquires `"mylock"` from A
* D successfully acquires `"mylock"` from C
* The partitions merge back into `{A,B,C,D}`. Now, only A is the coordinator, but C ceases
to be a coordinator
* Problem: D still holds a lock which should actually be invalid!
There is no easy way (via the Lock API) to 'remove' the lock from D. We could for example simply release
D's lock on `"mylock"`, but then there's no way telling D that the lock it holds is actually stale!

Therefore the recommended solution here is for nodes to listen to `MergeView` changes if they expect
merging to occur, and re-acquire all of their locks after a merge, e.g.:

[source,java]
----
Lock l1, l2, l3;
LockService lock_service;
...
public void viewAccepted(View view) {
if(view instanceof MergeView) {
new Thread() {
public void run() {
lock_service.unlockAll();
// stop all access to resources protected by l1, l2 or l3
// every thread needs to re-acquire the locks it holds
}
}.start();
}
}
----

==== Locking and merges (updated)
With <<CENTRAL_LOCK2>>, merging of partitions is handled differently. Contrary to CENTRAL_LOCK, which has the coordinator
back up its lock tables to one or more backup members, CENTRAL_LOCK2 doesn't do this.

Instead, when the current coordinator leaves or crashes, the new coordinator fetches information about locks and pending
lock/unlock requests from all members, and then builds its lock table based on this information.

In the above scenario with both B and D holding `mylock`, in case of a merge (say A becomes the new coordinator), D
will be told that its lock `mylock` has been *revoked*. This means that D needs to force-unlock D. This can be done
in the `lockRevoked()` callback, e.g.:

[source,java]
----
LockService lock_service;
...
public void lockRevoked(String lock_name, Owner current_owner) {
lock_service.unlockForce(lock_name);
}
----

This is maginally better than CENTRAL_LOCK, but admittedly less than ideal. Given the following code:

[source,java]
----
Lock lock=lock_service.get("mylock";
lock.lock();
try {
// do something while the lock is held
longRunningAction();
}
finally {
lock.unlock
}
----

When `mylock` is revoked, `longRunningAction()` should be stopped immediately, or - even better - its changes should be
undone (like in a transaction). However, this isn't feasible and would unnecessarily complicate the code.

Here, we see that the `Lock` abstraction, as easy as it is and as often it is used *locally* (inside the same JVM),
may not be the best abstraction for a distributed setting!



[[CounterService]]
=== Cluster wide atomic counters
Expand Down
35 changes: 0 additions & 35 deletions doc/manual/protocols.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -2161,41 +2161,6 @@ JIRA: https://issues.redhat.com/browse/JGRP-2402
${SOS}


[[LockingProtocols]]
==== Locking protocols

The locking protocol is org.jgroups.protocols.CENTRAL_LOCK:

${Locking}

[[CENTRAL_LOCK]]
===== CENTRAL_LOCK

CENTRAL_LOCK has the current coordinator of a cluster grants locks, so every node has to communicate with the
coordinator to acquire or release a lock. Lock requests by different nodes for the same lock are processed
in the order in which they are received.

A coordinator maintains a lock table. To prevent losing the knowledge of who holds which locks, the coordinator can push
lock information to a number of backups defined by num_backups. If num_backups is 0, no replication of lock information
happens. If num_backups is greater than 0, then the coordinator pushes information about acquired and released locks to
all backup nodes. Topology changes might create new backup nodes, and lock information is pushed to those on
becoming a new backup node.

The advantage of CENTRAL_LOCK is that all lock requests are granted in the same order across the cluster.

${CENTRAL_LOCK}


[[CENTRAL_LOCK2]]
===== CENTRAL_LOCK2

In CENTRAL_LOCK2, the coordinator (= lock issuer) does not backup its lock table to other member(s), but instead a new
coordinator fetches information about held locks and pending lock/unlock requests from existing members, before it
starts processing lock requests. See <<LockService>> for details.

${CENTRAL_LOCK2}



[[COUNTER]]

Expand Down
12 changes: 0 additions & 12 deletions src/org/jgroups/Event.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,6 @@ public class Event {
public static final int ADD_PHYSICAL_ADDRESS = 89; // arg = Tuple<Address,PhysicalAddress> --> boolean
public static final int REMOVE_ADDRESS = 90; // arg = Address
public static final int GET_LOCAL_ADDRESS = 91; // arg = null --> UUID (local_addr)
public static final int LOCK = 95; // arg = LockInfo
public static final int UNLOCK = 96; // arg = LockInfo
public static final int UNLOCK_ALL = 97; // arg = null
public static final int LOCK_AWAIT = 98; // arg = LockInfo
public static final int LOCK_SIGNAL = 99; // arg = AwaitInfo
public static final int IS_MERGE_IN_PROGRESS = 100; // returns true or false
public static final int GET_PHYSICAL_ADDRESSES = 102; // arg = null (returns all physical addresses)
public static final int SITE_UNREACHABLE = 104; // arg = SiteMaster (RELAY2/RELAY3)
Expand All @@ -56,7 +51,6 @@ public class Event {
public static final int GET_PING_DATA = 109; // arg = cluster_name
public static final int GET_SECRET_KEY = 111; // arg = null -> Tuple<SecretKey,byte[]> // PK+version
public static final int SET_SECRET_KEY = 112; // arg = Tuple<SecretKey,byte[]> // PK+version
public static final int UNLOCK_FORCE = 113; // arg = lock name
public static final int INSTALL_MERGE_VIEW = 114; // arg = MergeView
public static final int IS_LOCAL_SITEMASTER = 115; // arg = SiteMaster(site), returns true / false
public static final int IS_LOCAL = 116; // arg = SiteAddress(site), returns true / false
Expand Down Expand Up @@ -124,11 +118,6 @@ public static String type2String(int t) {
case ADD_PHYSICAL_ADDRESS: return "ADD_PHYSICAL_ADDRESS";
case REMOVE_ADDRESS: return "REMOVE_ADDRESS";
case GET_LOCAL_ADDRESS: return "GET_LOCAL_ADDRESS";
case LOCK: return "LOCK";
case UNLOCK: return "UNLOCK";
case UNLOCK_ALL: return "UNLOCK_ALL";
case LOCK_AWAIT: return "LOCK_AWAIT";
case LOCK_SIGNAL: return "LOCK_SIGNAL";
case IS_MERGE_IN_PROGRESS: return "IS_MERGE_IN_PROGRESS";
case GET_PHYSICAL_ADDRESSES: return "GET_PHYSICAL_ADDRESSES";
case SITE_UNREACHABLE: return "SITE_UNREACHABLE";
Expand All @@ -139,7 +128,6 @@ public static String type2String(int t) {
case GET_PING_DATA: return "GET_PING_DATA";
case GET_SECRET_KEY: return "GET_SECRET_KEY";
case SET_SECRET_KEY: return "SET_SECRET_KEY";
case UNLOCK_FORCE: return "UNLOCK_FORCE";
case INSTALL_MERGE_VIEW: return "INSTALL_MERGE_VIEW";
case IS_LOCAL_SITEMASTER: return "IS_LOCAL_SITEMASTER";
case IS_LOCAL: return "IS_LOCAL";
Expand Down
30 changes: 0 additions & 30 deletions src/org/jgroups/blocks/locking/AwaitInfo.java

This file was deleted.

55 changes: 0 additions & 55 deletions src/org/jgroups/blocks/locking/LockInfo.java

This file was deleted.

16 changes: 0 additions & 16 deletions src/org/jgroups/blocks/locking/LockNotification.java

This file was deleted.

0 comments on commit b527db9

Please sign in to comment.