这是indexloc提供的服务,不要输入任何密码
Skip to content
This repository was archived by the owner on Mar 4, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/main/java/com/netflix/simianarmy/MonkeyRecorder.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
*/
package com.netflix.simianarmy;

import java.util.Map;
import java.util.List;
import java.util.Date;
import java.util.List;
import java.util.Map;

/**
* The Interface MonkeyRecorder. This is use to store and find events in some datastore.
Expand Down Expand Up @@ -102,7 +102,7 @@ public interface Event {
* @param eventType
* the event type
* @param region
* the region the event occured
* the region the event occurred
* @param id
* the id
* @return the event
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,22 @@
*/
package com.netflix.simianarmy.basic.chaos;

import com.netflix.simianarmy.chaos.ChaosMonkey;
import com.netflix.simianarmy.MonkeyConfiguration;
import com.netflix.simianarmy.MonkeyRecorder.Event;
import com.netflix.simianarmy.NotFoundException;
import com.netflix.simianarmy.chaos.ChaosCrawler.InstanceGroup;

import java.util.Map;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Calendar;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.netflix.simianarmy.MonkeyConfiguration;
import com.netflix.simianarmy.MonkeyRecorder.Event;
import com.netflix.simianarmy.NotFoundException;
import com.netflix.simianarmy.chaos.ChaosCrawler.InstanceGroup;
import com.netflix.simianarmy.chaos.ChaosMonkey;

/**
* The Class BasicChaosMonkey.
*/
Expand All @@ -44,10 +45,13 @@ public class BasicChaosMonkey extends ChaosMonkey {
private static final String NS = "simianarmy.chaos.";

/** The cfg. */
private MonkeyConfiguration cfg;
private final MonkeyConfiguration cfg;

/** The runs per day. */
private long runsPerDay;
private final long runsPerDay;

/** The minimum value of the maxTerminationCountPerday property to be considered non-zero. **/
private static final double MIN_MAX_TERMINATION_COUNT_PER_DAY = 0.001;

/**
* Instantiates a new basic chaos monkey.
Expand All @@ -71,6 +75,7 @@ public BasicChaosMonkey(ChaosMonkey.Context ctx) {
}

/** {@inheritDoc} */
@Override
public void doMonkeyBusiness() {
cfg.reload();
String prop = NS + "enabled";
Expand All @@ -83,6 +88,9 @@ public void doMonkeyBusiness() {
prop = NS + group.type() + "." + group.name() + ".enabled";
String defaultProp = NS + group.type();
if (cfg.getBoolOrElse(prop, cfg.getBool(defaultProp + ".enabled"))) {
if (isMaxTerminationCountExceeded(group)) {
continue;
}
String probProp = NS + group.type() + "." + group.name() + ".probability";
double prob = cfg.getNumOrElse(probProp, cfg.getNumOrElse(defaultProp + ".probability", 1.0));
LOGGER.info("Group {} [type {}] enabled [prob {}]", new Object[] {group.name(), group.type(), prob});
Expand All @@ -93,11 +101,6 @@ public void doMonkeyBusiness() {
LOGGER.info("leashed ChaosMonkey prevented from killing {} from group {} [{}], set {}=false",
new Object[] {inst, group.name(), group.type(), prop});
} else {
if (hasPreviousTerminations(group)) {
LOGGER.info("ChaosMonkey takes pity on group {} [{}] since it was attacked ealier today",
group.name(), group.type());
continue;
}
try {
recordTermination(group, inst);
context().cloudClient().terminateInstance(inst);
Expand Down Expand Up @@ -132,26 +135,52 @@ protected void handleTerminationError(String instance, Throwable e) {
}

/** {@inheritDoc} */
public boolean hasPreviousTerminations(InstanceGroup group) {
Map<String, String> query = new HashMap<String, String>();
query.put("groupType", group.type().name());
query.put("groupName", group.name());
Calendar today = Calendar.getInstance();
// set to midnight
today.set(Calendar.HOUR_OF_DAY, 0);
today.set(Calendar.MINUTE, 0);
today.set(Calendar.SECOND, 0);
today.set(Calendar.MILLISECOND, 0);
List<Event> evts = context().recorder().findEvents(Type.CHAOS, EventTypes.CHAOS_TERMINATION, query,
today.getTime());
return !evts.isEmpty();
}

/** {@inheritDoc} */
@Override
public void recordTermination(InstanceGroup group, String instance) {
Event evt = context().recorder().newEvent(Type.CHAOS, EventTypes.CHAOS_TERMINATION, group.region(), instance);
evt.addField("groupType", group.type().name());
evt.addField("groupName", group.name());
context().recorder().recordEvent(evt);
}

/** {@inheritDoc} */
@Override
public int getPreviousTerminationCount(InstanceGroup group, Date after) {
Map<String, String> query = new HashMap<String, String>();
query.put("groupType", group.type().name());
query.put("groupName", group.name());
List<Event> evts = context().recorder().findEvents(Type.CHAOS, EventTypes.CHAOS_TERMINATION, query, after);
return evts.size();
}

private boolean isMaxTerminationCountExceeded(InstanceGroup group) {
String propName = "maxTerminationsPerDay";
String defaultProp = String.format("%s%s.%s", NS, group.type(), propName);
String prop = String.format("%s%s.%s.%s", NS, group.type(), group.name(), propName);
double maxTerminationsPerDay = cfg.getNumOrElse(prop, cfg.getNumOrElse(defaultProp, 1.0));
if (maxTerminationsPerDay <= MIN_MAX_TERMINATION_COUNT_PER_DAY) {
LOGGER.info("ChaosMonkey is configured to not allow any killing from group {} [{}] "
+ "with max daily count set as {}",
new Object[] {group.name(), group.type(), prop});
return true;
} else {
int daysBack = 1;
int maxCount = (int) maxTerminationsPerDay;
if (maxTerminationsPerDay < 1.0) {
daysBack = (int) Math.ceil(1 / maxTerminationsPerDay);
maxCount = 1;
}
Calendar after = Calendar.getInstance();
after.add(Calendar.DATE, -1 * daysBack);
// Check if the group has exceeded the maximum terminations for the last period
int terminationCount = getPreviousTerminationCount(group, after.getTime());
if (terminationCount >= maxCount) {
LOGGER.info("The count of terminations in the last {} days is {}, equal or greater than"
+ " the max count threshold {}",
new Object[] {daysBack, terminationCount, maxCount});
return true;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,20 @@
public interface ChaosInstanceSelector {

/**
* Select. Pick a random instance out of the group with provided probabilty. Chaos will draw a random number and if
* Select. Pick a random instance out of the group with provided probability. Chaos will draw a random number and if
* that random number is lower than probability then it will proceed to select an instance (at random) out of the
* group. If the random number is higher than the provide probability then no instance will be selected and
* group. If the random number is higher than the provided probability then no instance will be selected and
* <b>null</b> will be returned.
*
* The probability is the run probability. If Chaos is running hourly between 9am and 3pm with an overall configured
* probabilty of "1.0" then the probabilty provided to this routine would be 1.0/6 (6 hours in 9am-3pm). So the
* probability of "1.0" then the probability provided to this routine would be 1.0/6 (6 hours in 9am-3pm). So the
* typical probability here would be .1666. For Chaos to select an instance it will pick a random number between 0
* and 1. If that random number is less than the .1666 it will proced to select an instance and return it, otherwise
* it will return null. Over 6 runs it is likely that the random number be less than .1666, but it is not certain.
* and 1. If that random number is less than the .1666 it will proceed to select an instance and return it,
* otherwise it will return null. Over 6 runs it is likely that the random number be less than .1666, but it is not
* certain.
*
* To make Chaos select an instance with 100% certainty it would have to be configured to run only once a day and
* the instance group would have to be configured for "1.0" daily probabilty.
* the instance group would have to be configured for "1.0" daily probability.
*
* @param group
* the group
Expand Down
13 changes: 9 additions & 4 deletions src/main/java/com/netflix/simianarmy/chaos/ChaosMonkey.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
*/
package com.netflix.simianarmy.chaos;

import java.util.Date;

import com.netflix.simianarmy.Monkey;
import com.netflix.simianarmy.MonkeyConfiguration;

Expand Down Expand Up @@ -53,7 +55,7 @@ public interface Context extends Monkey.Context {
}

/** The context. */
private Context ctx;
private final Context ctx;

/**
* Instantiates a new chaos monkey.
Expand Down Expand Up @@ -85,27 +87,30 @@ public enum EventTypes {
}

/** {@inheritDoc} */
@Override
public final Enum type() {
return Type.CHAOS;
}

/** {@inheritDoc} */
@Override
public Context context() {
return ctx;
}

/** {@inheritDoc} */
@Override
public abstract void doMonkeyBusiness();

/**
* Checks for previous terminations. Chaos should probably not continue to beat up an instance group if it has
* already been thrashed today.
* Gets the count of terminations since a specific time. Chaos should probably not continue to beat up an
* instance group if the count exceeds a threshold.
*
* @param group
* the group
* @return true, if successful
*/
public abstract boolean hasPreviousTerminations(ChaosCrawler.InstanceGroup group);
public abstract int getPreviousTerminationCount(ChaosCrawler.InstanceGroup group, Date after);

/**
* Record termination. This is used to notify system owners of terminations and to record terminations so that Chaos
Expand Down
6 changes: 3 additions & 3 deletions src/main/resources/simianarmy.properties
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ simianarmy.calendar.timezone = America/Los_Angeles
# let chaos run
simianarmy.chaos.enabled = true

# dont allow chaos to kill (ie dryrun mode)
# don't allow chaos to kill (ie dryrun mode)
simianarmy.chaos.leashed = true

# set to "false" for Opt-In behavior, "true" for Opt-Out behavior
simianarmy.chaos.ASG.enabled = false

# default probabily for all ASGs
# default probability for all ASGs
simianarmy.chaos.ASG.probability = 1.0

# enable a specific ASG
# simianarmy.chaos.ASG.<asgname>.enabled = true
# simianarmy.chaos.ASG.<asgName>.enabled = true
# simianarmy.chaos.ASG.<asgName>.probability = 1.0
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
package com.netflix.simianarmy.basic.chaos;

import java.util.List;
import com.netflix.simianarmy.chaos.ChaosMonkey;
import com.netflix.simianarmy.chaos.ChaosCrawler.InstanceGroup;

import com.netflix.simianarmy.chaos.TestChaosMonkeyContext;

import org.testng.annotations.Test;
import org.testng.Assert;
import org.testng.annotations.Test;

import com.netflix.simianarmy.chaos.ChaosCrawler.InstanceGroup;
import com.netflix.simianarmy.chaos.ChaosMonkey;
import com.netflix.simianarmy.chaos.TestChaosMonkeyContext;

// CHECKSTYLE IGNORE MagicNumberCheck
public class TestBasicChaosMonkey {
Expand Down Expand Up @@ -202,4 +202,101 @@ public void testNoProbabilityByName() {
Assert.assertEquals(selectedOn.get(3).name(), "name3");
Assert.assertEquals(terminated.size(), 0);
}

@Test
public void testMaxTerminationCountPerDayAsZero() {
TestChaosMonkeyContext ctx = new TestChaosMonkeyContext("terminationPerDayAsZero.properties");
ChaosMonkey chaos = new BasicChaosMonkey(ctx);
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 0);
Assert.assertEquals(ctx.terminated().size(), 0);
}

@Test
public void testMaxTerminationCountPerDayAsOne() {
TestChaosMonkeyContext ctx = new TestChaosMonkeyContext("terminationPerDayAsOne.properties");
ChaosMonkey chaos = new BasicChaosMonkey(ctx);
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 1);
Assert.assertEquals(ctx.terminated().size(), 1);

// Run the chaos the second time will NOT trigger another termination
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 1);
Assert.assertEquals(ctx.terminated().size(), 1);
}

@Test
public void testMaxTerminationCountPerDayAsBiggerThanOne() {
TestChaosMonkeyContext ctx = new TestChaosMonkeyContext("terminationPerDayAsBiggerThanOne.properties");
ChaosMonkey chaos = new BasicChaosMonkey(ctx);
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 1);
Assert.assertEquals(ctx.terminated().size(), 1);

// Run the chaos the second time will trigger another termination
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 2);
Assert.assertEquals(ctx.terminated().size(), 2);
}

@Test
public void testMaxTerminationCountPerDayAsSmallerThanOne() {
TestChaosMonkeyContext ctx = new TestChaosMonkeyContext("terminationPerDayAsSmallerThanOne.properties");
ChaosMonkey chaos = new BasicChaosMonkey(ctx);
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 1);
Assert.assertEquals(ctx.terminated().size(), 1);

// Run the chaos the second time will NOT trigger another termination
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 1);
Assert.assertEquals(ctx.terminated().size(), 1);
}

@Test
public void testMaxTerminationCountPerDayAsNegative() {
TestChaosMonkeyContext ctx = new TestChaosMonkeyContext("terminationPerDayAsNegative.properties");
ChaosMonkey chaos = new BasicChaosMonkey(ctx);
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 0);
Assert.assertEquals(ctx.terminated().size(), 0);
}

@Test
public void testMaxTerminationCountPerDayAsVerySmall() {
TestChaosMonkeyContext ctx = new TestChaosMonkeyContext("terminationPerDayAsVerySmall.properties");
ChaosMonkey chaos = new BasicChaosMonkey(ctx);
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 0);
Assert.assertEquals(ctx.terminated().size(), 0);
}

@Test
public void testMaxTerminationCountPerDayGroupLevel() {
TestChaosMonkeyContext ctx = new TestChaosMonkeyContext("terminationPerDayGroupLevel.properties");
ChaosMonkey chaos = new BasicChaosMonkey(ctx);

for (int i=1; i<=3; i++) {
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), i);
Assert.assertEquals(ctx.terminated().size(), i);
}
// Run the chaos the second time will NOT trigger another termination
chaos.start();
chaos.stop();
Assert.assertEquals(ctx.selectedOn().size(), 3);
Assert.assertEquals(ctx.terminated().size(), 3);
}

}
Loading