Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .gradle/4.8/fileHashes/fileHashes.bin
Binary file not shown.
Binary file modified .gradle/4.8/fileHashes/fileHashes.lock
Binary file not shown.
1 change: 1 addition & 0 deletions .idea/gradle.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

447 changes: 224 additions & 223 deletions .idea/workspace.xml

Large diffs are not rendered by default.

72 changes: 28 additions & 44 deletions src/main/java/com/base64/d/AllStudyGuideLinks.java
Original file line number Diff line number Diff line change
@@ -1,50 +1,34 @@
package com.base64.d;

import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.Arrays;
import java.util.List;

public class AllStudyGuideLinks {

public static final String STUDY_GUIDE_PATH = "https://content.infinitecampus.com/sis/latest/study-guide/";



public static final Set<String> ALL_STUDY_GUIDES = Stream.of(
"academic-planner-system-set-up",
"academic-planner-use-and-management-",
"ad-hoc-filters-letters-and-data-viewer",
"ad-hoc-functions-and-logical-expressions",
"attendance",
"behavior-admin-set-up",
"behavior-data-management-and-reporting",
"behavior-messages-and-letters",
"calendar-rights-user-groups",
"campus-instruction-part-1-the-fundamentals",
"campus-instruction-part-2-grade-book-basics",
"campus-instruction-part-3-advanced-grade-book-and-posting-grades",
"campus-instruction-part-4-campus-learning",
"census---new-personfamily-set-up",
"census---personhousehold-maintenance",
"census-reports",
"flags-and-programs",
"grade-submission-process",
"grading-setup",
"health-module-system-setup",
"health-module-view-and-manage-student-health-information",
"messenger-setup-contacts-and-reports",
"messenger-for-the-end-user",
"schedule-wizard-mass-schedule-students",
"tool-rights-user-groups",
"user-account-creation-maintenance-and-reporting",
"walk-in-scheduler-complete-or-change-a-students-schedule"
//combining first and second part of the url.
).map(studyGuideName -> STUDY_GUIDE_PATH + studyGuideName)
.collect(Collectors.toSet());






public static final List<String> ALL_STUDY_GUIDES = Arrays.asList(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two things here:

  1. A List would be the preferred approach here. Although a Set does work, the data lends itself closer to a List, partly because you are actually listing it out here, and the List type will keep the order when you process the results, unlike the Set.
  2. As of Java 8 you can create an inline List by using the Arrays.asList() method, which will take a variable length of parameters, and return them all in a List object.

"https://content.infinitecampus.com/sis/latest/study-guide/academic-planner-system-set-up",
"https://content.infinitecampus.com/sis/latest/study-guide/academic-planner-use-and-management-",
"https://content.infinitecampus.com/sis/latest/study-guide/ad-hoc-filters-letters-and-data-viewer",
"https://content.infinitecampus.com/sis/latest/study-guide/ad-hoc-functions-and-logical-expressions",
"https://content.infinitecampus.com/sis/latest/study-guide/attendance",
"https://content.infinitecampus.com/sis/latest/study-guide/behavior-admin-set-up",
"https://content.infinitecampus.com/sis/latest/study-guide/behavior-data-management-and-reporting",
"https://content.infinitecampus.com/sis/latest/study-guide/behavior-messages-and-letters",
"https://content.infinitecampus.com/sis/latest/study-guide/calendar-rights-user-groups",
"https://content.infinitecampus.com/sis/latest/study-guide/campus-instruction-part-1-the-fundamentals",
"https://content.infinitecampus.com/sis/latest/study-guide/campus-instruction-part-2-grade-book-basics",
"https://content.infinitecampus.com/sis/latest/study-guide/campus-instruction-part-3-advanced-grade-book-and-posting-grades",
"https://content.infinitecampus.com/sis/latest/study-guide/campus-instruction-part-4-campus-learning",
"https://content.infinitecampus.com/sis/latest/study-guide/census---new-personfamily-set-up",
"https://content.infinitecampus.com/sis/latest/study-guide/census---personhousehold-maintenance",
"https://content.infinitecampus.com/sis/latest/study-guide/census-reports",
"https://content.infinitecampus.com/sis/latest/study-guide/flags-and-programs",
"https://content.infinitecampus.com/sis/latest/study-guide/grade-submission-process",
"https://content.infinitecampus.com/sis/latest/study-guide/grading-setup",
"https://content.infinitecampus.com/sis/latest/study-guide/health-module-system-setup",
"https://content.infinitecampus.com/sis/latest/study-guide/health-module-view-and-manage-student-health-information",
"https://content.infinitecampus.com/sis/latest/study-guide/messenger-for-the-end-user",
"https://content.infinitecampus.com/sis/latest/study-guide/tool-rights-user-groups",
"https://content.infinitecampus.com/sis/latest/study-guide/user-account-creation-maintenance-and-reporting",
"https://content.infinitecampus.com/sis/latest/study-guide/walk-in-scheduler-complete-or-change-a-students-schedule");
}
9 changes: 9 additions & 0 deletions src/main/java/com/base64/d/Application.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.base64.d;

public class Application {

public static void main(String[] args) {
StudyGuideChecker checker = new StudyGuideChecker();
checker.checkStudyGuides();
}
}
36 changes: 8 additions & 28 deletions src/main/java/com/base64/d/KnownFalsePositives.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,17 @@

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;


public class KnownFalsePositives {

public static final String BLACK_LIST_PATH = "https://content.infinitecampus.com/sis/latest/";

public static final Set<String> BLACK_LIST = Stream.of(
"simulation/send-a-behavior-message-sc-01-28-01",
"simulation/schedule-attendance-message-sc-01-24-02",
"simulation/schedule-a-behavior-message-sc-01-27-02",
"simulation/create-a-behavior-message-template-sc-01-27-01",
"simulation/view-individual-student-attendance",
"simulation/cancel-a-user-created-future-dated-message-sc-03-08-04"
).map(blackList -> BLACK_LIST_PATH + blackList)
.collect(Collectors.toSet());




// public KnownFalsePositives(Set<String> knownFalsePositives) {
// this.knownFalsePositives = knownFalsePositives;
// }
//
// public Set<String> getKnownFalsePositives() {
// return knownFalsePositives;
// }
//
// public void setKnownFalsePositives(Set<String> knownFalsePositives) {
// this.knownFalsePositives = knownFalsePositives;
// }
public static final List<String> WHITE_LIST = Arrays.asList(
"https://content.infinitecampus.com/sis/latest/simulation/send-a-behavior-message-sc-01-28-01",
"https://content.infinitecampus.com/sis/latest/simulation/schedule-attendance-message-sc-01-24-02",
"https://content.infinitecampus.com/sis/latest/simulation/schedule-a-behavior-message-sc-01-27-02",
"https://content.infinitecampus.com/sis/latest/simulation/create-a-behavior-message-template-sc-01-27-01",
"https://content.infinitecampus.com/sis/latest/simulation/view-individual-student-attendance",
"simulation/cancel-a-user-created-future-dated-message-sc-03-08-04");
}
140 changes: 63 additions & 77 deletions src/main/java/com/base64/d/StudyGuideChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,127 +8,120 @@

import java.io.IOException;
import java.util.Base64;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class StudyGuideChecker {
int count = 0;
public int totalLinksFound = 0;
String url3 = "https://content.infinitecampus.com/sis/latest/study-guide/ad-hoc-filters-letters-and-data-viewer";
String decodedText = "";
String iframeSrc = "";
String date = new Date().toString();

String titleOfStudyGuide = "";
Set<String> studyGuideLinks = new HashSet<>();
Set<String> badLinks = new HashSet<>();
public void checkStudyGuides() {
int count = 0;
int totalLinksFound = 0;

WriteToFile writeToFile = new WriteToFile();
String decodedText = "";
String iframeSrc = "";
String titleOfStudyGuide = null;

public void checkStudyGuides() {
Set<String> studyGuideLinks = new HashSet<>();
Set<String> badLinks = new HashSet<>();

for (String link2 : AllStudyGuideLinks.ALL_STUDY_GUIDES) {
WriteToFile writeToFile = new WriteToFile();

for (String studyGuideLink : AllStudyGuideLinks.ALL_STUDY_GUIDES) {
// Find all links within a Study Guide
try {
//
// Connect to the study guide url
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's good to have code comments, especially any area where the functionality is not obvious. The double forward slashes for these comments appear to be created with the "comment out code" shortcut in IntelliJ, which starts the slashes at the first character of the line. This is more useful when commenting out a block of code (to really help draw attention to the full block that is commented out, since all of the characters are on the left side of the screen, which is usually blank). What is preferred for code comments is to have the slashes justified with the rest of the code. You can see below how the two forward slashes have been moved to be more inline with the code.

Document doc3 = Jsoup.connect(link2).get();
// Connect to the study guide url
Document wraperDocument = Jsoup.connect(studyGuideLink).get();

// grab the iframe
Element iframe = doc3.select("iframe").first();
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A lot of the variables have more descriptive names, which is helpful. I've renamed doc3 and link2 to be more descriptive of what they will actually hold. Good naming is a best practice in general, and is really helpful when others read your code.

// Grab the iframe
Element iframe = wraperDocument.select("iframe").first();

// get iframe scr url
// Get iframe src url
iframeSrc = iframe.attr("src");

// connect to the iframe source url
Document doc = Jsoup.connect(iframeSrc).get();
// Connect to the iframe source url
Document studyGuideDocument = Jsoup.connect(iframeSrc).get();

titleOfStudyGuide = doc3.title();
titleOfStudyGuide = wraperDocument.title();
System.out.println(titleOfStudyGuide + " " + iframeSrc);

// get the script tag info
Elements scriptTag = doc.getElementsByTag("script");
// Get the script tag info
Elements scriptTag = studyGuideDocument.getElementsByTag("script");
String jsScripts = scriptTag.toString();

// match any string of characters over 100 in length.
String patternString = "(\\w{100,}.+)";
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This regex definitely works, and although it is unlikely to have more than 100 characters together in this class, it would be considered brittle, because some data could be included that would cause problems.
I've changed it to match on the JavaScript variable name, and then reference the double quotes that will be around this object. This would be considered more robust because it is much less likely to fail if the data changes (it's unlikely that a second window.courseData object will be created in this class).

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't get either version of that regex to work, so have left the 100 character one.


Pattern pattern = Pattern.compile(patternString);
// Retrieve the `window.courseData` JavaScript variable.
// This value will be enclosed in double quotes.
Pattern courseDataPattern = Pattern.compile("window.courseData = \"(.*)\"");

Matcher matcher = pattern.matcher(jsScripts);
// System.out.println(matcher);
String foundBase64String = "";
while (matcher.find()) {
Matcher courseDataMatcher = courseDataPattern.matcher(jsScripts);
String base64String = "";
while (courseDataMatcher.find()) {
System.out.println("--------");

foundBase64String = matcher.group(1);
base64String = courseDataMatcher.group(1);
}
//reduced the giant string by 2 because it included the " and the ;
String trimFoundBase64String = foundBase64String.substring(0, (foundBase64String.length() - 2));

//put in this try block because one study guide had different script tag layout and threw error.
// Put in this try block because one study guide had different script tag layout and threw error.
try {
// decoding a base64 string and putting into a byte array
byte[] decodedArr = Base64.getDecoder().decode(trimFoundBase64String);
// Decoding a base64 string and putting into a byte array
byte[] decodedArr = Base64.getDecoder().decode(base64String);
decodedText = new String(decodedArr, "UTF-8");
//System.out.println("decoded base64: " + decodedText);
} catch
(IllegalArgumentException exc) {
} catch (IllegalArgumentException exc) {
exc.printStackTrace();
totalLinksFound -= studyGuideLinks.size();
System.out.println(titleOfStudyGuide + " not checked because of decoding error");
}

// regex meaning - . means any character. + means 1 or more, * means 0 or more. ? means 0 or 1
// everything in the () is pattern/matcher "group" syntax. then \" is looking for the next quote
String patternString2 = "(https://content.infinitecampus.com/sis.*?)\"";
// Regex meanings:
// . => means any character
// + => means 1 or more
// * => means 0 or more
// ? => means 0 or 1
// Everything in the () is pattern/matcher "group" syntax, then \" is looking for the next quote
Pattern contentLinkPattern = Pattern.compile("(https://content.infinitecampus.com/sis.*?)\"");

Pattern pattern2 = Pattern.compile(patternString2);
Matcher contentLinkMatcher = contentLinkPattern.matcher(decodedText);


Matcher matcher2 = pattern2.matcher(decodedText);
//System.out.println("This is the matcher " + matcher2);


while (matcher2.find()) {
while (contentLinkMatcher.find()) {
count++;
//some study guide links had '\' added for unknown reason. this code checks for it and removes it before adding it to the studyGuide.

String shorterStudyGuideLink = matcher2.group(1).replaceAll("\\\\+$", "");
// .substring(0, matcher2.group(1).length() - 1);
// System.out.println(shorterStudyGuideLink);
// links are good because regex replace all removes "\" if there.
studyGuideLinks.add(shorterStudyGuideLink);

// Some study guide links had '\' added for unknown reason.
// This code checks for it and removes it before adding it to the studyGuide.
String shorterStudyGuideLink = contentLinkMatcher.group(1).substring(0, contentLinkMatcher.group(1).length() - 1);

if (contentLinkMatcher.group(1).substring(contentLinkMatcher.group(1).length() - 1).equals("\\")) {
studyGuideLinks.add(shorterStudyGuideLink);
} else {
studyGuideLinks.add(contentLinkMatcher.group(1));
}
}

System.out.println(studyGuideLinks.size() + " Unique Campus Community links found");
//remove all the false positives that are in JSON of study guide but not visible to front end user.
studyGuideLinks.removeAll(KnownFalsePositives.BLACK_LIST);
// Remove all the false positives that are in JSON of study guide but not visible to front end user.
studyGuideLinks.removeAll(KnownFalsePositives.WHITE_LIST);
System.out.println(studyGuideLinks);
totalLinksFound += studyGuideLinks.size();

} catch (IOException e) {
e.printStackTrace();
}


// Test all links in a Study Guide
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two big blocks of code in this method. And at first I was confused what they were doing, so I just added a comment before the second block here, and also the block above. This is the type of thing that could be extracted into separate methods, to improve readability. But this would also require restructuring the code, because it currently is gathering all of the links in a study guide (first block), and then processing all links for the study guide (second block). It's possible that restructuring might be to check each link after it is retrieved (and just keep track of links that have already been checked, to prevent checking duplicates).

int response;
for (String link : studyGuideLinks) {
// Document doc2 = (Document) Jsoup.connect(link).response();
try {
Connection connection = Jsoup.connect(link).ignoreHttpErrors(true);

Connection linkConnection = Jsoup.connect(link).ignoreHttpErrors(true);

Document doc2 = connection.get();
response = connection.response()
.statusCode();
// Although the `linkContents` is never utilized, it is a
// required call. This is because Jsoup expects that `get()`
// is called before the `response()` object is available.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I've been cleaning deleting all unused variables, I added a comment here to provide an explanation for why this one exists. Since I initially deleted this variable and then found that response() is never populated unless you first call get(). Which I then remembered we talked about over IM a while back 🤦 :haha:

Document linkContents = linkConnection.get();
response = linkConnection.response().statusCode();

if (response >= 400) {
// Report a bad link if anything other than a "200 OK" response is returned.
if (response != 200) {
badLinks.add(link);
writeToFile.createTxtFile(titleOfStudyGuide, link, link2);
writeToFile.createTxtFile(titleOfStudyGuide, link, studyGuideLink);
}
} catch (IOException ie) {
ie.printStackTrace();
Expand All @@ -138,18 +131,11 @@ public void checkStudyGuides() {
System.out.println(badLinks.size() + " Broken links: " + badLinks);
System.out.println(totalLinksFound + " total links checked");


System.out.println("----------------------------------------------------");
System.out.println("----------------------------------------------------");

studyGuideLinks.clear();
}
System.out.println(AllStudyGuideLinks.ALL_STUDY_GUIDES.size() + " Total Study Guides");

String totalLinksFoundAsString = Integer.toString(totalLinksFound);
// totalLinksFoundAsString = totalLinksFound.to;

writeToFile.writeTotalCount(totalLinksFoundAsString);
}
}

Loading