import components.set.Set; import components.set.Set1L; import components.simplereader.SimpleReader; import components.simplereader.SimpleReader1L; import components.simplewriter.SimpleWriter; import components.simplewriter.SimpleWriter1L; /** * Utility class to support string reassembly from fragments. * * @author Put your name here * * @mathdefinitions
* * OVERLAPS ( * s1: string of character, * s2: string of character, * k: integer * ) : boolean is * 0 <= k and k <= |s1| and k <= |s2| and * s1[|s1|-k, |s1|) = s2[0, k) * * SUBSTRINGS ( * strSet: finite set of string of character, * s: string of character * ) : finite set of string of character is * {t: string of character * where (t is in strSet and t is substring of s) * (t)} * * SUPERSTRINGS ( * strSet: finite set of string of character, * s: string of character * ) : finite set of string of character is * {t: string of character * where (t is in strSet and s is substring of t) * (t)} * * CONTAINS_NO_SUBSTRING_PAIRS ( * strSet: finite set of string of character * ) : boolean is * for all t: string of character * where (t is in strSet) * (SUBSTRINGS(strSet \ {t}, t) = {}) * * ALL_SUPERSTRINGS ( * strSet: finite set of string of character * ) : set of string of character is * {t: string of character * where (SUBSTRINGS(strSet, t) = strSet) * (t)} * * CONTAINS_NO_OVERLAPPING_PAIRS ( * strSet: finite set of string of character * ) : boolean is * for all t1, t2: string of character, k: integer * where (t1 /= t2 and t1 is in strSet and t2 is in strSet and * 1 <= k and k <= |s1| and k <= |s2|) * (not OVERLAPS(s1, s2, k)) * **/ public final class StringReassembly { /** * Private no-argument constructor to prevent instantiation of this utility * class. */ private StringReassembly() { } /** * Reports the maximum length of a common suffix of {@code str1} and prefix * of {@code str2}. * * @param str1 * first string * @param str2 * second string * @return maximum overlap between right end of {@code str1} and left end of * {@code str2} * @requires
* str1 is not substring of str2 and * str2 is not substring of str1 ** @ensures
* OVERLAPS(str1, str2, overlap) and * for all k: integer * where (overlap < k and k <= |str1| and k <= |str2|) * (not OVERLAPS(str1, str2, k)) **/ public static int overlap(String str1, String str2) { assert str1 != null : "Violation of: str1 is not null"; assert str2 != null : "Violation of: str2 is not null"; assert str2.indexOf(str1) < 0 : "Violation of: " + "str1 is not substring of str2"; assert str1.indexOf(str2) < 0 : "Violation of: " + "str2 is not substring of str1"; /* * Start with maximum possible overlap and work down until a match is * found; think about it and try it on some examples to see why * iterating in the other direction doesn't work */ int maxOverlap = str2.length() - 1; while (!str1.regionMatches(str1.length() - maxOverlap, str2, 0, maxOverlap)) { maxOverlap--; } return maxOverlap; } /** * Returns concatenation of {@code str1} and {@code str2} from which one of * the two "copies" of the common string of {@code overlap} characters at * the end of {@code str1} and the beginning of {@code str2} has been * removed. * * @param str1 * first string * @param str2 * second string * @param overlap * amount of overlap * @return combination with one "copy" of overlap removed * @requires OVERLAPS(str1, str2, overlap) * @ensures combination = str1[0, |str1|-overlap) * str2 */ public static String combination(String str1, String str2, int overlap) { assert str1 != null : "Violation of: str1 is not null"; assert str2 != null : "Violation of: str2 is not null"; assert 0 <= overlap && overlap <= str1.length() && overlap <= str2.length() && str1.regionMatches(str1.length() - overlap, str2, 0, overlap) : "" + "Violation of: OVERLAPS(str1, str2, overlap)"; /* * Hint: consider using substring (a String method) */ // TODO: fill in body /* * This line added just to make the program compilable. Should be * replaced with appropriate return statement. */ return ""; } /** * Adds {@code str} to {@code strSet} if and only if it is not a substring * of any string already in {@code strSet}; and if it is added, also removes * from {@code strSet} any string already in {@code strSet} that is a * substring of {@code str}. * * @param strSet * set to consider adding to * @param str * string to consider adding * @updates strSet * @requires CONTAINS_NO_SUBSTRING_PAIRS(strSet) * @ensures
* if SUPERSTRINGS(#strSet, str) = {} * then strSet = #strSet union {str} \ SUBSTRINGS(#strSet, str) * else strSet = #strSet **/ public static void addToSetAvoidingSubstrings(Set
* input.is_open and input.content = <> and * linesFromInput = [maximal set of lines from #input.content such that * CONTAINS_NO_SUBSTRING_PAIRS(linesFromInput)] **/ public static Set
* CONTAINS_NO_SUBSTRING_PAIRS(strSet) and * bestTwo.length >= 2 ** @ensures
* bestTwo[0] is in strSet and * bestTwo[1] is in strSet and * OVERLAPS(bestTwo[0], bestTwo[1], bestOverlap) and * for all str1, str2: string of character, overlap: integer * where (str1 is in strSet and str2 is in strSet and * OVERLAPS(str1, str2, overlap)) * (overlap <= bestOverlap) **/ private static int bestOverlap(Set
* ALL_SUPERSTRINGS(strSet) is subset of ALL_SUPERSTRINGS(#strSet) and * |strSet| <= |#strSet| and * CONTAINS_NO_SUBSTRING_PAIRS(strSet) and * CONTAINS_NO_OVERLAPPING_PAIRS(strSet) **/ public static void assemble(Set
* out.is_open and * out.content = #out.content * * [text with each '~' replaced by line separator] **/ public static void printWithLineSeparators(String text, SimpleWriter out) { assert text != null : "Violation of: text is not null"; assert out != null : "Violation of: out is not null"; assert out.isOpen() : "Violation of: out.is_open"; // TODO: fill in body } /** * Given a file name (relative to the path where the application is running) * that contains fragments of a single original source text, one fragment * per line, outputs to stdout the result of trying to reassemble the * original text from those fragments using a "greedy assembler". The * result, if reassembly is complete, might be the original text; but this * might not happen because a greedy assembler can make a mistake and end up * predicting the fragments were from a string other than the true original * source text. It can also end up with two or more fragments that are * mutually non-overlapping, in which case it outputs the remaining * fragments, appropriately labelled. * * @param args * Command-line arguments: not used */ public static void main(String[] args) { SimpleReader in = new SimpleReader1L(); SimpleWriter out = new SimpleWriter1L(); /* * Get input file name */ out.print("Input file (with fragments): "); String inputFileName = in.nextLine(); SimpleReader inFile = new SimpleReader1L(inputFileName); /* * Get initial fragments from input file */ Set