diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1ff0c42 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,63 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto + +############################################################################### +# Set default behavior for command prompt diff. +# +# This is need for earlier builds of msysgit that does not have it on by +# default for csharp files. +# Note: This is only used by command line +############################################################################### +#*.cs diff=csharp + +############################################################################### +# Set the merge driver for project and solution files +# +# Merging from the command prompt will add diff markers to the files if there +# are conflicts (Merging from VS is not affected by the settings below, in VS +# the diff markers are never inserted). Diff markers may cause the following +# file extensions to fail to load in VS. An alternative would be to treat +# these files as binary and thus will always conflict and require user +# intervention with every merge. To do so, just uncomment the entries below +############################################################################### +#*.sln merge=binary +#*.csproj merge=binary +#*.vbproj merge=binary +#*.vcxproj merge=binary +#*.vcproj merge=binary +#*.dbproj merge=binary +#*.fsproj merge=binary +#*.lsproj merge=binary +#*.wixproj merge=binary +#*.modelproj merge=binary +#*.sqlproj merge=binary +#*.wwaproj merge=binary + +############################################################################### +# behavior for image files +# +# image files are treated as binary by default. +############################################################################### +#*.jpg binary +#*.png binary +#*.gif binary + +############################################################################### +# diff behavior for common document formats +# +# Convert binary document formats to text before diffing them. This feature +# is only available from the command line. Turn it on by uncommenting the +# entries below. +############################################################################### +#*.doc diff=astextplain +#*.DOC diff=astextplain +#*.docx diff=astextplain +#*.DOCX diff=astextplain +#*.dot diff=astextplain +#*.DOT diff=astextplain +#*.pdf diff=astextplain +#*.PDF diff=astextplain +#*.rtf diff=astextplain +#*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c4efe2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,261 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# DNX +project.lock.json +project.fragment.lock.json +artifacts/ + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +#*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config +# NuGet v3's project.json files produces more ignoreable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +node_modules/ +orleans.codegen.cs + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml + +# CodeRush +.cr/ + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc \ No newline at end of file diff --git a/201731092120/wordCount/wordCount.sln b/201731092120/wordCount/wordCount.sln new file mode 100644 index 0000000..6334838 --- /dev/null +++ b/201731092120/wordCount/wordCount.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28010.2016 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "wordCount", "wordCount\wordCount.csproj", "{5B81EACF-ADCD-495F-BB1A-F4D1EE6C2C9B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5B81EACF-ADCD-495F-BB1A-F4D1EE6C2C9B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5B81EACF-ADCD-495F-BB1A-F4D1EE6C2C9B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5B81EACF-ADCD-495F-BB1A-F4D1EE6C2C9B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5B81EACF-ADCD-495F-BB1A-F4D1EE6C2C9B}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {8D2572BD-69AC-4F8D-8754-57E1F40675A8} + EndGlobalSection +EndGlobal diff --git a/201731092120/wordCount/wordCount/App.config b/201731092120/wordCount/wordCount/App.config new file mode 100644 index 0000000..731f6de --- /dev/null +++ b/201731092120/wordCount/wordCount/App.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/201731092120/wordCount/wordCount/Program.cs b/201731092120/wordCount/wordCount/Program.cs new file mode 100644 index 0000000..7c1a492 --- /dev/null +++ b/201731092120/wordCount/wordCount/Program.cs @@ -0,0 +1,269 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.IO; +using System.Text.RegularExpressions; + +namespace wordCount +{ + interface library + { + string ReadFile(string path); //用于打开文件 + int CountChars(string text); //用于统计文档中的字符个数 + int CountLines(string text); //用于统计文档行数 + int CountWords(string Text); //用于统计文档中单词的个数 + Dictionary Countphrase(string text, int n); //用于统计文档中的短语的频率 + Dictionary CoutWords(string text, int n); //用于统计文档中的单词的频率 + void WreteFile(string path); //用于创建一个文件,将统计结果保存在文件中 + + } + + class Program + { + + public static void Main(string[] args) + { + Statistics statistics = new Statistics(); + string text = ""; + string path = ""; + for (int i = 0; i < args.Length; i++) + { + if (args[i] == "-i") + { + path = args[i + 1]; + text = statistics.ReadFile(path).ToLower(); + } + } + if (text == "") + { + Console.WriteLine("未输入文本信息!"); + return; + } + for (int i = 0; i < args.Length; i++) + { + if (args[i] == "-m") + { + statistics.Countphrase(text, int.Parse(args[i + 1])); + } + if (args[i] == "-n") + { + statistics.CoutWords(text, int.Parse(args[i + 1])); + } + } + + for (int i = 0; i < args.Length; i++) + { + if (args[i] == "-o") + { + statistics.WreteFile(args[i + 1]); + } + } + } + } + class Statistics : library + { + public StringBuilder sb = new StringBuilder(); //用于存储控制台输出,输入到文件 + public string ReadFile(string path) + { + string text = ""; + try + { + FileStream aFile = new FileStream(path, FileMode.Open); + StreamReader sr = new StreamReader(aFile); + text = sr.ReadToEnd(); + sr.Close(); + return text; + } + + catch (IOException ex) + { + Console.WriteLine("文件操作异常"); + Console.WriteLine(ex.ToString()); //输出异常原因 + return text; + } + + } + + public int CountChars(string text) + { + int count = 0; + foreach (var item in text) + { + if (item < 128 && item >= 0) + count++; + } + return count; + } + + public int CountLines(string text) + { + int lines = 0; + bool flg = true; + foreach (var item in text) + { + if (item == '\n') + { + if (!flg) + lines++; + flg = true; + } + else + flg = false; + + } + return lines; + } + + public int CountWords(string Text) + { + + string text = Text.ToLower(); + int length = 0; + + foreach (char a in "[\']#$%&()*+,-./:;<=>?@[\\]^_{|}~".ToCharArray()) + { + text = text.Replace(a.ToString(), ""); + } + string[] texts = text.Split(' '); + foreach (var word in texts) + { + foreach (var w in word) + { + if (w >= 97 && w <= 122) + { + break; + } + + } + + if (word.Length >= 4) + length++; + } + return length; + + } + + //统计词组长度 + public Dictionary Countphrase(string text, int n) + { + int sumCount = 0; //统计单词个数 + Console.WriteLine("characters:{0}", CountChars(text)); + sb.Append(CountChars(text)); + Console.WriteLine("words: {0}", CountWords(text)); + sb.Append(CountWords(text)); + Console.WriteLine("lines:{0}", CountLines(text)); + sb.Append(CountLines(text)); + Dictionary frequencies = new Dictionary(); + string[] words = Regex.Split(text.ToLower(), @"\W+"); + + //统计有词组之后的数组 + String[] phrase = new string[words.Length - n]; + + //用于将切分过的字符串进行组合,变成词组 然后存入另外一个数组中 + for (int i = 0; i < words.Length - n; i++) + { + StringBuilder ph = new StringBuilder(); + for (int j = i; j < n + i; j++) + { + if (words[j].Equals(" ")) + { + j--; + continue; + } + ph.Append(words[j] + " "); + } + phrase[i] = ph.ToString(); + } + + sumCount = phrase.Length; + //统计的关键代码,若map中存在该单词则数量加1,反之存入map + for (int i = 0; i < phrase.Length; i++) + { + if (frequencies.ContainsKey(phrase[i])) + { + int count = (int)frequencies[phrase[i]]; + count++; + frequencies[phrase[i]] = count; + } + else + { + frequencies.Add(phrase[i], 1); + } + } + foreach (KeyValuePair k in frequencies) + { + + Console.WriteLine(k.Key + ":" + k.Value); + sb.Append(k.Key + ":" + k.Value); + sb.Append("\n"); + } + return frequencies; + } + + public Dictionary CoutWords(string text, int n) + { + Dictionary frequencies = new Dictionary(); + + string[] words = Regex.Split(text.ToLower(), @"\W+"); + + foreach (string word in words) + { + if (frequencies.ContainsKey(word)) + { + frequencies[word]++; + } + else + { + frequencies[word] = 1; + } + } + + foreach (KeyValuePair entry in frequencies) + { + + string word = entry.Key; + int frequency = entry.Value; + + } + + //对值进行排序 + Dictionary dicDesc = frequencies.OrderByDescending(p => p.Value).ToDictionary(p => p.Key, p => p.Value); + + foreach (KeyValuePair k in dicDesc.Take(n)) + { + + Console.WriteLine(k.Key + ":" + k.Value); + sb.Append(k.Key + ":" + k.Value); + sb.Append("\n"); + } + return frequencies; + } + + //创建一个.txt文件,并将统计信息输入文件中 + + public void WreteFile(string path) + { + if (File.Exists(path)) + { + File.Delete(path); + } + try + { + StreamWriter sw = new StreamWriter(path, true); + + sw.WriteLine(sb.ToString()); + sw.WriteLine("\n"); + sw.Close(); + } + catch (IOException ex) + { + Console.WriteLine("文件操作异常"); + Console.WriteLine(ex.ToString()); + Console.ReadKey(); + return; + } + } + } +} diff --git a/201731092120/wordCount/wordCount/Properties/AssemblyInfo.cs b/201731092120/wordCount/wordCount/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..87e084b --- /dev/null +++ b/201731092120/wordCount/wordCount/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// 有关程序集的一般信息由以下 +// 控制。更改这些特性值可修改 +// 与程序集关联的信息。 +[assembly: AssemblyTitle("wordCount")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("wordCount")] +[assembly: AssemblyCopyright("Copyright © 2019")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// 将 ComVisible 设置为 false 会使此程序集中的类型 +//对 COM 组件不可见。如果需要从 COM 访问此程序集中的类型 +//请将此类型的 ComVisible 特性设置为 true。 +[assembly: ComVisible(false)] + +// 如果此项目向 COM 公开,则下列 GUID 用于类型库的 ID +[assembly: Guid("5b81eacf-adcd-495f-bb1a-f4d1ee6c2c9b")] + +// 程序集的版本信息由下列四个值组成: +// +// 主版本 +// 次版本 +// 生成号 +// 修订号 +// +// 可以指定所有值,也可以使用以下所示的 "*" 预置版本号和修订号 +// 方法是按如下所示使用“*”: : +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/201731092120/wordCount/wordCount/wordCount.csproj b/201731092120/wordCount/wordCount/wordCount.csproj new file mode 100644 index 0000000..c8b0f2e --- /dev/null +++ b/201731092120/wordCount/wordCount/wordCount.csproj @@ -0,0 +1,53 @@ + + + + + Debug + AnyCPU + {5B81EACF-ADCD-495F-BB1A-F4D1EE6C2C9B} + Exe + wordCount + wordCount + v4.6.1 + 512 + true + true + + + AnyCPU + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + + + + \ No newline at end of file